c# - Return a finite set matching a regex expression -


something similar http://regexio.com/prototype.html, i'm trying set matching particular regex.

basically, need parse regular expression , then, instead of reading input while walking parsed expression, output variants.

i have hacked following program doing need very simple regular expression (only alternate options using |, iteration using *, grouping using (), , escaping using \ supported). note iteration done 0–5 times, conversion possibly infinite iteration left exercise reader ;-).

i have used straightforward recursive-descent parser building abstract syntax tree in memory; tree in end walked , possible sets built. solution not optimal @ all, works. enjoy:

public class testprg {     static void main()     {         var expression = new regexparser("a(b|c)*d").parse();          foreach (var item in expression.generate())         {             console.writeline(item);         }     } }  public static class enumerableextensions {     // build cartesian product of sequence of sequences     // code eric lippert, copied <http://blogs.msdn.com/b/ericlippert/archive/2010/06/28/computing-a-cartesian-product-with-linq.aspx>     public static ienumerable<ienumerable<t>> cartesianproduct<t>(this ienumerable<ienumerable<t>> sequences)     {         ienumerable<ienumerable<t>> emptyproduct = new[] { enumerable.empty<t>() };         return sequences.aggregate(             emptyproduct,             (accumulator, sequence) =>             accseq in accumulator             item in sequence             select accseq.concat(new[] { item }));     } }  public class regexparser {     private const char eof = '\x0000';     private readonly string str;     private char curr;     private int pos;      public regexparser(string s)     {         str = s;     }      public regexpression parse()     {         pos = -1;         read();         return parseexpression();     }      private void read()     {         ++pos;         curr = pos < str.length ? str[pos] : eof;     }      private regexpression parseexpression()     {         var term = parseterm();         if (curr == '|')         {             read();             var secondexpr = parseexpression();             return new variants(term, secondexpr);         }         else         {             return term;         }     }      private regexpression parseterm()     {         var factor = parsefactor();         if (curr != '|' && curr != '+' && curr != '*' && curr != ')' && curr != eof)         {             var secondterm = parseterm();             return new concatenation(factor, secondterm);         }         else         {             return factor;         }     }      private regexpression parsefactor()     {         var element = parseelement();         if (curr == '*')         {             read();             return new repeat(element);         }         else         {             return element;         }     }      private regexpression parseelement()     {         switch (curr)         {             case '(':                 read();                 var expr = parseexpression();                 if (curr != ')') throw new formatexception("closing paren expected");                 read();                 return expr;              case '\\':                 read();                 var escapedchar = curr;                 read();                 return new literal(escapedchar);              default:                 var literal = curr;                 read();                 return new literal(literal);         }     } }  public abstract class regexpression {     protected static ienumerable<regexpression> merge<t>(regexpression head, regexpression tail, func<t, ienumerable<regexpression>> selector)         t : regexpression     {         var other = tail t;         if (other != null)         {             return new[] { head }.concat(selector(other));         }         else         {             return new[] { head, tail };         }     }      public abstract ienumerable<string> generate(); }  public class variants : regexpression {     public ienumerable<regexpression> subexpressions { get; private set; }      public variants(regexpression term, regexpression rest)     {         subexpressions = merge<variants>(term, rest, c => c.subexpressions);     }      public override ienumerable<string> generate()     {         return subexpressions.selectmany(sub => sub.generate());     } }  public class concatenation : regexpression {     public ienumerable<regexpression> subexpressions { get; private set; }      public concatenation(regexpression factor, regexpression rest)     {         subexpressions = merge<concatenation>(factor, rest, c => c.subexpressions);     }      public override ienumerable<string> generate()     {         foreach (var variant in subexpressions.select(sub => sub.generate()).cartesianproduct())         {             var builder = new stringbuilder();             foreach (var item in variant) builder.append(item);             yield return builder.tostring();         }     } }  public class repeat : regexpression {     public regexpression expr { get; private set; }      public repeat(regexpression expr)     {         expr = expr;     }      public override ienumerable<string> generate()     {         foreach (var subexpr in expr.generate())         {             (int cnt = 0; cnt < 5; ++cnt)             {                 var builder = new stringbuilder(subexpr.length * cnt);                 (int = 0; < cnt; ++i) builder.append(subexpr);                 yield return builder.tostring();             }         }     } }  public class literal : regexpression {     public char ch { get; private set; }      public literal(char c)     {         ch = c;     }      public override ienumerable<string> generate()     {         yield return new string(ch, 1);     } } 

Comments

Popular posts from this blog

apache - Add omitted ? to URLs -

redirect - bbPress Forum - rewrite to wwww.mysite prohibits login -

php - How can I stop spam on my custom forum/blog? -