c# - Return a finite set matching a regex expression -
something similar http://regexio.com/prototype.html, i'm trying set matching particular regex.
basically, need parse regular expression , then, instead of reading input while walking parsed expression, output variants.
i have hacked following program doing need very simple regular expression (only alternate options using |
, iteration using *
, grouping using ()
, , escaping using \
supported). note iteration done 0–5 times, conversion possibly infinite iteration left exercise reader ;-).
i have used straightforward recursive-descent parser building abstract syntax tree in memory; tree in end walked , possible sets built. solution not optimal @ all, works. enjoy:
public class testprg { static void main() { var expression = new regexparser("a(b|c)*d").parse(); foreach (var item in expression.generate()) { console.writeline(item); } } } public static class enumerableextensions { // build cartesian product of sequence of sequences // code eric lippert, copied <http://blogs.msdn.com/b/ericlippert/archive/2010/06/28/computing-a-cartesian-product-with-linq.aspx> public static ienumerable<ienumerable<t>> cartesianproduct<t>(this ienumerable<ienumerable<t>> sequences) { ienumerable<ienumerable<t>> emptyproduct = new[] { enumerable.empty<t>() }; return sequences.aggregate( emptyproduct, (accumulator, sequence) => accseq in accumulator item in sequence select accseq.concat(new[] { item })); } } public class regexparser { private const char eof = '\x0000'; private readonly string str; private char curr; private int pos; public regexparser(string s) { str = s; } public regexpression parse() { pos = -1; read(); return parseexpression(); } private void read() { ++pos; curr = pos < str.length ? str[pos] : eof; } private regexpression parseexpression() { var term = parseterm(); if (curr == '|') { read(); var secondexpr = parseexpression(); return new variants(term, secondexpr); } else { return term; } } private regexpression parseterm() { var factor = parsefactor(); if (curr != '|' && curr != '+' && curr != '*' && curr != ')' && curr != eof) { var secondterm = parseterm(); return new concatenation(factor, secondterm); } else { return factor; } } private regexpression parsefactor() { var element = parseelement(); if (curr == '*') { read(); return new repeat(element); } else { return element; } } private regexpression parseelement() { switch (curr) { case '(': read(); var expr = parseexpression(); if (curr != ')') throw new formatexception("closing paren expected"); read(); return expr; case '\\': read(); var escapedchar = curr; read(); return new literal(escapedchar); default: var literal = curr; read(); return new literal(literal); } } } public abstract class regexpression { protected static ienumerable<regexpression> merge<t>(regexpression head, regexpression tail, func<t, ienumerable<regexpression>> selector) t : regexpression { var other = tail t; if (other != null) { return new[] { head }.concat(selector(other)); } else { return new[] { head, tail }; } } public abstract ienumerable<string> generate(); } public class variants : regexpression { public ienumerable<regexpression> subexpressions { get; private set; } public variants(regexpression term, regexpression rest) { subexpressions = merge<variants>(term, rest, c => c.subexpressions); } public override ienumerable<string> generate() { return subexpressions.selectmany(sub => sub.generate()); } } public class concatenation : regexpression { public ienumerable<regexpression> subexpressions { get; private set; } public concatenation(regexpression factor, regexpression rest) { subexpressions = merge<concatenation>(factor, rest, c => c.subexpressions); } public override ienumerable<string> generate() { foreach (var variant in subexpressions.select(sub => sub.generate()).cartesianproduct()) { var builder = new stringbuilder(); foreach (var item in variant) builder.append(item); yield return builder.tostring(); } } } public class repeat : regexpression { public regexpression expr { get; private set; } public repeat(regexpression expr) { expr = expr; } public override ienumerable<string> generate() { foreach (var subexpr in expr.generate()) { (int cnt = 0; cnt < 5; ++cnt) { var builder = new stringbuilder(subexpr.length * cnt); (int = 0; < cnt; ++i) builder.append(subexpr); yield return builder.tostring(); } } } } public class literal : regexpression { public char ch { get; private set; } public literal(char c) { ch = c; } public override ienumerable<string> generate() { yield return new string(ch, 1); } }
Comments
Post a Comment