{- Enumerate the strings for a regular expression Order by length, and lexicographically within length. enum "a(c|b)|d" = ["d","ab","ac"] enum "(b|ab*a)*" = ["","b","aa","bb","aab","aba","aab","baa","bbb"... -} import Char (isAlpha) type Lang = [String] alt :: Lang -> Lang -> Lang -- alternation (merge languages) cat :: Lang -> Lang -> Lang -- catenation (product of languages) clo :: Lang -> Lang -- Kleene closure enum :: String -> Lang alt xs@(x:xt) ys@(y:yt) = case compare (length x, x) (length y, y) of LT -> x : alt xt ys EQ -> x : alt xt yt GT -> y : alt xs yt alt xs ys = xs ++ ys cat (x:xt) ys@(y:yt) = (x++y) : alt (cat [x] yt) (cat xt ys) cat _ _ = [] clo [] = [""] clo ("":xs) = clo xs clo xs = "" : cat xs (clo xs) enum s = parse [] s {- Shift-reduce parser of regular expressions parse k s: k is current stack; s is tail of reg exp. Each stack symbol carries its language. Stack initally empty, finally holds one alternation. -} data StkSym = P Lang -- Primary ::= letter | "()" | "(" A ")" | C Lang -- Catenation ::= P | P "*" | C C | A Lang -- Alternation ::= C | A "|" A | L -- "(" type Stack = [StkSym] -- head of list is top of stack parse :: Stack -> String -> Lang parse (P(x):z) ('*':s) = parse (C(clo x):z) s parse (P(x):z) s = parse (C(x):z) s parse (C(y):C(x):z) s = parse (C(cat x y):z) s parse (C(x):z) ('|':s) = parse (A(x):z) s parse (C(x):z) s@(')':_) = parse (A(x):z) s parse (C(x):z) s@"" = parse (A(x):z) s parse (A(y):A(x):z) s = parse (A(alt x y):z) s parse (A(x):L:z) (')':s) = parse (P(x):z) s parse (L:z) (')':s) = parse (P[""]:z) s parse z ('(':s) = parse (L:z) s parse z (c:s) | isAlpha c = parse (P[[c]]:z) s parse [A(x)] "" = x parse _ s = error ("suffix where parse failed: \""++s++"\"")