-- Parser example -- by Chris Bailey-Kellogg, based on an example written by Lennart Andersson. -- modified by Scot Drysdale 11/1/07 and 2/12/08 module Parser(Parser, (#), (-#), (#-), (#:), (!), (>->), (?), lit, letter, space, char, takeParse, takeWhileParse, returnParse, failParse, module Data.Char) where import Data.Char -- for isSpace, isAlpha. isDigit might also be useful. -- Consider the following simple version of a tree: data SimpleTree = Leaf | Branch SimpleTree SimpleTree deriving Show -- an example: tree1 = Branch (Branch Leaf (Branch Leaf Leaf)) Leaf -- A function to return a String containing out a parenthsized representation -- of a SimpleTree, with leaves represented by "*" and the two trees within a -- Branch surrounded by "( .... )" parenthesizeTree :: SimpleTree -> String parenthesizeTree Leaf = "*" parenthesizeTree (Branch t1 t2) = "(" ++ parenthesizeTree t1 ++ parenthesizeTree t2 ++ ")" -- Parsing goes the other direction - given a string, reconstruct the tree. -- "Maybe" because the string may not represent a valid tree. -- A pair because you need to return not only the tree found, but the -- remaining input string parseTree :: String -> Maybe (SimpleTree, String) parseTree [] = Nothing parseTree (c:cs) = if c == '*' then Just (Leaf, cs) else if c == '(' then case parseTree cs of Nothing -> Nothing Just (t1, cs1) -> case parseTree cs1 of Nothing -> Nothing Just (t2, cs2) -> if null cs2 then Nothing else if head cs2 == ')' then Just (Branch t1 t2, tail cs2) else Nothing else Nothing ------------------------------------------------------------------------ -- Based on a book chapter by Lennart Andersson -- Parse head of the string, producing an "a" and the remaining string type Parser a = String -> Maybe (a,String) -- Parse a '(' parseOpenParen :: Parser Char parseOpenParen [] = Nothing parseOpenParen (c:cs) = if c=='(' then Just (c,cs) else Nothing -- Parse a ')' parseCloseParen :: Parser Char parseCloseParen [] = Nothing parseCloseParen (c:cs) = if c==')' then Just (c,cs) else Nothing -- Capturing the common pattern of a literal character -- Parse a single character passed as parameter c lit :: Char -> Parser Char lit c [] = Nothing lit c (c2:cs) = if c2==c then Just (c,cs) else Nothing -- Example parseOpenParen2 = lit '(' -------------------- -- The tree parser, modified to use lit. Changes some "if" statements to -- case statements. Not yet clear that this is a win! parseBinary1 :: Parser SimpleTree parseBinary1 [] = Nothing parseBinary1 cs = case lit '*' cs of Just (_,cs1) -> Just (Leaf,cs1) Nothing -> case lit '(' cs of Nothing -> Nothing Just (_,cs1) -> case parseBinary1 cs1 of Nothing -> Nothing Just (l,cs2) -> case parseBinary1 cs2 of Nothing -> Nothing Just (r,cs3) -> case lit ')' cs3 of Just (_,cs4) -> Just (Branch l r, cs4) Nothing -> Nothing ------------------------------------------------------------------------ -- Extract some common code into patterns, -- so can build up parser from components -- Note that we are writing functions that take functions and RETURN functions! -- Sequence operation - Takes two parsers, recognizes whether a string -- starts with a substring recognized by the first followed by a string -- recognized by the second. Returns ordered pair if succeeds. infixl 6 # (#) :: Parser a -> Parser b -> Parser (a,b) (m # n) cs = case m cs of Nothing -> Nothing Just (p,cs1) -> case n cs1 of Nothing -> Nothing Just (q,cs2) -> Just ((p,q),cs2) -- An example parseOC cs = (lit '(' # lit ')') cs parseOC2 = lit '(' # lit ')' -- currying allows us simplify -- Transforms the output of a parse by applying a function infix 5 >-> (>->) :: Parser a -> (a->b) -> Parser b (m >-> f) cs = case m cs of Nothing -> Nothing Just (a,cs1) -> Just (f a,cs1) -- An example parseTreeDup = parseTree >-> (\t -> Branch t t) -- Sequence throwing away first or second part (whichever has '-') infixl 6 -# (-#) :: Parser a -> Parser b -> Parser b m -# n = m # n >-> snd infixl 6 #- (#-) :: Parser a -> Parser b -> Parser a m #- n = m # n >-> fst -- An example parseParen2 = lit '(' -# lit ')' -- Alternatives - Finds if either succeeds. If both succeed, choose first. infixl 3 ! (!) :: Parser a -> Parser a -> Parser a (m ! n) cs = case m cs of Nothing -> n cs mcs -> mcs -- An example parseParen = lit '(' ! lit ')' ------------------------------------------------------------------------ -- Now our nice clean parser, built with these components parseBinary2 :: Parser SimpleTree parseBinary2 = lit '*' >-> (\a -> Leaf) ! lit '(' -# parseBinary2 # parseBinary2 #- lit ')' >-> (\(a,b) -> Branch a b) ------------------------------------------------------------------------ -- Moving on to "natural" language (parse the sentences we generated) -- Parses a single alphabetical character letter :: Parser Char letter [] = Nothing letter (c:cs) = if isAlpha c then Just (c,cs) else Nothing -- Parses a single whitespace character (space, return, tab) space :: Parser Char space [] = Nothing space (c:cs) = if isSpace c then Just (c,cs) else Nothing -- Extracting the common pattern -- Parses using m, but only succeeds if m succeeds and p is true. infix 7 ? (?) :: Parser a -> (a -> Bool) -> Parser a (m ? p) cs = case m cs of Nothing -> Nothing Just (a,cs) -> if p a then Just (a,cs) else Nothing -- Parses a single character char :: Parser Char char [] = Nothing char (c:cs) = Just (c,cs) -- examples letter2 = (char ? isAlpha) space2 = (char ? isSpace) -- Parses two things and conses their results into one list infixl 6 #: (#:) :: Parser a -> Parser [a] -> Parser [a] m #: n = m # n >-> (\ (x,xs) -> x : xs) -- Like take -- apply parser m i times takeParse :: Parser a -> Int -> Parser [a] takeParse m 0 = returnParse [] takeParse m i = m #: takeParse m (i-1) -- Like takeWhile -- apply parser m as long as it succeeds takeWhileParse :: Parser a -> Parser [a] takeWhileParse m = m #: takeWhileParse m ! returnParse [] -- Convenience functions to package things up -- Returns a parser that always succeeds, returning a fixed value. returnParse :: a -> Parser a returnParse v cs = Just (v,cs) -- A parser that always fails failParse :: Parser a failParse cs = Nothing -- Now we can extract a single specified word (and eliminate the following whitespace) word :: String -> Parser String word w = (takeParse letter (length w) ? (==w)) #- (takeWhileParse space) -------------------- data Parse = S Parse Parse | NP Parse Parse Parse | VP Parse Parse | Name String | Det String | Adj String | Noun String | IVerb String | TVerb String deriving Show -- Each parses the thing named (where s is sentence) s = np # vp >-> (\(np,vp) -> S np vp) np = name ! (det # adj # noun >-> (\((d,a),n) -> NP d a n)) name = wordChoice ["scot", "chris", "theha"] >-> Name det = wordChoice ["a", "the"] >-> Det adj = wordChoice ["happy", "hungry", "blue"] >-> Adj noun = wordChoice ["person", "cat"] >-> Noun vp = iverb ! (tverb # np) >-> (\(t, n) -> VP t n) iverb = wordChoice ["sits", "jogs"] >-> IVerb tverb = wordChoice ["eats", "watches"] >-> TVerb -- Match the first word in the list wordChoice :: [String] -> Parser String wordChoice [] = failParse wordChoice (w:ws) = (word w ! wordChoice ws)