-- Simple Monadic Parser Abstraction -- by Taylor Campbell 3/2008 -- modified by Scot Drysdale 3/2009 to demonstrate using List monad -- instead of Maybe mondad to parse ambiguous grammars. module ParserST where import StateT import Monad import Data.Char -- for isSpace, isAlpha. isDigit might also be useful. type ReturnMonad = [] type Parser a = StateT String ReturnMonad a -- In other words, -- -- data Parser a = Parser (String -> ReturnMonad (a, String)), -- -- as we had before, but with a data wrapper around the function. -- Recall that the old definition was -- -- type Parser a = String -> Maybe (a, string) runParser :: Parser a -> String -> ReturnMonad (a, String) runParser parser string = runStateT parser string -- This version returns only the parse, not the remaining string. parse :: Parser a -> String -> ReturnMonad a parse parser string = evalStateT parser string -- The following are equivalent to the same functions defined in -- ParserX. See comments there for their use. char :: Parser Char char = do string <- get case string of (c : cs) -> do put cs; return c _ -> fail "No more characters in string!" infixl 6 # (#) :: Parser a -> Parser b -> Parser (a, b) m # n = do a <- m b <- n return (a, b) infixl 6 -# (-#) :: Parser a -> Parser b -> Parser b m -# n = do m n infixl 6 #- (#-) :: Parser a -> Parser b -> Parser a m #- n = do a <- m n return a infixl 6 #: (#:) :: Parser a -> Parser [a] -> Parser [a] m #: n = do a <- m b <- n return (a:b) infixl 6 #++ (#++) :: Parser [a] -> Parser [a] -> Parser [a] m #++ n = do a <- m b <- n return (a ++ b) infix 5 >-> (>->) :: Parser a -> (a -> b) -> Parser b m >-> f = do a <- m return (f a) (?) :: Parser a -> (a -> Bool) -> Parser a m ? p = do a <- m if p a then return a else fail "Parsing failure!" infixl 3 ! (!) :: Parser a -> Parser a -> Parser a m ! n = m `mplus` n -- *** -- Sentence-parsing example -- *** -- Like take -- apply parser m i times takeParse :: Parser a -> Int -> Parser [a] takeParse m 0 = return [] takeParse m i = m #: takeParse m (i-1) -- Like takeWhile -- apply parser m as long as it succeeds takeWhileParse :: Parser a -> Parser [a] takeWhileParse m = m #: takeWhileParse m ! return [] -- Repeatedly parsed using m while it succeeds, -- but guarantees at least one successful parse takeRepeatParse :: Parser a -> Parser [a] takeRepeatParse m = m #: takeWhileParse m -- Parse a single character passed as parameter c lit :: Char -> Parser Char lit c = char ? (== c) -- Parses a whitespace character space :: Parser Char space = char ? isSpace -- Parses a letter letter :: Parser Char letter = char ? isAlpha -- Parses a maximal contiguous whitespace block of 0 or more characters parseSpaces :: Parser String parseSpaces = takeWhileParse space #- atEndOf isSpace -- Parse that succeeds only if the string is empty or the first -- character fails the boolean test -- Consumes NO characters from the input string. atEndOf :: (Char -> Bool) -> Parser String atEndOf test = do string <- get case string of "" -> return "" -- Succeed if string empty (c:cs) -> if test c then fail "More valid characters" else return "" -- Eliminates white space before and after the parse of m -- Not required, but useful. elimSpacesAround :: Parser a -> Parser a elimSpacesAround m = parseSpaces -# m #- parseSpaces -- Parses a consecutive sequence of one or more alphabetic characters. -- Eliminates leading and trailing spaces. parseWord :: Parser String parseWord = elimSpacesAround (takeRepeatParse letter #- atEndOf isAlpha) -- Extract a single word (and surrounding whitespace), but fail if -- does not match w. word :: String -> Parser String word w = (parseWord ? (==w)) -------------------- data Parse = S Parse Parse | NP Parse Parse Parse | VP Parse Parse | Name String | Det String | Adj String | Noun String | UnmodNoun String | IVerb String | TVerb String | ImpVerb String deriving Show -- Each parses the thing named (where s is sentence) s = parseSpaces -# ( np # vp >-> (\(np,vp) -> S np vp) ! impVerb # np >-> \(impV, np) -> S impV np) np = name ! (det # adj # noun >-> (\((d,a),n) -> NP d a n)) ! unmodNoun name = wordChoice ["scot", "chris", "grace", "prudence"] >-> Name det = wordChoice ["a", "the"] >-> Det adj = wordChoice ["happy", "hungry", "blue"] >-> Adj noun = wordChoice ["person", "cat"] >-> Noun unmodNoun = wordChoice ["time", "grace", "prudence", "flies"] >-> UnmodNoun vp = iverb ! (tverb # np) >-> (\(t, n) -> VP t n) iverb = wordChoice ["sits", "jogs", "flies"] >-> IVerb tverb = wordChoice ["eats", "watches"] >-> TVerb impVerb = wordChoice ["kiss", "time", "eat"] >-> ImpVerb -- Match the first word in the list wordChoice :: [String] -> Parser String wordChoice [] = fail "None left" wordChoice (w:ws) = (word w ! wordChoice ws) -- Test data -- sentence1 = "thehappypersonwatchesahungrycat" sentence2 = " the happy person watches a hungry cat " sentence3 = "time flies" sentence4 = "grace sits" sentence5 = "kiss grace" sentence6 = "grace watches prudence" -- Tree-parsing example data Tree a = Leaf a | Branch (Tree a) (Tree a) deriving Show -- Parses a Tree, where leaves have string data parseTree :: Parser (Tree String) parseTree = parseWord >-> Leaf ! elimSpacesAround (lit '(' -# parseTree # parseTree #- lit ')') >-> (\(a,b) -> Branch a b) -- Tree test data stockData = " ( ( CLX FDC ) ( UPS ( ASN HLT ) ) ) " stockDataLong = "(AW (((A (FPL (ED WMI))) ((CLX FDC) (UPS (ASN HLT))))" ++ "((DGX (RTN (GE (MO (CTAS (BCR KO)))))) (IP TMO))))"