-- Parser example -- by Chris Bailey-Kellogg, based on an example written by Lennart Andersson. -- modified by Scot Drysdale 11/1/07 and converted to use a Monad on 2/28/08, -- incorporating a number of suggestions made by Taylor Campbell. -- This version is modified to use a state Monad to deal with the state -- (the current input string). The goal is to show what changes are needed -- to convert the parser to a monad (a modification of a State monad). -- These are: -- 1) returnParse becomes monad return and failParse becomes monad fail. -- 2) the (#), (?), and (>->) operators are defined in terms of (>>=) (using "do") -- 3) the (!) function is implemented using `mplus` from MonadPlus -- 4) Parser must become a data type, with a constructor "Parser", rather than -- just a function. The function runParser runs a Parser on a String. module ParserM(Parser (Parser), (#), (-#), (#-), (#:), (#++), (!), (>->), (?), lit, letter, space, char, runParser, takeParse, takeWhileParse, takeRepeatParse, parseSpaces, parseWord, wordChoice, elimSpacesAround, module Data.Char) where import Data.Char -- for isSpace, isAlpha. isDigit might also be useful. import Monad -- for mplus -- Parse head of the string, producing an "a" and the remaining string -- We did this as a "type" before, but can't use a "type" synonym in -- an "instance" declaration for a type class. data Parser a = Parser (String -> Maybe (a, String)) -- Takes a parser and a string and returns the result of running the -- parser on the string. runParser :: Parser a -> String -> Maybe (a, String) runParser (Parser parseFunction) str = parseFunction str instance Monad Parser where return value -- equivalent to returnParse in Parser = Parser (\str -> Just (value, str)) fail string -- equivalent to failParse in Parser = Parser (\str -> Nothing) parser >>= function = Parser $ \ firstString -> case runParser parser firstString of Nothing -> Nothing Just (intermediateValue, intermediateString) -> runParser (function intermediateValue) intermediateString instance MonadPlus Parser where mzero = Parser (\str -> Nothing) x `mplus` y = Parser (\str -> runParser x str `mplus` runParser y str) -- Sequence operation - Takes two parsers, recognizes whether a string -- starts with a substring recognized by the first followed by a string -- recognized by the second. Returns ordered pair if succeeds. infixl 6 # (#) :: Parser a -> Parser b -> Parser (a,b) m # n = do a <- m b <- n return (a,b) -- Transforms the output of a parse by applying a function infix 5 >-> (>->) :: Parser a -> (a->b) -> Parser b m >-> f = do a <- m return (f a) -- Parses using m, but only succeeds if m succeeds and p is true. -- Note that the way >>= is defined if m returns Nothing the whole do -- will return Nothing, so it is safe to try to evaluate (p a). infix 7 ? (?) :: Parser a -> (a -> Bool) -> Parser a m ? p = do a <- m if p a then return a else fail "predicate false" -- Alternatives - Finds if either succeeds. If both succeed, choose first. -- This uses "mplus" MonadPlus infixl 3 ! (!) :: Parser a -> Parser a -> Parser a m ! n = m `mplus` n -- Sequence throwing away first or second part (whichever has '-') -- (Old definitions using >-> would also work.) infixl 6 -# (-#) :: Parser a -> Parser b -> Parser b m -# n = do m n infixl 6 #- (#-) :: Parser a -> Parser b -> Parser a m #- n = do a <- m n return a -- Parses two things and conses their results into one list infixl 6 #: (#:) :: Parser a -> Parser [a] -> Parser [a] m #: n = do a <- m b <- n return (a:b) -- Parses two things and appends their results into one list infixl 6 #++ (#++) :: Parser [a] -> Parser [a] -> Parser [a] m #++ n = do a <- m b <- n return (a ++ b) -- Parses a single character char :: Parser Char char = Parser $ \cs -> case cs of "" -> Nothing (c:cs) -> Just (c,cs) -- Parse a single character passed as parameter c lit :: Char -> Parser Char lit c = char ? (== c) ------------------------------------------------------------------------ data Tree a = Leaf a | Branch (Tree a) (Tree a) deriving Show -- Now our nice clean parser, built with these components -- Parses a Tree, where leaves have string data parseTree :: Parser (Tree String) parseTree = parseWord >-> Leaf ! elimSpacesAround (lit '(' -# parseTree # parseTree #- lit ')') >-> (\(a,b) -> Branch a b) ------------------------------------------------------------------------ -- Moving on to "natural" language (parse the sentences we generated) letter = (char ? isAlpha) space = (char ? isSpace) -- Like take -- apply parser m i times takeParse :: Parser a -> Int -> Parser [a] takeParse m 0 = return [] takeParse m i = m #: takeParse m (i-1) -- Like takeWhile -- apply parser m as long as it succeeds takeWhileParse :: Parser a -> Parser [a] takeWhileParse m = m #: takeWhileParse m ! return [] -- Repeatedly parse using m while it succeeds, -- but guarantees at least one successful parse takeRepeatParse :: Parser a -> Parser [a] takeRepeatParse m = m #: takeWhileParse m -- Parses a maximal contiguous whitespace block of 0 or more characters parseSpaces :: Parser String parseSpaces = takeWhileParse space -- Eliminates white space before and after the parse of m elimSpacesAround :: Parser a -> Parser a elimSpacesAround m = parseSpaces -# m #- parseSpaces -- Parses a consecutive sequence of one or more alphabetic characters, -- eliminating whitespace before and after. parseWord :: Parser String parseWord = elimSpacesAround (takeRepeatParse letter) -- Extract a single word (and surrounding whitespace), but fail if does not match w. word :: String -> Parser String word w = (parseWord ? (==w)) -------------------- data Parse = S Parse Parse | NP Parse Parse Parse | VP Parse Parse | Name String | Det String | Adj String | Noun String | IVerb String | TVerb String deriving Show -- Each parses the thing named (where s is sentence) s = np # vp >-> \(np,vp) -> S np vp np = name ! (det # adj # noun >-> (\((d,a),n) -> NP d a n)) name = wordChoice ["scot", "chris"] >-> (\n -> Name n) det = wordChoice ["a", "the"] >-> (\n -> Det n) adj = wordChoice ["happy", "hungry", "blue"] >-> (\n -> Adj n) noun = wordChoice ["person", "cat"] >-> (\n -> Noun n) vp = iverb ! (tverb # np) >-> (\(t, n) -> VP t n) iverb = wordChoice ["sits", "jogs"] >-> (\iv -> IVerb iv) tverb = wordChoice ["eats", "watches"] >-> (\tv -> TVerb tv) -- Match the first word in the list wordChoice :: [String] -> Parser String wordChoice [] = fail "empty" wordChoice (w:ws) = (word w ! wordChoice ws) -- Test data -- stockData = " ( ( CLX FDC ) ( UPS ( ASN HLT ) ) ) " stockTree = runParser parseTree stockData stockDataLong = "(AW (((A (FPL (ED WMI))) ((CLX FDC) (UPS (ASN HLT))))" ++ "((DGX (RTN (GE (MO (CTAS (BCR KO)))))) (IP TMO))))" stockLongTree = runParser parseTree stockDataLong sentence1 = "thehappypersonwatchesahungrycat" sentence1parsed = runParser s sentence1 sentence2 = " the happy person watches a hungry cat " sentence2parsed = runParser s sentence2