-- Data base implementation -- by Chris Bailey-Kellogg 10/07 -- modified by Scot Drysdale to add join, sortOnCol, getKeys on 10/26/07 module Database (Table, ColumnName, Row, Value, Key, GetCol, SelectPred, makeTable, putTable, lookupKey, lookupKeyCol, select, project, innerJoin, join, sortOnCol, getColNames, getRows, getKeyCol, getKeys) where import qualified Data.Map as Map import Data.List import Data.Ord import Data.Maybe (mapMaybe) import Control.Monad (mapM_) -------------------- -- Table definition data Table = Table {colNames ::[ColumnName], rows ::[Row], key :: (Maybe Key)} deriving Show type ColumnName = String type Row = [Value] type Value = String type Key = (String, Map.Map Value Row) -- Create a table from list of column headers, list of rows, and maybe a key makeTable :: [ColumnName] -> [Row] -> (Maybe String) -> Table makeTable cols rows Nothing = Table cols rows Nothing makeTable cols rows (Just keyCol) = case elemIndex keyCol cols of Nothing -> Table cols rows Nothing Just p -> Table cols rows (Just (keyCol, Map.fromList $ zip (map (!! p) rows) rows)) -------------------- -- Accessor functions -- Get all the column headers from a table getColNames :: Table -> [ColumnName] getColNames = colNames -- Get all data, row by row, from a table getRows :: Table -> [Row] getRows = rows -- Given a Key, get the column name that the table is keyed on getKeyCol :: Maybe Key -> Maybe ColumnName getKeyCol Nothing = Nothing getKeyCol (Just (kc,_)) = Just kc -- Given a Table, get a list of all keys in the Table getKeys :: Table -> Maybe [Value] getKeys (Table _ _ Nothing) = Nothing getKeys (Table _ _ (Just (_,mp))) = Just (Map.keys mp) -------------------- -- Printing -- Print out, with columns lined up -- (Could instead set up a Show instance, but that's a bit more complicated.) putTable :: Table -> IO () putTable (Table cols rows _) = let lens = map (maximum . map length) (transpose (cols:rows)) spaces = repeat ' ' tabify row = do mapM_ (\(x,l) -> putStr $ take (l+2) (x ++ spaces)) (zip row lens) putChar '\n' in mapM_ tabify (cols:rows) -------------------- -- lookup in keyed Table -- Use the key to get a specific row lookupKey :: Value -> Table -> Maybe Row lookupKey key (Table _ _ (Just (keyCol,mp))) = Map.lookup key mp lookupKey _ _ = Nothing -- Use key and column name to get specific value lookupKeyCol :: Value -> ColumnName -> Table -> Maybe Value lookupKeyCol key col table@(Table cols _ _) = case lookupKey key table of Nothing -> Nothing (Just row) -> Just (row !! (posIn col cols)) -------------------- -- Select rows satisfying a predicate -- The predicate takes a row and a column accessor -- a function that takes -- a row and column name and returns the value type GetCol = Row -> ColumnName -> Value type SelectPred = Row -> GetCol -> Bool select :: SelectPred -> Table -> Table select pred (Table cols rows key) = makeTable cols srows (getKeyCol key) where getCol r c = r !! (posIn c cols) srows = filter (\r -> pred r getCol) rows -- Position of an column name in a list of names. -- Function is general, but error message is specific. posIn :: ColumnName -> [ColumnName] -> Int posIn col cols = case elemIndex col cols of Just p -> p Nothing -> error ("Column name " ++ col ++ " does not exist in this table") -------------------- -- Project rows onto particular columns project :: [ColumnName] -> Table -> Table project pcols (Table cols rows key) = makeTable pcols prows (getKeyCol key) where prows = map (getElemsAt (mapMaybe (\c -> elemIndex c cols) pcols)) rows -- Members of xs at positions is getElemsAt :: [Int] -> [a] -> [a] getElemsAt is xs = map (xs !!) is -------------------- -- Sorts the rows of the table according to the values in a given column sortOnCol :: ColumnName -> Table -> Table sortOnCol col (Table cols rows key) = Table cols (sortBy (comparing (!! p)) rows) key where p = posIn col cols -------------------- -- Two varieties of join -- Join the first table with the second, using the specified column -- name from the first column to access the key in the second innerJoin :: ColumnName -> Table -> Table -> Table innerJoin colName1 (Table cols1 rows1 key1) t2@(Table cols2 _ _) = let cols = cols1 ++ cols2 getForeignKey r = r !! (posIn colName1 cols1) rows = mapMaybe (\r -> case lookupKey (getForeignKey r) t2 of Nothing -> Nothing Just r2 -> Just (r ++ r2)) rows1 in makeTable cols rows (getKeyCol key1) -- Join on any two columns. Takes cartesian product of all lines with matching -- values in the two specified columns in the two tables. -- The function first sorts the tables to speed up the processing by merging join :: ColumnName -> ColumnName -> Table -> Table -> Table join colName1 colName2 t1 t2 = let (Table cols1 rows1 _) = sortOnCol colName1 t1 (Table cols2 rows2 _) = sortOnCol colName2 t2 pos1 = posIn colName1 cols1 pos2 = posIn colName2 cols2 joinRows :: [Row] -> [Row] -> [Row] joinRows rw1@(r1:rs1) rw2@(r2:rs2) | r1 !! pos1 < r2 !! pos2 = joinRows (dropWhile (\r -> r1 !! pos1 == r !! pos1) rs1) rw2 | r1 !! pos1 > r2 !! pos2 = joinRows rw1 (dropWhile (\r -> r2 !! pos2 == r !! pos2) rs2) | otherwise = let (eq1, rest1) = span (\r -> r1 !! pos1 == r !! pos1) rw1 (eq2, rest2) = span (\r -> r2 !! pos2 == r !! pos2) rw2 in [x ++ y | x <- eq1, y <- eq2] ++ joinRows rest1 rest2 joinRows _ _ = [] -- When either list empty, join is also in makeTable (cols1 ++ cols2) (joinRows rows1 rows2) Nothing ------------------ Examples and Test Data -------------------------- -- Basic example peopleCols = ["personId","name","gender","bday","city","state"] peopleRows = [["1","Alice","F","2000-01-01","Wonderland","??"], ["2","Elvis","M","1935-01-08","Memphis","TN"], ["3","Delilah","F","1988-04-03","Hanover","NH"], ["4","Charlie","M","1900-07-14","Strafford","VT"], ["5","Bob","M","1999-12-31","WRJ","VT"], ["6","Flora","F","1905-05-01","Stowe","VT"]] people = makeTable peopleCols peopleRows (Just "personId") -------------------- -- Lookup examples elvis = lookupKey "2" people elvisCity = lookupKeyCol "2" "city" people -------------------- -- Projection and join interests = makeTable ["interestId","personId","interest"] [["1","1","rabbit holes"], ["2","1","tea"], ["3","1","playing cards"], ["4","2","singing"], ["5","2","donuts"], ["6","4","dairy"], ["7","4","fiddle"]] (Just "interestId") peopleInterests = project ["name","interest"] $ innerJoin "personId" interests people -------------------- -- Selection -- Note - the second argument in the anonymous function is a function called -- "." which is used as an infix operator. oldVermonters = select (\r (.) -> (r . "state") == "VT" && (r . "bday") < show(2007-100)) people oldVermontContacts = project ["name","city"] oldVermonters aliceInterests1 = select (\r (.) -> r . "name" == "Alice") peopleInterests -- vs. aliceInterests2 = project ["name","interest"] $ innerJoin "personId" interests $ select (\r (.) -> r . "name" == "Alice") people -------------------- -- Join examples networks = makeTable ["networkId","network"] [["1","musicians"],["2","food lovers"]] (Just "networkId") membership = makeTable ["personId","networkId"] [["2","1"],["4","1"],["1","2"],["2","2"]] Nothing musicians = project ["name"] $ innerJoin "personId" (innerJoin "networkId" membership $ select (\r (.) -> r . "network" == "musicians") networks) people elvisNetworks = innerJoin "networkId" (innerJoin "personId" membership $ select (\r (.) -> r . "name" == "Elvis") people) networks elvisNetworkNames = project ["network"] elvisNetworks elvisNetworkIds = map head $ getRows $ project ["networkId"] elvisNetworks elvisId = head $ head $ getRows $ project ["personId"] elvisNetworks friendPred r (.) = (elem (r . "networkId") elvisNetworkIds) && ((r . "personId") /= elvisId) elvisFriends = project ["name","network"] $ innerJoin "networkId" (innerJoin "personId" (select friendPred membership) people) networks interests2 = project ["personId", "interest"] interests interests3 = makeTable ["personId2", "interest2"] (getRows interests2) (getKeyCol (key interests2)) commonInterests = join "personId" "personId2" interests2 interests3 aliceInterests = select (\r (.) -> r . "personId" == "1") interests2 commonIntAlice = join "personId" "personId2" aliceInterests interests3