HXT/Practical/Simple2

From HaskellWiki
< HXT‎ | Practical
Revision as of 01:05, 5 August 2007 by Mrd (talk | contribs) (simple2 -- more involved example)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

{-# OPTIONS -farrows -fno-monomorphism-restriction #-} import Text.XML.HXT.Arrow -- This example demonstrates a more complex XML parse, -- involving multiple levels, attributes, inner lists, -- and dealing with optional data. -- Example data drawn from: -- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml -- save as: simple2.xml data Team = Team { teamName, division, league, city :: String, players :: [Player] } deriving (Show, Eq) data Player = Player { firstName, lastName, position :: String, atBats, hits :: Maybe Int, era :: Maybe Float } deriving (Show, Eq) parseXML file = readDocument [(a_validate,v_0)] file atTag tag = deep (isElem >>> hasName tag) -- Incremental development of the getTeams function: -- First, list the teams. -- Try it out in GHCi: -- Main> runX (parseXML "simple2.xml" >>> getTeams1) getTeams1 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team returnA -< (leagName, diviName, teamName) -- getTeams2 also lists the players. -- But there is a catch; now teams without players -- are being left out. (This behavior is familiar to -- users of the List monad) getTeams2 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team player <- atTag "PLAYER" -< team fName <- getAttrValue "GIVEN_NAME" -< player lName <- getAttrValue "SURNAME" -< player returnA -< (leagName, diviName, teamName, fName, lName) -- What we really want is to capture the players in a list -- at this level; and if there are no players then the -- empty list will suffice. listA is used for this purpose. getPlayer1 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p returnA -< (fName, lName) getTeams3 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team players <- listA getPlayer1 -< team returnA -< (leagName, diviName, teamName, players) -- Try capturing some statistics about the players significant = not . all (`elem` " \n\r\t") -- Use our definition of "significant" strings to -- capture the value; or else nothing. defaultAttrStat attr = (getAttrValue attr >>> isA significant >>> arr Just) `orElse` (constA Nothing) getPlayer2 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p position <- getAttrValue "POSITION" -< p hits <- defaultAttrStat "HITS" -< p atBats <- defaultAttrStat "AT_BATS" -< p era <- defaultAttrStat "ERA" -< p returnA -< Player { firstName = fName, lastName = lName, position = position, hits = read `fmap` hits, atBats = read `fmap` atBats, era = read `fmap` era } getTeams4 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team city <- getAttrValue "CITY" -< team players <- listA getPlayer2 -< team returnA -< Team { league = leagName, division = diviName, teamName = teamName, city = city, players = players } -- Our final choices getPlayer = getPlayer2 getTeams = getTeams4 main = do teams <- runX (parseXML "simple2.xml" >>> getTeams) print teams