HXT/Practical/Simple2

From HaskellWiki
< HXT‎ | Practical
Revision as of 21:32, 11 August 2007 by Mrd (talk | contribs) (use language pragma)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

{-# LANGUAGE Arrows, NoMonomorphismRestriction #-} import Text.XML.HXT.Arrow -- This example demonstrates a more complex XML parse, -- involving multiple levels, attributes, inner lists, -- and dealing with optional data. -- Example data drawn from: -- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml -- save as: simple2.xml data Team = Team { teamName, division, league, city :: String, players :: [Player] } deriving (Show, Eq) data Player = Player { firstName, lastName, position :: String, atBats, hits :: Maybe Int, era :: Maybe Float } deriving (Show, Eq) parseXML file = readDocument [(a_validate,v_0)] file atTag tag = deep (isElem >>> hasName tag) -- Incremental development of the getTeams function: -- First, list the teams. -- Try it out in GHCi: -- Main> runX (parseXML "simple2.xml" >>> getTeams1) getTeams1 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team returnA -< (leagName, diviName, teamName) -- getTeams2 also lists the players. -- But there is a catch; now teams without players -- are being left out. (This behavior is familiar to -- users of the List monad) getTeams2 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team player <- atTag "PLAYER" -< team fName <- getAttrValue "GIVEN_NAME" -< player lName <- getAttrValue "SURNAME" -< player returnA -< (leagName, diviName, teamName, fName, lName) -- What we really want is to capture the players in a list -- at this level; and if there are no players then the -- empty list will suffice. listA is used for this purpose. getPlayer1 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p returnA -< (fName, lName) getTeams3 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team players <- listA getPlayer1 -< team returnA -< (leagName, diviName, teamName, players) -- Try capturing some statistics about the players significant = not . all (`elem` " \n\r\t") -- Use our definition of "significant" strings to -- capture the value; or else nothing. getStat attr = (getAttrValue attr >>> isA significant >>> arr Just) `orElse` (constA Nothing) getPlayer2 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p position <- getAttrValue "POSITION" -< p hits <- getStat "HITS" -< p atBats <- getStat "AT_BATS" -< p era <- getStat "ERA" -< p returnA -< Player { firstName = fName, lastName = lName, position = position, hits = read `fmap` hits, atBats = read `fmap` atBats, era = read `fmap` era } getTeams4 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team city <- getAttrValue "CITY" -< team players <- listA getPlayer2 -< team returnA -< Team { league = leagName, division = diviName, teamName = teamName, city = city, players = players } -- Our final choices getPlayer = getPlayer2 getTeams = getTeams4 main = do teams <- runX (parseXML "simple2.xml" >>> getTeams) print teams

The Data

Included here for convenience.

<SEASON YEAR="1998">
<LEAGUE NAME="National League">
  <DIVISION NAME="East">

  <TEAM CITY="Atlanta" NAME="Braves">
<PLAYER GIVEN_NAME="Marty" SURNAME="Malloy" POSITION="Second Base" GAMES="11" GAMES_STARTED="8" AT_BATS="28" RUNS="3" HITS="5" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="1" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="0" WALKS="2" STRUCK_OUT="2" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Ozzie" SURNAME="Guillen" POSITION="Shortstop" GAMES="83" GAMES_STARTED="59" AT_BATS="264" RUNS="35" HITS="73" DOUBLES="15" TRIPLES="1" HOME_RUNS="1" RBI="22" STEALS="1" CAUGHT_STEALING="4" SACRIFICE_HITS="4" SACRIFICE_FLIES="2" ERRORS="6" WALKS="24" STRUCK_OUT="25" HIT_BY_PITCH="1">
        </PLAYER>
<PLAYER GIVEN_NAME="Danny" SURNAME="Bautista" POSITION="Outfield" GAMES="82" GAMES_STARTED="27" AT_BATS="144" RUNS="17" HITS="36" DOUBLES="11" TRIPLES="0" HOME_RUNS="3" RBI="17" STEALS="1" CAUGHT_STEALING="0" SACRIFICE_HITS="3" SACRIFICE_FLIES="2" ERRORS="2" WALKS="7" STRUCK_OUT="21" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Gerald" SURNAME="Williams" POSITION="Outfield" GAMES="129" GAMES_STARTED="51" AT_BATS="266" RUNS="46" HITS="81" DOUBLES="18" TRIPLES="3" HOME_RUNS="10" RBI="44" STEALS="11" CAUGHT_STEALING="5" SACRIFICE_HITS="2" SACRIFICE_FLIES="1" ERRORS="5" WALKS="17" STRUCK_OUT="48" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Tom" SURNAME="Glavine" POSITION="Starting Pitcher" GAMES="33" GAMES_STARTED="33" WINS="20" LOSSES="6" SAVES="0" COMPLETE_GAMES="4" SHUT_OUTS="3" ERA="2.47" INNINGS="229.1" HOME_RUNS_AGAINST="13" RUNS_AGAINST="67" EARNED_RUNS="63" HIT_BATTER="2" WILD_PITCHES="3" BALK="0" WALKED_BATTER="74" STRUCK_OUT_BATTER="157">
        </PLAYER>
<PLAYER GIVEN_NAME="Javier" SURNAME="Lopez" POSITION="Catcher" GAMES="133" GAMES_STARTED="124" AT_BATS="489" RUNS="73" HITS="139" DOUBLES="21" TRIPLES="1" HOME_RUNS="34" RBI="106" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="1" SACRIFICE_FLIES="8" ERRORS="5" WALKS="30" STRUCK_OUT="85" HIT_BY_PITCH="6">
        </PLAYER>
<PLAYER GIVEN_NAME="Ryan" SURNAME="Klesko" POSITION="Outfield" GAMES="129" GAMES_STARTED="124" AT_BATS="427" RUNS="69" HITS="117" DOUBLES="29" TRIPLES="1" HOME_RUNS="18" RBI="70" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="0" SACRIFICE_FLIES="4" ERRORS="2" WALKS="56" STRUCK_OUT="66" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Andres" SURNAME="Galarraga" POSITION="First Base" GAMES="153" GAMES_STARTED="151" AT_BATS="555" RUNS="103" HITS="169" DOUBLES="27" TRIPLES="1" HOME_RUNS="44" RBI="121" STEALS="7" CAUGHT_STEALING="6" SACRIFICE_HITS="0" SACRIFICE_FLIES="5" ERRORS="11" WALKS="63" STRUCK_OUT="146" HIT_BY_PITCH="25">
        </PLAYER>
<PLAYER GIVEN_NAME="Wes" SURNAME="Helms" POSITION="Third Base" GAMES="7" GAMES_STARTED="2" AT_BATS="13" RUNS="2" HITS="4" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="2" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="1" WALKS="0" STRUCK_OUT="4" HIT_BY_PITCH="0">
        </PLAYER>
</TEAM>
<TEAM CITY="Florida" NAME="Marlins">
      </TEAM>
<TEAM CITY="Montreal" NAME="Expos">
      </TEAM>
<TEAM CITY="New York" NAME="Mets">
      </TEAM>
<TEAM CITY="Philadelphia" NAME="Phillies">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="Cubs">
      </TEAM>
<TEAM CITY="Cincinnati" NAME="Reds">
      </TEAM>
<TEAM CITY="Houston" NAME="Astros">
      </TEAM>
<TEAM CITY="Milwaukee" NAME="Brewers">
      </TEAM>
<TEAM CITY="Pittsburgh" NAME="Pirates">
      </TEAM>
<TEAM CITY="St. Louis" NAME="Cardinals">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Arizona" NAME="Diamondbacks">
      </TEAM>
<TEAM CITY="Colorado" NAME="Rockies">
      </TEAM>
<TEAM CITY="Los Angeles" NAME="Dodgers">
      </TEAM>
<TEAM CITY="San Diego" NAME="Padres">
      </TEAM>
<TEAM CITY="San Francisco" NAME="Giants">
      </TEAM>
</DIVISION>
</LEAGUE>

  <LEAGUE NAME="American League">

  <DIVISION NAME="East">
<TEAM CITY="Baltimore" NAME="Orioles">
      </TEAM>
<TEAM CITY="Boston" NAME="Red Sox">
      </TEAM>
<TEAM CITY="New York" NAME="Yankees">
      </TEAM>
<TEAM CITY="Tampa Bay" NAME="Devil Rays">
      </TEAM>
<TEAM CITY="Toronto" NAME="Blue Jays">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="White Sox">
      </TEAM>
<TEAM CITY="Kansas City" NAME="Royals">
      </TEAM>
<TEAM CITY="Detroit" NAME="Tigers">
      </TEAM>
<TEAM CITY="Cleveland" NAME="Indians">
      </TEAM>
<TEAM CITY="Minnesota" NAME="Twins">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Anaheim" NAME="Angels">
      </TEAM>
<TEAM CITY="Oakland" NAME="Athletics">
      </TEAM>
<TEAM CITY="Seattle" NAME="Mariners">
      </TEAM>
<TEAM CITY="Texas" NAME="Rangers">
      </TEAM>
</DIVISION>
</LEAGUE>
</SEASON>