Difference between revisions of "HXT/Practical/Simple2"

From HaskellWiki
< HXT‎ | Practical
Jump to navigation Jump to search
Line 1: Line 1:
 
<hask>
 
<hask>
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import Text.XML.HXT.Arrow
+
import Text.XML.HXT.Core
   
 
-- This example demonstrates a more complex XML parse,
 
-- This example demonstrates a more complex XML parse,

Revision as of 17:46, 24 January 2011

{-# LANGUAGE Arrows, NoMonomorphismRestriction #-} import Text.XML.HXT.Core -- This example demonstrates a more complex XML parse, -- involving multiple levels, attributes, inner lists, -- and dealing with optional data. -- Example data drawn from: -- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml -- save as: simple2.xml data Team = Team { teamName, division, league, city :: String, players :: [Player] } deriving (Show, Eq) data Player = Player { firstName, lastName, position :: String, atBats, hits :: Maybe Int, era :: Maybe Float } deriving (Show, Eq) parseXML file = readDocument [ withValidate no , withRemoveWS yes -- throw away formating WS ] file atTag tag = deep (isElem >>> hasName tag) -- Incremental development of the getTeams function: -- First, list the teams. -- Try it out in GHCi: -- Main> runX (parseXML "simple2.xml" >>> getTeams1) getTeams1 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team returnA -< (leagName, diviName, teamName) -- getTeams2 also lists the players. -- But there is a catch; now teams without players -- are being left out. (This behavior is familiar to -- users of the List monad) getTeams2 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team player <- atTag "PLAYER" -< team fName <- getAttrValue "GIVEN_NAME" -< player lName <- getAttrValue "SURNAME" -< player returnA -< (leagName, diviName, teamName, fName, lName) -- What we really want is to capture the players in a list -- at this level; and if there are no players then the -- empty list will suffice. listA is used for this purpose. getPlayer1 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p returnA -< (fName, lName) getTeams3 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team players <- listA getPlayer1 -< team returnA -< (leagName, diviName, teamName, players) -- Try capturing some statistics about the players significant = not . all (`elem` " \n\r\t") -- Use our definition of "significant" strings to -- capture the value; or else nothing. getStat attr = (getAttrValue attr >>> isA significant >>> arr Just) `orElse` (constA Nothing) getPlayer2 = atTag "PLAYER" >>> proc p -> do fName <- getAttrValue "GIVEN_NAME" -< p lName <- getAttrValue "SURNAME" -< p position <- getAttrValue "POSITION" -< p hits <- getStat "HITS" -< p atBats <- getStat "AT_BATS" -< p era <- getStat "ERA" -< p returnA -< Player { firstName = fName, lastName = lName, position = position, hits = read `fmap` hits, atBats = read `fmap` atBats, era = read `fmap` era } getTeams4 = atTag "LEAGUE" >>> proc l -> do leagName <- getAttrValue "NAME" -< l divi <- atTag "DIVISION" -< l diviName <- getAttrValue "NAME" -< divi team <- atTag "TEAM" -< divi teamName <- getAttrValue "NAME" -< team city <- getAttrValue "CITY" -< team players <- listA getPlayer2 -< team returnA -< Team { league = leagName, division = diviName, teamName = teamName, city = city, players = players } -- Our final choices getPlayer = getPlayer2 getTeams = getTeams4 main = do teams <- runX (parseXML "simple2.xml" >>> getTeams) print teams

The Data

Included here for convenience.

<SEASON YEAR="1998">
<LEAGUE NAME="National League">
  <DIVISION NAME="East">

  <TEAM CITY="Atlanta" NAME="Braves">
<PLAYER GIVEN_NAME="Marty" SURNAME="Malloy" POSITION="Second Base" GAMES="11" GAMES_STARTED="8" AT_BATS="28" RUNS="3" HITS="5" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="1" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="0" WALKS="2" STRUCK_OUT="2" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Ozzie" SURNAME="Guillen" POSITION="Shortstop" GAMES="83" GAMES_STARTED="59" AT_BATS="264" RUNS="35" HITS="73" DOUBLES="15" TRIPLES="1" HOME_RUNS="1" RBI="22" STEALS="1" CAUGHT_STEALING="4" SACRIFICE_HITS="4" SACRIFICE_FLIES="2" ERRORS="6" WALKS="24" STRUCK_OUT="25" HIT_BY_PITCH="1">
        </PLAYER>
<PLAYER GIVEN_NAME="Danny" SURNAME="Bautista" POSITION="Outfield" GAMES="82" GAMES_STARTED="27" AT_BATS="144" RUNS="17" HITS="36" DOUBLES="11" TRIPLES="0" HOME_RUNS="3" RBI="17" STEALS="1" CAUGHT_STEALING="0" SACRIFICE_HITS="3" SACRIFICE_FLIES="2" ERRORS="2" WALKS="7" STRUCK_OUT="21" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Gerald" SURNAME="Williams" POSITION="Outfield" GAMES="129" GAMES_STARTED="51" AT_BATS="266" RUNS="46" HITS="81" DOUBLES="18" TRIPLES="3" HOME_RUNS="10" RBI="44" STEALS="11" CAUGHT_STEALING="5" SACRIFICE_HITS="2" SACRIFICE_FLIES="1" ERRORS="5" WALKS="17" STRUCK_OUT="48" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Tom" SURNAME="Glavine" POSITION="Starting Pitcher" GAMES="33" GAMES_STARTED="33" WINS="20" LOSSES="6" SAVES="0" COMPLETE_GAMES="4" SHUT_OUTS="3" ERA="2.47" INNINGS="229.1" HOME_RUNS_AGAINST="13" RUNS_AGAINST="67" EARNED_RUNS="63" HIT_BATTER="2" WILD_PITCHES="3" BALK="0" WALKED_BATTER="74" STRUCK_OUT_BATTER="157">
        </PLAYER>
<PLAYER GIVEN_NAME="Javier" SURNAME="Lopez" POSITION="Catcher" GAMES="133" GAMES_STARTED="124" AT_BATS="489" RUNS="73" HITS="139" DOUBLES="21" TRIPLES="1" HOME_RUNS="34" RBI="106" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="1" SACRIFICE_FLIES="8" ERRORS="5" WALKS="30" STRUCK_OUT="85" HIT_BY_PITCH="6">
        </PLAYER>
<PLAYER GIVEN_NAME="Ryan" SURNAME="Klesko" POSITION="Outfield" GAMES="129" GAMES_STARTED="124" AT_BATS="427" RUNS="69" HITS="117" DOUBLES="29" TRIPLES="1" HOME_RUNS="18" RBI="70" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="0" SACRIFICE_FLIES="4" ERRORS="2" WALKS="56" STRUCK_OUT="66" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Andres" SURNAME="Galarraga" POSITION="First Base" GAMES="153" GAMES_STARTED="151" AT_BATS="555" RUNS="103" HITS="169" DOUBLES="27" TRIPLES="1" HOME_RUNS="44" RBI="121" STEALS="7" CAUGHT_STEALING="6" SACRIFICE_HITS="0" SACRIFICE_FLIES="5" ERRORS="11" WALKS="63" STRUCK_OUT="146" HIT_BY_PITCH="25">
        </PLAYER>
<PLAYER GIVEN_NAME="Wes" SURNAME="Helms" POSITION="Third Base" GAMES="7" GAMES_STARTED="2" AT_BATS="13" RUNS="2" HITS="4" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="2" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="1" WALKS="0" STRUCK_OUT="4" HIT_BY_PITCH="0">
        </PLAYER>
</TEAM>
<TEAM CITY="Florida" NAME="Marlins">
      </TEAM>
<TEAM CITY="Montreal" NAME="Expos">
      </TEAM>
<TEAM CITY="New York" NAME="Mets">
      </TEAM>
<TEAM CITY="Philadelphia" NAME="Phillies">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="Cubs">
      </TEAM>
<TEAM CITY="Cincinnati" NAME="Reds">
      </TEAM>
<TEAM CITY="Houston" NAME="Astros">
      </TEAM>
<TEAM CITY="Milwaukee" NAME="Brewers">
      </TEAM>
<TEAM CITY="Pittsburgh" NAME="Pirates">
      </TEAM>
<TEAM CITY="St. Louis" NAME="Cardinals">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Arizona" NAME="Diamondbacks">
      </TEAM>
<TEAM CITY="Colorado" NAME="Rockies">
      </TEAM>
<TEAM CITY="Los Angeles" NAME="Dodgers">
      </TEAM>
<TEAM CITY="San Diego" NAME="Padres">
      </TEAM>
<TEAM CITY="San Francisco" NAME="Giants">
      </TEAM>
</DIVISION>
</LEAGUE>

  <LEAGUE NAME="American League">

  <DIVISION NAME="East">
<TEAM CITY="Baltimore" NAME="Orioles">
      </TEAM>
<TEAM CITY="Boston" NAME="Red Sox">
      </TEAM>
<TEAM CITY="New York" NAME="Yankees">
      </TEAM>
<TEAM CITY="Tampa Bay" NAME="Devil Rays">
      </TEAM>
<TEAM CITY="Toronto" NAME="Blue Jays">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="White Sox">
      </TEAM>
<TEAM CITY="Kansas City" NAME="Royals">
      </TEAM>
<TEAM CITY="Detroit" NAME="Tigers">
      </TEAM>
<TEAM CITY="Cleveland" NAME="Indians">
      </TEAM>
<TEAM CITY="Minnesota" NAME="Twins">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Anaheim" NAME="Angels">
      </TEAM>
<TEAM CITY="Oakland" NAME="Athletics">
      </TEAM>
<TEAM CITY="Seattle" NAME="Mariners">
      </TEAM>
<TEAM CITY="Texas" NAME="Rangers">
      </TEAM>
</DIVISION>
</LEAGUE>
</SEASON>