Difference between revisions of "HXT/Practical/Simple2"

From HaskellWiki
< HXT‎ | Practical
Jump to navigation Jump to search
(Use block markup for multiline code)
 
Line 1: Line 1:
<hask>
+
<haskell>
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
 
import Text.XML.HXT.Core
 
import Text.XML.HXT.Core
Line 132: Line 132:
 
teams <- runX (parseXML "simple2.xml" >>> getTeams)
 
teams <- runX (parseXML "simple2.xml" >>> getTeams)
 
print teams
 
print teams
</hask>
+
</haskell>
   
 
=== The Data ===
 
=== The Data ===

Latest revision as of 16:05, 11 October 2011

{-# LANGUAGE Arrows, NoMonomorphismRestriction #-}
import Text.XML.HXT.Core

-- This example demonstrates a more complex XML parse,
-- involving multiple levels, attributes, inner lists,
-- and dealing with optional data.

-- Example data drawn from:
-- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml
-- save as: simple2.xml

data Team = Team 
  { teamName, division, league, city :: String,
    players :: [Player] }
  deriving (Show, Eq)

data Player = Player
  { firstName, lastName, position :: String,
    
    atBats, hits :: Maybe Int,
    era          :: Maybe Float }
  deriving (Show, Eq)

parseXML file = readDocument [ withValidate no
                             , withRemoveWS yes  -- throw away formating WS
                             ] file

atTag tag = deep (isElem >>> hasName tag)

-- Incremental development of the getTeams function:

-- First, list the teams.
-- Try it out in GHCi: 
-- Main> runX (parseXML "simple2.xml" >>> getTeams1)

getTeams1 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    returnA -< (leagName, diviName, teamName)

-- getTeams2 also lists the players.
-- But there is a catch; now teams without players
-- are being left out.  (This behavior is familiar to
-- users of the List monad)

getTeams2 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME"       -< l
    divi     <- atTag "DIVISION"          -< l
    diviName <- getAttrValue "NAME"       -< divi
    team     <- atTag "TEAM"              -< divi
    teamName <- getAttrValue "NAME"       -< team
    player   <- atTag "PLAYER"            -< team
    fName    <- getAttrValue "GIVEN_NAME" -< player
    lName    <- getAttrValue "SURNAME"    -< player
    returnA -< (leagName, diviName, teamName, fName, lName)

-- What we really want is to capture the players in a list
-- at this level; and if there are no players then the
-- empty list will suffice.  listA is used for this purpose.

getPlayer1 = atTag "PLAYER" >>>
  proc p -> do
    fName    <- getAttrValue "GIVEN_NAME" -< p
    lName    <- getAttrValue "SURNAME"    -< p
    returnA -< (fName, lName)
    
getTeams3 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    players  <- listA getPlayer1    -< team
    returnA -< (leagName, diviName, teamName, players)

-- Try capturing some statistics about the players

significant = not . all (`elem` " \n\r\t")

-- Use our definition of "significant" strings to
-- capture the value; or else nothing.

getStat attr = 
  (getAttrValue attr >>> isA significant >>> arr Just)
    `orElse` (constA Nothing)

getPlayer2 = atTag "PLAYER" >>>
  proc p -> do
    fName    <- getAttrValue "GIVEN_NAME" -< p
    lName    <- getAttrValue "SURNAME"    -< p
    position <- getAttrValue "POSITION"   -< p
    hits     <- getStat "HITS"            -< p
    atBats   <- getStat "AT_BATS"         -< p
    era      <- getStat "ERA"             -< p
    returnA -< Player
      { firstName = fName,
        lastName  = lName,
        position  = position,
        hits      = read `fmap` hits,
        atBats    = read `fmap` atBats,
        era       = read `fmap` era }
 
getTeams4 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    city     <- getAttrValue "CITY" -< team
    players  <- listA getPlayer2    -< team
    returnA -< Team
      { league   = leagName,
        division = diviName,
        teamName = teamName,
        city     = city,
        players  = players }

-- Our final choices

getPlayer = getPlayer2
getTeams  = getTeams4

main = do
  teams <- runX (parseXML "simple2.xml" >>> getTeams)
  print teams

The Data

Included here for convenience.

<SEASON YEAR="1998">
<LEAGUE NAME="National League">
  <DIVISION NAME="East">

  <TEAM CITY="Atlanta" NAME="Braves">
<PLAYER GIVEN_NAME="Marty" SURNAME="Malloy" POSITION="Second Base" GAMES="11" GAMES_STARTED="8" AT_BATS="28" RUNS="3" HITS="5" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="1" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="0" WALKS="2" STRUCK_OUT="2" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Ozzie" SURNAME="Guillen" POSITION="Shortstop" GAMES="83" GAMES_STARTED="59" AT_BATS="264" RUNS="35" HITS="73" DOUBLES="15" TRIPLES="1" HOME_RUNS="1" RBI="22" STEALS="1" CAUGHT_STEALING="4" SACRIFICE_HITS="4" SACRIFICE_FLIES="2" ERRORS="6" WALKS="24" STRUCK_OUT="25" HIT_BY_PITCH="1">
        </PLAYER>
<PLAYER GIVEN_NAME="Danny" SURNAME="Bautista" POSITION="Outfield" GAMES="82" GAMES_STARTED="27" AT_BATS="144" RUNS="17" HITS="36" DOUBLES="11" TRIPLES="0" HOME_RUNS="3" RBI="17" STEALS="1" CAUGHT_STEALING="0" SACRIFICE_HITS="3" SACRIFICE_FLIES="2" ERRORS="2" WALKS="7" STRUCK_OUT="21" HIT_BY_PITCH="0">
        </PLAYER>
<PLAYER GIVEN_NAME="Gerald" SURNAME="Williams" POSITION="Outfield" GAMES="129" GAMES_STARTED="51" AT_BATS="266" RUNS="46" HITS="81" DOUBLES="18" TRIPLES="3" HOME_RUNS="10" RBI="44" STEALS="11" CAUGHT_STEALING="5" SACRIFICE_HITS="2" SACRIFICE_FLIES="1" ERRORS="5" WALKS="17" STRUCK_OUT="48" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Tom" SURNAME="Glavine" POSITION="Starting Pitcher" GAMES="33" GAMES_STARTED="33" WINS="20" LOSSES="6" SAVES="0" COMPLETE_GAMES="4" SHUT_OUTS="3" ERA="2.47" INNINGS="229.1" HOME_RUNS_AGAINST="13" RUNS_AGAINST="67" EARNED_RUNS="63" HIT_BATTER="2" WILD_PITCHES="3" BALK="0" WALKED_BATTER="74" STRUCK_OUT_BATTER="157">
        </PLAYER>
<PLAYER GIVEN_NAME="Javier" SURNAME="Lopez" POSITION="Catcher" GAMES="133" GAMES_STARTED="124" AT_BATS="489" RUNS="73" HITS="139" DOUBLES="21" TRIPLES="1" HOME_RUNS="34" RBI="106" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="1" SACRIFICE_FLIES="8" ERRORS="5" WALKS="30" STRUCK_OUT="85" HIT_BY_PITCH="6">
        </PLAYER>
<PLAYER GIVEN_NAME="Ryan" SURNAME="Klesko" POSITION="Outfield" GAMES="129" GAMES_STARTED="124" AT_BATS="427" RUNS="69" HITS="117" DOUBLES="29" TRIPLES="1" HOME_RUNS="18" RBI="70" STEALS="5" CAUGHT_STEALING="3" SACRIFICE_HITS="0" SACRIFICE_FLIES="4" ERRORS="2" WALKS="56" STRUCK_OUT="66" HIT_BY_PITCH="3">
        </PLAYER>
<PLAYER GIVEN_NAME="Andres" SURNAME="Galarraga" POSITION="First Base" GAMES="153" GAMES_STARTED="151" AT_BATS="555" RUNS="103" HITS="169" DOUBLES="27" TRIPLES="1" HOME_RUNS="44" RBI="121" STEALS="7" CAUGHT_STEALING="6" SACRIFICE_HITS="0" SACRIFICE_FLIES="5" ERRORS="11" WALKS="63" STRUCK_OUT="146" HIT_BY_PITCH="25">
        </PLAYER>
<PLAYER GIVEN_NAME="Wes" SURNAME="Helms" POSITION="Third Base" GAMES="7" GAMES_STARTED="2" AT_BATS="13" RUNS="2" HITS="4" DOUBLES="1" TRIPLES="0" HOME_RUNS="1" RBI="2" STEALS="0" CAUGHT_STEALING="0" SACRIFICE_HITS="0" SACRIFICE_FLIES="0" ERRORS="1" WALKS="0" STRUCK_OUT="4" HIT_BY_PITCH="0">
        </PLAYER>
</TEAM>
<TEAM CITY="Florida" NAME="Marlins">
      </TEAM>
<TEAM CITY="Montreal" NAME="Expos">
      </TEAM>
<TEAM CITY="New York" NAME="Mets">
      </TEAM>
<TEAM CITY="Philadelphia" NAME="Phillies">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="Cubs">
      </TEAM>
<TEAM CITY="Cincinnati" NAME="Reds">
      </TEAM>
<TEAM CITY="Houston" NAME="Astros">
      </TEAM>
<TEAM CITY="Milwaukee" NAME="Brewers">
      </TEAM>
<TEAM CITY="Pittsburgh" NAME="Pirates">
      </TEAM>
<TEAM CITY="St. Louis" NAME="Cardinals">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Arizona" NAME="Diamondbacks">
      </TEAM>
<TEAM CITY="Colorado" NAME="Rockies">
      </TEAM>
<TEAM CITY="Los Angeles" NAME="Dodgers">
      </TEAM>
<TEAM CITY="San Diego" NAME="Padres">
      </TEAM>
<TEAM CITY="San Francisco" NAME="Giants">
      </TEAM>
</DIVISION>
</LEAGUE>

  <LEAGUE NAME="American League">

  <DIVISION NAME="East">
<TEAM CITY="Baltimore" NAME="Orioles">
      </TEAM>
<TEAM CITY="Boston" NAME="Red Sox">
      </TEAM>
<TEAM CITY="New York" NAME="Yankees">
      </TEAM>
<TEAM CITY="Tampa Bay" NAME="Devil Rays">
      </TEAM>
<TEAM CITY="Toronto" NAME="Blue Jays">
      </TEAM>
</DIVISION>

  <DIVISION NAME="Central">
<TEAM CITY="Chicago" NAME="White Sox">
      </TEAM>
<TEAM CITY="Kansas City" NAME="Royals">
      </TEAM>
<TEAM CITY="Detroit" NAME="Tigers">
      </TEAM>
<TEAM CITY="Cleveland" NAME="Indians">
      </TEAM>
<TEAM CITY="Minnesota" NAME="Twins">
      </TEAM>
</DIVISION>

  <DIVISION NAME="West">
<TEAM CITY="Anaheim" NAME="Angels">
      </TEAM>
<TEAM CITY="Oakland" NAME="Athletics">
      </TEAM>
<TEAM CITY="Seattle" NAME="Mariners">
      </TEAM>
<TEAM CITY="Texas" NAME="Rangers">
      </TEAM>
</DIVISION>
</LEAGUE>
</SEASON>