Personal tools

HXT/Practical/Simple2

From HaskellWiki

< HXT | Practical
Revision as of 01:05, 5 August 2007 by Mrd (Talk | contribs)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
{-# OPTIONS -farrows -fno-monomorphism-restriction #-}
import Text.XML.HXT.Arrow

-- This example demonstrates a more complex XML parse,
-- involving multiple levels, attributes, inner lists,
-- and dealing with optional data.

-- Example data drawn from:
-- http://www.ibiblio.org/xml/books/bible/examples/05/5-1.xml
-- save as: simple2.xml

data Team = Team
  { teamName, division, league, city :: String,
    players :: [Player] }
  deriving (Show, Eq)

data Player = Player
  { firstName, lastName, position :: String,
   
    atBats, hits :: Maybe Int,
    era          :: Maybe Float }
  deriving (Show, Eq)

parseXML file = readDocument [(a_validate,v_0)] file

atTag tag = deep (isElem >>> hasName tag)

-- Incremental development of the getTeams function:

-- First, list the teams.
-- Try it out in GHCi:
-- Main> runX (parseXML "simple2.xml" >>> getTeams1)

getTeams1 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    returnA -< (leagName, diviName, teamName)

-- getTeams2 also lists the players.
-- But there is a catch; now teams without players
-- are being left out.  (This behavior is familiar to
-- users of the List monad)

getTeams2 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME"       -< l
    divi     <- atTag "DIVISION"          -< l
    diviName <- getAttrValue "NAME"       -< divi
    team     <- atTag "TEAM"              -< divi
    teamName <- getAttrValue "NAME"       -< team
    player   <- atTag "PLAYER"            -< team
    fName    <- getAttrValue "GIVEN_NAME" -< player
    lName    <- getAttrValue "SURNAME"    -< player
    returnA -< (leagName, diviName, teamName, fName, lName)

-- What we really want is to capture the players in a list
-- at this level; and if there are no players then the
-- empty list will suffice.  listA is used for this purpose.

getPlayer1 = atTag "PLAYER" >>>
  proc p -> do
    fName    <- getAttrValue "GIVEN_NAME" -< p
    lName    <- getAttrValue "SURNAME"    -< p
    returnA -< (fName, lName)
   
getTeams3 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    players  <- listA getPlayer1    -< team
    returnA -< (leagName, diviName, teamName, players)

-- Try capturing some statistics about the players

significant = not . all (`elem` " \n\r\t")

-- Use our definition of "significant" strings to
-- capture the value; or else nothing.

defaultAttrStat attr =
  (getAttrValue attr >>> isA significant >>> arr Just)
    `orElse` (constA Nothing)

getPlayer2 = atTag "PLAYER" >>>
  proc p -> do
    fName    <- getAttrValue "GIVEN_NAME" -< p
    lName    <- getAttrValue "SURNAME"    -< p
    position <- getAttrValue "POSITION"   -< p
    hits     <- defaultAttrStat "HITS"    -< p
    atBats   <- defaultAttrStat "AT_BATS" -< p
    era      <- defaultAttrStat "ERA"     -< p
    returnA -< Player
      { firstName = fName,
        lastName  = lName,
        position  = position,
        hits      = read `fmap` hits,
        atBats    = read `fmap` atBats,
        era       = read `fmap` era }
 
getTeams4 = atTag "LEAGUE" >>>
  proc l -> do
    leagName <- getAttrValue "NAME" -< l
    divi     <- atTag "DIVISION"    -< l
    diviName <- getAttrValue "NAME" -< divi
    team     <- atTag "TEAM"        -< divi
    teamName <- getAttrValue "NAME" -< team
    city     <- getAttrValue "CITY" -< team
    players  <- listA getPlayer2    -< team
    returnA -< Team
      { league   = leagName,
        division = diviName,
        teamName = teamName,
        city     = city,
        players  = players }

-- Our final choices

getPlayer = getPlayer2
getTeams  = getTeams4

main = do
  teams <- runX (parseXML "simple2.xml" >>> getTeams)
  print teams