HaskellWiki

Haskell | Wiki community | Recent changes
Random page | Special pages

 

Not logged in
Log in | Help

HXT/Practical/Ebay1

< HXT | Practical

{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-}
module Main where
 
import Text.XML.HXT.Arrow  hiding (deep)
import Data.List (nub,sort,isPrefixOf,transpose,groupBy)

deep f = f `orElse` (getChildren >>> deep f)  -- deep redefinition to allow a broader signature

split "" = []
split xs = a : split (drop 1 b) where (a,b) = break (=='/') xs

through =  (getChildren >>>) . foldr1 (/>). map hasName . split 
-- contains =  (getChildren >>>). foldr1 (</). (map hasName)

mkReport  =  mkelem "p" [] . map constA
{- The datas we are munging is unstructured
 - Every feedback is spanned on two contigous rows of a big table
 - We cannot catch the all data in a match, so we use listA to have the two single-row lists
 - and then zip them to rebuild the data.
 -}

getFeedbackAndValue =
  hasName "table"
  >>> hasAttrValue "class" (=="fbOuter")
  /> hasName "tbody"
  >>> proc table -> do 
        feedbacks <- listA (through "tr/td/img")                          -< table
        values    <- listA (through "tr/td" /> hasText (isPrefixOf "EUR")) -< table
        catA (map mkReport $ transpose [values,feedbacks]) -<< ()

src = "feedback.example.html"
dst = "feedback.report.html"

unicoding= (a_encoding, unicodeString)
nowarnings = (a_issue_warnings,v_0)

main = runX ( readDocument [(a_parse_html, v_1),unicoding,nowarnings] src
              >>> root [] [deep getFeedbackAndValue] 
              >>> writeDocument [(a_indent,v_1),unicoding,nowarnings] dst
              )

Note that the use of groupBy in defining 'split' abuses the implementation details of 'groupBy' which are not guaranteed by its definition in the Haskell 98 standard report.

Retrieved from "http://www.haskell.org/haskellwiki/HXT/Practical/Ebay1"

This page has been accessed 712 times. This page was last modified 06:38, 21 August 2007. Recent content is available under a simple permissive license.