HXT/Practical/Ebay1

From HaskellWiki
< HXT‎ | Practical
Jump to navigation Jump to search
{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-}
module Main where
 
import Text.XML.HXT.Core  hiding (deep)
import Data.List (nub,sort,isPrefixOf,transpose,groupBy) 

deep f = f `orElse` (getChildren >>> deep f)  -- deep redefinition to allow a broader signature

split "" = []
split xs = a : split (drop 1 b) where (a,b) = break (=='/') xs

through =  (getChildren >>>) . foldr1 (/>). map hasName . split  
-- contains =  (getChildren >>>). foldr1 (</). (map hasName)

mkReport  =  mkelem "p" [] . map constA
{- The datas we are munging is unstructured
 - Every feedback is spanned on two contigous rows of a big table
 - We cannot catch the all data in a match, so we use listA to have the two single-row lists
 - and then zip them to rebuild the data.
 -}
getFeedbackAndValue = 
  hasName "table" 
  >>> hasAttrValue "class" (=="fbOuter") 
  /> hasName "tbody" 
  >>> proc table -> do  
        feedbacks <- listA (through "tr/td/img")                          -< table
        values    <- listA (through "tr/td" /> hasText (isPrefixOf "EUR")) -< table
        catA (map mkReport $ transpose [values,feedbacks]) -<< ()

src = "feedback.example.html"
dst = "feedback.report.html"

main = runX ( readDocument [ withParseHTML yes
                           , withInputEncoding unicodeString
                           , withWarnings no
                           ] src 
              >>> root [] [deep getFeedbackAndValue]  
              >>> writeDocument
                           [ withIndent yes
                           , withOutputEncoding unicodeString
                           ] dst 
              )

Note that the use of groupBy in defining 'split' abuses the implementation details of 'groupBy' which are not guaranteed by its definition in the Haskell 98 standard report.