Difference between revisions of "HXT/Practical/Ebay1"

From HaskellWiki
< HXT‎ | Practical
Jump to navigation Jump to search
(Munging from an ebay feedback page)
 
 
(3 intermediate revisions by 3 users not shown)
Line 1: Line 1:
<hask>
+
<haskell>
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-}
 
{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-}
 
module Main where
 
module Main where
 
 
import Text.XML.HXT.Arrow hiding (deep)
+
import Text.XML.HXT.Core hiding (deep)
 
import Data.List (nub,sort,isPrefixOf,transpose,groupBy)
 
import Data.List (nub,sort,isPrefixOf,transpose,groupBy)
   
 
deep f = f `orElse` (getChildren >>> deep f) -- deep redefinition to allow a broader signature
 
deep f = f `orElse` (getChildren >>> deep f) -- deep redefinition to allow a broader signature
   
  +
split "" = []
split = map (dropWhile p) . groupBy (const (not . p)) where p = (=='/')
 
  +
split xs = a : split (drop 1 b) where (a,b) = break (=='/') xs
  +
 
through = (getChildren >>>) . foldr1 (/>). map hasName . split
 
through = (getChildren >>>) . foldr1 (/>). map hasName . split
 
-- contains = (getChildren >>>). foldr1 (</). (map hasName)
 
-- contains = (getChildren >>>). foldr1 (</). (map hasName)
Line 30: Line 32:
 
dst = "feedback.report.html"
 
dst = "feedback.report.html"
   
  +
main = runX ( readDocument [ withParseHTML yes
unicoding= (a_encoding, unicodeString)
 
  +
, withInputEncoding unicodeString
nowarnings = (a_issue_warnings,v_0)
 
  +
, withWarnings no
 
  +
] src
main = runX ( readDocument [(a_parse_html, v_1),unicoding,nowarnings] src
 
 
>>> root [] [deep getFeedbackAndValue]
 
>>> root [] [deep getFeedbackAndValue]
>>> writeDocument [(a_indent,v_1),unicoding,nowarnings] dst
+
>>> writeDocument
  +
[ withIndent yes
  +
, withOutputEncoding unicodeString
  +
] dst
 
)
 
)
</hask>
+
</haskell>
  +
  +
Note that the use of groupBy in defining 'split' abuses the implementation details of 'groupBy' which are not guaranteed by its definition in the Haskell 98 standard report.

Latest revision as of 17:09, 11 October 2011

{-# LANGUAGE Arrows, NoMonomorphismRestriction, ParallelListComp #-}
module Main where
 
import Text.XML.HXT.Core  hiding (deep)
import Data.List (nub,sort,isPrefixOf,transpose,groupBy) 

deep f = f `orElse` (getChildren >>> deep f)  -- deep redefinition to allow a broader signature

split "" = []
split xs = a : split (drop 1 b) where (a,b) = break (=='/') xs

through =  (getChildren >>>) . foldr1 (/>). map hasName . split  
-- contains =  (getChildren >>>). foldr1 (</). (map hasName)

mkReport  =  mkelem "p" [] . map constA
{- The datas we are munging is unstructured
 - Every feedback is spanned on two contigous rows of a big table
 - We cannot catch the all data in a match, so we use listA to have the two single-row lists
 - and then zip them to rebuild the data.
 -}
getFeedbackAndValue = 
  hasName "table" 
  >>> hasAttrValue "class" (=="fbOuter") 
  /> hasName "tbody" 
  >>> proc table -> do  
        feedbacks <- listA (through "tr/td/img")                          -< table
        values    <- listA (through "tr/td" /> hasText (isPrefixOf "EUR")) -< table
        catA (map mkReport $ transpose [values,feedbacks]) -<< ()

src = "feedback.example.html"
dst = "feedback.report.html"

main = runX ( readDocument [ withParseHTML yes
                           , withInputEncoding unicodeString
                           , withWarnings no
                           ] src 
              >>> root [] [deep getFeedbackAndValue]  
              >>> writeDocument
                           [ withIndent yes
                           , withOutputEncoding unicodeString
                           ] dst 
              )

Note that the use of groupBy in defining 'split' abuses the implementation details of 'groupBy' which are not guaranteed by its definition in the Haskell 98 standard report.