Haskell Quiz/GEDCOM/Solution Abhinav
From HaskellWiki
(Difference between revisions)
(New page: <haskell> -- A GEDCOM to XML converter written using Parsec as a -- solution for rubyquiz 6 (http://rubyquiz.com/quiz6.html). -- Example GEDCOM document at -- http://cpansearch.perl.org/sr...) |
|||
| Line 4: | Line 4: | ||
-- Example GEDCOM document at | -- Example GEDCOM document at | ||
-- http://cpansearch.perl.org/src/PJCJ/Gedcom-1.16/royal.ged | -- http://cpansearch.perl.org/src/PJCJ/Gedcom-1.16/royal.ged | ||
| + | -- Copyright (C) 2012 Abhinav Sarkar | ||
{-# LANGUAGE NoMonomorphismRestriction, RecordWildCards #-} | {-# LANGUAGE NoMonomorphismRestriction, RecordWildCards #-} | ||
Revision as of 16:58, 4 August 2012
-- A GEDCOM to XML converter written using Parsec as a -- solution for rubyquiz 6 (http://rubyquiz.com/quiz6.html). -- Example GEDCOM document at -- http://cpansearch.perl.org/src/PJCJ/Gedcom-1.16/royal.ged -- Copyright (C) 2012 Abhinav Sarkar {-# LANGUAGE NoMonomorphismRestriction, RecordWildCards #-} module GedcomParser where import Text.Parsec hiding (spaces, Line) import System.IO -- a line in a GEDCOM document data Line = Line { lineLevel :: Int, lineTag :: String, lineValue :: Maybe String, lineId :: Maybe String } deriving (Show) -- an element in a GEDCOM document data Elem = Elem { elemTag :: String, elemValue :: Maybe String, elemId :: Maybe String, elemChildren :: [Elem] } deriving (Show) indent n = concat . (replicate n) $ " " trimValue value = case value of Nothing -> Nothing Just v | v == "" -> Nothing | otherwise -> Just v normalizeValue = maybe "" id spaces = many (char ' ' <|> tab) whitespaces = many (char ' ' <|> tab <|> newline) -- parses a line line level = do string (show level) spaces id <- optionMaybe $ between (char '@') (char '@') (many1 alphaNum) spaces tag <- many1 upper spaces value <- fmap trimValue $ optionMaybe $ manyTill (anyChar) newline return $ Line level tag value id -- parses an element element level = do ml <- optionMaybe $ line level case ml of Nothing -> fail ("invalid level " ++ show level) Just Line{..} -> do children <- many (element $ level + 1) return $ Elem lineTag lineValue lineId children -- parses a document document = (element 0) `endBy` whitespaces -- normalizes an element by merging values of CONC and CONT -- elements with parent element value normalizeElem element = let conChildren = filter concOrCont $ elemChildren element text = foldl (\t el -> t ++ (if elemTag el == "CONC" then "\n" else " ") ++ normalizeValue (elemValue el)) "" conChildren nonConChildren = filter (not . concOrCont) $ elemChildren element in element { elemValue = trimValue $ Just (normalizeValue (elemValue element) ++ text), elemChildren = map normalizeElem nonConChildren } where concOrCont el = elemTag el `elem` ["CONC", "CONT"] -- normalizes a document normalizeDoc = map normalizeElem -- converts an element to XML elemToXml indentation Elem{..} = indent indentation ++ "<" ++ elemTag ++ maybe "" (\i -> " id=\"@" ++ i ++ "@\"") elemId ++ case elemChildren of [] -> ">" ++ normalizeValue elemValue ++ "</" ++ elemTag ++ ">" _ -> maybe "" (\v -> " value=\"" ++ v ++ "\"") elemValue ++ ">\n" ++ unlines (map (elemToXml (indentation + 1)) elemChildren) ++ indent indentation ++ "</" ++ elemTag ++ ">" -- converts a document to XML documentToXml doc = "<DOCUMENT>\n" ++ (unlines . map (elemToXml 1) $ doc) ++ "</DOCUMENT>" -- converts a GEDCOM document supplied through STDIN into XML -- and prints to STDOUT main = do text <- getContents case parse document "GEDCOM Parser" text of Right [] -> return () Right doc -> putStrLn $ documentToXml (normalizeDoc doc) Left e -> print e
Description: This solution uses Parsec to parse a GEDCOM file and then converts the parsed data to XML.
Source: https://github.com/abhin4v/rubyquiz/blob/master/GedcomParser.hs
