Haskell Quiz/GEDCOM/Solution Abhinav
< Haskell Quiz | GEDCOM
Jump to navigation
Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
-- A GEDCOM to XML converter written using Parsec as a
-- solution for rubyquiz 6 (http://rubyquiz.com/quiz6.html).
-- Example GEDCOM document at
-- http://cpansearch.perl.org/src/PJCJ/Gedcom-1.16/royal.ged
-- Copyright (C) 2012 Abhinav Sarkar
{-# LANGUAGE NoMonomorphismRestriction, RecordWildCards #-}
module GedcomParser where
import Text.Parsec hiding (spaces, Line)
import System.IO
-- a line in a GEDCOM document
data Line = Line {
lineLevel :: Int,
lineTag :: String,
lineValue :: Maybe String,
lineId :: Maybe String
} deriving (Show)
-- an element in a GEDCOM document
data Elem = Elem {
elemTag :: String,
elemValue :: Maybe String,
elemId :: Maybe String,
elemChildren :: [Elem]
} deriving (Show)
indent n = concat . (replicate n) $ " "
trimValue value = case value of
Nothing -> Nothing
Just v
| v == "" -> Nothing
| otherwise -> Just v
normalizeValue = maybe "" id
spaces = many (char ' ' <|> tab)
whitespaces = many (char ' ' <|> tab <|> newline)
-- parses a line
line level = do
string (show level)
spaces
id <- optionMaybe $ between (char '@') (char '@') (many1 alphaNum)
spaces
tag <- many1 upper
spaces
value <- fmap trimValue $ optionMaybe $ manyTill (anyChar) newline
return $ Line level tag value id
-- parses an element
element level = do
ml <- optionMaybe $ line level
case ml of
Nothing -> fail ("invalid level " ++ show level)
Just Line{..} -> do
children <- many (element $ level + 1)
return $ Elem lineTag lineValue lineId children
-- parses a document
document = (element 0) `endBy` whitespaces
-- normalizes an element by merging values of CONC and CONT
-- elements with parent element value
normalizeElem element =
let
conChildren = filter concOrCont $ elemChildren element
text = foldl (\t el -> t
++ (if elemTag el == "CONC" then "\n" else " ")
++ normalizeValue (elemValue el))
"" conChildren
nonConChildren = filter (not . concOrCont) $ elemChildren element
in
element { elemValue = trimValue $
Just (normalizeValue (elemValue element) ++ text),
elemChildren = map normalizeElem nonConChildren }
where
concOrCont el = elemTag el `elem` ["CONC", "CONT"]
-- normalizes a document
normalizeDoc = map normalizeElem
-- converts an element to XML
elemToXml indentation Elem{..} =
indent indentation
++ "<" ++ elemTag
++ maybe "" (\i -> " id=\"@" ++ i ++ "@\"") elemId
++ case elemChildren of
[] -> ">" ++ normalizeValue elemValue ++ "</" ++ elemTag ++ ">"
_ -> maybe "" (\v -> " value=\"" ++ v ++ "\"") elemValue ++ ">\n"
++ unlines (map (elemToXml (indentation + 1)) elemChildren)
++ indent indentation ++ "</" ++ elemTag ++ ">"
-- converts a document to XML
documentToXml doc = "<DOCUMENT>\n"
++ (unlines . map (elemToXml 1) $ doc)
++ "</DOCUMENT>"
-- converts a GEDCOM document supplied through STDIN into XML
-- and prints to STDOUT
main = do
text <- getContents
case parse document "GEDCOM Parser" text of
Right [] -> return ()
Right doc -> putStrLn $ documentToXml (normalizeDoc doc)
Left e -> print e
Description: This solution uses Parsec to parse a GEDCOM file and then converts the parsed data to XML.
Source: https://github.com/abhin4v/rubyquiz/blob/master/GedcomParser.hs
[Category:Code]]