Parsing CSV files

Shawn P. Garbett listman@garbett.org
Tue, 29 Jul 2003 10:57:18 -0500


Nevermind the previous version, I've solved a few bugs in it (like unquoted 
numbers and correctly handling blank fields).

Here's my current version:
------------------------------------------------------------------------
import Parsec
import System.IO

-- Sometimes unquoted numbers appear
number    :: Parser String
number     = do
               f  <- (char '-' <|> digit)
               fs <- many digit
               return (f:fs)

-- A cell can be a quoted value, a number or empty
cell      :: Parser String
cell       = do { char '\"';
                  w <- (many1 (noneOf "\""));
                  char '\"' <?> "end of cell";
                  return w;
                }
             <|> number
             <|> return "" -- Empty cell

-- A comma of course
separator :: Parser Char
separator  = char ','

-- Group of cells with a separator
cells     :: Parser [String]
cells      = sepBy1 cell separator

-- For extracting comma delimited values of a cell
contents  :: Parser [String]
contents   = sepBy1 (many (noneOf ",")) separator

-- Rows are a set of cells followed by a newline,
-- This is followed by more rows or nothing
rows      :: Parser [[String]]
rows       = do c <- cells
                newline
                do { cs <- rows; return (c:cs); } <|> return [c]

-- Comman Separated Values, set of rows followed by eof
csv       :: Parser [[String]]
csv        = do r <- rows
                eof
                return r

-- Main routine, for testing
main :: IO ()
main  = do result <- parseFromFile csv "sb.txt"
           case (result) of
             Left err  -> print err
             Right sb  -> print sb