{- From the Dragon Book Section 3.9 - From a Regular Expression to a DFA -} import Data.Set import Char import Monad -- Normal Regular Expression type data RE = Epsilon | Leaf Char | Selection RE RE | Sequence RE RE | Kleene RE | Optional RE | End instance Show RE where show re = case re of Leaf c -> "(Leaf '" ++ [c] ++ "')" Selection re1 re2 -> "(Selection " ++ show re1 ++ " " ++ show re2 ++ ")" Sequence re1 re2 -> "(Sequence " ++ show re1 ++ " " ++ show re2 ++ ")" Kleene re -> "(Kleene " ++ show re ++ ")" Optional re -> "(Optional " ++ show re ++ ")" End -> "(End)" -- basic Haskell Parser infixr 5 +++ newtype Parser a = P (String -> [(a,String)]) instance Monad Parser where return v = P (\inp -> [(v,inp)]) p >>= f = P (\inp -> case parse p inp of [] -> [] [(v,out)] -> parse (f v) out) instance MonadPlus Parser where mzero = P (\inp -> []) p `mplus` q = P (\inp -> case parse p inp of [] -> parse q inp [(v,out)] -> [(v,out)]) -- Basic parsers failure :: Parser a failure = mzero item :: Parser Char item = P (\inp -> case inp of [] -> [] (x:xs) -> [(x,xs)]) parse :: Parser a -> String -> [(a,String)] parse (P p) inp = p inp -- Choice (+++) :: Parser a -> Parser a -> Parser a p +++ q = p `mplus` q -- Derived primitives sat :: (Char -> Bool) -> Parser Char sat p = do x <- item if p x then return x else failure digit :: Parser Char digit = sat isDigit lower :: Parser Char lower = sat isLower upper :: Parser Char upper = sat isUpper letter :: Parser Char letter = sat isAlpha alphanum :: Parser Char alphanum = sat isAlphaNum char :: Char -> Parser Char char x = sat (== x) string :: String -> Parser String string [] = return [] string (x:xs) = do char x string xs return (x:xs) many :: Parser a -> Parser [a] many p = many1 p +++ return [] many1 :: Parser a -> Parser [a] many1 p = do v <- p vs <- many p return (v:vs) ident :: Parser String ident = do x <- lower xs <- many alphanum return (x:xs) nat :: Parser Int nat = do xs <- many1 digit return (read xs) int :: Parser Int int = do char '-' n <- nat return (-n) +++ nat space :: Parser () space = do many (sat isSpace) return () -- Ignoring spacing token :: Parser a -> Parser a token p = do space v <- p space return v identifier :: Parser String identifier = token ident natural :: Parser Int natural = token nat integer :: Parser Int integer = token int symbol :: String -> Parser String symbol xs = token (string xs) -- Parse a Regular Expression from a String to an RE type parseRE :: String -> [(RE, String)] parseRE s = (parse begin s) begin :: Parser RE begin = do s <- sel return (Sequence s End) sel :: Parser RE sel = do s <- xeq do char '|' t <- sel return (Selection s t) +++ return s xeq :: Parser RE xeq = do s <- rep do t <- xeq return (Sequence s t) +++ return s rep :: Parser RE rep = do b <- block do char '*' return (Kleene b) +++ do char '+' return (Sequence b (Kleene b)) +++ do char '?' return (Optional b) +++ return b block :: Parser RE block = do char '(' s <- sel symbol ")" return s +++ do a <- alphanum return (Leaf a) -- extractRE :: [(RE, String)] -> RE extractRE x = re where (re, s) = x !! 0 -- Augmented RE type with indexed states type State = Int data RE' = Epsilon' | Leaf' Char State | Selection' RE' RE' | Sequence' RE' RE' | Kleene' RE' | Optional' RE' | End' State instance Show RE' where show re' = case re' of Leaf' c s -> "(Leaf' '" ++ [c] ++ "' " ++ show s ++ ")" Selection' re1 re2 -> "(Selection' " ++ show re1 ++ " " ++ show re2 ++ ")" Sequence' re1 re2 -> "(Sequence' " ++ show re1 ++ " " ++ show re2 ++ ")" Kleene' re -> "(Kleene' " ++ show re ++ ")" Optional' re -> "(Optional' " ++ show re ++ ")" End' s -> "(End' " ++ show s ++ ")" -- Create a augmented RE with indexed states from a normal RE indexStates :: RE -> RE' indexStates re = re2 where (re2, _) = indexStates' (re, 1) -- indexStates' :: (RE, State) -> (RE', State) indexStates' (Leaf c, state) = (Leaf' c state, state + 1) indexStates' (Selection re1 re2, state) = (Selection' re1' re2', s2') where (re1', s1') = indexStates' (re1, state) (re2', s2') = indexStates' (re2, s1') indexStates' (Sequence re1 re2, state) = (Sequence' re1' re2', s2') where (re1', s1') = indexStates' (re1, state) (re2', s2') = indexStates' (re2, s1') indexStates' (Kleene re, state) = (Kleene' re', s') where (re', s') = indexStates'( re, state) indexStates' (Optional re, state) = (Optional' re', s') where (re', s') = indexStates'( re, state) indexStates' (End, state) = (End' state, state + 1) -- -- Nullable nodes nullable :: RE' -> Bool nullable Epsilon' = True nullable (Leaf' c s) = False nullable (Selection' re1 re2) = nullable re1 || nullable re2 nullable (Sequence' re1 re2) = nullable re1 && nullable re2 nullable (Kleene' re) = True nullable (Optional' re) = True nullable (End' n) = False -- firstpos, lastpos and followpos type Pos = Set Int firstpos :: RE' -> Pos firstpos Epsilon' = empty firstpos (Leaf' c s) = singleton s firstpos (Selection' re1 re2) = union (firstpos re1) (firstpos re2) firstpos (Sequence' re1 re2) = if (nullable re1) then (union (firstpos re1) (firstpos re2)) else (firstpos re1) firstpos (Kleene' re) = firstpos re firstpos (Optional' re) = firstpos re firstpos (End' n) = singleton n lastpos :: RE' -> Pos lastpos Epsilon' = empty lastpos (Leaf' c s) = singleton s lastpos (Selection' re1 re2) = union (lastpos re1) (lastpos re2) lastpos (Sequence' re1 re2) = if (nullable re2) then (union (lastpos re1) (lastpos re2)) else (lastpos re2) lastpos (Kleene' re) = lastpos re lastpos (Optional' re) = lastpos re lastpos (End' n) = singleton n --- Augmented RE type with indexed states and followPos sets data RE'' = Epsilon'' | Leaf'' Char State Pos | Selection'' RE'' RE'' | Sequence'' RE'' RE'' | Kleene'' RE'' | Optional'' RE'' | End'' State instance Show RE'' where show re' = case re' of Leaf'' c s pos -> "(Leaf'' '" ++ [c] ++ "' " ++ show s ++ ", " ++ show pos ++")" Selection'' re1 re2 -> "(Selection'' " ++ show re1 ++ " " ++ show re2 ++ ")" Sequence'' re1 re2 -> "(Sequence'' " ++ show re1 ++ " " ++ show re2 ++ ")" Kleene'' re -> "(Kleene'' " ++ show re ++ ")" Optional'' re -> "(Optional'' " ++ show re ++ ")" End'' s -> "(End'' " ++ show s ++ ")" -- followPos :: RE' -> [RE'] -> RE'' followPos (Sequence' re1 re2) res = (Sequence'' re1' re2') where re1' = followPos re1 res' re2' = followPos re2 res res' = if ( nullable re2) then re2 : res else [re2] followPos (Selection' re1 re2) res = (Selection'' re1' re2') where re1' = followPos re1 res re2' = followPos re2 res followPos (Kleene' re) res = (Kleene'' re') where re' = followPos re res' res' = re : res followPos (Optional' re) res = (Optional'' re') where re' = followPos re res followPos (Leaf' char state) res = (Leaf'' char state pos) where pos = foldr union empty (Prelude.map firstpos res) followPos (End' state) res = (End'' state) -- parseREs2RE :: [String] -> RE parseREs2RE (re:res) | Prelude.null res = extractRE (parseRE re) | otherwise = Selection (extractRE (parseRE re)) (parseREs2RE res) --- testREs = [ "(a|b)*abb", "(c|d)+abb", "e?"] t = parseREs2RE testREs t' = indexStates t t'firstpos = firstpos t' t'lastpos = lastpos t' t''followPos = followPos t' []