Personal tools

Embedding context free grammars

From HaskellWiki

(Difference between revisions)
Jump to: navigation, search
 
Line 52: Line 52:
 
top' = Fix ((Tok 'c' :& Ref) :| Tok 'c')
 
top' = Fix ((Tok 'c' :& Ref) :| Tok 'c')
 
print $ parse' "cccccc" top'
 
print $ parse' "cccccc" top'
  +
> True
  +
</haskell>
  +
  +
Grammar for even parentheses surrounding the letter c
  +
  +
<haskell>
  +
top' = Fix (Tok '(' :& Ref :& Tok ')' :| Tok 'c')
  +
print $ parse' "(((c)))" top'
 
> True
 
> True
 
</haskell>
 
</haskell>

Revision as of 18:55, 16 November 2006

Here's how to embed a context free grammar parser into haskell:

data Grammar b where
    Ref :: Grammar c
    Tok :: Char -> Grammar c
    Check :: (Char -> Bool) -> Grammar c
    Fix :: Grammar a -> Grammar c
    (:|) :: Grammar b -> Grammar b -> Grammar b
    (:&) :: Grammar b -> Grammar b -> Grammar b
    Push :: Char -> Grammar a -> Grammar a
    NullParser :: Grammar b
infixl 6 :|
infixl 7 :&
 
 
parse:: [Char] -> Grammar b -> Grammar b -> Bool
parse x Ref t = parse x t t
parse [c] (Tok c') _ = c == c'
parse [c] (Check y) _ = y c
parse _ (Tok _) _ = False
parse _ (Check _) _ = False
parse x (Fix g) _ = parse x g (Fix g)
parse x (g :| g') t = parse x g t || parse x g' t  --cool little trick!
parse (x:xs) (g :& g') t = parse xs ((Push x g) :& g') t || (parse [] g t && parse (x:xs) g' t)
parse x (Push c g) t = parse (c:x) g t
parse _ NullParser _ = False
parse [] _ _ = False
 
parse' x g = parse x g NullParser

and here's a lambda calculus parser written in this embedded language

var = Check (\x -> x <= 'z' && x >= 'a')
app = term :& term
term = var :| abstraction :| parenedTerm  
parenedTerm = Tok '(' :& term :& Tok ')'
abstraction = Tok '\\' :& var :& Tok '.' :& term
top = term :& Tok ';'
<haskell>
 
let's see the results
<haskell>
print $ parse' "\\x.x;" top 
> True

Let's check out a recursive grammar of all strings containing only c's of length at least 1

top' = Fix ((Tok 'c' :& Ref) :| Tok 'c')
print $ parse' "cccccc" top' 
> True

Grammar for even parentheses surrounding the letter c

top' = Fix (Tok '(' :& Ref :& Tok ')' :| Tok 'c')
print $ parse' "(((c)))" top' 
> True