Difference between revisions of "Haskell Quiz/Bytecode Compiler/Solution Justin Bailey"

From HaskellWiki
Jump to navigation Jump to search
(Found bug and documented)
m
 
(5 intermediate revisions by 4 users not shown)
Line 1: Line 1:
  +
[[Category:Haskell Quiz solutions|Bytecode Compiler]]
This solution isn't the cleanest or quite tested, but if you load it in hugs and run:
 
  +
This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute the <hask>interpret_tests</hask> function.
   
  +
To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The last actually evaluates the AST, without generating any bytescodes. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods found in the quiz's test program.
<haskell>
 
compile $ parse $ tokenize $ stmt1
 
compile $ parse $ tokenize $ stmt2
 
...
 
compile $ parse $ tokenize $ stmt8
 
</haskell>
 
 
You'll see the byte codes generated. I didn't implement any optimizations (i.e. using SWAP).
 
 
Sadly, the parsing algorithm does not preserve precedence correctly. For example, parsing "2*2+2" produces
 
 
<haskell>
 
> parse $ tokenize "2*2+2"
 
Statement Mult (Statement Plus (Val 2) (Val 2)) (Val 2)
 
</haskell>
 
   
  +
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).
Which is incorrect. Any suggestions?
 
   
 
The code below is literate Haskell.
 
The code below is literate Haskell.
Line 23: Line 10:
 
<haskell>
 
<haskell>
 
\begin{code}
 
\begin{code}
  +
import Text.ParserCombinators.Parsec hiding (parse)
import Data.Char(isAlpha, isDigit, isSeparator)
 
  +
import qualified Text.ParserCombinators.Parsec as P (parse)
  +
import Text.ParserCombinators.Parsec.Expr
  +
import Data.Bits
  +
import Data.Int
   
  +
-- Represents various operations that can be applied
data Op = Plus | Minus | Mult | Div | Pow | Mod
 
  +
-- to expressions.
deriving (Show, Eq)
 
  +
data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg
 
data Token = RightParen
 
| LeftParen
 
| Number Integer
 
| Operator Op
 
 
deriving (Show, Eq)
 
deriving (Show, Eq)
   
  +
-- Represents expression we can build - either numbers or expressions
  +
-- connected by operators. This structure is the basis of the AST built
  +
-- when parsing
 
data Expression = Statement Op Expression Expression
 
data Expression = Statement Op Expression Expression
 
| Val Integer
 
| Val Integer
Line 39: Line 29:
 
deriving (Show)
 
deriving (Show)
   
  +
-- Define the byte codes that can be generated.
 
data Bytecode = NOOP | CONST Integer | LCONST Integer
 
data Bytecode = NOOP | CONST Integer | LCONST Integer
 
| ADD
 
| ADD
Line 49: Line 40:
 
deriving (Show)
 
deriving (Show)
   
expr1 = Statement Plus (Val 1) (Val 2)
 
expr2 = Statement Mult (Statement Plus (Val 1) (Val 2)) (Val 3)
 
   
  +
-- Using imported Parsec.Expr library, build a parser for expressions.
-- Take a statement and evaluate
 
  +
expr :: Parser Expression
  +
expr =
  +
buildExpressionParser table factor
  +
<?> "expression"
  +
where
  +
-- Recognizes a factor in an expression
  +
factor =
  +
do{ char '('
  +
; x <- expr
  +
; char ')'
  +
; return x
  +
}
  +
<|> number
  +
<?> "simple expression"
  +
-- Recognizes a number
  +
number :: Parser Expression
  +
number = do{ ds <- many1 digit
  +
; return (Val (read ds))
  +
}
  +
<?> "number"
  +
-- Specifies operator, associativity, precendence, and constructor to execute
  +
-- and built AST with.
  +
table =
  +
[[prefix "-" (Statement Mult (Val (-1)))],
  +
[binary "^" (Statement Pow) AssocRight],
  +
[binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft],
  +
[binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft]
  +
]
  +
where
  +
binary s f assoc
  +
= Infix (do{ string s; return f}) assoc
  +
prefix s f
  +
= Prefix (do{ string s; return f})
  +
  +
-- Parses a string into an AST, using the parser defined above
  +
parse s = case P.parse expr "" s of
  +
Right ast -> ast
  +
Left e -> error $ show e
  +
  +
-- Take AST and evaluate (mostly for testing)
 
eval (Val n) = n
 
eval (Val n) = n
 
eval (Statement op left right)
 
eval (Statement op left right)
Line 62: Line 91:
 
| op == Mod = eval left `mod` eval right
 
| op == Mod = eval left `mod` eval right
   
-- Take a string and tokenize it
+
-- Takes an AST and turns it into a byte code list
  +
generate stmt = generate' stmt []
tokenize [] = []
 
  +
where
tokenize ('(':xs) = LeftParen : tokenize xs
 
  +
generate' (Statement op left right) instr =
tokenize (')':xs) = RightParen : tokenize xs
 
  +
let
tokenize (x:xs) | isDigit x = let (val, remaining) = span isDigit (x:xs)
 
in Number (read val) : tokenize remaining
+
li = generate' left instr
| isOperator x = toOperator x : tokenize xs
+
ri = generate' right instr
| isSeparator x = tokenize (dropWhile isSeparator xs)
+
lri = li ++ ri
where
+
in case op of
makeOp '+' = Just $ Operator Plus
+
Plus -> lri ++ [ADD]
makeOp '-' = Just $ Operator Minus
+
Minus -> lri ++ [SUB]
makeOp '*' = Just $ Operator Mult
+
Mult -> lri ++ [MUL]
makeOp '/' = Just $ Operator Div
+
Div -> lri ++ [DIV]
makeOp '^' = Just $ Operator Pow
+
Mod -> lri ++ [MOD]
makeOp '%' = Just $ Operator Mod
+
Pow -> lri ++ [POW]
makeOp _ = Nothing
+
generate' (Val n) instr
toOperator x = case makeOp x of
+
| abs n > 32768 = LCONST n : instr
Just x -> x
+
| otherwise = CONST n : instr
_ -> error "Bad operator."
 
isOperator x = case makeOp x of
 
Just x -> True
 
_ -> False
 
   
  +
-- Takes a statement and converts it into a list of actual bytes to
stmt1 = "1 + 2" -- 3
 
  +
-- be interpreted
stmt2 = "1 + 2 * 3" -- 7
 
  +
compile s = toBytes (generate $ parse s)
stmt3 = "(1 + 2) * 3" -- 9
 
stmt4 = "4 - 5 * 2" -- -6
 
stmt5 = "5 * (2 - 4)" -- -10
 
stmt6 = "(1*3)*4*(5*6)" -- 360
 
stmt7 = "2^(2+(3/2)^2)" -- 8
 
stmt8 = "(10%3)*(2+2)" -- 4
 
   
  +
-- Convert a list of byte codes to a list of integer codes. If LCONST or CONST
{-
 
  +
-- instruction are seen, correct byte representantion is produced
Based on http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
 
  +
toBytes ((NOOP):xs) = 0 : toBytes xs
  +
toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs
  +
toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs
  +
toBytes ((ADD):xs) = 0x0a : toBytes xs
  +
toBytes ((SUB):xs) = 0x0b : toBytes xs
  +
toBytes ((MUL):xs) = 0x0c : toBytes xs
  +
toBytes ((POW):xs) = 0x0d : toBytes xs
  +
toBytes ((DIV):xs) = 0x0e : toBytes xs
  +
toBytes ((MOD):xs) = 0x0f : toBytes xs
  +
toBytes ((SWAP):xs) = 0x0a : toBytes xs
  +
toBytes [] = []
   
  +
-- Convert number to CONST representation (2 element list)
E -> E + E
 
  +
toConstBytes n = toByteList 2 n
E -> E * E
 
  +
toLConstBytes n = toByteList 4 n
E -> E / E
 
E -> E - E
 
E -> E % E
 
E -> E ^ E
 
E -> ( E )
 
E -> n
 
 
Transform to
 
 
E --> Exp(0)
 
Exp(p) --> P {B Exp(q)}
 
P --> "(" E ")" | v
 
B --> "+" | "-" | "*" |"/" | "^" | "%"
 
   
  +
-- Convert a number into a list of 8-bit bytes (big-endian/network byte order).
Precedence
 
  +
-- Make sure final list is size elements long
 
  +
toByteList :: Bits Int => Int -> Int -> [Int]
^ 3
 
  +
toByteList size n = reverse $ take size (toByteList' n)
*, /, % 2
 
+, - 1
+
where
  +
toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8)
   
  +
-- All tests defined by the quiz, with the associated values they should evaluate to.
-}
 
  +
test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"),
  +
(2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")]
   
  +
test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"),
-- define precdence of operators
 
  +
(7`mod`2`mod`1, "7%2%1")]
precedence Plus = 1
 
  +
precedence Minus = 1
 
  +
test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"),
precedence Mult = 2
 
  +
(4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")]
precedence Div = 3
 
precedence Mod = 2
 
precedence Pow = 3
 
   
  +
test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"),
-- Precedence comparison - gets precedence of
 
  +
(2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")]
-- given operator and determines if its greater than the value given.
 
(>*) op val = precedence op >= val
 
   
  +
test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")]
-- Precedence addition - for left associative operators,
 
-- return its precedence + 1. For righ associative, just return the operators
 
-- precedence.
 
prec_add p@(Pow) = precedence p
 
prec_add p = 1 + precedence p
 
   
  +
test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"),
parse [] = error "Can't parse empty list of tokens"
 
  +
((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"),
parse tokens = fst $ parseE tokens 0
 
  +
((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")]
where parseE tokens prec
 
= let (p, remaining) = parseP tokens prec
 
in
 
if remaining == []
 
then (p, remaining)
 
else case head remaining of
 
Operator op ->
 
if op >* prec
 
then
 
let (right, rest) = parseE (tail remaining) $ prec_add op
 
in (Statement op p right, rest)
 
else let (left, rest) = parseE (tail remaining) $ prec
 
in (Statement op left p, rest)
 
_ -> (p, remaining)
 
parseP ((Number n):ts) prec = (Val n, ts)
 
parseP ((LeftParen):ts) prec
 
= let (e, remaining) = parseE ts 0
 
in (e, tail remaining)
 
   
  +
-- Evaluates the tests and makes sure the expressions match the expected values
compile stmt = compile' stmt []
 
  +
eval_tests = concatMap eval_tests [test1, test2, test3, test4, test5, test6]
  +
where
  +
eval_tests ((val, stmt):ts) =
  +
let eval_val = eval $ parse stmt
  +
in
  +
if val == eval_val
  +
then ("Passed: " ++ stmt) : eval_tests ts
  +
else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts
  +
eval_tests [] = []
   
  +
-- Takes all the tests and displays symbolic bytes codes for each
compile' (Statement op left right) instr =
 
  +
generate_tests = concatMap generate_all [test1,test2,test3,test4,test5,test6]
let li = compile' left instr
 
  +
where generate_all = map (\(val, stmt) -> (stmt, generate (parse stmt)))
ri = compile' right instr
 
lri = li ++ ri
+
  +
-- Takes all tests and generates a list of bytes representing them
in case op of
 
  +
compile_tests = concatMap compile_all [test1,test2,test3,test4,test5,test6]
Plus -> lri ++ [ADD]
 
  +
where compile_all = map (\(val, stmt) -> (stmt, compile stmt))
Minus -> lri ++ [SUB]
 
Mult -> lri ++ [MUL]
 
Div -> lri ++ [DIV]
 
Mod -> lri ++ [MOD]
 
Pow -> lri ++ [POW]
 
compile' (Val n) instr = instr ++ [LCONST n]
 
   
  +
interpret_tests = concatMap f' [test1, test2, test3, test4, test5, test6]
\end{code}
 
  +
where
  +
f' = map f''
  +
f'' (expected, stmt) =
  +
let value = fromIntegral $ interpret [] $ compile stmt
  +
in
  +
if value == expected
  +
then "Passed: " ++ stmt
  +
else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")"
  +
  +
fromBytes n xs =
  +
let int16 = fromIntegral (fromIntegral int32 :: Int16) :: Int
  +
int32 = byte xs
  +
byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs))
  +
in
  +
if n == 2
  +
then int16
  +
else int32
  +
  +
interpret [] [] = error "no result produced"
  +
interpret (s1:s) [] = s1
  +
interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs)
  +
interpret (s1:s2:s) (o:xs)
  +
| o == 16 = interpret (s2:s1:s) xs
  +
| otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs
  +
  +
\end{code}
 
</haskell>
 
</haskell>

Latest revision as of 02:32, 19 February 2010

This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute the interpret_tests function.

To see the (symbolic) byte codes generated, run generate_tests. To see the actual byte codes, run compile_tests. To see that the values produced by each expression match those expected, run eval_tests. The last actually evaluates the AST, without generating any bytescodes. The tests are contained in the variables test1,test2, ..., test6, which correspond to the six "test_n" methods found in the quiz's test program.

The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).

The code below is literate Haskell.

\begin{code}
import Text.ParserCombinators.Parsec hiding (parse)
import qualified Text.ParserCombinators.Parsec as P (parse)
import Text.ParserCombinators.Parsec.Expr
import Data.Bits
import Data.Int

-- Represents various operations that can be applied
-- to expressions.
data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg
  deriving (Show, Eq)

-- Represents expression we can build - either numbers or expressions
-- connected by operators. This structure is the basis of the AST built
-- when parsing
data Expression = Statement Op Expression Expression
           | Val Integer
           | Empty
  deriving (Show)

-- Define the byte codes that can be generated. 
data Bytecode = NOOP | CONST Integer | LCONST Integer
            | ADD
            | SUB
            | MUL
            | POW
            | DIV
            | MOD
            | SWAP
  deriving (Show)


-- Using imported Parsec.Expr library, build a parser for expressions.
expr :: Parser Expression
expr =
  buildExpressionParser table factor
  <?> "expression"
  where
  -- Recognizes a factor in an expression
  factor  =
    do{ char '('
          ; x <- expr
          ; char ')'
          ; return x 
          }
      <|> number
      <?> "simple expression"
  -- Recognizes a number
  number  :: Parser Expression 
  number  = do{ ds <- many1 digit
              ; return (Val (read ds))
              }
          <?> "number"
  -- Specifies operator, associativity, precendence, and constructor to execute
  -- and built AST with.
  table =
    [[prefix "-" (Statement Mult (Val (-1)))],
      [binary "^" (Statement Pow) AssocRight],
      [binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft],
      [binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft]
       ]          
    where
      binary s f assoc
         = Infix (do{ string s; return f}) assoc
      prefix s f 
         = Prefix (do{ string s; return f})

-- Parses a string into an AST, using the parser defined above
parse s = case P.parse expr "" s of
  Right ast -> ast
  Left e -> error $ show e
  
-- Take AST and evaluate (mostly for testing)
eval (Val n) = n
eval (Statement op left right)
        | op == Mult = eval left * eval right
        | op == Minus = eval left - eval right
        | op == Plus = eval left + eval right
        | op == Div = eval left `div` eval right
        | op == Pow = eval left ^ eval right
        | op == Mod = eval left `mod` eval right

-- Takes an AST and turns it into a byte code list
generate stmt = generate' stmt []
       where
               generate' (Statement op left right) instr =
                       let
                               li = generate' left instr
                               ri = generate' right instr
                               lri = li ++ ri
                       in case op of
                               Plus -> lri ++ [ADD]
                               Minus -> lri ++ [SUB]
                               Mult -> lri ++ [MUL]
                               Div -> lri ++ [DIV]
                               Mod -> lri ++ [MOD]
                               Pow -> lri ++ [POW]
               generate' (Val n) instr
                | abs n > 32768 = LCONST n : instr  
                | otherwise     = CONST n : instr

-- Takes a statement and converts it into a list of actual bytes to
-- be interpreted
compile s = toBytes (generate $ parse s)

-- Convert a list of byte codes to a list of integer codes. If LCONST or CONST
-- instruction are seen, correct byte representantion is produced
toBytes ((NOOP):xs) = 0 : toBytes xs 
toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((ADD):xs) = 0x0a : toBytes xs
toBytes ((SUB):xs) = 0x0b : toBytes xs
toBytes ((MUL):xs) = 0x0c : toBytes xs
toBytes ((POW):xs) = 0x0d : toBytes xs
toBytes ((DIV):xs) = 0x0e : toBytes xs
toBytes ((MOD):xs) = 0x0f : toBytes xs
toBytes ((SWAP):xs) = 0x0a : toBytes xs
toBytes [] = []

-- Convert number to CONST representation (2 element list)
toConstBytes n = toByteList 2 n 
toLConstBytes n = toByteList 4 n 

-- Convert a number into a list of 8-bit bytes (big-endian/network byte order).
-- Make sure final list is size elements long
toByteList ::  Bits Int => Int -> Int -> [Int]
toByteList size n = reverse $ take size (toByteList' n)
    where
      toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8)

-- All tests defined by the quiz, with the associated values they should evaluate to.
test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"),
  (2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")]

test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"),
  (7`mod`2`mod`1, "7%2%1")]
  
test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"),
  (4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")]

test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"),
  (2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")]

test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")]

test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"), 
    ((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"),
    ((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")]

-- Evaluates the tests and makes sure the expressions match the expected values
eval_tests = concatMap eval_tests [test1, test2, test3, test4, test5, test6]
  where
    eval_tests ((val, stmt):ts) =
      let eval_val = eval $ parse stmt 
      in
        if val == eval_val 
        then ("Passed: " ++ stmt) : eval_tests ts
        else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts
    eval_tests [] = []

-- Takes all the tests and displays symbolic bytes codes for each
generate_tests = concatMap generate_all [test1,test2,test3,test4,test5,test6]
  where generate_all = map (\(val, stmt) -> (stmt, generate (parse stmt)))
        
-- Takes all tests and generates a list of bytes representing them
compile_tests = concatMap compile_all [test1,test2,test3,test4,test5,test6]
  where compile_all = map (\(val, stmt) -> (stmt, compile stmt))

interpret_tests = concatMap f' [test1, test2, test3, test4, test5, test6]
  where
    f' = map f''
    f'' (expected, stmt) =
      let value = fromIntegral $ interpret [] $ compile stmt
      in
        if value == expected
        then "Passed: " ++ stmt
        else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")"

fromBytes n xs =
  let int16 = fromIntegral (fromIntegral int32 :: Int16) :: Int
      int32 = byte xs
      byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs))
  in
    if n == 2
    then int16 
    else int32 
   
interpret [] [] = error "no result produced"
interpret (s1:s) [] = s1
interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs)
interpret (s1:s2:s) (o:xs)
  | o == 16 = interpret (s2:s1:s) xs
  | otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs
 
\end{code}