Haskell Quiz/Bytecode Compiler/Solution Justin Bailey
From HaskellWiki
This solution should work correctly. I was unable to test the byte codes generated, for obvious reasons. However, all test strings from the quiz do evaluate to the correct values.
To see the (symbolic) byte codes generated, rungenerate_tests
compile_tests
eval_tests
test1,test2, ..., test6
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).
The code below is literate Haskell.
\begin{code} import Text.ParserCombinators.Parsec hiding (parse) import qualified Text.ParserCombinators.Parsec as P (parse) import Text.ParserCombinators.Parsec.Expr import Data.Bits -- Represents various operations that can be applied -- to expressions. data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg deriving (Show, Eq) -- Represents expression we can build - either numbers or expressions -- connected by operators. data Expression = Statement Op Expression Expression | Val Integer | Empty deriving (Show) -- Define the byte codes that can be generated. data Bytecode = NOOP | CONST Integer | LCONST Integer | ADD | SUB | MUL | POW | DIV | MOD | SWAP deriving (Show) -- Using imported Parsec.Expr library, build a parser for expressions. expr :: Parser Expression expr = buildExpressionParser table factor <?> "expression" where -- Recognizes a factor in an expression factor = do{ char '(' ; x <- expr ; char ')' ; return x } <|> number <?> "simple expression" -- Recognizes a number number :: Parser Expression number = do{ ds <- many1 digit ; return (Val (read ds)) } <?> "number" -- Specifies operator, associativity, precendence, and constructor to execute -- and built AST with. table = [[prefix "-" (Statement Mult (Val (-1)))], [binary "^" (Statement Pow) AssocRight], [binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft], [binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft] ] where binary s f assoc = Infix (do{ string s; return f}) assoc prefix s f = Prefix (do{ string s; return f}) -- Parses a string into an AST, using the parser defined above parse s = case P.parse expr "" s of Right ast -> ast Left e -> error $ show e -- Take AST and evaluate (mostly for testing) eval (Val n) = n eval (Statement op left right) | op == Mult = eval left * eval right | op == Minus = eval left - eval right | op == Plus = eval left + eval right | op == Div = eval left `div` eval right | op == Pow = eval left ^ eval right | op == Mod = eval left `mod` eval right -- Takes an AST and turns it into a byte code list generate stmt = generate' stmt [] where generate' (Statement op left right) instr = let li = generate' left instr ri = generate' right instr lri = li ++ ri in case op of Plus -> lri ++ [ADD] Minus -> lri ++ [SUB] Mult -> lri ++ [MUL] Div -> lri ++ [DIV] Mod -> lri ++ [MOD] Pow -> lri ++ [POW] generate' (Val n) instr = if abs(n) > 32768 then instr ++ [LCONST n] else instr ++ [CONST n] -- Takes a statement and converts it into a list of actual bytes to -- be interpreted compile s = toBytes (generate $ parse s) -- Convert a list of byte codes to a list of integer codes. If LCONST or CONST -- instruction are seen, correct byte representantion is produced toBytes ((NOOP):xs) = 0 : toBytes xs toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs toBytes ((ADD):xs) = 0x0a : toBytes xs toBytes ((SUB):xs) = 0x0b : toBytes xs toBytes ((MUL):xs) = 0x0c : toBytes xs toBytes ((POW):xs) = 0x0d : toBytes xs toBytes ((DIV):xs) = 0x0e : toBytes xs toBytes ((MOD):xs) = 0x0f : toBytes xs toBytes ((SWAP):xs) = 0x0a : toBytes xs toBytes [] = [] -- Convert number to CONST representation (2 element list) toConstBytes n = toByteList 2 n toLConstBytes n = toByteList 4 n -- Convert a number into a list of 8-bit bytes (big-endian/network byte order). -- Make sure final list is size elements long toByteList :: Bits Int => Int -> Int -> [Int] toByteList size n = if (length bytes) < size then (replicate (size - (length bytes)) 0) ++ bytes else bytes where bytes = reverse $ toByteList' n -- for negative, and with signed bit and remove negative. Then continue recursion. toByteList' 0 = [] toByteList' a | a < 0 = (a .&. 511) : toByteList' (abs(a) `shiftR` 8) | otherwise = (a .&. 255) : toByteList' (a `shiftR` 8) -- All tests defined by the quiz, with the associated values they should evaluate to. test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"), (2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")] test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"), (7`mod`2`mod`1, "7%2%1")] test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"), (4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")] test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"), (2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")] test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")] test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"), ((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"), ((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")] -- Evaluates the tests and makes sure the expressions match the expected values eval_tests = map eval_tests [test1, test2, test3, test4, test5, test6] where eval_tests ((val, stmt):ts) = let eval_val = eval $ parse stmt in if val == eval_val then "True" : eval_tests ts else (stmt ++ " evaluated incorrectly to " ++ show eval_val ++ " instead of " ++ show val) : eval_tests ts eval_tests [] = [] -- Takes all the tests and displays symbolic bytes codes for each generate_tests = map generate_all [test1,test2,test3,test4,test5,test6] where generate_all ((val, stmt):ts) = generate (parse stmt) : generate_all ts generate_all [] = [] -- Takes all tests and generates a list of bytes representing them compile_tests = map compile_all [test1,test2,test3,test4,test5,test6] where compile_all ((val, stmt):ts) = compile stmt : compile_all ts compile_all [] = [] \end{code}
