# Haskell Quiz/Bytecode Compiler/Solution Justin Bailey

### From HaskellWiki

< Haskell Quiz | Bytecode Compiler(Difference between revisions)

(Updated to corrrect code) |
m |
||

(3 intermediate revisions by 3 users not shown) | |||

Line 1: | Line 1: | ||

− | [[Category:Code]] |
+ | [[Category:Haskell Quiz solutions|Bytecode Compiler]] |

− | This solution should work correctly. I was unable to test the byte codes generated, for obvious reasons. However, all test strings from the quiz do evaluate to the correct values. |
+ | This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute the <hask>interpret_tests</hask> function. |

− | To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods fouind in the quiz's test program. |
+ | To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The last actually evaluates the AST, without generating any bytescodes. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods found in the quiz's test program. |

The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values). |
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values). |
||

Line 14: | Line 14: | ||

import Text.ParserCombinators.Parsec.Expr |
import Text.ParserCombinators.Parsec.Expr |
||

import Data.Bits |
import Data.Bits |
||

+ | import Data.Int |
||

-- Represents various operations that can be applied |
-- Represents various operations that can be applied |
||

Line 21: | Line 22: | ||

-- Represents expression we can build - either numbers or expressions |
-- Represents expression we can build - either numbers or expressions |
||

− | -- connected by operators. |
+ | -- connected by operators. This structure is the basis of the AST built |

+ | -- when parsing |
||

data Expression = Statement Op Expression Expression |
data Expression = Statement Op Expression Expression |
||

| Val Integer |
| Val Integer |
||

Line 73: | Line 74: | ||

prefix s f |
prefix s f |
||

= Prefix (do{ string s; return f}) |
= Prefix (do{ string s; return f}) |
||

− | |||

-- Parses a string into an AST, using the parser defined above |
-- Parses a string into an AST, using the parser defined above |
||

Line 105: | Line 105: | ||

Mod -> lri ++ [MOD] |
Mod -> lri ++ [MOD] |
||

Pow -> lri ++ [POW] |
Pow -> lri ++ [POW] |
||

− | generate' (Val n) instr = |
+ | generate' (Val n) instr |

− | if abs(n) > 32768 |
+ | | abs n > 32768 = LCONST n : instr |

− | then instr ++ [LCONST n] |
+ | | otherwise = CONST n : instr |

− | else instr ++ [CONST n] |
||

-- Takes a statement and converts it into a list of actual bytes to |
-- Takes a statement and converts it into a list of actual bytes to |
||

Line 134: | Line 134: | ||

-- Make sure final list is size elements long |
-- Make sure final list is size elements long |
||

toByteList :: Bits Int => Int -> Int -> [Int] |
toByteList :: Bits Int => Int -> Int -> [Int] |
||

− | toByteList size n = |
+ | toByteList size n = reverse $ take size (toByteList' n) |

− | if (length bytes) < size |
||

− | then (replicate (size - (length bytes)) 0) ++ bytes |
||

− | else bytes |
||

where |
where |
||

− | bytes = reverse $ toByteList' n |
+ | toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8) |

− | -- for negative, and with signed bit and remove negative. Then continue recursion. |
||

− | toByteList' 0 = [] |
||

− | toByteList' a | a < 0 = (a .&. 511) : toByteList' (abs(a) `shiftR` 8) |
||

− | | otherwise = (a .&. 255) : toByteList' (a `shiftR` 8) |
||

-- All tests defined by the quiz, with the associated values they should evaluate to. |
-- All tests defined by the quiz, with the associated values they should evaluate to. |
||

Line 158: | Line 158: | ||

-- Evaluates the tests and makes sure the expressions match the expected values |
-- Evaluates the tests and makes sure the expressions match the expected values |
||

− | eval_tests = map eval_tests [test1, test2, test3, test4, test5, test6] |
+ | eval_tests = concatMap eval_tests [test1, test2, test3, test4, test5, test6] |

where |
where |
||

eval_tests ((val, stmt):ts) = |
eval_tests ((val, stmt):ts) = |
||

Line 164: | Line 164: | ||

in |
in |
||

if val == eval_val |
if val == eval_val |
||

− | then "True" : eval_tests ts |
+ | then ("Passed: " ++ stmt) : eval_tests ts |

− | else (stmt ++ " evaluated incorrectly to " ++ show eval_val ++ " instead of " ++ show val) : eval_tests ts |
+ | else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts |

eval_tests [] = [] |
eval_tests [] = [] |
||

-- Takes all the tests and displays symbolic bytes codes for each |
-- Takes all the tests and displays symbolic bytes codes for each |
||

− | generate_tests = map generate_all [test1,test2,test3,test4,test5,test6] |
+ | generate_tests = concatMap generate_all [test1,test2,test3,test4,test5,test6] |

− | where generate_all ((val, stmt):ts) = generate (parse stmt) : generate_all ts |
+ | where generate_all = map (\(val, stmt) -> (stmt, generate (parse stmt))) |

− | generate_all [] = [] |
||

-- Takes all tests and generates a list of bytes representing them |
-- Takes all tests and generates a list of bytes representing them |
||

− | compile_tests = map compile_all [test1,test2,test3,test4,test5,test6] |
+ | compile_tests = concatMap compile_all [test1,test2,test3,test4,test5,test6] |

− | where compile_all ((val, stmt):ts) = compile stmt : compile_all ts |
+ | where compile_all = map (\(val, stmt) -> (stmt, compile stmt)) |

− | compile_all [] = [] |
+ | |

+ | interpret_tests = concatMap f' [test1, test2, test3, test4, test5, test6] |
||

+ | where |
||

+ | f' = map f'' |
||

+ | f'' (expected, stmt) = |
||

+ | let value = fromIntegral $ interpret [] $ compile stmt |
||

+ | in |
||

+ | if value == expected |
||

+ | then "Passed: " ++ stmt |
||

+ | else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")" |
||

+ | fromBytes n xs = |
||

+ | let int16 = fromIntegral (fromIntegral int32 :: Int16) :: Int |
||

+ | int32 = byte xs |
||

+ | byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs)) |
||

+ | in |
||

+ | if n == 2 |
||

+ | then int16 |
||

+ | else int32 |
||

+ | |||

+ | interpret [] [] = error "no result produced" |
||

+ | interpret (s1:s) [] = s1 |
||

+ | interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs) |
||

+ | interpret (s1:s2:s) (o:xs) |
||

+ | | o == 16 = interpret (s2:s1:s) xs |
||

+ | | otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs |
||

+ | |||

\end{code} |
\end{code} |
||

</haskell> |
</haskell> |

## Latest revision as of 02:32, 19 February 2010

This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute theinterpret_tests

generate_tests

compile_tests

eval_tests

test1,test2, ..., test6

The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).

The code below is literate Haskell.

\begin{code} import Text.ParserCombinators.Parsec hiding (parse) import qualified Text.ParserCombinators.Parsec as P (parse) import Text.ParserCombinators.Parsec.Expr import Data.Bits import Data.Int -- Represents various operations that can be applied -- to expressions. data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg deriving (Show, Eq) -- Represents expression we can build - either numbers or expressions -- connected by operators. This structure is the basis of the AST built -- when parsing data Expression = Statement Op Expression Expression | Val Integer | Empty deriving (Show) -- Define the byte codes that can be generated. data Bytecode = NOOP | CONST Integer | LCONST Integer | ADD | SUB | MUL | POW | DIV | MOD | SWAP deriving (Show) -- Using imported Parsec.Expr library, build a parser for expressions. expr :: Parser Expression expr = buildExpressionParser table factor <?> "expression" where -- Recognizes a factor in an expression factor = do{ char '(' ; x <- expr ; char ')' ; return x } <|> number <?> "simple expression" -- Recognizes a number number :: Parser Expression number = do{ ds <- many1 digit ; return (Val (read ds)) } <?> "number" -- Specifies operator, associativity, precendence, and constructor to execute -- and built AST with. table = [[prefix "-" (Statement Mult (Val (-1)))], [binary "^" (Statement Pow) AssocRight], [binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft], [binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft] ] where binary s f assoc = Infix (do{ string s; return f}) assoc prefix s f = Prefix (do{ string s; return f}) -- Parses a string into an AST, using the parser defined above parse s = case P.parse expr "" s of Right ast -> ast Left e -> error $ show e -- Take AST and evaluate (mostly for testing) eval (Val n) = n eval (Statement op left right) | op == Mult = eval left * eval right | op == Minus = eval left - eval right | op == Plus = eval left + eval right | op == Div = eval left `div` eval right | op == Pow = eval left ^ eval right | op == Mod = eval left `mod` eval right -- Takes an AST and turns it into a byte code list generate stmt = generate' stmt [] where generate' (Statement op left right) instr = let li = generate' left instr ri = generate' right instr lri = li ++ ri in case op of Plus -> lri ++ [ADD] Minus -> lri ++ [SUB] Mult -> lri ++ [MUL] Div -> lri ++ [DIV] Mod -> lri ++ [MOD] Pow -> lri ++ [POW] generate' (Val n) instr | abs n > 32768 = LCONST n : instr | otherwise = CONST n : instr -- Takes a statement and converts it into a list of actual bytes to -- be interpreted compile s = toBytes (generate $ parse s) -- Convert a list of byte codes to a list of integer codes. If LCONST or CONST -- instruction are seen, correct byte representantion is produced toBytes ((NOOP):xs) = 0 : toBytes xs toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs toBytes ((ADD):xs) = 0x0a : toBytes xs toBytes ((SUB):xs) = 0x0b : toBytes xs toBytes ((MUL):xs) = 0x0c : toBytes xs toBytes ((POW):xs) = 0x0d : toBytes xs toBytes ((DIV):xs) = 0x0e : toBytes xs toBytes ((MOD):xs) = 0x0f : toBytes xs toBytes ((SWAP):xs) = 0x0a : toBytes xs toBytes [] = [] -- Convert number to CONST representation (2 element list) toConstBytes n = toByteList 2 n toLConstBytes n = toByteList 4 n -- Convert a number into a list of 8-bit bytes (big-endian/network byte order). -- Make sure final list is size elements long toByteList :: Bits Int => Int -> Int -> [Int] toByteList size n = reverse $ take size (toByteList' n) where toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8) -- All tests defined by the quiz, with the associated values they should evaluate to. test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"), (2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")] test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"), (7`mod`2`mod`1, "7%2%1")] test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"), (4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")] test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"), (2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")] test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")] test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"), ((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"), ((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")] -- Evaluates the tests and makes sure the expressions match the expected values eval_tests = concatMap eval_tests [test1, test2, test3, test4, test5, test6] where eval_tests ((val, stmt):ts) = let eval_val = eval $ parse stmt in if val == eval_val then ("Passed: " ++ stmt) : eval_tests ts else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts eval_tests [] = [] -- Takes all the tests and displays symbolic bytes codes for each generate_tests = concatMap generate_all [test1,test2,test3,test4,test5,test6] where generate_all = map (\(val, stmt) -> (stmt, generate (parse stmt))) -- Takes all tests and generates a list of bytes representing them compile_tests = concatMap compile_all [test1,test2,test3,test4,test5,test6] where compile_all = map (\(val, stmt) -> (stmt, compile stmt)) interpret_tests = concatMap f' [test1, test2, test3, test4, test5, test6] where f' = map f'' f'' (expected, stmt) = let value = fromIntegral $ interpret [] $ compile stmt in if value == expected then "Passed: " ++ stmt else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")" fromBytes n xs = let int16 = fromIntegral (fromIntegral int32 :: Int16) :: Int int32 = byte xs byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs)) in if n == 2 then int16 else int32 interpret [] [] = error "no result produced" interpret (s1:s) [] = s1 interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs) interpret (s1:s2:s) (o:xs) | o == 16 = interpret (s2:s1:s) xs | otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs \end{code}