Modular Monadic Compilers for Programming Languages
From HaskellWiki
Primele luni ale anului 2011 s-au dovedit prolifice pentru realizatorii de compilatoare:
1 . Din cercetarile comunitatii Haskell internationaler
1. http://www.cs.nott.ac.uk/~gmh/bib.html#mod-comp
2 . Din cercetarile de la Bacau
-- A modular monadic compiler built using pseudoconstructors -- over monadic values, by Dan Popa. "V.Alecsandri" Univ. of Bacau. module MCOMP where import Monad import Data.Char -- MCOMP, 20 iunie 2011, Dan Popa -- v02. 21 iunie 2011 - corectat while-ul care genera cod fara salt inapoi --- [... deleted] -- v08 30 iunie 2011 -- Tipul starilor , aici adresele de inceput/sfarsit de cod type S = Int -- Monada starilor data SM a = SM (S -> (a,S)) instance Monad SM where SM c1 >>= fc2 = SM (\s0 -> let (r,s1) = c1 s0 SM c2 = fc2 r in c2 s1 ) return k = SM (\s -> (k,s)) readSM :: SM S readSM = SM (\s -> (s,s)) updateSM :: (S -> S) -> SM S updateSM f = SM (\s -> (s, f s)) {-- fupdateSM :: (S -> S) -> SM S fupdateSM f = SM (\s -> (s, f s)) --} writeSM :: S -> SM S writeSM a = SM (\s -> (s,a)) allocSM :: S -> SM S allocSM l = SM (\s -> (s, s+l)) runSM :: S -> SM a -> (a,S) runSM s0 (SM c) = c s0 compile :: (Show t, Show t1) => SM (t, [Instr t1]) -> IO () compile arb = putStr . prettyprint $ runSM 0000 arb ----- INSTRUCTIONS ------------------------ data Instr a = Instr String a deriving (Show,Read,Eq) ---- PRETTY PRINTER-ul ------------------- prettyprint ((a,l),b) = "\n" ++ "Length of the code:" ++ show a ++ myprintl 0 l myprintl nr [] = "\n" ++ show nr myprintl nr ((Instr a b) : l) = "\n" ++ show nr ++ "\t" ++ a ++ show b ++ myprintl (nr+1) l -- Fara putStr nu apar efectele de la CR si TAB-uri mainA0 = compile (iif (gt (constant 10) (constant 20)) (attr 'x' (constant 45)) (attr 'x' (constant 50)) ) {-- *MCOMP> mainA0 Length of the code:9 0 LD_INT 10 1 LD_INT 20 2 GT 0 3 JZ 7 4 LD_INT 45 5 STORE 120 6 JP 9 7 LD_INT 50 8 STORE 120 9*MCOMP> --} -- ---------------------COMPILER-------------------------- -- Compilarea constantei -- return pruimeste o pereche cu doua argumente -- lungimea = 1 a codului si codul masina generat constant nr = do { a0 <- readSM; let a1 = a0 +1 in do { writeSM a1; return (1, [Instr "LD_INT " nr] ) } } -- Sub GHC: mainA1 = compile (constant 10) {-- *MCOMP> mainA1 Length of the code:1 0 LD_INT 10 1*MCOMP> --} -- Daca printam direct runSM (constant 10) -- numarul dinaintea codului este lungimea acestuia !! {-- 1*MCOMP> runSM 0 (constant 10) ((1,[Instr "LD_INT " 10]),1) *MCOMP> --} -- Tabela de simboluri dummy symtable x = 000 + ord x -- Nu compilati (plus 10 20) in loc de (plus (constant 10) (constant 20)) -- altfel sistemul va pretinde sa existe o anume declaratie de instanta: -- instance Num SM (Integer,[Instr Int]) -- La fel, compilati (attr 's' (constant 10)) -----------------COMPILER------------------------------------ -- Compilarea variabilelor variable s = do { a0 <- readSM; let a1 = a0 +1 adr = symtable (s) in do { writeSM a1; return (1, [Instr "LD_VAR " adr] ) } } -- Compilarea variabilelor mainB1 = compile (variable 'a') {--*MCOMP> mainB1 Length of the code:1 0 LD_VAR 97 --} mainB2 = compile (variable 'A') {-- MCOMP> mainB2 Length of the code:1 0 LD_VAR 65 --} -- Compilarea declaratiilor -- nr indica numarul de variabile dintr-un limbaj monotip -- pt n variabile se aloca locatiile 0,1,...n-1 datas n = do { a0 <- readSM; let a1 = a0 + 1 arg = n -1 in do { writeSM a1; return (1, [Instr "DATA " arg] ) } } mainC1= compile (datas 10) {-- Length of the code:1 0 DATA 9 1*MCOMP> --} -- {-- -- Compilarea instructiunii skip skip :: SM (Int,[Instr Int]) skip = do { a0 <- readSM; let a1 = a0 + 0 -- pt a se conforma sablonului in do { writeSM a1; return (0, [] ) } } --} skip :: SM (Int,[Instr Int]) skip = return (0, [] ) mainD1 = compile skip {--compile (iif (variable 'x') (skip) (skip)) --} -- Compilarea operatiei de intrare read readv s -- read este utilizat = do { a0 <- readSM; let a1 = a0 +1 adr = symtable (s) in do { writeSM a1; return (1, [Instr "IN_INT " adr] ) } } mainD2 = compile (readv 'x') mainD3 = compile (readv 'y') {-- Length of the code:1 0 IN_INT 120 1 *MCOMP> mainD3 Length of the code:1 0 IN_INT 121 1 *MCOMP> --} -- Compilarea scrierilor seamana cu cea a atribuirilor write exp = do { a0 <-readSM; (l1,cod1) <- exp; let a1 = a0 + l1 a2 = a1 + 1 in do { writeSM a2; return (l1 + 1, concat [cod1, [Instr "OUT_INT " 0] ] ) } } mainE1 =compile (write (variable 'x')) {-- Length of the code:2 0 LD_VAR 120 1 OUT_INT 0 2 *MCOMP> mainE2 --} mainE2 = compile (write (plus (constant 10) (constant 20)) ) {-- *MCOMP> mainE2 Length of the code:4 0 LD_INT 10 1 LD_INT 20 2 ADD 0 3 OUT_INT 0 4 *MCOMP> --} mainE3 = compile (while (gt (constant 10) (constant 20)) (attr 'x' (constant 45))) {-- Length of the code:7 0 LD_INT 10 1 LD_INT 20 2 GT 0 3 JZ 7 4 LD_INT 45 5 STORE 120 6 JP 0 7 *MCOMP> --} -- Compilarea atribuirilor attr s exp = do { a0 <-readSM; (l1,cod1) <- exp; let a1 = a0 + l1 a2 = a1 + 1 adr = symtable s in do { writeSM a2; return (l1 + 1, concat [cod1, [Instr "STORE " adr] ] ) } } -- compilarea sumelor plus exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "ADD " 0 ] ] ) } } -- Si similar a celorlalte operatii minus exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "SUB " 0 ] ] ) } } mult exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "MULT " 0 ] ] ) } } div exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "DIV " 0 ] ] ) } } -- La fel si pentru comparatii eq exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "EQ " 0 ] ] ) } } -- Si similar, celelalte comparatii lt exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "LT " 0 ] ] ) } } gt exp1 exp2 = do { a0 <-readSM; (l1,cod1) <- exp1; writeSM (a0+l1); (l2,cod2) <- exp2; let a3 = a0 + l1 + l2 + 1 in do { writeSM a3; return (l1+l2+1, concat [cod1, cod2, [Instr "GT " 0 ] ] ) } } -- nu este neaparat nevoie de un NEQ el este inlocuibil cu un not de EQ -- adaugat -- Poate si lungimea totala trebuia stocata in monada nu numai adresa ? iif cond s1 s2 = do { a0 <-readSM; (l1,cod1) <- cond; writeSM (a0+l1+1); (l2,cod2) <- s1; writeSM (a0 + l1 + 1 + l2 + 1) ; (l3,cod3) <- s2; writeSM (a0 + l1 + 1 + l2 + 1 +l3); return (l1+1+l2+1+l3 , concat [cod1, [Instr "JZ " (a0 + l1 + 1 + l2 + 1) ], cod2, [Instr "JP " (a0 + l1 + 1 + l2 + 1 + l3) ], cod3 ] ) } -- Cod generat de instructiunea if, mainI1 = compile (iif (gt (constant 10) (constant 20)) (attr 'x' (constant 45)) (attr 'x' (constant 50)) ) {-- Length of the code:9 0 LD_INT 10 1 LD_INT 20 2 GT 0 3 JZ 7 4 LD_INT 45 5 STORE 120 6 JP 9 7 LD_INT 50 8 STORE 120 9 *MCOMP> --} -- Alt cod generat de instructiunea iif, corect mainI2 = compile (iif (variable 'x') (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) {-- *MCOMP> mainI2 Length of the code:7 0 LD_VAR 120 1 JZ 5 2 LD_INT 1 3 STORE 120 4 JP 7 5 LD_INT 2 6 STORE 120 7 *MCOMP> --} -- Alt cod generat de instructiunea iif, corect mainI3 = compile (iif (gt (variable 'x')(constant 0)) (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) {-- *MCOMP> mainI2 Length of the code:9 0 LD_VAR 120 1 LD_INT 0 2 GT 0 3 JZ 7 4 LD_INT 1 5 STORE 120 6 JP 9 7 LD_INT 2 8 STORE 120 9 *MCOMP> --} -- Compilarea secventelor -- Adresa intermediara de dupa cod 1 trebuie salvata in stare sequ s1 s2 -- seq e rezervat = do { a0 <-readSM; (l1,cod1) <- s1; let a2 = a0 +l1 in do { writeSM a2; -- the begining of the second code should be stored in state a2 <-readSM; -- putin redundant aici (l2,cod2) <- s2; let a4 = a2 + l2 in do { writeSM a4; return (l1+l2 , concat [cod1, cod2] ) } } } mainW = compile (sequ (attr 'x' (constant 45)) (attr 'x' (constant 50)) ) {--*MCOMP> mainW Length of the code:4 0 LD_INT 45 1 STORE 120 2 LD_INT 50 3 STORE 120 4*MCOMP> --} mainW2 = compile (sequ (attr 'x' (constant 45)) (attr 'y' (constant 50)) ) {--*MCOMP> mainW2 Length of the code:4 0 LD_INT 45 1 STORE 120 2 LD_INT 50 3 STORE 121 4*MCOMP> --} -- Bucla While -- Instructiunile JP generate de while sunt corecte while cond s1 = do { a0 <-readSM; (l1,cod1) <- cond; writeSM (a0+l1+1); (l2,cod2) <- s1; let a2 = a0 + l1 + 1 + l2+1 in do { writeSM a2; return (l1+l2+2, concat [cod1, [Instr "JZ " a2 ], cod2, [Instr "JP " a0]] ) } } -- Cod generat de o instr. while mainW0 = compile (while (gt (constant 10) (constant 20)) (attr 'x' (constant 45))) {-- Length of the code:7 0 LD_INT 10 1 LD_INT 20 2 GT 0 3 JZ 7 4 LD_INT 45 5 STORE 120 6 JP 0 7 *MCOMP> --} -- Alt cod generat de o instructiune while mainW1 = compile (while (gt (variable 'x') (constant 0)) (attr 'x' (minus (variable 'x') (constant 1)) )) {-- Length of the code:9 0 LD_VAR 120 1 LD_INT 0 2 GT 0 3 JZ 9 4 LD_VAR 120 5 LD_INT 1 6 SUB 0 7 STORE 120 8 JP 0 9 *MCOMP> --} -- Chiar daca instructiunea se scrie do, -- nu scrieti do, confuzionati parserul care recunoaste do-notatia -- am scris dowhile (do, instruction) dowhile s1 cond = do { a0 <-readSM; (l1,cod1) <- s1; writeSM (a0+l1); -- era fara +1,ok (l2,cod2) <- cond; let a2 = a0 + l1 + l2 + 1 -- erau inversate dar ok in do { writeSM a2; return (l1+l2+1, concat [cod1, cod2, [Instr "JNZ " a0 ] ] ) } } -- Cod generat de o instructiune do-while, OK mainDW1 = compile (dowhile (attr 'x' (constant 45)) (gt (constant 10) (constant 20) ) ) {-- *MCOMP> mainDW1 Length of the code:6 0 LD_INT 45 1 STORE 120 2 LD_INT 10 3 LD_INT 20 4 GT 0 5 JNZ 0 6*MCOMP> --} -- Alt cod generat de dowhile, OK mainDW2 = compile (dowhile (attr 'x' (minus (variable 'x') (constant 1)) ) (gt (variable 'x') (constant 0)) ) {-- Length of the code:8 0 LD_VAR 120 1 LD_INT 1 2 SUB 0 3 STORE 120 4 LD_VAR 120 5 LD_INT 0 6 GT 0 7 JNZ 0 8 *MCOMP> --} mainDW3 = compile (dowhile (sequ (attr 'x' (minus (variable 'x') (constant 1)) ) (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) ))) (gt (variable 'x') (constant 0)) ) {-- *MCOMP> mainDW3 Length of the code:17 0 LD_VAR 120 -- 'x' 1 LD_INT 1 -- 1 2 SUB 0 -- x-1 3 STORE 120 -- x:=x-1 4 LD_VAR 121 -- y -- intrare in bucala interioara !! -- optimizare posibila ? 5 LD_INT 0 -- 0 6 GT 0 -- y > 0 7 JZ 13 -- fals -> iese din bucla la adr 13 8 LD_VAR 121 9 LD_INT 1 10 SUB 0 11 STORE 121 12 JP 4 13 LD_VAR 120 14 LD_INT 0 15 GT 0 16 JNZ 0 17*MCOMP> --} -- Nota: diferenta intre cele 2 variante de writeSm (a0+l1 ) si writeSm (a0+l1 ) -- s-ar vedea numai daca insesi conditia lui dop-while ar fi o alta bucla while !! -- pentru ca aici, imediat dupa codul din do-while, ar trebui sa revina bucla interioara -- a conditiei !! mainDW4 = compile (dowhile (attr 'x' (minus (variable 'x') (constant 1)) ) (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )) ) {-- Era gresit cu: +1 *MCOMP> mainDW4 Length of the code:14 0 LD_VAR 120 -- x:=x-1 1 LD_INT 1 2 SUB 0 3 STORE 120 -- final atribuire 4 LD_VAR 121 5 LD_INT 0 -- punct de intrare gresit in mijlocul comparatiei 6 GT 0 7 JZ 14 -- ar fi fost 13 daca uitam pe +1 8 LD_VAR 121 9 LD_INT 1 10 SUB 0 11 STORE 121 12 JP 5 -- ar fi fost 4 daca uitam pe +1 13 JNZ 0 14*MCOMP> --} {-- fara +1 era bine !! *MCOMP> mainDW4 Length of the code:14 0 LD_VAR 120 1 LD_INT 1 2 SUB 0 3 STORE 120 4 LD_VAR 121 -- punct de intrare, inceput al comparatiei 5 LD_INT 0 6 GT 0 7 JZ 13 -- era 14 cu +1 8 LD_VAR 121 9 LD_INT 1 10 SUB 0 11 STORE 121 12 JP 4 -- era 5 cu +1 13 JNZ 0 14*MCOMP> --} -- Compilarea programului e la fel ca a secventei -- deci cel mai simplu e sa o copiem de la sequ program s1 s2 = sequ s1 s2 -- OK v04 si v05 mainX = compile (program (datas 2) (sequ (attr 'x' (constant 45)) (attr 'x' (constant 50)) ) ) {-- *MCOMP> mainX Length of the code:5 0 DATA 1 1 LD_INT 45 2 STORE 120 3 LD_INT 50 4 STORE 120 5*MCOMP> --} -- Compiling an I/O program: OK v04 si v05 main0 = compile (program (datas 2) (sequ (readv 'x') (write (variable 'x') ) ) ) {-- *MCOMP> main0 Length of the code:4 0 DATA 1 1 IN_INT 120 2 LD_VAR 120 3 OUT_INT 0 4*MCOMP> --} -- De compilat main = compile (program (datas 2) (sequ (readv 'n') (sequ (iif (gt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) (skip) )) ) {-- ---- Length of the code:9 0 DATA 1 1 IN_INT 110 2 LD_VAR 110 3 LD_INT 10 4 GT 0 5 JZ 9 6 LD_INT 1 7 STORE 120 8 JP 9 9*MCOMP> --} -- OK main1 = compile (iif (gt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) {-- 7*MCOMP> main1 Length of the code:7 0 LD_VAR 110 1 LD_INT 10 2 GT 0 3 JZ 7 4 LD_INT 1 5 STORE 120 6 JP 7 7*MCOMP> --} -- Ok main2 = compile (sequ (iif (gt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) (skip) ) {-- Length of the code:7 0 LD_VAR 110 1 LD_INT 10 2 GT 0 3 JZ 7 4 LD_INT 1 5 STORE 120 6 JP 7 7*MCOMP> --} -- ?? main3 = compile (sequ (readv 'x') (sequ (iif (gt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) (skip) )) {---- Length of the code:8 0 IN_INT 120 1 LD_VAR 110 2 LD_INT 10 3 GT 0 4 JZ 8 5 LD_INT 1 6 STORE 120 7 JP 8 8*MCOMP> --} -- ? main3b = compile (sequ (constant 111) (sequ (iif (gt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) (skip) )) {-- 8*MCOMP> main3b Length of the code:8 0 LD_INT 111 1 LD_VAR 110 2 LD_INT 10 3 GT 0 4 JZ 8 5 LD_INT 1 6 STORE 120 7 JP 8 8*MCOMP> --} -- Programul din cartea Compiler Construction using Flex and Bison, fig 7.1 -- pp 75 la editia 2006 Edusoft main4 = putStr . prettyprint $ runSM 0 (program (datas 2) (sequ (readv 'n') (sequ (iif (lt (variable 'n') (constant 10) ) (attr 'x' (constant 1)) (skip) ) (while (lt (variable 'n') (constant 10)) (sequ (attr 'x' (mult (constant 5)(variable 'x')) ) (attr 'n' (plus (variable 'n') (constant 1)) ) ) ) ) ) ) {-- *MCOMP> main4 Length of the code:22 0 DATA 1 1 IN_INT 110 2 LD_VAR 110 3 LD_INT 10 4 LT 0 5 JZ 9 6 LD_INT 1 7 STORE 120 8 JP 9 9 LD_VAR 110 10 LD_INT 10 11 LT 0 12 JZ 22 13 LD_INT 5 14 LD_VAR 120 15 MULT 0 16 STORE 120 17 LD_VAR 110 18 LD_INT 1 19 ADD 0 20 STORE 110 21 JP 9 22*MCOMP> --}
