Personal tools

Modular Monadic Compilers for Programming Languages

From HaskellWiki

Revision as of 13:06, 12 July 2011 by Ha$kell (Talk | contribs)

Jump to: navigation, search

Primele luni ale anului 2011 s-au dovedit prolifice pentru realizatorii de compilatoare:

1 . Din cercetarile comunitatii Haskell internationale

Prof. Graham Hutton lucreaza cu unul dintre doctoranzii (Laurence Day) sai la compilatoarele monadice modulare.

http://www.cs.nott.ac.uk/~gmh/bib.html#mod-comp

2 . Din cercetarile de la univ. "V.Alecsandri" din Bacau

Dan Popa isi continua cercetarile din teza de doctorat studiind aplicatiile acelor "Pseodoconstructors over monadic values" la realizarea compilatoarelor.

Vom reveni cu un Techincal Report.

-- A modular monadic compiler built using pseudoconstructors 
-- over monadic values, by Dan Popa. "V.Alecsandri" Univ. of Bacau.
 
module MCOMP where
import Monad
import Data.Char  
-- MCOMP, 20 iunie 2011, Dan Popa
-- v02. 21 iunie 2011 - corectat while-ul care genera cod fara salt inapoi
--- [... deleted]
-- v08 30 iunie 2011
 
-- Tipul starilor , aici adresele de inceput/sfarsit de cod
type S = Int
 
-- Monada starilor
data  SM a = SM (S -> (a,S))
 
instance Monad SM where
  SM c1 >>= fc2 = SM (\s0 -> let (r,s1) = c1  s0 
                                 SM c2  = fc2 r
                             in c2 s1 )
  return k 	= SM (\s -> (k,s))
 
readSM :: SM S
readSM = SM (\s -> (s,s))
 
updateSM :: (S -> S) -> SM S
updateSM f = SM (\s -> (s, f s)) 
 
{--
fupdateSM :: (S -> S) -> SM S
fupdateSM f = SM (\s -> (s, f s))
--}
 
writeSM :: S -> SM S 
writeSM a  = SM (\s -> (s,a))
 
allocSM :: S -> SM S
allocSM l = SM (\s -> (s, s+l))
 
runSM :: S -> SM a -> (a,S)
runSM s0 (SM c) = c s0
 
compile :: (Show t, Show t1) => SM (t, [Instr t1]) -> IO ()
compile arb = putStr . prettyprint $ runSM 0000 arb
 
----- INSTRUCTIONS ------------------------
 
data  Instr a = Instr String a
             deriving (Show,Read,Eq)
 
 
 
---- PRETTY PRINTER-ul -------------------
 
prettyprint ((a,l),b)
  = "\n" ++ "Length of the code:" ++ 
     show a ++ myprintl 0 l
 
myprintl nr [] 
  = "\n" ++ show nr 
myprintl nr ((Instr a b) : l) 
  =  "\n" ++ show nr ++ "\t" ++
     a ++ show b ++ myprintl (nr+1) l
 
-- Fara putStr nu apar efectele de la CR si TAB-uri
mainA0 = compile (iif (gt (constant  10) (constant 20)) (attr 'x' (constant 45))  (attr 'x' (constant 50))  )
 
{-- *MCOMP> mainA0
 
Length of the code:9
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 9
7	LD_INT 50
8	STORE 120
9*MCOMP> 
--}
 
--
 
---------------------COMPILER--------------------------
 
-- Compilarea constantei
-- return pruimeste  o pereche cu doua argumente
-- lungimea = 1 a codului si codul masina generat
 
constant nr 
  = do { a0 <- readSM;
         let a1 = a0 +1 
         in do { 
                 writeSM a1;
	         return (1, [Instr "LD_INT " nr] )
                }
       }
 
-- Sub GHC:
 
mainA1 = compile  (constant 10)
 
{--
*MCOMP> mainA1
 
Length of the code:1
0	LD_INT 10
1*MCOMP> 
--}
-- Daca printam direct runSM (constant 10)
-- numarul dinaintea codului este lungimea acestuia !!
{-- 1*MCOMP> runSM 0  (constant 10)
((1,[Instr "LD_INT " 10]),1)
*MCOMP> 
--}
 
-- Tabela de simboluri dummy
symtable x  = 000 + ord x
 
-- Nu compilati (plus 10 20) in loc de (plus (constant 10) (constant 20)) 
-- altfel sistemul va pretinde sa existe o anume declaratie de instanta:
-- instance Num  SM (Integer,[Instr Int]) 
-- La fel, compilati (attr 's' (constant 10)) 
 
-----------------COMPILER------------------------------------
-- Compilarea variabilelor
 
variable s
  = do { a0 <- readSM;
         let a1 = a0 +1 
             adr = symtable (s)   
         in do { 
                 writeSM a1;
	         return (1, [Instr "LD_VAR " adr] )
                }
       }
 
-- Compilarea variabilelor
 
mainB1 = compile  (variable 'a')
 
{--*MCOMP> mainB1
 
Length of the code:1
0	LD_VAR 97
--}
 
mainB2 = compile  (variable 'A')
 
{-- MCOMP> mainB2
 
Length of the code:1
0	LD_VAR 65
 
--}
 
 
-- Compilarea declaratiilor
-- nr indica numarul de variabile dintr-un limbaj monotip
-- pt n variabile se aloca locatiile 0,1,...n-1
datas n 
  = do { a0 <- readSM;
         let a1 = a0 + 1 
             arg = n -1
         in do { 
                 writeSM a1;
	         return (1, [Instr "DATA " arg] )   
                } 
       }
 
mainC1= compile  (datas 10)
 
{--
Length of the code:1
0	DATA 9
1*MCOMP> 
 
--}
 
 
 
--
{--
-- Compilarea instructiunii skip
skip ::  SM (Int,[Instr Int])
skip 
  = do { a0 <- readSM;
         let a1 = a0 + 0  -- pt a se conforma sablonului
         in do { 
                 writeSM a1;
	         return (0, [] )
                } 
       }
 
--}
skip ::  SM (Int,[Instr Int])
skip  = return (0, [] )
 
mainD1 = compile skip
 
 
{--compile (iif (variable 'x') (skip) (skip))
 
--}
 
-- Compilarea operatiei de intrare read
 
 
readv s                        -- read este utilizat
  = do { a0 <- readSM;
         let a1 = a0 +1 
             adr = symtable (s)   
         in do { 
                 writeSM a1;
	         return (1, [Instr "IN_INT " adr] )
                }
       }
 
 
mainD2 = compile (readv 'x')
 
mainD3 = compile (readv 'y')
 
{--
Length of the code:1
0	IN_INT 120
1
 
*MCOMP> mainD3
 
Length of the code:1
0	IN_INT 121
1
 
*MCOMP> 
--}
 
-- Compilarea scrierilor seamana cu cea a atribuirilor
write exp 
  = do { a0 <-readSM;
         (l1,cod1) <- exp;
         let   a1 = a0 + l1
               a2 = a1 + 1
         in do {  writeSM a2;
                  return (l1 + 1, concat [cod1,
                                         [Instr "OUT_INT " 0] ] )
               }
        }
 
 
mainE1 =compile (write (variable 'x'))
 
{--
Length of the code:2
0	LD_VAR 120
1	OUT_INT 0
2
*MCOMP> mainE2
--}
 
 
mainE2 = compile (write (plus (constant  10) (constant 20)) )
{--
*MCOMP> mainE2
 
Length of the code:4
0	LD_INT 10
1	LD_INT 20
2	ADD 0
3	OUT_INT 0
4
 
*MCOMP> 
--} 
mainE3 = compile (while (gt (constant  10) (constant 20)) (attr 'x' (constant 45)))
 
{--
Length of the code:7
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 0
7
*MCOMP> 
 
--}
 
 
-- Compilarea atribuirilor
attr s exp 
  = do { a0 <-readSM;
         (l1,cod1) <- exp;
         let   a1 = a0 + l1
               a2 = a1 + 1
               adr = symtable s 
         in do {  writeSM a2;
                  return (l1 + 1, concat [cod1,
                                         [Instr "STORE " adr] ] )
               }
        }
 
-- compilarea sumelor
plus exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2, 
                                          [Instr "ADD " 0 ] ] )
               }
        }
 
-- Si similar a celorlalte operatii
 
minus exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                           [Instr "SUB " 0 ] ] )
               }
        }
 
mult exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "MULT " 0 ] ] )
               }
        }
 
div exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "DIV " 0 ] ] )
               }
        }
 
-- La fel si pentru comparatii 
eq exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1, 
                                           cod2,
                                          [Instr "EQ " 0 ] ] )
               }
        }
 
-- Si similar, celelalte comparatii
 
lt exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "LT " 0 ] ] )
               }
        }
 
gt exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "GT " 0 ] ] )
               }
        }
 
-- nu este neaparat nevoie de un NEQ el este inlocuibil cu un not de EQ
--  adaugat
 
 
-- Poate si lungimea totala trebuia stocata in monada nu numai adresa ?
 
iif  cond s1 s2  
  = do { a0 <-readSM;
         (l1,cod1) <- cond;
         writeSM (a0+l1+1);
         (l2,cod2) <- s1;
         writeSM  (a0 + l1 + 1 + l2 + 1) ;
         (l3,cod3) <- s2;
	 writeSM  (a0 + l1 + 1 + l2 + 1 +l3);
         return (l1+1+l2+1+l3 , concat [cod1, 
	                                [Instr "JZ " (a0 + l1 + 1 + l2 + 1)  ],
                                        cod2,
                                        [Instr "JP " (a0 + l1 + 1 + l2 + 1 + l3) ],
 					cod3 ] )
 
        }
 
 
-- Cod generat de instructiunea if, 
mainI1 = compile (iif (gt (constant  10) (constant 20)) (attr 'x' (constant 45))  (attr 'x' (constant 50))  )
 
{--
Length of the code:9
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 9
7	LD_INT 50
8	STORE 120
9
*MCOMP> 
--}
 
-- Alt cod generat de instructiunea iif, corect
mainI2 = compile (iif (variable 'x') (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) 
 
{--
*MCOMP> mainI2
 
Length of the code:7
0	LD_VAR 120
1	JZ 5
2	LD_INT 1
3	STORE 120
4	JP 7
5	LD_INT 2
6	STORE 120
7
*MCOMP> 
--}
 
-- Alt cod generat de instructiunea iif, corect
mainI3 = compile (iif (gt (variable 'x')(constant 0)) (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) 
 
{--
*MCOMP> mainI2
Length of the code:9
0	LD_VAR 120
1	LD_INT 0
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 9
7	LD_INT 2
8	STORE 120
9
*MCOMP> 
--}
 
 
-- Compilarea secventelor
-- Adresa intermediara de dupa cod 1 trebuie salvata in stare
 
sequ s1 s2                      -- seq e rezervat
  = do { a0 <-readSM;
         (l1,cod1) <- s1;
         let a2 = a0 +l1
         in do  
              { writeSM a2;    -- the begining of the second code should be stored in state
                a2 <-readSM;   -- putin redundant aici
		(l2,cod2) <- s2;
                let   a4 = a2 +  l2 
                in do {  writeSM a4;
                      return (l1+l2 , concat [cod1, cod2] )
                      }
              }
        }
 
 
mainW = compile (sequ (attr 'x' (constant 45))  (attr 'x' (constant 50))  )
 
{--*MCOMP> mainW
 
Length of the code:4
0	LD_INT 45
1	STORE 120
2	LD_INT 50
3	STORE 120
4*MCOMP> 
--}
 
mainW2 = compile (sequ (attr 'x' (constant 45))  (attr 'y' (constant 50))  )
 
{--*MCOMP> mainW2
 
Length of the code:4
0	LD_INT 45
1	STORE 120
2	LD_INT 50
3	STORE 121
4*MCOMP> 
--}
 
-- Bucla While
-- Instructiunile JP generate de while sunt corecte
 
while cond s1 
  = do { a0 <-readSM;
         (l1,cod1) <- cond;
         writeSM (a0+l1+1);
         (l2,cod2) <- s1;
         let   a2 = a0 + l1  + 1 + l2+1
         in do {  writeSM a2;
                  return (l1+l2+2, concat [cod1,
                                          [Instr "JZ " a2 ],
                                           cod2,
                                          [Instr "JP " a0]] )
               }
        }
 
-- Cod generat de o instr. while
mainW0 = compile (while (gt (constant  10) (constant 20)) (attr 'x' (constant 45)))
{--
Length of the code:7
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 0
7
*MCOMP> 
--}
 
-- Alt cod generat de o instructiune while
mainW1 = compile (while (gt (variable 'x') (constant 0)) (attr 'x' (minus (variable 'x') (constant 1)) ))
{--
Length of the code:9
0	LD_VAR 120
1	LD_INT 0
2	GT 0
3	JZ 9
4	LD_VAR 120
5	LD_INT 1
6	SUB 0
7	STORE 120
8	JP 0
9
*MCOMP> 
--}
 
 
-- Chiar daca instructiunea se scrie do,
-- nu scrieti do, confuzionati parserul care recunoaste do-notatia
-- am scris dowhile (do, instruction)
dowhile s1 cond 
  = do { a0 <-readSM;
         (l1,cod1) <- s1;
         writeSM (a0+l1);  -- era fara +1,ok
         (l2,cod2) <- cond;
         let   a2 = a0 + l1  + l2 + 1  -- erau inversate dar ok
         in do {  writeSM a2;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                           [Instr "JNZ " a0 ] ] )
               }
        }
 
-- Cod generat de o instructiune do-while, OK
mainDW1 = compile (dowhile  (attr   'x' (constant 45))  (gt (constant  10) (constant 20) ) )
{--
*MCOMP> mainDW1 
 
Length of the code:6
0	LD_INT 45
1	STORE 120
2	LD_INT 10
3	LD_INT 20
4	GT 0
5	JNZ 0
6*MCOMP> 
--}
 
-- Alt cod generat de dowhile, OK
mainDW2 = compile (dowhile  (attr 'x' (minus (variable 'x') (constant 1)) ) (gt (variable 'x') (constant 0)) )
{--
Length of the code:8
0	LD_VAR 120
1	LD_INT 1
2	SUB 0
3	STORE 120
4	LD_VAR 120
5	LD_INT 0
6	GT 0
7	JNZ 0
8
*MCOMP> 
--}
 
mainDW3 
  = compile 
  (dowhile  (sequ (attr 'x' (minus (variable 'x') (constant 1)) ) 
                  (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )))
            (gt (variable 'x') (constant 0)) )
{--
*MCOMP> mainDW3
 
Length of the code:17
0	LD_VAR 120     -- 'x'     
1	LD_INT 1       -- 1
2	SUB 0          -- x-1 
3	STORE 120      -- x:=x-1              
4	LD_VAR 121     -- y  -- intrare in bucala interioara !!    -- optimizare posibila ?
5	LD_INT 0       -- 0
6	GT 0           -- y > 0
7	JZ 13          -- fals -> iese din bucla la adr 13 
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 4
13	LD_VAR 120
14	LD_INT 0
15	GT 0
16	JNZ 0
17*MCOMP> 
 
--}
 
-- Nota: diferenta intre cele 2 variante de writeSm (a0+l1 ) si  writeSm (a0+l1 )
-- s-ar vedea numai daca insesi conditia lui dop-while ar fi o alta bucla while !!
-- pentru ca aici, imediat dupa codul din do-while, ar trebui sa revina bucla interioara
-- a conditiei !!
 
mainDW4 
  = compile 
  (dowhile  (attr 'x' (minus (variable 'x') (constant 1)) ) 
            (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )) )
 
{-- Era gresit cu: +1
*MCOMP> mainDW4
 
Length of the code:14
0	LD_VAR 120   -- x:=x-1
1	LD_INT 1
2	SUB 0
3	STORE 120    -- final atribuire
4	LD_VAR 121   
5	LD_INT 0     -- punct de intrare gresit in mijlocul comparatiei
6	GT 0
7	JZ 14        -- ar fi fost 13 daca uitam pe +1
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 5         -- ar fi fost 4 daca uitam pe +1
13	JNZ 0
14*MCOMP> 
--}
 
{-- fara +1 era bine !!
*MCOMP> mainDW4
 
Length of the code:14
0	LD_VAR 120
1	LD_INT 1
2	SUB 0
3	STORE 120
4	LD_VAR 121         -- punct de intrare, inceput al comparatiei 
5	LD_INT 0
6	GT 0
7	JZ 13              -- era 14 cu +1
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 4               -- era 5 cu +1
13	JNZ 0
14*MCOMP> 
 
 
--}
 
-- Compilarea programului e la fel ca a secventei
-- deci cel mai simplu e sa o copiem de la sequ
program s1 s2 = sequ  s1 s2 
 
 
--  OK v04 si v05
mainX = compile 
   (program (datas 2) 
            (sequ (attr 'x' (constant 45))  
                  (attr 'x' (constant 50))  
            )
   )
 
{--
*MCOMP> mainX
 
Length of the code:5
0	DATA 1
1	LD_INT 45
2	STORE 120
3	LD_INT 50
4	STORE 120
5*MCOMP> 
 
--}
 
-- Compiling an I/O program: OK v04 si v05
 
main0 = compile 
   (program (datas 2) 
            (sequ (readv 'x')
                  (write (variable 'x')  )  
            )
   )
 
{-- 
*MCOMP> main0
 
Length of the code:4
0	DATA 1
1	IN_INT 120
2	LD_VAR 120
3	OUT_INT 0
4*MCOMP>
--}
 
--   De compilat 
main = compile 
   (program (datas 2) 
            (sequ (readv 'n')
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
   )
 
 
 
{-- ----
Length of the code:9
0	DATA 1
1	IN_INT 110
2	LD_VAR 110
3	LD_INT 10
4	GT 0
5	JZ 9
6	LD_INT 1
7	STORE 120
8	JP 9
9*MCOMP> 
 
--}
 
-- OK
main1 = compile 
                   (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
{--
7*MCOMP> main1
 
Length of the code:7
0	LD_VAR 110
1	LD_INT 10
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 7
7*MCOMP> 
--}
 
 
-- Ok 
main2 = compile 
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
             )
 
{--
Length of the code:7
0	LD_VAR 110
1	LD_INT 10
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 7
7*MCOMP> 
 
--}
 
-- ??
main3 = compile 
            (sequ (readv 'x')
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
{----
Length of the code:8
0	IN_INT 120
1	LD_VAR 110
2	LD_INT 10
3	GT 0
4	JZ 8
5	LD_INT 1
6	STORE 120
7	JP 8
8*MCOMP> 
 
--}
 
 
-- ?
main3b = compile 
            (sequ (constant 111)
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
{--
8*MCOMP> main3b
 
Length of the code:8
0	LD_INT 111
1	LD_VAR 110
2	LD_INT 10
3	GT 0
4	JZ 8
5	LD_INT 1
6	STORE 120
7	JP 8
8*MCOMP> 
--}
 
 
 -- Programul din cartea Compiler Construction using Flex and Bison, fig 7.1
 -- pp 75 la editia 2006 Edusoft
main4 = putStr . prettyprint $ runSM 0
   (program (datas 2) 
            (sequ (readv 'n')
             (sequ (iif (lt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (while (lt (variable 'n')  (constant 10))
                          (sequ (attr 'x' (mult (constant 5)(variable 'x')) 
                                )    
                                (attr 'n' (plus (variable 'n') (constant 1))
                                ) 
                          )  
                   )   
             )
            )
 
 
   )
 
{--
*MCOMP> main4
 
Length of the code:22
0	DATA 1
1	IN_INT 110
2	LD_VAR 110
3	LD_INT 10
4	LT 0
5	JZ 9
6	LD_INT 1
7	STORE 120
8	JP 9
9	LD_VAR 110
10	LD_INT 10
11	LT 0
12	JZ 22
13	LD_INT 5
14	LD_VAR 120
15	MULT 0
16	STORE 120
17	LD_VAR 110
18	LD_INT 1
19	ADD 0
20	STORE 110
21	JP 9
22*MCOMP> 
 
 
--}