diff --git a/.gitignore b/.gitignore index 5112877..193a11d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ dist-newstyle *.x *.bak src/Grammar -/language +language diff --git a/Grammar.cf b/Grammar.cf index b37d589..9ed9924 100644 --- a/Grammar.cf +++ b/Grammar.cf @@ -1,5 +1,3 @@ - - Program. Program ::= [Bind]; EId. Exp3 ::= Ident; @@ -9,14 +7,11 @@ EApp. Exp2 ::= Exp2 Exp3; EAdd. Exp1 ::= Exp1 "+" Exp2; EAbs. Exp ::= "\\" Ident "." Exp; -EId. Exp3 ::= Ident ; -EInt. Exp3 ::= Integer ; -EApp. Exp2 ::= Exp2 Exp3 ; -EAdd. Exp1 ::= Exp1 "+" Exp2 ; -EAbs. Exp ::= "\\" Ident "->" Exp ; +Bind. Bind ::= Ident [Ident] "=" Exp; +separator Bind ";"; +separator Ident " "; -coercions Exp 3 ; - -comment "--" ; -comment "{-" "-}" ; +coercions Exp 3; +comment "--"; +comment "{-" "-}"; \ No newline at end of file diff --git a/cabal.project.local b/cabal.project.local new file mode 100644 index 0000000..0432756 --- /dev/null +++ b/cabal.project.local @@ -0,0 +1,2 @@ +ignore-project: False +tests: True diff --git a/language b/language deleted file mode 120000 index 8c068e4..0000000 --- a/language +++ /dev/null @@ -1 +0,0 @@ -/home/samuel/.cabal/store/ghc-9.4.4/language-0.1.0.0-e-language-b2dd0f5c425bcebb6d06c37d2359fc74433c8d1d898fd217ad1cdc50c9035ad3/bin/language \ No newline at end of file diff --git a/language.cabal b/language.cabal index fb56076..2fd2bbd 100644 --- a/language.cabal +++ b/language.cabal @@ -1,4 +1,4 @@ -cabal-version: 3.0 +cabal-version: 3.4 name: language @@ -20,19 +20,52 @@ extra-source-files: common warnings ghc-options: -W -executable language - import: warnings - - main-is: Main.hs - +Test-Suite sample-programs + Type: exitcode-stdio-1.0 + main-is: Main.hs + Build-depends: + base >=4.16 + , QuickCheck + , hspec + , directory + , process + , mtl + , containers + , either + , array + , extra + hs-source-dirs: src, sample-programs + default-language: GHC2021 other-modules: Grammar.Abs Grammar.Lex Grammar.Par Grammar.Print Grammar.Skel + Grammar.ErrM + LambdaLifter + Auxiliary Interpreter + Compiler.Compiler + Compiler.LLVMIr +executable language + import: warnings + + main-is: Main.hs + + other-modules: + Grammar.Abs + Grammar.Lex + Grammar.Par + Grammar.Print + Grammar.Skel + Grammar.ErrM + LambdaLifter + Auxiliary + Interpreter + Compiler.Compiler + Compiler.LLVMIr hs-source-dirs: src build-depends: @@ -42,5 +75,4 @@ executable language , either , array , extra - default-language: GHC2021 diff --git a/sample-programs/Main.hs b/sample-programs/Main.hs new file mode 100644 index 0000000..54cdad3 --- /dev/null +++ b/sample-programs/Main.hs @@ -0,0 +1,60 @@ +module Main where +import Compiler.Compiler (compile) +import Control.Exception (IOException, catch, + evaluate) +import Data.List (isSuffixOf, sort) +import Data.List.Extra (trim) +import GHC.IO.Handle (BufferMode (NoBuffering), + hSetBuffering) +import Grammar.ErrM (Err) +import Grammar.Par (myLexer, pProgram) +import LambdaLifter (lambdaLift) +import System.Directory (getDirectoryContents) +import System.Directory.Internal.Prelude (exitFailure) +import System.IO (hPrint) +import System.IO.Extra (stderr) +import System.Process (CreateProcess (std_in), + StdStream (CreatePipe), + createProcess, proc, + readCreateProcess, shell, + waitForProcess) +import Test.Hspec (hspec, it, shouldSatisfy) +import Text.Printf (hPrintf) + + +path :: String +path = "sample-programs/sample-programs" + +main :: IO () +main = do + dir <- getDirectoryContents path + -- this is not a good way to grab tests + -- ideally one would grab all .sf files, remove the sf extension + -- and and res instead. Going to fix that soon + let tests = sort $ filter (".sf" `isSuffixOf`) dir + let results = sort $ filter (".res" `isSuffixOf`) dir + let combined = zip tests results + mapM_ (uncurry test) combined + +fromErr :: Err a -> IO a +fromErr (Left a) = do + hPrint stderr a + exitFailure +fromErr (Right a) = pure a + +comp :: String -> IO String +comp file = do + parsed <- fromErr . pProgram $ myLexer file + let lifted = lambdaLift parsed + fromErr $ compile lifted + +test :: String -> String -> IO () +test t r = do + test <- readFile (path <> "/" <> t) + expectedRes <- trim <$> readFile (path <> "/" <> r) + hspec $ it t $ do + compiled <- catch (comp test) + (const $ return "term" :: IOException -> IO String ) + ev <- catch (trim <$> readCreateProcess (shell "lli") compiled) + (\e -> return $ show (e :: IOException)) + ev `shouldSatisfy` (expectedRes ==) diff --git a/sample-programs/basic-4 b/sample-programs/basic-4 deleted file mode 100644 index 1de7a8c..0000000 --- a/sample-programs/basic-4 +++ /dev/null @@ -1,2 +0,0 @@ - -f x = let g = (\y. y+1) in g (g x) diff --git a/sample-programs/basic-6 b/sample-programs/basic-6 deleted file mode 100644 index 511ae10..0000000 --- a/sample-programs/basic-6 +++ /dev/null @@ -1,3 +0,0 @@ - - -f = \x.\y. x+y diff --git a/sample-programs/basic-8 b/sample-programs/basic-8 deleted file mode 100644 index 59abdac..0000000 --- a/sample-programs/basic-8 +++ /dev/null @@ -1,2 +0,0 @@ - -f x = let double = \y. y+y in (\x. x+y) 4; diff --git a/sample-programs/sample-programs/basic-1.res b/sample-programs/sample-programs/basic-1.res new file mode 100644 index 0000000..ed4df3c --- /dev/null +++ b/sample-programs/sample-programs/basic-1.res @@ -0,0 +1 @@ +666 \ No newline at end of file diff --git a/sample-programs/basic-1 b/sample-programs/sample-programs/basic-1.sf similarity index 53% rename from sample-programs/basic-1 rename to sample-programs/sample-programs/basic-1.sf index f109950..1d843d2 100644 --- a/sample-programs/basic-1 +++ b/sample-programs/sample-programs/basic-1.sf @@ -1,2 +1,3 @@ f = \x. x+1; +main = f 665 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-2.res b/sample-programs/sample-programs/basic-2.res new file mode 100644 index 0000000..2edeafb --- /dev/null +++ b/sample-programs/sample-programs/basic-2.res @@ -0,0 +1 @@ +20 \ No newline at end of file diff --git a/sample-programs/basic-2 b/sample-programs/sample-programs/basic-2.sf similarity index 100% rename from sample-programs/basic-2 rename to sample-programs/sample-programs/basic-2.sf diff --git a/sample-programs/sample-programs/basic-3.res b/sample-programs/sample-programs/basic-3.res new file mode 100644 index 0000000..c793025 --- /dev/null +++ b/sample-programs/sample-programs/basic-3.res @@ -0,0 +1 @@ +7 \ No newline at end of file diff --git a/sample-programs/basic-3 b/sample-programs/sample-programs/basic-3.sf similarity index 100% rename from sample-programs/basic-3 rename to sample-programs/sample-programs/basic-3.sf diff --git a/sample-programs/sample-programs/basic-4.res b/sample-programs/sample-programs/basic-4.res new file mode 100644 index 0000000..8bd100f --- /dev/null +++ b/sample-programs/sample-programs/basic-4.res @@ -0,0 +1 @@ +4507 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-4.sf b/sample-programs/sample-programs/basic-4.sf new file mode 100644 index 0000000..d7761c8 --- /dev/null +++ b/sample-programs/sample-programs/basic-4.sf @@ -0,0 +1,3 @@ + +f x = let g = (\y. y+1) in g (g x); +main = f 4505 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-5.res b/sample-programs/sample-programs/basic-5.res new file mode 100644 index 0000000..301160a --- /dev/null +++ b/sample-programs/sample-programs/basic-5.res @@ -0,0 +1 @@ +8 \ No newline at end of file diff --git a/sample-programs/basic-5 b/sample-programs/sample-programs/basic-5.sf similarity index 59% rename from sample-programs/basic-5 rename to sample-programs/sample-programs/basic-5.sf index 9984ddd..54512a5 100644 --- a/sample-programs/basic-5 +++ b/sample-programs/sample-programs/basic-5.sf @@ -6,4 +6,4 @@ double n = n + n; apply f x = \y. f x y; -main = apply (id add) ((\x. x + 1) 1) (double 3); +main = apply (id add) ((\x. x + 1) 1) (double 3); \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-6.res b/sample-programs/sample-programs/basic-6.res new file mode 100644 index 0000000..c5b431b --- /dev/null +++ b/sample-programs/sample-programs/basic-6.res @@ -0,0 +1 @@ +50 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-6.sf b/sample-programs/sample-programs/basic-6.sf new file mode 100644 index 0000000..00b7091 --- /dev/null +++ b/sample-programs/sample-programs/basic-6.sf @@ -0,0 +1,5 @@ + + +f = \x.\y. x+y; + +main = f 20 30 diff --git a/sample-programs/sample-programs/basic-7.res b/sample-programs/sample-programs/basic-7.res new file mode 100644 index 0000000..9a03714 --- /dev/null +++ b/sample-programs/sample-programs/basic-7.res @@ -0,0 +1 @@ +10 \ No newline at end of file diff --git a/sample-programs/basic-7 b/sample-programs/sample-programs/basic-7.sf similarity index 60% rename from sample-programs/basic-7 rename to sample-programs/sample-programs/basic-7.sf index b3769b9..7709e94 100644 --- a/sample-programs/basic-7 +++ b/sample-programs/sample-programs/basic-7.sf @@ -2,4 +2,4 @@ add x y = x + y; apply f x = f x; -main = apply (add 4) 6; +main = apply (add 4) 6; \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-8.res b/sample-programs/sample-programs/basic-8.res new file mode 100644 index 0000000..d2e1cef --- /dev/null +++ b/sample-programs/sample-programs/basic-8.res @@ -0,0 +1 @@ +44 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-8.sf b/sample-programs/sample-programs/basic-8.sf new file mode 100644 index 0000000..39a4b80 --- /dev/null +++ b/sample-programs/sample-programs/basic-8.sf @@ -0,0 +1,4 @@ +f z = let double = \y. y+y in (\x. x+z) 4; +-- f y = let double = \y. y+y in (\x. x+y) 4; +-- ^ this produces a bug in the renamer +main = f 40 \ No newline at end of file diff --git a/sample-programs/sample-programs/basic-9.res b/sample-programs/sample-programs/basic-9.res new file mode 100644 index 0000000..2edeafb --- /dev/null +++ b/sample-programs/sample-programs/basic-9.res @@ -0,0 +1 @@ +20 \ No newline at end of file diff --git a/sample-programs/basic-9 b/sample-programs/sample-programs/basic-9.sf similarity index 91% rename from sample-programs/basic-9 rename to sample-programs/sample-programs/basic-9.sf index ba9ebdc..582ccaa 100644 --- a/sample-programs/basic-9 +++ b/sample-programs/sample-programs/basic-9.sf @@ -1,4 +1 @@ - - - -main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4 +main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4 \ No newline at end of file diff --git a/src/Compiler/Compiler.hs b/src/Compiler/Compiler.hs index a09f502..4d5141a 100644 --- a/src/Compiler/Compiler.hs +++ b/src/Compiler/Compiler.hs @@ -1,183 +1,257 @@ -module Compiler.Compiler where +module Compiler.Compiler (compile) where -import Compiler.LLVMIr (LLVMIr (..), LLVMType (..), - Value (..), llvmIrToString) -import Compiler.StandardLLVMLibrary (standardLLVMLibrary) -import Control.Monad.State (StateT, execStateT, gets, modify) -import Grammar.Abs (Def (..), Exp (..), Ident (..), - Program (..), Type (..)) -import Grammar.ErrM (Err) -import Grammar.Print (printTree) +import Compiler.LLVMIr ( + LLVMComp (..), + LLVMIr (..), + LLVMType (..), + LLVMValue (..), + llvmIrToString, + ) +import Control.Monad.State (StateT, execStateT, gets, modify) +import Data.Map (Map) +import Data.Map qualified as Map +import Grammar.Abs ( + Bind (..), + Exp (..), + Ident (..), + Program (..), + ) +import Grammar.ErrM (Err) +import Grammar.Print (printTree) -- | The record used as the code generator state data CodeGenerator = CodeGenerator - { instructions :: [LLVMIr] - , methods :: [Ident] - , variableCount :: Integer } + { instructions :: [LLVMIr] + , functions :: Map Ident FunctionInfo + , variableCount :: Integer + } + +-- | A state type synonym type CompilerState a = StateT CodeGenerator Err a --- | An empty instance of CodeGenerator -defaultCodeGenerator :: CodeGenerator -defaultCodeGenerator = CodeGenerator - { instructions = [] - , methods = [] - , variableCount = 0 } +data FunctionInfo = FunctionInfo + { numArgs :: Int + , arguments :: [Ident] + } -- | Adds a instruction to the CodeGenerator state emit :: LLVMIr -> CompilerState () -emit l = modify (\t -> t {instructions = instructions t ++ [l]}) +emit l = modify (\t -> t{instructions = instructions t ++ [l]}) --- | Increases the variable counter in the Codegenerator state +-- | Increases the variable counter in the CodeGenerator state increaseVarCount :: CompilerState () -increaseVarCount = modify (\t -> t {variableCount = variableCount t + 1}) +increaseVarCount = modify (\t -> t{variableCount = variableCount t + 1}) +-- | Returns the variable count from the CodeGenerator state +getVarCount :: CompilerState Integer +getVarCount = gets variableCount + +-- | Increases the variable count and returns it from the CodeGenerator state +getNewVar :: CompilerState Integer +getNewVar = increaseVarCount >> getVarCount + +{- | Produces a map of functions infos from a list of binds, + which contains useful data for code generation. +-} +getFunctions :: [Bind] -> Map Ident FunctionInfo +getFunctions xs = + Map.fromList $ + map + ( \(Bind id args _) -> + ( id + , FunctionInfo + { numArgs = length args + , arguments = args + } + ) + ) + xs + +{- | Compiles an AST and produces a LLVM Ir string. + An easy way to actually "compile" this output is to + Simply pipe it to LLI +-} compile :: Program -> Err String compile (Program prg) = do - let s = defaultCodeGenerator {instructions = [ - Comment (show $ printTree (Program prg)), - UnsafeRaw $ standardLLVMLibrary <> "\n" - ]} - fin <- execStateT (goDef prg) s - let ins = instructions fin - pure $ concatMap llvmIrToString ins - where - mainContent var = - [ SetVariable (Ident "print_res") - , Call (Array 21 I8) (Ident "i64ToString") [(I64, VIdent $ Ident $ show var)] - , SetVariable (Ident "print_ptr"), Alloca (Array 21 I8) - , Store (Array 21 I8) (Ident "print_res") (Ref (Array 21 I8)) (Ident "print_ptr") - , SetVariable (Ident "printable"), Bitcast (Ref (Array 21 I8)) (Ident "print_ptr") (Ref I8) - , Call I32 (Ident "puts") [(Ref I8, VIdent (Ident "printable"))] - , Ret I64 (VInteger 0) - ] + let s = + CodeGenerator + { instructions = defaultStart + , functions = getFunctions prg + , variableCount = 0 + } + ins <- instructions <$> execStateT (goDef prg) s + pure $ llvmIrToString ins + where + mainContent :: LLVMValue -> [LLVMIr] + mainContent var = + [ UnsafeRaw $ + "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef " <> show var <> ")\n" + , -- , SetVariable (Ident "p") (Icmp LLEq I64 (VInteger 2) (VInteger 2)) + -- , BrCond (VIdent (Ident "p")) (Ident "b_1") (Ident "b_2") + -- , Label (Ident "b_1") + -- , UnsafeRaw + -- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 1)\n" + -- , Br (Ident "end") + -- , Label (Ident "b_2") + -- , UnsafeRaw + -- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 2)\n" + -- , Br (Ident "end") + -- , Label (Ident "end") + Ret I64 (VInteger 0) + ] - goDef :: [Def] -> CompilerState () - goDef [] = return () - goDef (DExp id@(Ident str) t _id2 args exp : xs) = do - let (rt, argTypes) = flattenFuncType t - emit $ Comment $ show str <> ": " <> show (rt, argTypes) - emit $ Define rt id (zip argTypes args) -- //TODO parse args - go exp - varNum <- gets variableCount - if str == "main" then mapM_ emit (mainContent varNum) - else emit $ Ret rt (VIdent (Ident (show varNum))) - emit DefineEnd - modify (\s -> s {variableCount = 0}) - goDef xs + defaultStart :: [LLVMIr] + defaultStart = + [ Comment (show $ printTree (Program prg)) + , UnsafeRaw "@.str = private unnamed_addr constant [3 x i8] c\"%i\n\", align 1\n" + , UnsafeRaw "declare i32 @printf(ptr noalias nocapture, ...)\n" + ] - go :: Exp -> CompilerState () - go (EInt int) = emitInt int - go (EAdd e1 e2) = emitAdd e1 e2 - go (ESub e1 e2) = emitSub e1 e2 - go (EMul e1 e2) = emitMul e1 e2 - go (EDiv e1 e2) = emitDiv e1 e2 - go (EMod e1 e2) = emitMod e1 e2 - go (EId id) = emitArg id - go (EApp e1 e2) = emitApp e1 e2 - go (EAbs id t e) = emitAbs id t e + goDef :: [Bind] -> CompilerState () + goDef [] = return () + goDef (Bind id@(Ident str) args exp : xs) = do + emit $ UnsafeRaw "\n" + emit $ Comment $ show str <> ": " <> show exp + emit $ Define I64 id (map (I64,) args) + functionBody <- exprToValue exp + if str == "main" + then mapM_ emit (mainContent functionBody) + else emit $ Ret I64 functionBody + emit DefineEnd + modify (\s -> s{variableCount = 0}) + goDef xs - --- aux functions --- - emitAbs :: Ident -> Type -> Exp -> CompilerState () - emitAbs id t e = do - emit $ Comment $ concat [ "EAbs (", show id, ", ", show t, ", " - , show e, ") is not implemented!"] + go :: Exp -> CompilerState () + go (EInt int) = emitInt int + go (EAdd e1 e2) = emitAdd e1 e2 + -- go (ESub e1 e2) = emitSub e1 e2 + -- go (EMul e1 e2) = emitMul e1 e2 + -- go (EDiv e1 e2) = emitDiv e1 e2 + -- go (EMod e1 e2) = emitMod e1 e2 + go (EId id) = emitIdent id + go (EApp e1 e2) = emitApp e1 e2 + go (EAbs id e) = emitAbs id e + go (ELet xs e) = emitLet xs e - emitApp :: Exp -> Exp -> CompilerState () - emitApp e1 e2 = do - emit $ Comment $ concat [ "EApp (", show e1, ", ", show e2 - , ") is not implemented!"] + --- aux functions --- + emitAbs :: Ident -> Exp -> CompilerState () + emitAbs id e = do + emit $ + Comment $ + concat + [ "EAbs (" + , show id + , ", " + , show I64 + , ", " + , show e + , ") is not implemented!" + ] + emitLet :: [Bind] -> Exp -> CompilerState () + emitLet xs e = do + emit $ + Comment $ + concat + [ "ELet (" + , show xs + , " = " + , show e + , ") is not implemented!" + ] - emitArg :: Ident -> CompilerState () - emitArg id = do - -- !!this should never happen!! - increaseVarCount - varCount <- gets variableCount - emit $ SetVariable (Ident $ show varCount) - emit $ Add I64 (VIdent id) (VInteger 0) + emitApp :: Exp -> Exp -> CompilerState () + emitApp e1 e2 = appEmitter e1 e2 [] + where + appEmitter :: Exp -> Exp -> [Exp] -> CompilerState () + appEmitter e1 e2 stack = do + let newStack = e2 : stack + case e1 of + EApp e1' e2' -> appEmitter e1' e2' newStack + EId id -> do + args <- traverse exprToValue newStack + vs <- getNewVar + emit $ SetVariable (Ident $ show vs) (Call I64 id (map (I64,) args)) + x -> do + emit . Comment $ "The unspeakable happened: " + emit . Comment $ show x - emitInt :: Integer -> CompilerState () - emitInt i = do - -- !!this should never happen!! - increaseVarCount - varCount <- gets variableCount - emit $ SetVariable $ Ident (show varCount) - emit $ Add I64 (VInteger i) (VInteger 0) + emitIdent :: Ident -> CompilerState () + emitIdent id = do + -- !!this should never happen!! + emit $ Comment "This should not have happened!" + emit $ Variable id + emit $ UnsafeRaw "\n" - emitAdd :: Exp -> Exp -> CompilerState () - emitAdd e1 e2 = do - (v1,v2) <- binExprToValues e1 e2 - increaseVarCount - v <- gets variableCount - emit $ SetVariable $ Ident $ show v - emit $ Add I64 v1 v2 + emitInt :: Integer -> CompilerState () + emitInt i = do + -- !!this should never happen!! + varCount <- getNewVar + emit $ Comment "This should not have happened!" + emit $ SetVariable (Ident (show varCount)) (Add I64 (VInteger i) (VInteger 0)) - emitMul :: Exp -> Exp -> CompilerState () - emitMul e1 e2 = do - (v1,v2) <- binExprToValues e1 e2 - increaseVarCount - v <- gets variableCount - emit $ SetVariable $ Ident $ show v - emit $ Mul I64 v1 v2 + emitAdd :: Exp -> Exp -> CompilerState () + emitAdd e1 e2 = do + v1 <- exprToValue e1 + v2 <- exprToValue e2 + v <- getNewVar + emit $ SetVariable (Ident $ show v) (Add I64 v1 v2) - emitMod :: Exp -> Exp -> CompilerState () - emitMod e1 e2 = do - -- `let m a b = rem (abs $ b + a) b` - (v1,v2) <- binExprToValues e1 e2 - increaseVarCount - vadd <- gets variableCount - emit $ SetVariable $ Ident $ show vadd - emit $ Add I64 v1 v2 + -- emitMul :: Exp -> Exp -> CompilerState () + -- emitMul e1 e2 = do + -- (v1,v2) <- binExprToValues e1 e2 + -- increaseVarCount + -- v <- gets variableCount + -- emit $ SetVariable $ Ident $ show v + -- emit $ Mul I64 v1 v2 - increaseVarCount - vabs <- gets variableCount - emit $ SetVariable $ Ident $ show vabs - emit $ Call I64 (Ident "llvm.abs.i64") - [ (I64, VIdent (Ident $ show vadd)) - , (I1, VInteger 1) - ] - increaseVarCount - v <- gets variableCount - emit $ SetVariable $ Ident $ show v - emit $ Srem I64 (VIdent (Ident $ show vabs)) v2 + -- emitMod :: Exp -> Exp -> CompilerState () + -- emitMod e1 e2 = do + -- -- `let m a b = rem (abs $ b + a) b` + -- (v1,v2) <- binExprToValues e1 e2 + -- increaseVarCount + -- vadd <- gets variableCount + -- emit $ SetVariable $ Ident $ show vadd + -- emit $ Add I64 v1 v2 + -- + -- increaseVarCount + -- vabs <- gets variableCount + -- emit $ SetVariable $ Ident $ show vabs + -- emit $ Call I64 (Ident "llvm.abs.i64") + -- [ (I64, VIdent (Ident $ show vadd)) + -- , (I1, VInteger 1) + -- ] + -- increaseVarCount + -- v <- gets variableCount + -- emit $ SetVariable $ Ident $ show v + -- emit $ Srem I64 (VIdent (Ident $ show vabs)) v2 - emitDiv :: Exp -> Exp -> CompilerState () - emitDiv e1 e2 = do - (v1,v2) <- binExprToValues e1 e2 - increaseVarCount - v <- gets variableCount - emit $ SetVariable $ Ident $ show v - emit $ Div I64 v1 v2 + -- emitDiv :: Exp -> Exp -> CompilerState () + -- emitDiv e1 e2 = do + -- (v1,v2) <- binExprToValues e1 e2 + -- increaseVarCount + -- v <- gets variableCount + -- emit $ SetVariable $ Ident $ show v + -- emit $ Div I64 v1 v2 - emitSub :: Exp -> Exp -> CompilerState () - emitSub e1 e2 = do - (v1,v2) <- binExprToValues e1 e2 - increaseVarCount - v <- gets variableCount - emit $ SetVariable $ Ident $ show v - emit $ Sub I64 v1 v2 + -- emitSub :: Exp -> Exp -> CompilerState () + -- emitSub e1 e2 = do + -- (v1,v2) <- binExprToValues e1 e2 + -- increaseVarCount + -- v <- gets variableCount + -- emit $ SetVariable $ Ident $ show v + -- emit $ Sub I64 v1 v2 - exprToValue :: Exp -> CompilerState Value - exprToValue (EInt i) = return $ VInteger i - exprToValue (EId i) = return $ VIdent i - exprToValue e = do - go e - v <- gets variableCount - return $ VIdent $ Ident $ show v - - binExprToValues :: Exp -> Exp -> CompilerState (Value, Value) - binExprToValues e1 e2 = do - v1 <- exprToValue e1 - v2 <- exprToValue e2 - return (v1,v2) - - --- | A pretty nasty function to flatten out function types, --- as they are currently represented by a recursive data type. -flattenFuncType :: Type -> (LLVMType, [LLVMType]) -flattenFuncType xs = do - let res = go xs - (last res, init res) - where - go TInt = [I64] - go (TPol id) = [CustomType id] - go (TFun t1 t2) = go t1 ++ go t2 + exprToValue :: Exp -> CompilerState LLVMValue + exprToValue (EInt i) = return $ VInteger i + exprToValue (EId id) = do + funcs <- gets functions + case Map.lookup id funcs of + Just _ -> do + vc <- getNewVar + emit $ SetVariable (Ident $ show vc) (Call I64 id []) + return $ VIdent (Ident $ show vc) + Nothing -> return $ VIdent id + exprToValue e = do + go e + v <- getVarCount + return . VIdent . Ident $ show v diff --git a/src/Compiler/LLVMIr.hs b/src/Compiler/LLVMIr.hs index c503b61..65397bb 100644 --- a/src/Compiler/LLVMIr.hs +++ b/src/Compiler/LLVMIr.hs @@ -1,91 +1,188 @@ {-# LANGUAGE LambdaCase #-} -module Compiler.LLVMIr where -import Data.List (intercalate) -import Grammar.Abs (Ident (Ident)) + +module Compiler.LLVMIr (LLVMType (..), LLVMIr (..), llvmIrToString, LLVMValue (..), LLVMComp (..)) where + +import Data.List (intercalate) +import Grammar.Abs (Ident (Ident)) -- | A datatype which represents some basic LLVM types -data LLVMType = I1 | I8 | I32 | I64 | Ptr - | Ref LLVMType | Array Integer LLVMType | CustomType Ident +data LLVMType + = I1 + | I8 + | I32 + | I64 + | Ptr + | Ref LLVMType + | Array Integer LLVMType + | CustomType Ident instance Show LLVMType where show :: LLVMType -> String - show t = case t of - I1 -> "i1" - I8 -> "i8" - I32 -> "i32" - I64 -> "i64" - Ptr -> "ptr" - Ref ty -> show ty <> "*" - Array n ty -> concat ["[", show n, " x ", show ty, "]"] + show = \case + I1 -> "i1" + I8 -> "i8" + I32 -> "i32" + I64 -> "i64" + Ptr -> "ptr" + Ref ty -> show ty <> "*" + Array n ty -> concat ["[", show n, " x ", show ty, "]"] CustomType (Ident ty) -> ty --- | Represents a LLVM "value", as in an integer, a register variable, --- or a string contstant -data Value = VInteger Integer | VIdent Ident | VConstant String -instance Show Value where - show :: Value -> String +data LLVMComp + = LLEq + | LLNe + | LLUgt + | LLUge + | LLUlt + | LLUle + | LLSgt + | LLSge + | LLSlt + | LLSle +instance Show LLVMComp where + show :: LLVMComp -> String + show = \case + LLEq -> "eq" + LLNe -> "ne" + LLUgt -> "ugt" + LLUge -> "uge" + LLUlt -> "ult" + LLUle -> "ule" + LLSgt -> "sgt" + LLSge -> "sge" + LLSlt -> "slt" + LLSle -> "sle" + +{- | Represents a LLVM "value", as in an integer, a register variable, + or a string contstant +-} +data LLVMValue = VInteger Integer | VIdent Ident | VConstant String + +instance Show LLVMValue where + show :: LLVMValue -> String show v = case v of - VInteger i -> show i + VInteger i -> show i VIdent (Ident i) -> "%" <> i - VConstant s -> "c" <> show s + VConstant s -> "c" <> show s type Params = [(LLVMType, Ident)] -type Args = [(LLVMType, Value)] +type Args = [(LLVMType, LLVMValue)] -- | A datatype which represents different instructions in LLVM -data LLVMIr = Define LLVMType Ident Params - | DefineEnd - | Declare LLVMType Ident Params - | SetVariable Ident - | Add LLVMType Value Value - | Sub LLVMType Value Value - | Div LLVMType Value Value - | Mul LLVMType Value Value - | Srem LLVMType Value Value - | Call LLVMType Ident Args - | Alloca LLVMType - | Store LLVMType Ident LLVMType Ident - | Bitcast LLVMType Ident LLVMType - | Ret LLVMType Value - | Comment String - | UnsafeRaw String -- This should generally be avoided, and proper - -- instructions should be used in its place +data LLVMIr + = Define LLVMType Ident Params + | DefineEnd + | Declare LLVMType Ident Params + | SetVariable Ident LLVMIr + | Variable Ident + | Add LLVMType LLVMValue LLVMValue + | Sub LLVMType LLVMValue LLVMValue + | Div LLVMType LLVMValue LLVMValue + | Mul LLVMType LLVMValue LLVMValue + | Srem LLVMType LLVMValue LLVMValue + | Icmp LLVMComp LLVMType LLVMValue LLVMValue + | Br Ident + | BrCond LLVMValue Ident Ident + | Label Ident + | Call LLVMType Ident Args + | Alloca LLVMType + | Store LLVMType Ident LLVMType Ident + | Bitcast LLVMType Ident LLVMType + | Ret LLVMType LLVMValue + | Comment String + | UnsafeRaw String -- This should generally be avoided, and proper + -- instructions should be used in its place deriving (Show) --- | Converts a LLVM inststruction to a String, allowing for printing etc. -llvmIrToString :: LLVMIr -> String -llvmIrToString = \case - (Define t (Ident i) params) -> concat ["define ", show t, " @", i, "(" - , intercalate "," (fmap (\(x,Ident y) -> unwords [show x, "%"<>y]) params) - ,") {\n"] - DefineEnd -> "}\n" - (Declare _t (Ident _i) _params) -> undefined - (SetVariable (Ident i)) -> concat ["%", i, " = "] - (Add t v1 v2) -> concat ["add ", show t, " " - , show v1, ", ", show v2 - , "\n"] - (Sub t v1 v2) -> concat ["sub ", show t, " " - , show v1, ", " - , show v2, "\n"] - (Div t v1 v2) -> concat ["sdiv ", show t, " " - , show v1, ", " - , show v2, "\n"] - (Mul t v1 v2) -> concat ["mul ", show t, " " - , show v1, ", " - , show v2, "\n"] - (Srem t v1 v2) -> concat ["srem ", show t, " " - , show v1, ", " - , show v2, "\n"] - (Call t (Ident i) arg) -> concat ["call ", show t, " @", i, "(" - , intercalate ", " $ Prelude.map (\(x,y) -> show x <> " " <> show y) arg - , ")\n"] - (Alloca t) -> unwords ["alloca", show t, "\n"] - (Store t1 (Ident id1) t2 (Ident id2)) -> concat ["store ", show t1, " %" - , id1, ", ", show t2, " %" - , id2, "\n"] - (Bitcast t1 (Ident i) t2) -> concat ["bitcast ", show t1, " %" - , i, " to ", show t2, "\n"] - (Ret t v) -> concat ["ret ", show t - , " ", show v, "\n"] - (UnsafeRaw s) -> s - (Comment s) -> "; " <> s <> "\n" +-- | Converts a list of LLVMIr instructions to a string +llvmIrToString :: [LLVMIr] -> String +llvmIrToString = go 0 + where + go :: Int -> [LLVMIr] -> String + go _ [] = mempty + go i (x : xs) = do + let (i', n) = case x of + Define{} -> (i + 1, 0) + DefineEnd -> (i - 1, 0) + _ -> (i, i) + insToString n x <> go i' xs + +{- | Converts a LLVM inststruction to a String, allowing for printing etc. + The integer represents the indentation +-} +{- FOURMOLU_DISABLE -} + insToString :: Int -> LLVMIr -> String + insToString i l = + replicate i '\t' <> case l of + (Define t (Ident i) params) -> + concat + [ "define ", show t, " @", i + , "(", intercalate ", " (fmap (\(x, Ident y) -> unwords [show x, "%" <> y]) params) + , ") {\n" + ] + DefineEnd -> "}\n" + (Declare _t (Ident _i) _params) -> undefined + (SetVariable (Ident i) ir) -> concat ["%", i, " = ", insToString 0 ir] + (Add t v1 v2) -> + concat + [ "add ", show t, " ", show v1 + , ", ", show v2, "\n" + ] + (Sub t v1 v2) -> + concat + [ "sub ", show t, " ", show v1, ", " + , show v2, "\n" + ] + (Div t v1 v2) -> + concat + [ "sdiv ", show t, " ", show v1, ", " + , show v2, "\n" + ] + (Mul t v1 v2) -> + concat + [ "mul ", show t, " ", show v1 + , ", ", show v2, "\n" + ] + (Srem t v1 v2) -> + concat + [ "srem ", show t, " ", show v1, ", " + , show v2, "\n" + ] + (Call t (Ident i) arg) -> + concat + [ "call ", show t, " @", i, "(" + , intercalate ", " $ Prelude.map (\(x, y) -> show x <> " " <> show y) arg + , ")\n" + ] + (Alloca t) -> unwords ["alloca", show t, "\n"] + (Store t1 (Ident id1) t2 (Ident id2)) -> + concat + [ "store ", show t1, " %", id1 + , ", ", show t2 , " %", id2, "\n" + ] + (Bitcast t1 (Ident i) t2) -> + concat + [ "bitcast ", show t1, " %" + , i, " to ", show t2, "\n" + ] + (Icmp comp t v1 v2) -> + concat + [ "icmp ", show comp, " ", show t + , " ", show v1, ", ", show v2, "\n" + ] + (Ret t v) -> + concat + [ "ret ", show t, " " + , show v, "\n" + ] + (UnsafeRaw s) -> s + (Label (Ident s)) -> "\nlabel_" <> s <> ":\n" + (Br (Ident s)) -> "br label %label_" <> s <> "\n" + (BrCond val (Ident s1) (Ident s2)) -> + concat + [ "br i1 ", show val, ", ", "label %" + , "label_", s1, ", ", "label %", "label_", s2, "\n" + ] + (Comment s) -> "; " <> s <> "\n" + (Variable (Ident id)) -> "%" <> id +{- FOURMOLU_ENABLE -} diff --git a/src/Compiler/StandardLLVMLibrary.hs b/src/Compiler/StandardLLVMLibrary.hs deleted file mode 100644 index 01388dd..0000000 --- a/src/Compiler/StandardLLVMLibrary.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE TemplateHaskell #-} -module Compiler.StandardLLVMLibrary where - -import Compiler.TH (includeStr) - --- | Uses Template Haskell to load our "standard library", which is written in --- LLVM IR. This library simply includes functions to generate strings from --- i64s and to print ints. -standardLLVMLibrary :: String -standardLLVMLibrary = $(includeStr "src/Compiler/standard_library.ll") diff --git a/src/Compiler/TH.hs b/src/Compiler/TH.hs deleted file mode 100644 index bb9b7c5..0000000 --- a/src/Compiler/TH.hs +++ /dev/null @@ -1,14 +0,0 @@ -{-# LANGUAGE TemplateHaskellQuotes #-} -module Compiler.TH where -import Language.Haskell.TH (Exp, Q) -import System.IO.Unsafe (unsafePerformIO) - --- While this is hacky (specifically the use of unsafePerformIO) --- in this case I think it is fine, as if an invalid string --- is passed to the function it will fail to compile, --- which is the intended behavior. This allows us to --- import strings (such as our "standard LLVM library") --- during compile time, removing the need to ship the source for --- that with the compiler. -includeStr :: String -> Q Exp -includeStr file = [|unsafePerformIO $ readFile file|] diff --git a/src/Compiler/standard_library.ll b/src/Compiler/standard_library.ll deleted file mode 100644 index 59cd2a6..0000000 --- a/src/Compiler/standard_library.ll +++ /dev/null @@ -1,118 +0,0 @@ -declare i64 @llvm.abs.i64(i64, i1 immarg) -declare i64 @llvm.powi.i64.i64(i64, i64) -declare i32 @puts(i8* nocapture) nounwind - - -define [21 x i8] @i64ToString(i64 %val_org) { - %val = alloca i64 - store i64 %val_org, i64* %val - %val_copy = add i64 0, %val_org - ; https://stackoverflow.com/a/7123710 - ; an algorithm for translating ints to strings - ; s = '' - ; sign = '' - ; if i < 0: - ; sign = '-' - ; i = -i - ; while True: - ; remainder = i % 10 - ; i = i / 10 - ; s = chr(ord('0') + remainder) + s - ; if i == 0: - ; break - ; return sign + s - - ; allocate memory for the string, and store the temp variable into it - %string_ptr = alloca [21 x i8] - store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %string_ptr - - ; create a pointer to the array - %array_index_ptr = alloca i32 - store i32 0, i32* %array_index_ptr - - br label %while_point - while_point: - %val.tmp = load i64, i64* %val; load i64, i64* %val - %val.1 = call i64 @llvm.abs.i64(i64 %val.tmp, i1 true) - %tmp = load i32, i32* %array_index_ptr - %array_pointer.1 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %tmp - %array_index_ptr.1 = add i32 %tmp, 1 - store i32 %array_index_ptr.1, i32* %array_index_ptr - - ; this should not work, but it does - %remainder = srem i64 %val.1, 10 - %remainder_tmp = alloca i64 - store i64 %remainder, i64* %remainder_tmp - %remainder_tmp8 = bitcast i64* %remainder_tmp to i8* - %remainder8 = load i8, i8* %remainder_tmp8 - - %i = sdiv i64 %val.1, 10 - %char = add i8 48, %remainder8 - store i8 %char, i8* %array_pointer.1 ; update string! - store i64 %i, i64* %val - %while_condition = icmp eq i64 %i, 0 - - br i1 %while_condition, label %while_break, label %while_point - while_break: - - ; get last_pointer - %array_index_ptr.0 = load i32, i32* %array_index_ptr - %array_pointer.0 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %array_index_ptr.0 - - ; check if p is below 0 - %condition = icmp slt i64 %val_copy, 0 - br i1 %condition, label %negative_check_true, label %negative_check_false - negative_check_true: - store i8 45, i8* %array_pointer.0 - br label %negative_check_done - negative_check_false: - ; store i8 43, i8* %array_pointer.0 - br label %negative_check_done - negative_check_done: - %noop = add i32 0, 0 - - ; load the result and return it - call void @reverseI64String([21 x i8]* %string_ptr) - %res = load [21 x i8],[21 x i8]* %string_ptr - ret [21 x i8] %res -} - -define void @reverseI64String([21 x i8]* %string) { - %new_string = alloca [21 x i8] - store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %new_string - %reverse_index = alloca i32 - store i32 20, i32* %reverse_index - - %new_index = alloca i32 - store i32 0, i32* %new_index - - br label %while_start - while_start: - %reverse_index.0 = load i32, i32* %reverse_index - %reverse_ptr = getelementptr [21 x i8], [21 x i8]* %string, i32 0, i32 %reverse_index.0 - %reverse_index.1 = sub i32 %reverse_index.0, 1 - store i32 %reverse_index.1, i32* %reverse_index - - %new_index.0 = load i32, i32* %new_index - %new_ptr = getelementptr [21 x i8], [21 x i8]* %new_string, i32 0, i32 %new_index.0 - - %break_condition = icmp slt i32 %reverse_index.0, 0 - br i1 %break_condition, label %break, label %ok - ok: - %value = load i8, i8* %reverse_ptr - %condition = icmp eq i8 %value, 0 - br i1 %condition, label %while_start, label %not_zero - not_zero: - store i8 %value, i8* %new_ptr - %new_index.1 = add i32 %new_index.0, 1 - store i32 %new_index.1, i32* %new_index - - br label %while_start - break: - %copy = load [21 x i8], [21 x i8]* %new_string - store [21 x i8] %copy, [21 x i8]* %string - ; start iterating over the original string in reverse - ; if i == '\00' ignore - ; else put value into new_string at new i, then new i + 1 - ret void -} \ No newline at end of file diff --git a/src/Main.hs b/src/Main.hs index 41379fc..4e82d29 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,6 +1,8 @@ {-# LANGUAGE LambdaCase #-} module Main where +import Compiler.Compiler (compile) +import GHC.IO.Handle.Text (hPutStrLn) import Grammar.ErrM (Err) import Grammar.Par (myLexer, pProgram) import Grammar.Print (printTree) @@ -8,6 +10,7 @@ import Interpreter (interpret) import LambdaLifter (abstract, freeVars, lambdaLift) import System.Environment (getArgs) import System.Exit (exitFailure, exitSuccess) +import System.IO (stderr) main :: IO () main = getArgs >>= \case @@ -18,13 +21,18 @@ main' :: String -> IO () main' s = do file <- readFile s - putStrLn "\n-- parse" + printToErr "-- Parse Tree -- " parsed <- fromSyntaxErr . pProgram $ myLexer file - putStrLn $ printTree parsed + printToErr $ printTree parsed - putStrLn "\n-- Lambda Lifter" + printToErr "\n-- Lambda Lifter --" let lifted = lambdaLift parsed - putStrLn $ printTree lifted + printToErr $ printTree lifted + + --putStrLn "\n-- Compiler" + printToErr "\n -- Printing compiler output to stdout --" + compiled <- fromCompilerErr $ compile lifted + putStrLn compiled -- interpred <- fromInterpreterErr $ interpret lifted -- putStrLn "\n-- interpret" @@ -32,6 +40,16 @@ main' s = do exitSuccess +printToErr :: String -> IO () +printToErr = hPutStrLn stderr + +fromCompilerErr :: Err a -> IO a +fromCompilerErr = either + (\err -> do + putStrLn "\nCOMPILER ERROR" + putStrLn err + exitFailure) + pure fromSyntaxErr :: Err a -> IO a fromSyntaxErr = either diff --git a/test/helloworld.ll b/test/helloworld.ll deleted file mode 100644 index 16a03fd..0000000 --- a/test/helloworld.ll +++ /dev/null @@ -1,20 +0,0 @@ -; Copied directly from the documentation -; Declare the string constant as a global constant. -@.hello = private unnamed_addr constant [13 x i8] c"hello world\0A\00" - -; External declaration of the puts function -declare i32 @puts(i8* nocapture) nounwind - -; Definition of main function -define i32 @main() { ; i32()* - ; Convert [13 x i8]* to i8 *... - %cast210 = getelementptr [13 x i8],[13 x i8]* @.hello, i64 0, i64 0 - - ; Call puts function to write out the string to stdout. - call i32 @puts(i8* %cast210) - ret i32 0 -} - -; Named metadata -!0 = !{i32 42, null, !"string"} -!foo = !{!0} \ No newline at end of file diff --git a/test/simple.sf b/test/simple.sf deleted file mode 100644 index afb2251..0000000 --- a/test/simple.sf +++ /dev/null @@ -1,5 +0,0 @@ -tripplet : Int -> (Int -> (Int -> Int)) -tripplet x y z = x + y + z; - -main : Int -main = {-(((2 * (123 + 4214 % (1230)) - 1231) / 2) * 412412) +-} tripplet 5 1 2; \ No newline at end of file diff --git a/test/simple_goal.ll b/test/simple_goal.ll deleted file mode 100644 index adc0420..0000000 --- a/test/simple_goal.ll +++ /dev/null @@ -1,18 +0,0 @@ -; External declaration of the puts function -declare i32 @puts(i8* nocapture) nounwind -declare [21 x i8] @i64ToString(i64) - -; Definition of main function -define i32 @main() { ; i32()* - ; %val = add i64 -123456789, 0 - %val = add i64 -133780085, 0 - - %print_res = call [21 x i8] @i64ToString(i64 %val) - %ptr = alloca [21 x i8] - store [21 x i8] %print_res, [21 x i8]* %ptr - - %printable = bitcast [21 x i8]* %ptr to i8* - call i32 @puts(i8* %printable) - - ret i32 0 -}