Fixed some merge errors that occured when merging with main

Started implementing EApp.

Simple function calls now work.

Removed the LLVM standard library as it was not needed.

Limited functionality.

Got EApp working!!

Fixed arguments being inserted in the wrong order.

Updated the showing of `Call` so that it inserts a space inbetween arguments.

Removed some unused code from the generated main functions.

Force removed language.

Expressions that are simply just calling a constant now work properly.

Fixed constants being called inside nested expressions.

Cleaned up the compiler a bit.

Added a test suite, testing that the output from programs is correct, "evaluating the whole process".

Fixed a typo in basic-4.sf

Fixed basic-8 to work without a renamer.

Fixed some nicer output in the main function.

Made the outputted LLVM Ir somewhat nicer.

Fixed a typo and updated how SetVariable works.

Formatted and added documentation.

Added instructions allowing for branching.
This commit is contained in:
Samuel Hammersberg 2023-02-12 13:30:45 +01:00 committed by Martin Fredin
parent 7c1e1d57a0
commit 8d698cecf9
36 changed files with 557 additions and 453 deletions

2
.gitignore vendored
View file

@ -3,4 +3,4 @@ dist-newstyle
*.x *.x
*.bak *.bak
src/Grammar src/Grammar
/language language

View file

@ -1,5 +1,3 @@
Program. Program ::= [Bind]; Program. Program ::= [Bind];
EId. Exp3 ::= Ident; EId. Exp3 ::= Ident;
@ -9,14 +7,11 @@ EApp. Exp2 ::= Exp2 Exp3;
EAdd. Exp1 ::= Exp1 "+" Exp2; EAdd. Exp1 ::= Exp1 "+" Exp2;
EAbs. Exp ::= "\\" Ident "." Exp; EAbs. Exp ::= "\\" Ident "." Exp;
EId. Exp3 ::= Ident ; Bind. Bind ::= Ident [Ident] "=" Exp;
EInt. Exp3 ::= Integer ; separator Bind ";";
EApp. Exp2 ::= Exp2 Exp3 ; separator Ident " ";
EAdd. Exp1 ::= Exp1 "+" Exp2 ;
EAbs. Exp ::= "\\" Ident "->" Exp ;
coercions Exp 3 ; coercions Exp 3;
comment "--" ;
comment "{-" "-}" ;
comment "--";
comment "{-" "-}";

2
cabal.project.local Normal file
View file

@ -0,0 +1,2 @@
ignore-project: False
tests: True

View file

@ -1 +0,0 @@
/home/samuel/.cabal/store/ghc-9.4.4/language-0.1.0.0-e-language-b2dd0f5c425bcebb6d06c37d2359fc74433c8d1d898fd217ad1cdc50c9035ad3/bin/language

View file

@ -1,4 +1,4 @@
cabal-version: 3.0 cabal-version: 3.4
name: language name: language
@ -20,6 +20,35 @@ extra-source-files:
common warnings common warnings
ghc-options: -W ghc-options: -W
Test-Suite sample-programs
Type: exitcode-stdio-1.0
main-is: Main.hs
Build-depends:
base >=4.16
, QuickCheck
, hspec
, directory
, process
, mtl
, containers
, either
, array
, extra
hs-source-dirs: src, sample-programs
default-language: GHC2021
other-modules:
Grammar.Abs
Grammar.Lex
Grammar.Par
Grammar.Print
Grammar.Skel
Grammar.ErrM
LambdaLifter
Auxiliary
Interpreter
Compiler.Compiler
Compiler.LLVMIr
executable language executable language
import: warnings import: warnings
@ -31,8 +60,12 @@ executable language
Grammar.Par Grammar.Par
Grammar.Print Grammar.Print
Grammar.Skel Grammar.Skel
Grammar.ErrM
LambdaLifter
Auxiliary
Interpreter Interpreter
Compiler.Compiler
Compiler.LLVMIr
hs-source-dirs: src hs-source-dirs: src
build-depends: build-depends:
@ -42,5 +75,4 @@ executable language
, either , either
, array , array
, extra , extra
default-language: GHC2021 default-language: GHC2021

60
sample-programs/Main.hs Normal file
View file

@ -0,0 +1,60 @@
module Main where
import Compiler.Compiler (compile)
import Control.Exception (IOException, catch,
evaluate)
import Data.List (isSuffixOf, sort)
import Data.List.Extra (trim)
import GHC.IO.Handle (BufferMode (NoBuffering),
hSetBuffering)
import Grammar.ErrM (Err)
import Grammar.Par (myLexer, pProgram)
import LambdaLifter (lambdaLift)
import System.Directory (getDirectoryContents)
import System.Directory.Internal.Prelude (exitFailure)
import System.IO (hPrint)
import System.IO.Extra (stderr)
import System.Process (CreateProcess (std_in),
StdStream (CreatePipe),
createProcess, proc,
readCreateProcess, shell,
waitForProcess)
import Test.Hspec (hspec, it, shouldSatisfy)
import Text.Printf (hPrintf)
path :: String
path = "sample-programs/sample-programs"
main :: IO ()
main = do
dir <- getDirectoryContents path
-- this is not a good way to grab tests
-- ideally one would grab all .sf files, remove the sf extension
-- and and res instead. Going to fix that soon
let tests = sort $ filter (".sf" `isSuffixOf`) dir
let results = sort $ filter (".res" `isSuffixOf`) dir
let combined = zip tests results
mapM_ (uncurry test) combined
fromErr :: Err a -> IO a
fromErr (Left a) = do
hPrint stderr a
exitFailure
fromErr (Right a) = pure a
comp :: String -> IO String
comp file = do
parsed <- fromErr . pProgram $ myLexer file
let lifted = lambdaLift parsed
fromErr $ compile lifted
test :: String -> String -> IO ()
test t r = do
test <- readFile (path <> "/" <> t)
expectedRes <- trim <$> readFile (path <> "/" <> r)
hspec $ it t $ do
compiled <- catch (comp test)
(const $ return "term" :: IOException -> IO String )
ev <- catch (trim <$> readCreateProcess (shell "lli") compiled)
(\e -> return $ show (e :: IOException))
ev `shouldSatisfy` (expectedRes ==)

View file

@ -1,2 +0,0 @@
f x = let g = (\y. y+1) in g (g x)

View file

@ -1,3 +0,0 @@
f = \x.\y. x+y

View file

@ -1,2 +0,0 @@
f x = let double = \y. y+y in (\x. x+y) 4;

View file

@ -0,0 +1 @@
666

View file

@ -1,2 +1,3 @@
f = \x. x+1; f = \x. x+1;
main = f 665

View file

@ -0,0 +1 @@
20

View file

@ -0,0 +1 @@
7

View file

@ -0,0 +1 @@
4507

View file

@ -0,0 +1,3 @@
f x = let g = (\y. y+1) in g (g x);
main = f 4505

View file

@ -0,0 +1 @@
8

View file

@ -0,0 +1 @@
50

View file

@ -0,0 +1,5 @@
f = \x.\y. x+y;
main = f 20 30

View file

@ -0,0 +1 @@
10

View file

@ -0,0 +1 @@
44

View file

@ -0,0 +1,4 @@
f z = let double = \y. y+y in (\x. x+z) 4;
-- f y = let double = \y. y+y in (\x. x+y) 4;
-- ^ this produces a bug in the renamer
main = f 40

View file

@ -0,0 +1 @@
20

View file

@ -1,4 +1 @@
main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4 main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4

View file

@ -1,183 +1,257 @@
module Compiler.Compiler where module Compiler.Compiler (compile) where
import Compiler.LLVMIr (LLVMIr (..), LLVMType (..), import Compiler.LLVMIr (
Value (..), llvmIrToString) LLVMComp (..),
import Compiler.StandardLLVMLibrary (standardLLVMLibrary) LLVMIr (..),
import Control.Monad.State (StateT, execStateT, gets, modify) LLVMType (..),
import Grammar.Abs (Def (..), Exp (..), Ident (..), LLVMValue (..),
Program (..), Type (..)) llvmIrToString,
import Grammar.ErrM (Err) )
import Grammar.Print (printTree) import Control.Monad.State (StateT, execStateT, gets, modify)
import Data.Map (Map)
import Data.Map qualified as Map
import Grammar.Abs (
Bind (..),
Exp (..),
Ident (..),
Program (..),
)
import Grammar.ErrM (Err)
import Grammar.Print (printTree)
-- | The record used as the code generator state -- | The record used as the code generator state
data CodeGenerator = CodeGenerator data CodeGenerator = CodeGenerator
{ instructions :: [LLVMIr] { instructions :: [LLVMIr]
, methods :: [Ident] , functions :: Map Ident FunctionInfo
, variableCount :: Integer } , variableCount :: Integer
}
-- | A state type synonym
type CompilerState a = StateT CodeGenerator Err a type CompilerState a = StateT CodeGenerator Err a
-- | An empty instance of CodeGenerator data FunctionInfo = FunctionInfo
defaultCodeGenerator :: CodeGenerator { numArgs :: Int
defaultCodeGenerator = CodeGenerator , arguments :: [Ident]
{ instructions = [] }
, methods = []
, variableCount = 0 }
-- | Adds a instruction to the CodeGenerator state -- | Adds a instruction to the CodeGenerator state
emit :: LLVMIr -> CompilerState () emit :: LLVMIr -> CompilerState ()
emit l = modify (\t -> t {instructions = instructions t ++ [l]}) emit l = modify (\t -> t{instructions = instructions t ++ [l]})
-- | Increases the variable counter in the Codegenerator state -- | Increases the variable counter in the CodeGenerator state
increaseVarCount :: CompilerState () increaseVarCount :: CompilerState ()
increaseVarCount = modify (\t -> t {variableCount = variableCount t + 1}) increaseVarCount = modify (\t -> t{variableCount = variableCount t + 1})
-- | Returns the variable count from the CodeGenerator state
getVarCount :: CompilerState Integer
getVarCount = gets variableCount
-- | Increases the variable count and returns it from the CodeGenerator state
getNewVar :: CompilerState Integer
getNewVar = increaseVarCount >> getVarCount
{- | Produces a map of functions infos from a list of binds,
which contains useful data for code generation.
-}
getFunctions :: [Bind] -> Map Ident FunctionInfo
getFunctions xs =
Map.fromList $
map
( \(Bind id args _) ->
( id
, FunctionInfo
{ numArgs = length args
, arguments = args
}
)
)
xs
{- | Compiles an AST and produces a LLVM Ir string.
An easy way to actually "compile" this output is to
Simply pipe it to LLI
-}
compile :: Program -> Err String compile :: Program -> Err String
compile (Program prg) = do compile (Program prg) = do
let s = defaultCodeGenerator {instructions = [ let s =
Comment (show $ printTree (Program prg)), CodeGenerator
UnsafeRaw $ standardLLVMLibrary <> "\n" { instructions = defaultStart
]} , functions = getFunctions prg
fin <- execStateT (goDef prg) s , variableCount = 0
let ins = instructions fin }
pure $ concatMap llvmIrToString ins ins <- instructions <$> execStateT (goDef prg) s
where pure $ llvmIrToString ins
mainContent var = where
[ SetVariable (Ident "print_res") mainContent :: LLVMValue -> [LLVMIr]
, Call (Array 21 I8) (Ident "i64ToString") [(I64, VIdent $ Ident $ show var)] mainContent var =
, SetVariable (Ident "print_ptr"), Alloca (Array 21 I8) [ UnsafeRaw $
, Store (Array 21 I8) (Ident "print_res") (Ref (Array 21 I8)) (Ident "print_ptr") "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef " <> show var <> ")\n"
, SetVariable (Ident "printable"), Bitcast (Ref (Array 21 I8)) (Ident "print_ptr") (Ref I8) , -- , SetVariable (Ident "p") (Icmp LLEq I64 (VInteger 2) (VInteger 2))
, Call I32 (Ident "puts") [(Ref I8, VIdent (Ident "printable"))] -- , BrCond (VIdent (Ident "p")) (Ident "b_1") (Ident "b_2")
, Ret I64 (VInteger 0) -- , Label (Ident "b_1")
] -- , UnsafeRaw
-- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 1)\n"
-- , Br (Ident "end")
-- , Label (Ident "b_2")
-- , UnsafeRaw
-- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 2)\n"
-- , Br (Ident "end")
-- , Label (Ident "end")
Ret I64 (VInteger 0)
]
goDef :: [Def] -> CompilerState () defaultStart :: [LLVMIr]
goDef [] = return () defaultStart =
goDef (DExp id@(Ident str) t _id2 args exp : xs) = do [ Comment (show $ printTree (Program prg))
let (rt, argTypes) = flattenFuncType t , UnsafeRaw "@.str = private unnamed_addr constant [3 x i8] c\"%i\n\", align 1\n"
emit $ Comment $ show str <> ": " <> show (rt, argTypes) , UnsafeRaw "declare i32 @printf(ptr noalias nocapture, ...)\n"
emit $ Define rt id (zip argTypes args) -- //TODO parse args ]
go exp
varNum <- gets variableCount
if str == "main" then mapM_ emit (mainContent varNum)
else emit $ Ret rt (VIdent (Ident (show varNum)))
emit DefineEnd
modify (\s -> s {variableCount = 0})
goDef xs
go :: Exp -> CompilerState () goDef :: [Bind] -> CompilerState ()
go (EInt int) = emitInt int goDef [] = return ()
go (EAdd e1 e2) = emitAdd e1 e2 goDef (Bind id@(Ident str) args exp : xs) = do
go (ESub e1 e2) = emitSub e1 e2 emit $ UnsafeRaw "\n"
go (EMul e1 e2) = emitMul e1 e2 emit $ Comment $ show str <> ": " <> show exp
go (EDiv e1 e2) = emitDiv e1 e2 emit $ Define I64 id (map (I64,) args)
go (EMod e1 e2) = emitMod e1 e2 functionBody <- exprToValue exp
go (EId id) = emitArg id if str == "main"
go (EApp e1 e2) = emitApp e1 e2 then mapM_ emit (mainContent functionBody)
go (EAbs id t e) = emitAbs id t e else emit $ Ret I64 functionBody
emit DefineEnd
modify (\s -> s{variableCount = 0})
goDef xs
--- aux functions --- go :: Exp -> CompilerState ()
emitAbs :: Ident -> Type -> Exp -> CompilerState () go (EInt int) = emitInt int
emitAbs id t e = do go (EAdd e1 e2) = emitAdd e1 e2
emit $ Comment $ concat [ "EAbs (", show id, ", ", show t, ", " -- go (ESub e1 e2) = emitSub e1 e2
, show e, ") is not implemented!"] -- go (EMul e1 e2) = emitMul e1 e2
-- go (EDiv e1 e2) = emitDiv e1 e2
-- go (EMod e1 e2) = emitMod e1 e2
go (EId id) = emitIdent id
go (EApp e1 e2) = emitApp e1 e2
go (EAbs id e) = emitAbs id e
go (ELet xs e) = emitLet xs e
emitApp :: Exp -> Exp -> CompilerState () --- aux functions ---
emitApp e1 e2 = do emitAbs :: Ident -> Exp -> CompilerState ()
emit $ Comment $ concat [ "EApp (", show e1, ", ", show e2 emitAbs id e = do
, ") is not implemented!"] emit $
Comment $
concat
[ "EAbs ("
, show id
, ", "
, show I64
, ", "
, show e
, ") is not implemented!"
]
emitLet :: [Bind] -> Exp -> CompilerState ()
emitLet xs e = do
emit $
Comment $
concat
[ "ELet ("
, show xs
, " = "
, show e
, ") is not implemented!"
]
emitArg :: Ident -> CompilerState () emitApp :: Exp -> Exp -> CompilerState ()
emitArg id = do emitApp e1 e2 = appEmitter e1 e2 []
-- !!this should never happen!! where
increaseVarCount appEmitter :: Exp -> Exp -> [Exp] -> CompilerState ()
varCount <- gets variableCount appEmitter e1 e2 stack = do
emit $ SetVariable (Ident $ show varCount) let newStack = e2 : stack
emit $ Add I64 (VIdent id) (VInteger 0) case e1 of
EApp e1' e2' -> appEmitter e1' e2' newStack
EId id -> do
args <- traverse exprToValue newStack
vs <- getNewVar
emit $ SetVariable (Ident $ show vs) (Call I64 id (map (I64,) args))
x -> do
emit . Comment $ "The unspeakable happened: "
emit . Comment $ show x
emitInt :: Integer -> CompilerState () emitIdent :: Ident -> CompilerState ()
emitInt i = do emitIdent id = do
-- !!this should never happen!! -- !!this should never happen!!
increaseVarCount emit $ Comment "This should not have happened!"
varCount <- gets variableCount emit $ Variable id
emit $ SetVariable $ Ident (show varCount) emit $ UnsafeRaw "\n"
emit $ Add I64 (VInteger i) (VInteger 0)
emitAdd :: Exp -> Exp -> CompilerState () emitInt :: Integer -> CompilerState ()
emitAdd e1 e2 = do emitInt i = do
(v1,v2) <- binExprToValues e1 e2 -- !!this should never happen!!
increaseVarCount varCount <- getNewVar
v <- gets variableCount emit $ Comment "This should not have happened!"
emit $ SetVariable $ Ident $ show v emit $ SetVariable (Ident (show varCount)) (Add I64 (VInteger i) (VInteger 0))
emit $ Add I64 v1 v2
emitMul :: Exp -> Exp -> CompilerState () emitAdd :: Exp -> Exp -> CompilerState ()
emitMul e1 e2 = do emitAdd e1 e2 = do
(v1,v2) <- binExprToValues e1 e2 v1 <- exprToValue e1
increaseVarCount v2 <- exprToValue e2
v <- gets variableCount v <- getNewVar
emit $ SetVariable $ Ident $ show v emit $ SetVariable (Ident $ show v) (Add I64 v1 v2)
emit $ Mul I64 v1 v2
emitMod :: Exp -> Exp -> CompilerState () -- emitMul :: Exp -> Exp -> CompilerState ()
emitMod e1 e2 = do -- emitMul e1 e2 = do
-- `let m a b = rem (abs $ b + a) b` -- (v1,v2) <- binExprToValues e1 e2
(v1,v2) <- binExprToValues e1 e2 -- increaseVarCount
increaseVarCount -- v <- gets variableCount
vadd <- gets variableCount -- emit $ SetVariable $ Ident $ show v
emit $ SetVariable $ Ident $ show vadd -- emit $ Mul I64 v1 v2
emit $ Add I64 v1 v2
increaseVarCount -- emitMod :: Exp -> Exp -> CompilerState ()
vabs <- gets variableCount -- emitMod e1 e2 = do
emit $ SetVariable $ Ident $ show vabs -- -- `let m a b = rem (abs $ b + a) b`
emit $ Call I64 (Ident "llvm.abs.i64") -- (v1,v2) <- binExprToValues e1 e2
[ (I64, VIdent (Ident $ show vadd)) -- increaseVarCount
, (I1, VInteger 1) -- vadd <- gets variableCount
] -- emit $ SetVariable $ Ident $ show vadd
increaseVarCount -- emit $ Add I64 v1 v2
v <- gets variableCount --
emit $ SetVariable $ Ident $ show v -- increaseVarCount
emit $ Srem I64 (VIdent (Ident $ show vabs)) v2 -- vabs <- gets variableCount
-- emit $ SetVariable $ Ident $ show vabs
-- emit $ Call I64 (Ident "llvm.abs.i64")
-- [ (I64, VIdent (Ident $ show vadd))
-- , (I1, VInteger 1)
-- ]
-- increaseVarCount
-- v <- gets variableCount
-- emit $ SetVariable $ Ident $ show v
-- emit $ Srem I64 (VIdent (Ident $ show vabs)) v2
emitDiv :: Exp -> Exp -> CompilerState () -- emitDiv :: Exp -> Exp -> CompilerState ()
emitDiv e1 e2 = do -- emitDiv e1 e2 = do
(v1,v2) <- binExprToValues e1 e2 -- (v1,v2) <- binExprToValues e1 e2
increaseVarCount -- increaseVarCount
v <- gets variableCount -- v <- gets variableCount
emit $ SetVariable $ Ident $ show v -- emit $ SetVariable $ Ident $ show v
emit $ Div I64 v1 v2 -- emit $ Div I64 v1 v2
emitSub :: Exp -> Exp -> CompilerState () -- emitSub :: Exp -> Exp -> CompilerState ()
emitSub e1 e2 = do -- emitSub e1 e2 = do
(v1,v2) <- binExprToValues e1 e2 -- (v1,v2) <- binExprToValues e1 e2
increaseVarCount -- increaseVarCount
v <- gets variableCount -- v <- gets variableCount
emit $ SetVariable $ Ident $ show v -- emit $ SetVariable $ Ident $ show v
emit $ Sub I64 v1 v2 -- emit $ Sub I64 v1 v2
exprToValue :: Exp -> CompilerState Value exprToValue :: Exp -> CompilerState LLVMValue
exprToValue (EInt i) = return $ VInteger i exprToValue (EInt i) = return $ VInteger i
exprToValue (EId i) = return $ VIdent i exprToValue (EId id) = do
exprToValue e = do funcs <- gets functions
go e case Map.lookup id funcs of
v <- gets variableCount Just _ -> do
return $ VIdent $ Ident $ show v vc <- getNewVar
emit $ SetVariable (Ident $ show vc) (Call I64 id [])
binExprToValues :: Exp -> Exp -> CompilerState (Value, Value) return $ VIdent (Ident $ show vc)
binExprToValues e1 e2 = do Nothing -> return $ VIdent id
v1 <- exprToValue e1 exprToValue e = do
v2 <- exprToValue e2 go e
return (v1,v2) v <- getVarCount
return . VIdent . Ident $ show v
-- | A pretty nasty function to flatten out function types,
-- as they are currently represented by a recursive data type.
flattenFuncType :: Type -> (LLVMType, [LLVMType])
flattenFuncType xs = do
let res = go xs
(last res, init res)
where
go TInt = [I64]
go (TPol id) = [CustomType id]
go (TFun t1 t2) = go t1 ++ go t2

View file

@ -1,91 +1,188 @@
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
module Compiler.LLVMIr where
import Data.List (intercalate) module Compiler.LLVMIr (LLVMType (..), LLVMIr (..), llvmIrToString, LLVMValue (..), LLVMComp (..)) where
import Grammar.Abs (Ident (Ident))
import Data.List (intercalate)
import Grammar.Abs (Ident (Ident))
-- | A datatype which represents some basic LLVM types -- | A datatype which represents some basic LLVM types
data LLVMType = I1 | I8 | I32 | I64 | Ptr data LLVMType
| Ref LLVMType | Array Integer LLVMType | CustomType Ident = I1
| I8
| I32
| I64
| Ptr
| Ref LLVMType
| Array Integer LLVMType
| CustomType Ident
instance Show LLVMType where instance Show LLVMType where
show :: LLVMType -> String show :: LLVMType -> String
show t = case t of show = \case
I1 -> "i1" I1 -> "i1"
I8 -> "i8" I8 -> "i8"
I32 -> "i32" I32 -> "i32"
I64 -> "i64" I64 -> "i64"
Ptr -> "ptr" Ptr -> "ptr"
Ref ty -> show ty <> "*" Ref ty -> show ty <> "*"
Array n ty -> concat ["[", show n, " x ", show ty, "]"] Array n ty -> concat ["[", show n, " x ", show ty, "]"]
CustomType (Ident ty) -> ty CustomType (Ident ty) -> ty
-- | Represents a LLVM "value", as in an integer, a register variable, data LLVMComp
-- or a string contstant = LLEq
data Value = VInteger Integer | VIdent Ident | VConstant String | LLNe
instance Show Value where | LLUgt
show :: Value -> String | LLUge
| LLUlt
| LLUle
| LLSgt
| LLSge
| LLSlt
| LLSle
instance Show LLVMComp where
show :: LLVMComp -> String
show = \case
LLEq -> "eq"
LLNe -> "ne"
LLUgt -> "ugt"
LLUge -> "uge"
LLUlt -> "ult"
LLUle -> "ule"
LLSgt -> "sgt"
LLSge -> "sge"
LLSlt -> "slt"
LLSle -> "sle"
{- | Represents a LLVM "value", as in an integer, a register variable,
or a string contstant
-}
data LLVMValue = VInteger Integer | VIdent Ident | VConstant String
instance Show LLVMValue where
show :: LLVMValue -> String
show v = case v of show v = case v of
VInteger i -> show i VInteger i -> show i
VIdent (Ident i) -> "%" <> i VIdent (Ident i) -> "%" <> i
VConstant s -> "c" <> show s VConstant s -> "c" <> show s
type Params = [(LLVMType, Ident)] type Params = [(LLVMType, Ident)]
type Args = [(LLVMType, Value)] type Args = [(LLVMType, LLVMValue)]
-- | A datatype which represents different instructions in LLVM -- | A datatype which represents different instructions in LLVM
data LLVMIr = Define LLVMType Ident Params data LLVMIr
| DefineEnd = Define LLVMType Ident Params
| Declare LLVMType Ident Params | DefineEnd
| SetVariable Ident | Declare LLVMType Ident Params
| Add LLVMType Value Value | SetVariable Ident LLVMIr
| Sub LLVMType Value Value | Variable Ident
| Div LLVMType Value Value | Add LLVMType LLVMValue LLVMValue
| Mul LLVMType Value Value | Sub LLVMType LLVMValue LLVMValue
| Srem LLVMType Value Value | Div LLVMType LLVMValue LLVMValue
| Call LLVMType Ident Args | Mul LLVMType LLVMValue LLVMValue
| Alloca LLVMType | Srem LLVMType LLVMValue LLVMValue
| Store LLVMType Ident LLVMType Ident | Icmp LLVMComp LLVMType LLVMValue LLVMValue
| Bitcast LLVMType Ident LLVMType | Br Ident
| Ret LLVMType Value | BrCond LLVMValue Ident Ident
| Comment String | Label Ident
| UnsafeRaw String -- This should generally be avoided, and proper | Call LLVMType Ident Args
-- instructions should be used in its place | Alloca LLVMType
| Store LLVMType Ident LLVMType Ident
| Bitcast LLVMType Ident LLVMType
| Ret LLVMType LLVMValue
| Comment String
| UnsafeRaw String -- This should generally be avoided, and proper
-- instructions should be used in its place
deriving (Show) deriving (Show)
-- | Converts a LLVM inststruction to a String, allowing for printing etc. -- | Converts a list of LLVMIr instructions to a string
llvmIrToString :: LLVMIr -> String llvmIrToString :: [LLVMIr] -> String
llvmIrToString = \case llvmIrToString = go 0
(Define t (Ident i) params) -> concat ["define ", show t, " @", i, "(" where
, intercalate "," (fmap (\(x,Ident y) -> unwords [show x, "%"<>y]) params) go :: Int -> [LLVMIr] -> String
,") {\n"] go _ [] = mempty
DefineEnd -> "}\n" go i (x : xs) = do
(Declare _t (Ident _i) _params) -> undefined let (i', n) = case x of
(SetVariable (Ident i)) -> concat ["%", i, " = "] Define{} -> (i + 1, 0)
(Add t v1 v2) -> concat ["add ", show t, " " DefineEnd -> (i - 1, 0)
, show v1, ", ", show v2 _ -> (i, i)
, "\n"] insToString n x <> go i' xs
(Sub t v1 v2) -> concat ["sub ", show t, " "
, show v1, ", " {- | Converts a LLVM inststruction to a String, allowing for printing etc.
, show v2, "\n"] The integer represents the indentation
(Div t v1 v2) -> concat ["sdiv ", show t, " " -}
, show v1, ", " {- FOURMOLU_DISABLE -}
, show v2, "\n"] insToString :: Int -> LLVMIr -> String
(Mul t v1 v2) -> concat ["mul ", show t, " " insToString i l =
, show v1, ", " replicate i '\t' <> case l of
, show v2, "\n"] (Define t (Ident i) params) ->
(Srem t v1 v2) -> concat ["srem ", show t, " " concat
, show v1, ", " [ "define ", show t, " @", i
, show v2, "\n"] , "(", intercalate ", " (fmap (\(x, Ident y) -> unwords [show x, "%" <> y]) params)
(Call t (Ident i) arg) -> concat ["call ", show t, " @", i, "(" , ") {\n"
, intercalate ", " $ Prelude.map (\(x,y) -> show x <> " " <> show y) arg ]
, ")\n"] DefineEnd -> "}\n"
(Alloca t) -> unwords ["alloca", show t, "\n"] (Declare _t (Ident _i) _params) -> undefined
(Store t1 (Ident id1) t2 (Ident id2)) -> concat ["store ", show t1, " %" (SetVariable (Ident i) ir) -> concat ["%", i, " = ", insToString 0 ir]
, id1, ", ", show t2, " %" (Add t v1 v2) ->
, id2, "\n"] concat
(Bitcast t1 (Ident i) t2) -> concat ["bitcast ", show t1, " %" [ "add ", show t, " ", show v1
, i, " to ", show t2, "\n"] , ", ", show v2, "\n"
(Ret t v) -> concat ["ret ", show t ]
, " ", show v, "\n"] (Sub t v1 v2) ->
(UnsafeRaw s) -> s concat
(Comment s) -> "; " <> s <> "\n" [ "sub ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Div t v1 v2) ->
concat
[ "sdiv ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Mul t v1 v2) ->
concat
[ "mul ", show t, " ", show v1
, ", ", show v2, "\n"
]
(Srem t v1 v2) ->
concat
[ "srem ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Call t (Ident i) arg) ->
concat
[ "call ", show t, " @", i, "("
, intercalate ", " $ Prelude.map (\(x, y) -> show x <> " " <> show y) arg
, ")\n"
]
(Alloca t) -> unwords ["alloca", show t, "\n"]
(Store t1 (Ident id1) t2 (Ident id2)) ->
concat
[ "store ", show t1, " %", id1
, ", ", show t2 , " %", id2, "\n"
]
(Bitcast t1 (Ident i) t2) ->
concat
[ "bitcast ", show t1, " %"
, i, " to ", show t2, "\n"
]
(Icmp comp t v1 v2) ->
concat
[ "icmp ", show comp, " ", show t
, " ", show v1, ", ", show v2, "\n"
]
(Ret t v) ->
concat
[ "ret ", show t, " "
, show v, "\n"
]
(UnsafeRaw s) -> s
(Label (Ident s)) -> "\nlabel_" <> s <> ":\n"
(Br (Ident s)) -> "br label %label_" <> s <> "\n"
(BrCond val (Ident s1) (Ident s2)) ->
concat
[ "br i1 ", show val, ", ", "label %"
, "label_", s1, ", ", "label %", "label_", s2, "\n"
]
(Comment s) -> "; " <> s <> "\n"
(Variable (Ident id)) -> "%" <> id
{- FOURMOLU_ENABLE -}

View file

@ -1,10 +0,0 @@
{-# LANGUAGE TemplateHaskell #-}
module Compiler.StandardLLVMLibrary where
import Compiler.TH (includeStr)
-- | Uses Template Haskell to load our "standard library", which is written in
-- LLVM IR. This library simply includes functions to generate strings from
-- i64s and to print ints.
standardLLVMLibrary :: String
standardLLVMLibrary = $(includeStr "src/Compiler/standard_library.ll")

View file

@ -1,14 +0,0 @@
{-# LANGUAGE TemplateHaskellQuotes #-}
module Compiler.TH where
import Language.Haskell.TH (Exp, Q)
import System.IO.Unsafe (unsafePerformIO)
-- While this is hacky (specifically the use of unsafePerformIO)
-- in this case I think it is fine, as if an invalid string
-- is passed to the function it will fail to compile,
-- which is the intended behavior. This allows us to
-- import strings (such as our "standard LLVM library")
-- during compile time, removing the need to ship the source for
-- that with the compiler.
includeStr :: String -> Q Exp
includeStr file = [|unsafePerformIO $ readFile file|]

View file

@ -1,118 +0,0 @@
declare i64 @llvm.abs.i64(i64, i1 immarg)
declare i64 @llvm.powi.i64.i64(i64, i64)
declare i32 @puts(i8* nocapture) nounwind
define [21 x i8] @i64ToString(i64 %val_org) {
%val = alloca i64
store i64 %val_org, i64* %val
%val_copy = add i64 0, %val_org
; https://stackoverflow.com/a/7123710
; an algorithm for translating ints to strings
; s = ''
; sign = ''
; if i < 0:
; sign = '-'
; i = -i
; while True:
; remainder = i % 10
; i = i / 10
; s = chr(ord('0') + remainder) + s
; if i == 0:
; break
; return sign + s
; allocate memory for the string, and store the temp variable into it
%string_ptr = alloca [21 x i8]
store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %string_ptr
; create a pointer to the array
%array_index_ptr = alloca i32
store i32 0, i32* %array_index_ptr
br label %while_point
while_point:
%val.tmp = load i64, i64* %val; load i64, i64* %val
%val.1 = call i64 @llvm.abs.i64(i64 %val.tmp, i1 true)
%tmp = load i32, i32* %array_index_ptr
%array_pointer.1 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %tmp
%array_index_ptr.1 = add i32 %tmp, 1
store i32 %array_index_ptr.1, i32* %array_index_ptr
; this should not work, but it does
%remainder = srem i64 %val.1, 10
%remainder_tmp = alloca i64
store i64 %remainder, i64* %remainder_tmp
%remainder_tmp8 = bitcast i64* %remainder_tmp to i8*
%remainder8 = load i8, i8* %remainder_tmp8
%i = sdiv i64 %val.1, 10
%char = add i8 48, %remainder8
store i8 %char, i8* %array_pointer.1 ; update string!
store i64 %i, i64* %val
%while_condition = icmp eq i64 %i, 0
br i1 %while_condition, label %while_break, label %while_point
while_break:
; get last_pointer
%array_index_ptr.0 = load i32, i32* %array_index_ptr
%array_pointer.0 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %array_index_ptr.0
; check if p is below 0
%condition = icmp slt i64 %val_copy, 0
br i1 %condition, label %negative_check_true, label %negative_check_false
negative_check_true:
store i8 45, i8* %array_pointer.0
br label %negative_check_done
negative_check_false:
; store i8 43, i8* %array_pointer.0
br label %negative_check_done
negative_check_done:
%noop = add i32 0, 0
; load the result and return it
call void @reverseI64String([21 x i8]* %string_ptr)
%res = load [21 x i8],[21 x i8]* %string_ptr
ret [21 x i8] %res
}
define void @reverseI64String([21 x i8]* %string) {
%new_string = alloca [21 x i8]
store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %new_string
%reverse_index = alloca i32
store i32 20, i32* %reverse_index
%new_index = alloca i32
store i32 0, i32* %new_index
br label %while_start
while_start:
%reverse_index.0 = load i32, i32* %reverse_index
%reverse_ptr = getelementptr [21 x i8], [21 x i8]* %string, i32 0, i32 %reverse_index.0
%reverse_index.1 = sub i32 %reverse_index.0, 1
store i32 %reverse_index.1, i32* %reverse_index
%new_index.0 = load i32, i32* %new_index
%new_ptr = getelementptr [21 x i8], [21 x i8]* %new_string, i32 0, i32 %new_index.0
%break_condition = icmp slt i32 %reverse_index.0, 0
br i1 %break_condition, label %break, label %ok
ok:
%value = load i8, i8* %reverse_ptr
%condition = icmp eq i8 %value, 0
br i1 %condition, label %while_start, label %not_zero
not_zero:
store i8 %value, i8* %new_ptr
%new_index.1 = add i32 %new_index.0, 1
store i32 %new_index.1, i32* %new_index
br label %while_start
break:
%copy = load [21 x i8], [21 x i8]* %new_string
store [21 x i8] %copy, [21 x i8]* %string
; start iterating over the original string in reverse
; if i == '\00' ignore
; else put value into new_string at new i, then new i + 1
ret void
}

View file

@ -1,6 +1,8 @@
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
module Main where module Main where
import Compiler.Compiler (compile)
import GHC.IO.Handle.Text (hPutStrLn)
import Grammar.ErrM (Err) import Grammar.ErrM (Err)
import Grammar.Par (myLexer, pProgram) import Grammar.Par (myLexer, pProgram)
import Grammar.Print (printTree) import Grammar.Print (printTree)
@ -8,6 +10,7 @@ import Interpreter (interpret)
import LambdaLifter (abstract, freeVars, lambdaLift) import LambdaLifter (abstract, freeVars, lambdaLift)
import System.Environment (getArgs) import System.Environment (getArgs)
import System.Exit (exitFailure, exitSuccess) import System.Exit (exitFailure, exitSuccess)
import System.IO (stderr)
main :: IO () main :: IO ()
main = getArgs >>= \case main = getArgs >>= \case
@ -18,13 +21,18 @@ main' :: String -> IO ()
main' s = do main' s = do
file <- readFile s file <- readFile s
putStrLn "\n-- parse" printToErr "-- Parse Tree -- "
parsed <- fromSyntaxErr . pProgram $ myLexer file parsed <- fromSyntaxErr . pProgram $ myLexer file
putStrLn $ printTree parsed printToErr $ printTree parsed
putStrLn "\n-- Lambda Lifter" printToErr "\n-- Lambda Lifter --"
let lifted = lambdaLift parsed let lifted = lambdaLift parsed
putStrLn $ printTree lifted printToErr $ printTree lifted
--putStrLn "\n-- Compiler"
printToErr "\n -- Printing compiler output to stdout --"
compiled <- fromCompilerErr $ compile lifted
putStrLn compiled
-- interpred <- fromInterpreterErr $ interpret lifted -- interpred <- fromInterpreterErr $ interpret lifted
-- putStrLn "\n-- interpret" -- putStrLn "\n-- interpret"
@ -32,6 +40,16 @@ main' s = do
exitSuccess exitSuccess
printToErr :: String -> IO ()
printToErr = hPutStrLn stderr
fromCompilerErr :: Err a -> IO a
fromCompilerErr = either
(\err -> do
putStrLn "\nCOMPILER ERROR"
putStrLn err
exitFailure)
pure
fromSyntaxErr :: Err a -> IO a fromSyntaxErr :: Err a -> IO a
fromSyntaxErr = either fromSyntaxErr = either

View file

@ -1,20 +0,0 @@
; Copied directly from the documentation
; Declare the string constant as a global constant.
@.hello = private unnamed_addr constant [13 x i8] c"hello world\0A\00"
; External declaration of the puts function
declare i32 @puts(i8* nocapture) nounwind
; Definition of main function
define i32 @main() { ; i32()*
; Convert [13 x i8]* to i8 *...
%cast210 = getelementptr [13 x i8],[13 x i8]* @.hello, i64 0, i64 0
; Call puts function to write out the string to stdout.
call i32 @puts(i8* %cast210)
ret i32 0
}
; Named metadata
!0 = !{i32 42, null, !"string"}
!foo = !{!0}

View file

@ -1,5 +0,0 @@
tripplet : Int -> (Int -> (Int -> Int))
tripplet x y z = x + y + z;
main : Int
main = {-(((2 * (123 + 4214 % (1230)) - 1231) / 2) * 412412) +-} tripplet 5 1 2;

View file

@ -1,18 +0,0 @@
; External declaration of the puts function
declare i32 @puts(i8* nocapture) nounwind
declare [21 x i8] @i64ToString(i64)
; Definition of main function
define i32 @main() { ; i32()*
; %val = add i64 -123456789, 0
%val = add i64 -133780085, 0
%print_res = call [21 x i8] @i64ToString(i64 %val)
%ptr = alloca [21 x i8]
store [21 x i8] %print_res, [21 x i8]* %ptr
%printable = bitcast [21 x i8]* %ptr to i8*
call i32 @puts(i8* %printable)
ret i32 0
}