Fixed some merge errors that occured when merging with main

Started implementing EApp.

Simple function calls now work.

Removed the LLVM standard library as it was not needed.

Limited functionality.

Got EApp working!!

Fixed arguments being inserted in the wrong order.

Updated the showing of `Call` so that it inserts a space inbetween arguments.

Removed some unused code from the generated main functions.

Force removed language.

Expressions that are simply just calling a constant now work properly.

Fixed constants being called inside nested expressions.

Cleaned up the compiler a bit.

Added a test suite, testing that the output from programs is correct, "evaluating the whole process".

Fixed a typo in basic-4.sf

Fixed basic-8 to work without a renamer.

Fixed some nicer output in the main function.

Made the outputted LLVM Ir somewhat nicer.

Fixed a typo and updated how SetVariable works.

Formatted and added documentation.

Added instructions allowing for branching.
This commit is contained in:
Samuel Hammersberg 2023-02-12 13:30:45 +01:00 committed by Martin Fredin
parent 7c1e1d57a0
commit 8d698cecf9
36 changed files with 557 additions and 453 deletions

2
.gitignore vendored
View file

@ -3,4 +3,4 @@ dist-newstyle
*.x
*.bak
src/Grammar
/language
language

View file

@ -1,5 +1,3 @@
Program. Program ::= [Bind];
EId. Exp3 ::= Ident;
@ -9,14 +7,11 @@ EApp. Exp2 ::= Exp2 Exp3;
EAdd. Exp1 ::= Exp1 "+" Exp2;
EAbs. Exp ::= "\\" Ident "." Exp;
EId. Exp3 ::= Ident ;
EInt. Exp3 ::= Integer ;
EApp. Exp2 ::= Exp2 Exp3 ;
EAdd. Exp1 ::= Exp1 "+" Exp2 ;
EAbs. Exp ::= "\\" Ident "->" Exp ;
Bind. Bind ::= Ident [Ident] "=" Exp;
separator Bind ";";
separator Ident " ";
coercions Exp 3 ;
comment "--" ;
comment "{-" "-}" ;
coercions Exp 3;
comment "--";
comment "{-" "-}";

2
cabal.project.local Normal file
View file

@ -0,0 +1,2 @@
ignore-project: False
tests: True

View file

@ -1 +0,0 @@
/home/samuel/.cabal/store/ghc-9.4.4/language-0.1.0.0-e-language-b2dd0f5c425bcebb6d06c37d2359fc74433c8d1d898fd217ad1cdc50c9035ad3/bin/language

View file

@ -1,4 +1,4 @@
cabal-version: 3.0
cabal-version: 3.4
name: language
@ -20,19 +20,52 @@ extra-source-files:
common warnings
ghc-options: -W
executable language
import: warnings
main-is: Main.hs
Test-Suite sample-programs
Type: exitcode-stdio-1.0
main-is: Main.hs
Build-depends:
base >=4.16
, QuickCheck
, hspec
, directory
, process
, mtl
, containers
, either
, array
, extra
hs-source-dirs: src, sample-programs
default-language: GHC2021
other-modules:
Grammar.Abs
Grammar.Lex
Grammar.Par
Grammar.Print
Grammar.Skel
Grammar.ErrM
LambdaLifter
Auxiliary
Interpreter
Compiler.Compiler
Compiler.LLVMIr
executable language
import: warnings
main-is: Main.hs
other-modules:
Grammar.Abs
Grammar.Lex
Grammar.Par
Grammar.Print
Grammar.Skel
Grammar.ErrM
LambdaLifter
Auxiliary
Interpreter
Compiler.Compiler
Compiler.LLVMIr
hs-source-dirs: src
build-depends:
@ -42,5 +75,4 @@ executable language
, either
, array
, extra
default-language: GHC2021

60
sample-programs/Main.hs Normal file
View file

@ -0,0 +1,60 @@
module Main where
import Compiler.Compiler (compile)
import Control.Exception (IOException, catch,
evaluate)
import Data.List (isSuffixOf, sort)
import Data.List.Extra (trim)
import GHC.IO.Handle (BufferMode (NoBuffering),
hSetBuffering)
import Grammar.ErrM (Err)
import Grammar.Par (myLexer, pProgram)
import LambdaLifter (lambdaLift)
import System.Directory (getDirectoryContents)
import System.Directory.Internal.Prelude (exitFailure)
import System.IO (hPrint)
import System.IO.Extra (stderr)
import System.Process (CreateProcess (std_in),
StdStream (CreatePipe),
createProcess, proc,
readCreateProcess, shell,
waitForProcess)
import Test.Hspec (hspec, it, shouldSatisfy)
import Text.Printf (hPrintf)
path :: String
path = "sample-programs/sample-programs"
main :: IO ()
main = do
dir <- getDirectoryContents path
-- this is not a good way to grab tests
-- ideally one would grab all .sf files, remove the sf extension
-- and and res instead. Going to fix that soon
let tests = sort $ filter (".sf" `isSuffixOf`) dir
let results = sort $ filter (".res" `isSuffixOf`) dir
let combined = zip tests results
mapM_ (uncurry test) combined
fromErr :: Err a -> IO a
fromErr (Left a) = do
hPrint stderr a
exitFailure
fromErr (Right a) = pure a
comp :: String -> IO String
comp file = do
parsed <- fromErr . pProgram $ myLexer file
let lifted = lambdaLift parsed
fromErr $ compile lifted
test :: String -> String -> IO ()
test t r = do
test <- readFile (path <> "/" <> t)
expectedRes <- trim <$> readFile (path <> "/" <> r)
hspec $ it t $ do
compiled <- catch (comp test)
(const $ return "term" :: IOException -> IO String )
ev <- catch (trim <$> readCreateProcess (shell "lli") compiled)
(\e -> return $ show (e :: IOException))
ev `shouldSatisfy` (expectedRes ==)

View file

@ -1,2 +0,0 @@
f x = let g = (\y. y+1) in g (g x)

View file

@ -1,3 +0,0 @@
f = \x.\y. x+y

View file

@ -1,2 +0,0 @@
f x = let double = \y. y+y in (\x. x+y) 4;

View file

@ -0,0 +1 @@
666

View file

@ -1,2 +1,3 @@
f = \x. x+1;
main = f 665

View file

@ -0,0 +1 @@
20

View file

@ -0,0 +1 @@
7

View file

@ -0,0 +1 @@
4507

View file

@ -0,0 +1,3 @@
f x = let g = (\y. y+1) in g (g x);
main = f 4505

View file

@ -0,0 +1 @@
8

View file

@ -6,4 +6,4 @@ double n = n + n;
apply f x = \y. f x y;
main = apply (id add) ((\x. x + 1) 1) (double 3);
main = apply (id add) ((\x. x + 1) 1) (double 3);

View file

@ -0,0 +1 @@
50

View file

@ -0,0 +1,5 @@
f = \x.\y. x+y;
main = f 20 30

View file

@ -0,0 +1 @@
10

View file

@ -2,4 +2,4 @@ add x y = x + y;
apply f x = f x;
main = apply (add 4) 6;
main = apply (add 4) 6;

View file

@ -0,0 +1 @@
44

View file

@ -0,0 +1,4 @@
f z = let double = \y. y+y in (\x. x+z) 4;
-- f y = let double = \y. y+y in (\x. x+y) 4;
-- ^ this produces a bug in the renamer
main = f 40

View file

@ -0,0 +1 @@
20

View file

@ -1,4 +1 @@
main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4
main = (\f.\x.\y. f x + f y) (\x. x+x) ((\x. x+1) ((\x. x+3) 2)) 4

View file

@ -1,183 +1,257 @@
module Compiler.Compiler where
module Compiler.Compiler (compile) where
import Compiler.LLVMIr (LLVMIr (..), LLVMType (..),
Value (..), llvmIrToString)
import Compiler.StandardLLVMLibrary (standardLLVMLibrary)
import Control.Monad.State (StateT, execStateT, gets, modify)
import Grammar.Abs (Def (..), Exp (..), Ident (..),
Program (..), Type (..))
import Grammar.ErrM (Err)
import Grammar.Print (printTree)
import Compiler.LLVMIr (
LLVMComp (..),
LLVMIr (..),
LLVMType (..),
LLVMValue (..),
llvmIrToString,
)
import Control.Monad.State (StateT, execStateT, gets, modify)
import Data.Map (Map)
import Data.Map qualified as Map
import Grammar.Abs (
Bind (..),
Exp (..),
Ident (..),
Program (..),
)
import Grammar.ErrM (Err)
import Grammar.Print (printTree)
-- | The record used as the code generator state
data CodeGenerator = CodeGenerator
{ instructions :: [LLVMIr]
, methods :: [Ident]
, variableCount :: Integer }
{ instructions :: [LLVMIr]
, functions :: Map Ident FunctionInfo
, variableCount :: Integer
}
-- | A state type synonym
type CompilerState a = StateT CodeGenerator Err a
-- | An empty instance of CodeGenerator
defaultCodeGenerator :: CodeGenerator
defaultCodeGenerator = CodeGenerator
{ instructions = []
, methods = []
, variableCount = 0 }
data FunctionInfo = FunctionInfo
{ numArgs :: Int
, arguments :: [Ident]
}
-- | Adds a instruction to the CodeGenerator state
emit :: LLVMIr -> CompilerState ()
emit l = modify (\t -> t {instructions = instructions t ++ [l]})
emit l = modify (\t -> t{instructions = instructions t ++ [l]})
-- | Increases the variable counter in the Codegenerator state
-- | Increases the variable counter in the CodeGenerator state
increaseVarCount :: CompilerState ()
increaseVarCount = modify (\t -> t {variableCount = variableCount t + 1})
increaseVarCount = modify (\t -> t{variableCount = variableCount t + 1})
-- | Returns the variable count from the CodeGenerator state
getVarCount :: CompilerState Integer
getVarCount = gets variableCount
-- | Increases the variable count and returns it from the CodeGenerator state
getNewVar :: CompilerState Integer
getNewVar = increaseVarCount >> getVarCount
{- | Produces a map of functions infos from a list of binds,
which contains useful data for code generation.
-}
getFunctions :: [Bind] -> Map Ident FunctionInfo
getFunctions xs =
Map.fromList $
map
( \(Bind id args _) ->
( id
, FunctionInfo
{ numArgs = length args
, arguments = args
}
)
)
xs
{- | Compiles an AST and produces a LLVM Ir string.
An easy way to actually "compile" this output is to
Simply pipe it to LLI
-}
compile :: Program -> Err String
compile (Program prg) = do
let s = defaultCodeGenerator {instructions = [
Comment (show $ printTree (Program prg)),
UnsafeRaw $ standardLLVMLibrary <> "\n"
]}
fin <- execStateT (goDef prg) s
let ins = instructions fin
pure $ concatMap llvmIrToString ins
where
mainContent var =
[ SetVariable (Ident "print_res")
, Call (Array 21 I8) (Ident "i64ToString") [(I64, VIdent $ Ident $ show var)]
, SetVariable (Ident "print_ptr"), Alloca (Array 21 I8)
, Store (Array 21 I8) (Ident "print_res") (Ref (Array 21 I8)) (Ident "print_ptr")
, SetVariable (Ident "printable"), Bitcast (Ref (Array 21 I8)) (Ident "print_ptr") (Ref I8)
, Call I32 (Ident "puts") [(Ref I8, VIdent (Ident "printable"))]
, Ret I64 (VInteger 0)
]
let s =
CodeGenerator
{ instructions = defaultStart
, functions = getFunctions prg
, variableCount = 0
}
ins <- instructions <$> execStateT (goDef prg) s
pure $ llvmIrToString ins
where
mainContent :: LLVMValue -> [LLVMIr]
mainContent var =
[ UnsafeRaw $
"call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef " <> show var <> ")\n"
, -- , SetVariable (Ident "p") (Icmp LLEq I64 (VInteger 2) (VInteger 2))
-- , BrCond (VIdent (Ident "p")) (Ident "b_1") (Ident "b_2")
-- , Label (Ident "b_1")
-- , UnsafeRaw
-- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 1)\n"
-- , Br (Ident "end")
-- , Label (Ident "b_2")
-- , UnsafeRaw
-- "call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef 2)\n"
-- , Br (Ident "end")
-- , Label (Ident "end")
Ret I64 (VInteger 0)
]
goDef :: [Def] -> CompilerState ()
goDef [] = return ()
goDef (DExp id@(Ident str) t _id2 args exp : xs) = do
let (rt, argTypes) = flattenFuncType t
emit $ Comment $ show str <> ": " <> show (rt, argTypes)
emit $ Define rt id (zip argTypes args) -- //TODO parse args
go exp
varNum <- gets variableCount
if str == "main" then mapM_ emit (mainContent varNum)
else emit $ Ret rt (VIdent (Ident (show varNum)))
emit DefineEnd
modify (\s -> s {variableCount = 0})
goDef xs
defaultStart :: [LLVMIr]
defaultStart =
[ Comment (show $ printTree (Program prg))
, UnsafeRaw "@.str = private unnamed_addr constant [3 x i8] c\"%i\n\", align 1\n"
, UnsafeRaw "declare i32 @printf(ptr noalias nocapture, ...)\n"
]
go :: Exp -> CompilerState ()
go (EInt int) = emitInt int
go (EAdd e1 e2) = emitAdd e1 e2
go (ESub e1 e2) = emitSub e1 e2
go (EMul e1 e2) = emitMul e1 e2
go (EDiv e1 e2) = emitDiv e1 e2
go (EMod e1 e2) = emitMod e1 e2
go (EId id) = emitArg id
go (EApp e1 e2) = emitApp e1 e2
go (EAbs id t e) = emitAbs id t e
goDef :: [Bind] -> CompilerState ()
goDef [] = return ()
goDef (Bind id@(Ident str) args exp : xs) = do
emit $ UnsafeRaw "\n"
emit $ Comment $ show str <> ": " <> show exp
emit $ Define I64 id (map (I64,) args)
functionBody <- exprToValue exp
if str == "main"
then mapM_ emit (mainContent functionBody)
else emit $ Ret I64 functionBody
emit DefineEnd
modify (\s -> s{variableCount = 0})
goDef xs
--- aux functions ---
emitAbs :: Ident -> Type -> Exp -> CompilerState ()
emitAbs id t e = do
emit $ Comment $ concat [ "EAbs (", show id, ", ", show t, ", "
, show e, ") is not implemented!"]
go :: Exp -> CompilerState ()
go (EInt int) = emitInt int
go (EAdd e1 e2) = emitAdd e1 e2
-- go (ESub e1 e2) = emitSub e1 e2
-- go (EMul e1 e2) = emitMul e1 e2
-- go (EDiv e1 e2) = emitDiv e1 e2
-- go (EMod e1 e2) = emitMod e1 e2
go (EId id) = emitIdent id
go (EApp e1 e2) = emitApp e1 e2
go (EAbs id e) = emitAbs id e
go (ELet xs e) = emitLet xs e
emitApp :: Exp -> Exp -> CompilerState ()
emitApp e1 e2 = do
emit $ Comment $ concat [ "EApp (", show e1, ", ", show e2
, ") is not implemented!"]
--- aux functions ---
emitAbs :: Ident -> Exp -> CompilerState ()
emitAbs id e = do
emit $
Comment $
concat
[ "EAbs ("
, show id
, ", "
, show I64
, ", "
, show e
, ") is not implemented!"
]
emitLet :: [Bind] -> Exp -> CompilerState ()
emitLet xs e = do
emit $
Comment $
concat
[ "ELet ("
, show xs
, " = "
, show e
, ") is not implemented!"
]
emitArg :: Ident -> CompilerState ()
emitArg id = do
-- !!this should never happen!!
increaseVarCount
varCount <- gets variableCount
emit $ SetVariable (Ident $ show varCount)
emit $ Add I64 (VIdent id) (VInteger 0)
emitApp :: Exp -> Exp -> CompilerState ()
emitApp e1 e2 = appEmitter e1 e2 []
where
appEmitter :: Exp -> Exp -> [Exp] -> CompilerState ()
appEmitter e1 e2 stack = do
let newStack = e2 : stack
case e1 of
EApp e1' e2' -> appEmitter e1' e2' newStack
EId id -> do
args <- traverse exprToValue newStack
vs <- getNewVar
emit $ SetVariable (Ident $ show vs) (Call I64 id (map (I64,) args))
x -> do
emit . Comment $ "The unspeakable happened: "
emit . Comment $ show x
emitInt :: Integer -> CompilerState ()
emitInt i = do
-- !!this should never happen!!
increaseVarCount
varCount <- gets variableCount
emit $ SetVariable $ Ident (show varCount)
emit $ Add I64 (VInteger i) (VInteger 0)
emitIdent :: Ident -> CompilerState ()
emitIdent id = do
-- !!this should never happen!!
emit $ Comment "This should not have happened!"
emit $ Variable id
emit $ UnsafeRaw "\n"
emitAdd :: Exp -> Exp -> CompilerState ()
emitAdd e1 e2 = do
(v1,v2) <- binExprToValues e1 e2
increaseVarCount
v <- gets variableCount
emit $ SetVariable $ Ident $ show v
emit $ Add I64 v1 v2
emitInt :: Integer -> CompilerState ()
emitInt i = do
-- !!this should never happen!!
varCount <- getNewVar
emit $ Comment "This should not have happened!"
emit $ SetVariable (Ident (show varCount)) (Add I64 (VInteger i) (VInteger 0))
emitMul :: Exp -> Exp -> CompilerState ()
emitMul e1 e2 = do
(v1,v2) <- binExprToValues e1 e2
increaseVarCount
v <- gets variableCount
emit $ SetVariable $ Ident $ show v
emit $ Mul I64 v1 v2
emitAdd :: Exp -> Exp -> CompilerState ()
emitAdd e1 e2 = do
v1 <- exprToValue e1
v2 <- exprToValue e2
v <- getNewVar
emit $ SetVariable (Ident $ show v) (Add I64 v1 v2)
emitMod :: Exp -> Exp -> CompilerState ()
emitMod e1 e2 = do
-- `let m a b = rem (abs $ b + a) b`
(v1,v2) <- binExprToValues e1 e2
increaseVarCount
vadd <- gets variableCount
emit $ SetVariable $ Ident $ show vadd
emit $ Add I64 v1 v2
-- emitMul :: Exp -> Exp -> CompilerState ()
-- emitMul e1 e2 = do
-- (v1,v2) <- binExprToValues e1 e2
-- increaseVarCount
-- v <- gets variableCount
-- emit $ SetVariable $ Ident $ show v
-- emit $ Mul I64 v1 v2
increaseVarCount
vabs <- gets variableCount
emit $ SetVariable $ Ident $ show vabs
emit $ Call I64 (Ident "llvm.abs.i64")
[ (I64, VIdent (Ident $ show vadd))
, (I1, VInteger 1)
]
increaseVarCount
v <- gets variableCount
emit $ SetVariable $ Ident $ show v
emit $ Srem I64 (VIdent (Ident $ show vabs)) v2
-- emitMod :: Exp -> Exp -> CompilerState ()
-- emitMod e1 e2 = do
-- -- `let m a b = rem (abs $ b + a) b`
-- (v1,v2) <- binExprToValues e1 e2
-- increaseVarCount
-- vadd <- gets variableCount
-- emit $ SetVariable $ Ident $ show vadd
-- emit $ Add I64 v1 v2
--
-- increaseVarCount
-- vabs <- gets variableCount
-- emit $ SetVariable $ Ident $ show vabs
-- emit $ Call I64 (Ident "llvm.abs.i64")
-- [ (I64, VIdent (Ident $ show vadd))
-- , (I1, VInteger 1)
-- ]
-- increaseVarCount
-- v <- gets variableCount
-- emit $ SetVariable $ Ident $ show v
-- emit $ Srem I64 (VIdent (Ident $ show vabs)) v2
emitDiv :: Exp -> Exp -> CompilerState ()
emitDiv e1 e2 = do
(v1,v2) <- binExprToValues e1 e2
increaseVarCount
v <- gets variableCount
emit $ SetVariable $ Ident $ show v
emit $ Div I64 v1 v2
-- emitDiv :: Exp -> Exp -> CompilerState ()
-- emitDiv e1 e2 = do
-- (v1,v2) <- binExprToValues e1 e2
-- increaseVarCount
-- v <- gets variableCount
-- emit $ SetVariable $ Ident $ show v
-- emit $ Div I64 v1 v2
emitSub :: Exp -> Exp -> CompilerState ()
emitSub e1 e2 = do
(v1,v2) <- binExprToValues e1 e2
increaseVarCount
v <- gets variableCount
emit $ SetVariable $ Ident $ show v
emit $ Sub I64 v1 v2
-- emitSub :: Exp -> Exp -> CompilerState ()
-- emitSub e1 e2 = do
-- (v1,v2) <- binExprToValues e1 e2
-- increaseVarCount
-- v <- gets variableCount
-- emit $ SetVariable $ Ident $ show v
-- emit $ Sub I64 v1 v2
exprToValue :: Exp -> CompilerState Value
exprToValue (EInt i) = return $ VInteger i
exprToValue (EId i) = return $ VIdent i
exprToValue e = do
go e
v <- gets variableCount
return $ VIdent $ Ident $ show v
binExprToValues :: Exp -> Exp -> CompilerState (Value, Value)
binExprToValues e1 e2 = do
v1 <- exprToValue e1
v2 <- exprToValue e2
return (v1,v2)
-- | A pretty nasty function to flatten out function types,
-- as they are currently represented by a recursive data type.
flattenFuncType :: Type -> (LLVMType, [LLVMType])
flattenFuncType xs = do
let res = go xs
(last res, init res)
where
go TInt = [I64]
go (TPol id) = [CustomType id]
go (TFun t1 t2) = go t1 ++ go t2
exprToValue :: Exp -> CompilerState LLVMValue
exprToValue (EInt i) = return $ VInteger i
exprToValue (EId id) = do
funcs <- gets functions
case Map.lookup id funcs of
Just _ -> do
vc <- getNewVar
emit $ SetVariable (Ident $ show vc) (Call I64 id [])
return $ VIdent (Ident $ show vc)
Nothing -> return $ VIdent id
exprToValue e = do
go e
v <- getVarCount
return . VIdent . Ident $ show v

View file

@ -1,91 +1,188 @@
{-# LANGUAGE LambdaCase #-}
module Compiler.LLVMIr where
import Data.List (intercalate)
import Grammar.Abs (Ident (Ident))
module Compiler.LLVMIr (LLVMType (..), LLVMIr (..), llvmIrToString, LLVMValue (..), LLVMComp (..)) where
import Data.List (intercalate)
import Grammar.Abs (Ident (Ident))
-- | A datatype which represents some basic LLVM types
data LLVMType = I1 | I8 | I32 | I64 | Ptr
| Ref LLVMType | Array Integer LLVMType | CustomType Ident
data LLVMType
= I1
| I8
| I32
| I64
| Ptr
| Ref LLVMType
| Array Integer LLVMType
| CustomType Ident
instance Show LLVMType where
show :: LLVMType -> String
show t = case t of
I1 -> "i1"
I8 -> "i8"
I32 -> "i32"
I64 -> "i64"
Ptr -> "ptr"
Ref ty -> show ty <> "*"
Array n ty -> concat ["[", show n, " x ", show ty, "]"]
show = \case
I1 -> "i1"
I8 -> "i8"
I32 -> "i32"
I64 -> "i64"
Ptr -> "ptr"
Ref ty -> show ty <> "*"
Array n ty -> concat ["[", show n, " x ", show ty, "]"]
CustomType (Ident ty) -> ty
-- | Represents a LLVM "value", as in an integer, a register variable,
-- or a string contstant
data Value = VInteger Integer | VIdent Ident | VConstant String
instance Show Value where
show :: Value -> String
data LLVMComp
= LLEq
| LLNe
| LLUgt
| LLUge
| LLUlt
| LLUle
| LLSgt
| LLSge
| LLSlt
| LLSle
instance Show LLVMComp where
show :: LLVMComp -> String
show = \case
LLEq -> "eq"
LLNe -> "ne"
LLUgt -> "ugt"
LLUge -> "uge"
LLUlt -> "ult"
LLUle -> "ule"
LLSgt -> "sgt"
LLSge -> "sge"
LLSlt -> "slt"
LLSle -> "sle"
{- | Represents a LLVM "value", as in an integer, a register variable,
or a string contstant
-}
data LLVMValue = VInteger Integer | VIdent Ident | VConstant String
instance Show LLVMValue where
show :: LLVMValue -> String
show v = case v of
VInteger i -> show i
VInteger i -> show i
VIdent (Ident i) -> "%" <> i
VConstant s -> "c" <> show s
VConstant s -> "c" <> show s
type Params = [(LLVMType, Ident)]
type Args = [(LLVMType, Value)]
type Args = [(LLVMType, LLVMValue)]
-- | A datatype which represents different instructions in LLVM
data LLVMIr = Define LLVMType Ident Params
| DefineEnd
| Declare LLVMType Ident Params
| SetVariable Ident
| Add LLVMType Value Value
| Sub LLVMType Value Value
| Div LLVMType Value Value
| Mul LLVMType Value Value
| Srem LLVMType Value Value
| Call LLVMType Ident Args
| Alloca LLVMType
| Store LLVMType Ident LLVMType Ident
| Bitcast LLVMType Ident LLVMType
| Ret LLVMType Value
| Comment String
| UnsafeRaw String -- This should generally be avoided, and proper
-- instructions should be used in its place
data LLVMIr
= Define LLVMType Ident Params
| DefineEnd
| Declare LLVMType Ident Params
| SetVariable Ident LLVMIr
| Variable Ident
| Add LLVMType LLVMValue LLVMValue
| Sub LLVMType LLVMValue LLVMValue
| Div LLVMType LLVMValue LLVMValue
| Mul LLVMType LLVMValue LLVMValue
| Srem LLVMType LLVMValue LLVMValue
| Icmp LLVMComp LLVMType LLVMValue LLVMValue
| Br Ident
| BrCond LLVMValue Ident Ident
| Label Ident
| Call LLVMType Ident Args
| Alloca LLVMType
| Store LLVMType Ident LLVMType Ident
| Bitcast LLVMType Ident LLVMType
| Ret LLVMType LLVMValue
| Comment String
| UnsafeRaw String -- This should generally be avoided, and proper
-- instructions should be used in its place
deriving (Show)
-- | Converts a LLVM inststruction to a String, allowing for printing etc.
llvmIrToString :: LLVMIr -> String
llvmIrToString = \case
(Define t (Ident i) params) -> concat ["define ", show t, " @", i, "("
, intercalate "," (fmap (\(x,Ident y) -> unwords [show x, "%"<>y]) params)
,") {\n"]
DefineEnd -> "}\n"
(Declare _t (Ident _i) _params) -> undefined
(SetVariable (Ident i)) -> concat ["%", i, " = "]
(Add t v1 v2) -> concat ["add ", show t, " "
, show v1, ", ", show v2
, "\n"]
(Sub t v1 v2) -> concat ["sub ", show t, " "
, show v1, ", "
, show v2, "\n"]
(Div t v1 v2) -> concat ["sdiv ", show t, " "
, show v1, ", "
, show v2, "\n"]
(Mul t v1 v2) -> concat ["mul ", show t, " "
, show v1, ", "
, show v2, "\n"]
(Srem t v1 v2) -> concat ["srem ", show t, " "
, show v1, ", "
, show v2, "\n"]
(Call t (Ident i) arg) -> concat ["call ", show t, " @", i, "("
, intercalate ", " $ Prelude.map (\(x,y) -> show x <> " " <> show y) arg
, ")\n"]
(Alloca t) -> unwords ["alloca", show t, "\n"]
(Store t1 (Ident id1) t2 (Ident id2)) -> concat ["store ", show t1, " %"
, id1, ", ", show t2, " %"
, id2, "\n"]
(Bitcast t1 (Ident i) t2) -> concat ["bitcast ", show t1, " %"
, i, " to ", show t2, "\n"]
(Ret t v) -> concat ["ret ", show t
, " ", show v, "\n"]
(UnsafeRaw s) -> s
(Comment s) -> "; " <> s <> "\n"
-- | Converts a list of LLVMIr instructions to a string
llvmIrToString :: [LLVMIr] -> String
llvmIrToString = go 0
where
go :: Int -> [LLVMIr] -> String
go _ [] = mempty
go i (x : xs) = do
let (i', n) = case x of
Define{} -> (i + 1, 0)
DefineEnd -> (i - 1, 0)
_ -> (i, i)
insToString n x <> go i' xs
{- | Converts a LLVM inststruction to a String, allowing for printing etc.
The integer represents the indentation
-}
{- FOURMOLU_DISABLE -}
insToString :: Int -> LLVMIr -> String
insToString i l =
replicate i '\t' <> case l of
(Define t (Ident i) params) ->
concat
[ "define ", show t, " @", i
, "(", intercalate ", " (fmap (\(x, Ident y) -> unwords [show x, "%" <> y]) params)
, ") {\n"
]
DefineEnd -> "}\n"
(Declare _t (Ident _i) _params) -> undefined
(SetVariable (Ident i) ir) -> concat ["%", i, " = ", insToString 0 ir]
(Add t v1 v2) ->
concat
[ "add ", show t, " ", show v1
, ", ", show v2, "\n"
]
(Sub t v1 v2) ->
concat
[ "sub ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Div t v1 v2) ->
concat
[ "sdiv ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Mul t v1 v2) ->
concat
[ "mul ", show t, " ", show v1
, ", ", show v2, "\n"
]
(Srem t v1 v2) ->
concat
[ "srem ", show t, " ", show v1, ", "
, show v2, "\n"
]
(Call t (Ident i) arg) ->
concat
[ "call ", show t, " @", i, "("
, intercalate ", " $ Prelude.map (\(x, y) -> show x <> " " <> show y) arg
, ")\n"
]
(Alloca t) -> unwords ["alloca", show t, "\n"]
(Store t1 (Ident id1) t2 (Ident id2)) ->
concat
[ "store ", show t1, " %", id1
, ", ", show t2 , " %", id2, "\n"
]
(Bitcast t1 (Ident i) t2) ->
concat
[ "bitcast ", show t1, " %"
, i, " to ", show t2, "\n"
]
(Icmp comp t v1 v2) ->
concat
[ "icmp ", show comp, " ", show t
, " ", show v1, ", ", show v2, "\n"
]
(Ret t v) ->
concat
[ "ret ", show t, " "
, show v, "\n"
]
(UnsafeRaw s) -> s
(Label (Ident s)) -> "\nlabel_" <> s <> ":\n"
(Br (Ident s)) -> "br label %label_" <> s <> "\n"
(BrCond val (Ident s1) (Ident s2)) ->
concat
[ "br i1 ", show val, ", ", "label %"
, "label_", s1, ", ", "label %", "label_", s2, "\n"
]
(Comment s) -> "; " <> s <> "\n"
(Variable (Ident id)) -> "%" <> id
{- FOURMOLU_ENABLE -}

View file

@ -1,10 +0,0 @@
{-# LANGUAGE TemplateHaskell #-}
module Compiler.StandardLLVMLibrary where
import Compiler.TH (includeStr)
-- | Uses Template Haskell to load our "standard library", which is written in
-- LLVM IR. This library simply includes functions to generate strings from
-- i64s and to print ints.
standardLLVMLibrary :: String
standardLLVMLibrary = $(includeStr "src/Compiler/standard_library.ll")

View file

@ -1,14 +0,0 @@
{-# LANGUAGE TemplateHaskellQuotes #-}
module Compiler.TH where
import Language.Haskell.TH (Exp, Q)
import System.IO.Unsafe (unsafePerformIO)
-- While this is hacky (specifically the use of unsafePerformIO)
-- in this case I think it is fine, as if an invalid string
-- is passed to the function it will fail to compile,
-- which is the intended behavior. This allows us to
-- import strings (such as our "standard LLVM library")
-- during compile time, removing the need to ship the source for
-- that with the compiler.
includeStr :: String -> Q Exp
includeStr file = [|unsafePerformIO $ readFile file|]

View file

@ -1,118 +0,0 @@
declare i64 @llvm.abs.i64(i64, i1 immarg)
declare i64 @llvm.powi.i64.i64(i64, i64)
declare i32 @puts(i8* nocapture) nounwind
define [21 x i8] @i64ToString(i64 %val_org) {
%val = alloca i64
store i64 %val_org, i64* %val
%val_copy = add i64 0, %val_org
; https://stackoverflow.com/a/7123710
; an algorithm for translating ints to strings
; s = ''
; sign = ''
; if i < 0:
; sign = '-'
; i = -i
; while True:
; remainder = i % 10
; i = i / 10
; s = chr(ord('0') + remainder) + s
; if i == 0:
; break
; return sign + s
; allocate memory for the string, and store the temp variable into it
%string_ptr = alloca [21 x i8]
store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %string_ptr
; create a pointer to the array
%array_index_ptr = alloca i32
store i32 0, i32* %array_index_ptr
br label %while_point
while_point:
%val.tmp = load i64, i64* %val; load i64, i64* %val
%val.1 = call i64 @llvm.abs.i64(i64 %val.tmp, i1 true)
%tmp = load i32, i32* %array_index_ptr
%array_pointer.1 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %tmp
%array_index_ptr.1 = add i32 %tmp, 1
store i32 %array_index_ptr.1, i32* %array_index_ptr
; this should not work, but it does
%remainder = srem i64 %val.1, 10
%remainder_tmp = alloca i64
store i64 %remainder, i64* %remainder_tmp
%remainder_tmp8 = bitcast i64* %remainder_tmp to i8*
%remainder8 = load i8, i8* %remainder_tmp8
%i = sdiv i64 %val.1, 10
%char = add i8 48, %remainder8
store i8 %char, i8* %array_pointer.1 ; update string!
store i64 %i, i64* %val
%while_condition = icmp eq i64 %i, 0
br i1 %while_condition, label %while_break, label %while_point
while_break:
; get last_pointer
%array_index_ptr.0 = load i32, i32* %array_index_ptr
%array_pointer.0 = getelementptr [21 x i8], [21 x i8]* %string_ptr, i32 0, i32 %array_index_ptr.0
; check if p is below 0
%condition = icmp slt i64 %val_copy, 0
br i1 %condition, label %negative_check_true, label %negative_check_false
negative_check_true:
store i8 45, i8* %array_pointer.0
br label %negative_check_done
negative_check_false:
; store i8 43, i8* %array_pointer.0
br label %negative_check_done
negative_check_done:
%noop = add i32 0, 0
; load the result and return it
call void @reverseI64String([21 x i8]* %string_ptr)
%res = load [21 x i8],[21 x i8]* %string_ptr
ret [21 x i8] %res
}
define void @reverseI64String([21 x i8]* %string) {
%new_string = alloca [21 x i8]
store [21 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [21 x i8]* %new_string
%reverse_index = alloca i32
store i32 20, i32* %reverse_index
%new_index = alloca i32
store i32 0, i32* %new_index
br label %while_start
while_start:
%reverse_index.0 = load i32, i32* %reverse_index
%reverse_ptr = getelementptr [21 x i8], [21 x i8]* %string, i32 0, i32 %reverse_index.0
%reverse_index.1 = sub i32 %reverse_index.0, 1
store i32 %reverse_index.1, i32* %reverse_index
%new_index.0 = load i32, i32* %new_index
%new_ptr = getelementptr [21 x i8], [21 x i8]* %new_string, i32 0, i32 %new_index.0
%break_condition = icmp slt i32 %reverse_index.0, 0
br i1 %break_condition, label %break, label %ok
ok:
%value = load i8, i8* %reverse_ptr
%condition = icmp eq i8 %value, 0
br i1 %condition, label %while_start, label %not_zero
not_zero:
store i8 %value, i8* %new_ptr
%new_index.1 = add i32 %new_index.0, 1
store i32 %new_index.1, i32* %new_index
br label %while_start
break:
%copy = load [21 x i8], [21 x i8]* %new_string
store [21 x i8] %copy, [21 x i8]* %string
; start iterating over the original string in reverse
; if i == '\00' ignore
; else put value into new_string at new i, then new i + 1
ret void
}

View file

@ -1,6 +1,8 @@
{-# LANGUAGE LambdaCase #-}
module Main where
import Compiler.Compiler (compile)
import GHC.IO.Handle.Text (hPutStrLn)
import Grammar.ErrM (Err)
import Grammar.Par (myLexer, pProgram)
import Grammar.Print (printTree)
@ -8,6 +10,7 @@ import Interpreter (interpret)
import LambdaLifter (abstract, freeVars, lambdaLift)
import System.Environment (getArgs)
import System.Exit (exitFailure, exitSuccess)
import System.IO (stderr)
main :: IO ()
main = getArgs >>= \case
@ -18,13 +21,18 @@ main' :: String -> IO ()
main' s = do
file <- readFile s
putStrLn "\n-- parse"
printToErr "-- Parse Tree -- "
parsed <- fromSyntaxErr . pProgram $ myLexer file
putStrLn $ printTree parsed
printToErr $ printTree parsed
putStrLn "\n-- Lambda Lifter"
printToErr "\n-- Lambda Lifter --"
let lifted = lambdaLift parsed
putStrLn $ printTree lifted
printToErr $ printTree lifted
--putStrLn "\n-- Compiler"
printToErr "\n -- Printing compiler output to stdout --"
compiled <- fromCompilerErr $ compile lifted
putStrLn compiled
-- interpred <- fromInterpreterErr $ interpret lifted
-- putStrLn "\n-- interpret"
@ -32,6 +40,16 @@ main' s = do
exitSuccess
printToErr :: String -> IO ()
printToErr = hPutStrLn stderr
fromCompilerErr :: Err a -> IO a
fromCompilerErr = either
(\err -> do
putStrLn "\nCOMPILER ERROR"
putStrLn err
exitFailure)
pure
fromSyntaxErr :: Err a -> IO a
fromSyntaxErr = either

View file

@ -1,20 +0,0 @@
; Copied directly from the documentation
; Declare the string constant as a global constant.
@.hello = private unnamed_addr constant [13 x i8] c"hello world\0A\00"
; External declaration of the puts function
declare i32 @puts(i8* nocapture) nounwind
; Definition of main function
define i32 @main() { ; i32()*
; Convert [13 x i8]* to i8 *...
%cast210 = getelementptr [13 x i8],[13 x i8]* @.hello, i64 0, i64 0
; Call puts function to write out the string to stdout.
call i32 @puts(i8* %cast210)
ret i32 0
}
; Named metadata
!0 = !{i32 42, null, !"string"}
!foo = !{!0}

View file

@ -1,5 +0,0 @@
tripplet : Int -> (Int -> (Int -> Int))
tripplet x y z = x + y + z;
main : Int
main = {-(((2 * (123 + 4214 % (1230)) - 1231) / 2) * 412412) +-} tripplet 5 1 2;

View file

@ -1,18 +0,0 @@
; External declaration of the puts function
declare i32 @puts(i8* nocapture) nounwind
declare [21 x i8] @i64ToString(i64)
; Definition of main function
define i32 @main() { ; i32()*
; %val = add i64 -123456789, 0
%val = add i64 -133780085, 0
%print_res = call [21 x i8] @i64ToString(i64 %val)
%ptr = alloca [21 x i8]
store [21 x i8] %print_res, [21 x i8]* %ptr
%printable = bitcast [21 x i8]* %ptr to i8*
call i32 @puts(i8* %printable)
ret i32 0
}