diff --git a/.gitignore b/.gitignore index 53eb51e..897dce2 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ dist-newstyle Grammar.tex src/Grammar -churf +language llvm.ll /language .vscode/ diff --git a/Grammar.pdf b/Grammar.pdf index ead9695..f7f7a70 100644 Binary files a/Grammar.pdf and b/Grammar.pdf differ diff --git a/Justfile b/Justfile index 16be91b..96be864 100644 --- a/Justfile +++ b/Justfile @@ -6,7 +6,7 @@ build: # clean the generated directories clean: rm -r src/Grammar - rm churf + rm language rm -r dist-newstyle/ # run all tests @@ -14,46 +14,46 @@ test: cabal test debug FILE: - cabal run churf -- -d {{FILE}} + cabal run language -- -d {{FILE}} hm FILE: - cabal run churf -- -t hm {{FILE}} + cabal run language -- -t hm {{FILE}} bi FILE: - cabal run churf -- -t bi {{FILE}} + cabal run language -- -t bi {{FILE}} hml FILE: - cabal run churf -- -l -t hm {{FILE}} + cabal run language -- -l -t hm {{FILE}} bil FILE: - cabal run churf -- -l -t bi {{FILE}} + cabal run language -- -l -t bi {{FILE}} hmd FILE: - cabal run churf -- -d -t hm {{FILE}} + cabal run language -- -d -t hm {{FILE}} bid FILE: - cabal run churf -- -d -t bi {{FILE}} + cabal run language -- -d -t bi {{FILE}} hmdm FILE: - cabal run churf -- -d -t hm -m {{FILE}} + cabal run language -- -d -t hm -m {{FILE}} bidm FILE: - cabal run churf -- -d -t bi -m {{FILE}} + cabal run language -- -d -t bi -m {{FILE}} hmp FILE: - cabal run churf -- -t hm -p {{FILE}} + cabal run language -- -t hm -p {{FILE}} bip FILE: - cabal run churf -- -t bi -p {{FILE}} + cabal run language -- -t bi -p {{FILE}} hmdp FILE: - cabal run churf -- -t hm -d -p {{FILE}} + cabal run language -- -t hm -d -p {{FILE}} bidp FILE: - cabal run churf -- -t bi -d -p {{FILE}} + cabal run language -- -t bi -d -p {{FILE}} quicksort: - cabal run churf -- -t bi sample-programs/working/quicksort.crf + cabal run language -- -t bi sample-programs/working/quicksort.crf lc: - cabal run churf -- -t bi sample-programs/working/lambda_calculus-2.crf + cabal run language -- -t bi sample-programs/working/lambda_calculus-2.crf diff --git a/Makefile b/Makefile index e48386b..6c1ebde 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ pdf : Grammar.pdf clean : rm -r src/Grammar - rm churf + rm language rm -rf dist-newstyles rm Grammar.aux Grammar.fdb_latexmk Grammar.fls Grammar.log Grammar.synctex.gz Grammar.tex diff --git a/README.md b/README.md index 395da33..af09ebd 100644 --- a/README.md +++ b/README.md @@ -3,40 +3,246 @@ The branch [thesis](https://github.com/bachelor-group-66-systemf/churf/tree/thesis) contain the state of the project when the thesis report was submitted (2023-05-15). # Build +First generate the parser using [BNFC](https://bnfc.digitalgrammars.com/), +this is done using the command `bnfc -o src -d Grammar.cf` -Using [make](https://www.gnu.org/software/make/) the entire thing can be built by running `make` +Churf can then be built using `cabal install` + +Using the tool [make](https://www.gnu.org/software/make/) the entire thing can be built by running `make` +or using [just](https://github.com/casey/just), `just build` + +# Dependencies +If you have Nix installed, simply run `nix-shell --pure shell.nix` to get into an environment +with the right versions of packages. Then run `make` and the compiler should build. # Compiling a program -Using the Hindley-Milner type checker: `./churf -t hm ` +Using the Hindley-Milner type checker: `./language -t hm example.crf` -Using the bidirectional type checker: `./churf -t bi ` +Using the bidirectional type checker: `./language -t bi example.crf` -Running `./churf` will display a help message for the different available flags +The program to compile has to have the file extension `.crf` +# Syntax and quirks -# Syntax +See Grammar.pdf for the full syntax. + +The syntactic requirements differ a bit using the different type checkers. +The bidirectional type checker require explicit `forall` everywhere a type +forall quantified type variable is declared. In the Hindley-Milner type checker +all type variables are assumed to be forall quantified. + +Currently for the code generator and monomorphizer to work correctly it is +expected that the function `main` exist with either explicitly given type `Int` +or inferrable. Single line comments are written using `--` Multi line comments are written using `{-` and `-}` -The syntax of Churf can be read in [Grammar.pdf](https://github.com/bachelor-group-66-systemf/churf/blob/main/Grammar.pdf) +Braches and semicolons are optional. -Here is an example program in Churf +## Program + +A program is a list of defs separated by semicolons, which in turn is either a bind, a signature, or a data types +`Program ::= [Def]` ```hs -main = case odd (sum 123) of - True => printStr "odd!" - False => printStr "even!" - -sum = \x. case x of - 0 => 0 - n => n + (sum (n - 1)) - -odd x = case x of - 0 => False - n => even (n - 1) - -even x = case x of - 0 => True - n => odd (n - 1) +data Test () where + Test : Test () +test : Int +test = 0 ``` + +## Bind + +A bind is a name followed by a white space separated list of arguments, then an equal sign followed by an expression. +Both name and arguments have to start with lower case letters + +`Bind ::= LIdent [LIdent] "=" Exp` + +```hs +example x y = x + y +``` + +## Signature +A signature is a name followed by a colon and then the type +The name has to start with a lowe case letter + +`Sig ::= LIdent ":" Type` + +```hs +const : a -> b -> a +``` + +## Data type +A data type is declared as follows + +`Data ::= "data" Type "where" "{" [Inj] "}"` + +The words in quotes are necessary keywords +The type can be any type for parsing, but only `TData` will type check. + +The list of Inj is separated by white space. Using new lines is recommended for ones own sanity. + +```hs +data Maybe (a) where + Nothing : Maybe (a) + Just : a -> Maybe (a) +``` +The parens are necessary for every data type to make the grammar unambiguous. +Thus in `data Bool () where ...` the parens *do* *not* represent Unit + +### Inj +An inj is a constructor for the data type + +It is declared like a signature, except the name has to start with a lower case letter. +The return type of the constructor also has match the type of the data type to type check. + +`Inj ::= UIdent ":" Type` + +## Type + +A type can be either a type literal, type variable, function type, explicit forall quantified type or a type representing a data type +A type literal have to start with an upper case letter, type variables have to start with a lower case letter, +data types have to start with an upper case letter, a function type is two types separated by an arrow (arrows right associative), +and foralls take one type variable followed by a type. + +`TLit ::= UIdent` + +`TVar ::= LIdent` + +`TData ::= UIdent "(" [Type] ")"` + +`TFun ::= Type "->" Type` + +`TAll ::= "forall" LIdent "." Type` + +```hs +exampleLit : Int +exampleVar : a +exampleData : Maybe (a) +exampleFun : Int -> a +exampleAll : forall a. forall b. a -> b +``` + +## Expressions + +There are a couple different expressions, probably best explained by their rules + +Type annotated expression + +`EAnn ::= "(" Exp ":" Type ")"` + +Variable + +`EVar ::= LIdent` +```hs +x +``` + +Constructor + +`EInj ::= UIdent` +```hs +Just +``` + +Literal + +`ELit ::= Lit` +```hs +0 +``` + +Function application + +`EApp ::= Exp2 Exp3` +```hs +f 0 +``` + +Addition + +`EAdd ::= Exp1 "+" Exp2` +```hs +3 + 5 +``` + +Let expression + +`ELet ::= "let" Bind "in" Exp ` +```hs +let f x = x in f 0 +``` + +Abstraction, known as lambda or closure + +`EAbs ::= "\\" LIdent "." Exp` +```hs +\x. x +``` + +Case expression consist of a list semicolon separated list of Branches + +`ECase ::= "case" Exp "of" "{" [Branch] "}"` + +```hs +case xs of + Cons x xs => 1 + Nil => 0 +``` + +### Branch +A branch is a pattern followed by the fat arrow and then an expression + +`Branch ::= Pattern "=>" Exp` + +### Pattern +A pattern can be either a variable, literal, a wildcard represented by `_`, an enum constructor (constructor with zero arguments) +, or a constructor followed by a recursive list of patterns. + +Variable match + +`PVar ::= LIdent` + +The x in the following example +```hs +x => 0 +``` +Literal match + +`PLit ::= Lit` + +The 1 in the following example +```hs +1 => 0 +``` +A wildcard match + +`PCatch ::= "_"` + +The underscore in the following example +```hs +_ => 0 +``` +A constructor without arguments + +`PEnum ::= UIdent` + +The Nothing in the following example +```hs +Nothing => 0 +``` +The recursive match on a constructor + +`PInj ::= UIdent [Pattern1]` + +The outer Just represents the UIdent and the rest is the recursive match +```hs +Just (Just 0) => 1 +``` + +For simplicity sake a user does not need to consider these last two cases as different in parsing. +We allow arbitrarily deep pattern matching. + +## Literal +We currently allow two different literals: Integer and Char diff --git a/benchmark.py b/benchmark.py new file mode 100755 index 0000000..40f0a15 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,21 @@ +#!/bin/env/python3 + +import sys +import os +import time + +if __name__ == "__main__": + args = sys.argv + if len(args) == 1: + print ("first arg is number of loops second is exe") + else: + total = 0 + iter = int(args[1]) + for i in range(iter): + time_pre = time.time() + os.system("./" + args[2] + "> /dev/null") + time_post = time.time() + calc = time_post - time_pre + total += calc + + print ("File: " + args[2] + ", " + str(iter) + " runs gave average: " + str(total / iter) + "s") diff --git a/benchmark.txt b/benchmark.txt new file mode 100644 index 0000000..c12461e --- /dev/null +++ b/benchmark.txt @@ -0,0 +1,9 @@ +# Full optimization Churf +File: output/hello_world, 100 runs gave average: 0.025261127948760988s + +# O2 Haskell +File: ./Bench, 100 runs gave average: 0.05629507303237915s + +# 03 Haskell +File: ./Bench, 100 runs gave average: 0.05490849256515503s +File: ./Bench, 100 runs gave average: 0.05323728561401367s diff --git a/churfy.png b/churfy.png deleted file mode 100644 index fbe57fa..0000000 Binary files a/churfy.png and /dev/null differ diff --git a/demo/lambda_calculus b/demo/lambda_calculus new file mode 100755 index 0000000..26c97b7 Binary files /dev/null and b/demo/lambda_calculus differ diff --git a/demo/lambda_calculus.crf b/demo/lambda_calculus.crf index e0f6377..f483cb2 100644 --- a/demo/lambda_calculus.crf +++ b/demo/lambda_calculus.crf @@ -24,20 +24,12 @@ insert : Char -> Val -> Cxt -> Cxt insert x v cxt = case cxt of Cxt ps => Cxt (Cons (Pair x v) ps) - --- (λx. x + x) 200 -exp = EApp - (EAbs 'x' - (EAdd - (EVar 'x') - (EVar 'x'))) - (EInt 200) - eval : Cxt -> Exp -> Val eval cxt exp = case exp of - EAbs x e => VClosure cxt x e EVar x => case lookup x cxt of + VInt i => VInt i VClosure delta x e => eval delta e + EAbs x e => VClosure cxt x e EApp e1 e2 => case eval cxt e1 of VClosure delta x f => let v = VClosure cxt x e2 in @@ -48,6 +40,14 @@ eval cxt exp = case exp of let i2 = case eval cxt e2 of { VInt i => i } in VInt (i1 + i2) +-- (λx. x + x) 200 +exp = EApp + (EAbs 'x' + (EAdd + (EVar 'x') + (EVar 'x'))) + (EInt 200) + main : Int main = case eval (Cxt Nil) exp of VInt i => i diff --git a/demo/lambda_calculus.sh b/demo/lambda_calculus.sh deleted file mode 100755 index 8509baa..0000000 --- a/demo/lambda_calculus.sh +++ /dev/null @@ -1,2 +0,0 @@ -cd .. -./language -t bi demo/lambda_calculus.crf diff --git a/demo/quicksort b/demo/quicksort new file mode 100755 index 0000000..3917ce2 Binary files /dev/null and b/demo/quicksort differ diff --git a/demo/quicksort.crf b/demo/quicksort.crf index 3dd1677..e97af3a 100644 --- a/demo/quicksort.crf +++ b/demo/quicksort.crf @@ -23,110 +23,6 @@ filter p xs = case xs of True => Cons x (filter p xs) False => filter p xs -.++ : List a -> List a -> List a .++ list1 list2 = case list1 of Nil => list2 Cons x xs => Cons x (xs ++ list2) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -printListH : List Int -> Unit -printListH xs = case xs of - Cons a as => flipConst (printInt a) (printListHH as) - Nil => Unit - -printListHH : List Int -> Unit -printListHH xs = case xs of - Nil => Unit - Cons a as => flipConst (printChar ',') (flipConst (printInt a) (printListHH as)) - -printList : List Int -> Unit -printList xs = case Cons (printChar '[') (Cons (printListH xs) (Cons (printChar ']') Nil)) of - _ => Unit - -flipConst x y = y - - -printInt : Int -> Unit -printInt xs = Unit - -printChar : Char -> Unit -printChar = \x. Unit - -data Unit where - Unit : Unit diff --git a/demo/quicksort.sh b/demo/quicksort.sh deleted file mode 100755 index bfc145d..0000000 --- a/demo/quicksort.sh +++ /dev/null @@ -1,2 +0,0 @@ -cd .. -./language -t bi demo/quicksort.crf diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..8b96b58 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1 @@ +indent-wheres: false diff --git a/churf.cabal b/language.cabal similarity index 96% rename from churf.cabal rename to language.cabal index 27496ce..fa7841a 100644 --- a/churf.cabal +++ b/language.cabal @@ -1,6 +1,6 @@ cabal-version: 3.4 -name: churf +name: language version: 0.1.0.0 license: MIT @@ -18,7 +18,7 @@ extra-source-files: common warnings ghc-options: -w -executable churf +executable language import: warnings main-is: Main.hs @@ -72,11 +72,10 @@ executable churf , QuickCheck , directory , process - , filepath default-language: GHC2021 -Test-suite churf-testsuite +Test-suite language-testsuite type: exitcode-stdio-1.0 main-is: Main.hs diff --git a/pipeline.txt b/pipeline.txt new file mode 100644 index 0000000..1872562 --- /dev/null +++ b/pipeline.txt @@ -0,0 +1,27 @@ + + Parser + | + ReportForall Report unnecessary foralls. Hm: report rank>2 foralls + | + AnnotateForall Annotate all unbound type variables with foralls + | + Renamer Rename type variables and term variables + | + / \ + / \ + TypeCheckHm TypeCheckBi + \ / + \ / + | + ReportTEVar Report type existential variables and change type AST + | + RemoveForall RemoveForall and change type AST + | + Monomorpher + | + Desugar + | + CodeGen + + + diff --git a/spec.txt b/spec.txt new file mode 100644 index 0000000..2273846 --- /dev/null +++ b/spec.txt @@ -0,0 +1,121 @@ +--------------------------------------------------------------------------- +-- * Parser +--------------------------------------------------------------------------- + +data Program = Program [Def] + +data Def = DSig Ident Type | DBind Bind + +data Bind = Bind Ident [Ident] Exp + +data Exp + = EId Ident + | ELit Lit + | EAnn Exp Type + | ELet Ident Exp Exp + | EApp Exp Exp + | EAdd Exp Exp + | EAbs Ident Exp + +data Lit = LInt Integer + | LChar Character + +data Type + = TLit Ident -- τ + | TVar TVar -- α + | TFun Type Type -- A → A + | TAll TVar Type -- ∀α. A + | TEVar TEVar -- ά (internal) + +data TVar = MkTVar Ident +data TEVar = MkTEVar Ident + +--------------------------------------------------------------------------- +-- * Type checker +--------------------------------------------------------------------------- + +-- • Def and DSig are removed in favor on just Bind +-- • Typed expressions +-- • TEVar is removed (NOT IMPLEMENTED) + +newtype Program = Program [Bind] + +data Bind = Bind Id [Id] ExpT + +data Exp + = EId Ident + | ELit Lit + | ELet Bind ExpT + | EApp ExpT ExpT + | EAdd ExpT ExpT + | EAbs Ident ExpT + +type Id = (Ident, Type) +type ExpT = (Exp, Type) + + +data Lit = LInt Integer + | LChar Character + +data Type + = TLit Ident -- τ + | TVar TVar -- α + | TFun Type Type -- A → A + | TAll TVar Type -- ∀α. A + +data TVar = MkTVar Ident + +--------------------------------------------------------------------------- +-- * Lambda lifter +--------------------------------------------------------------------------- +-- • EAbs are removed (NOT IMPLEMENTED) +-- • ELet only allow constant expressions (NOT IMPLEMENTED) + +newtype Program = Program [Bind] + +data Bind = Bind Id [Id] ExpT + +data Exp + = EId Ident + | ELit Lit + | ELet Ident ExpT ExpT + | EApp ExpT ExpT + | EAdd ExpT ExpT + +type Id = (Ident, Type) +type ExpT = (Exp, Type) + +data Lit = LInt Integer + | LChar Character + +data Type + = TLit Ident -- τ + | TVar TVar -- α + | TFun Type Type -- A → A + | TAll TVar Type -- ∀α. A + +data TVar = MkTVar Ident + +--------------------------------------------------------------------------- +-- * Monomorpher +--------------------------------------------------------------------------- +-- • Polymorphic types are removed (NOT IMPLEMENTED) + +newtype Program = Program [Bind] + +data Bind = Bind Id [Id] ExpT + +data Exp + = EId Ident + | ELit Lit + | ELet Ident ExpT ExpT + | EApp ExpT ExpT + | EAdd ExpT ExpT + +type Id = (Ident, Type) +type ExpT = (Exp, Type) + +data Lit = LInt Integer + | LChar Character + +data Type = Type Ident diff --git a/src/Compiler.hs b/src/Compiler.hs index 4abe422..6f7fb24 100644 --- a/src/Compiler.hs +++ b/src/Compiler.hs @@ -7,8 +7,8 @@ import System.Process.Extra (readCreateProcess, shell) optimize :: String -> IO String optimize = readCreateProcess (shell "opt --O3 --tailcallopt -S") -compileClang :: String -> Bool -> String -> IO String -compileClang name False = +compileClang :: Bool -> String -> IO String +compileClang False = readCreateProcess . shell $ unwords [ "clang++" -- , "-Lsrc/GC/lib/", "-l:libgcoll.a" @@ -16,10 +16,10 @@ compileClang name False = , "-x" , "ir" -- , "-Lsrc/GC/lib -l:gcoll.a" , "-o" - , "output/" <> name + , "output/hello_world" , "-" ] -compileClang name True = +compileClang True = readCreateProcess . shell $ unwords [ "clang++" -- , "-Lsrc/GC/lib/", "-l:libgcoll.a" @@ -36,9 +36,9 @@ compileClang name True = , "-x" , "ir" -- , "-Lsrc/GC/lib -l:gcoll.a" , "-o" - , "output/" <> name + , "output/hello_world" , "-" ] -compile :: String -> String -> Bool -> IO String -compile name s addGc = optimize s >>= compileClang name addGc +compile :: String -> Bool -> IO String +compile s addGc = optimize s >>= compileClang addGc diff --git a/src/Main.hs b/src/Main.hs index 4864901..98900b8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -8,7 +8,6 @@ import Compiler (compile) import Control.Monad (when, (<=<)) import Data.List.Extra (isSuffixOf) import Data.Maybe (fromJust, isNothing) -import Data.Tuple.Extra (uncurry3) import Desugar.Desugar (desugar) -- import Expander (expand) import GHC.IO.Handle.Text (hPutStrLn) @@ -33,30 +32,27 @@ import System.Environment (getArgs) import System.Exit (ExitCode (ExitFailure), exitFailure, exitSuccess, exitWith) -import System.FilePath.Posix (takeFileName, dropExtensions) import System.IO (stderr) import System.Process (spawnCommand, waitForProcess) import TypeChecker.TypeChecker (TypeChecker (Bi, Hm), typecheck) main :: IO () -main = getArgs >>= parseArgs >>= uncurry3 main' +main = getArgs >>= parseArgs >>= uncurry main' -parseArgs :: [String] -> IO (Options, String, String) +parseArgs :: [String] -> IO (Options, String) parseArgs argv = case getOpt RequireOrder flags argv of - (os, f : xs, []) + (os, f : _, []) | opts.help || isNothing opts.typechecker -> do hPutStrLn stderr (usageInfo header flags) exitSuccess - | otherwise -> do - let name = dropExtensions $ takeFileName f - pure (opts, name, f) + | otherwise -> pure (opts, f) where opts = foldr ($) initOpts os (_, _, errs) -> do hPutStrLn stderr (concat errs ++ usageInfo header flags) exitWith (ExitFailure 1) where - header = "Usage: churf [--help] [-l|--log-intermediate] [-d|--debug] [-m|--disable-gc] [-t|--type-checker bi/hm] [-p|--disable-prelude] \n" + header = "Usage: language [--help] [-l|--log-intermediate] [-d|--debug] [-m|--disable-gc] [-t|--type-checker bi/hm] [-p|--disable-prelude] \n" flags :: [OptDescr (Options -> Options)] flags = @@ -112,8 +108,8 @@ data Options = Options , logIL :: Bool } -main' :: Options -> String -> String -> IO () -main' opts name s = +main' :: Options -> String -> IO () +main' opts s = let log :: (Print a, Show a) => a -> IO () log = printToErr . if opts.debug then show else printTree @@ -153,6 +149,7 @@ main' opts name s = generatedCode <- fromErr $ generateCode monomorphized (gc opts) + -- generatedCode <- fromErr $ generateCode monomorphized False check <- doesPathExist "output" when check (removeDirectoryRecursive "output") @@ -162,11 +159,13 @@ main' opts name s = when opts.debug $ do printToErr "\n -- Compiler --" writeFile "output/llvm.ll" generatedCode + --debugDotViz - compile name generatedCode (gc opts) + compile generatedCode (gc opts) + -- compile generatedCode False printToErr "Compilation done!" printToErr "\n-- Program output --" - print =<< spawnWait ("./output/" <> name) + print =<< spawnWait "./output/hello_world" exitSuccess diff --git a/test_program.crf b/test_program.crf new file mode 100644 index 0000000..ac089a3 --- /dev/null +++ b/test_program.crf @@ -0,0 +1 @@ +main = let f x = x in f 123