diff --git a/language.cabal b/language.cabal index 255f75c..9785d75 100644 --- a/language.cabal +++ b/language.cabal @@ -43,6 +43,10 @@ executable language Codegen.Codegen Codegen.LlvmIr Compiler + Renamer.Renamer + --Codegen.Codegen + --Codegen.LlvmIr + TreeConverter hs-source-dirs: src @@ -75,6 +79,8 @@ Test-suite language-testsuite Grammar.Skel Grammar.ErrM Auxiliary + Monomorphizer.Monomorphizer + Monomorphizer.MonomorphizerIr Renamer.Renamer TypeChecker.TypeChecker TypeChecker.TypeCheckerHm @@ -96,6 +102,7 @@ Test-suite language-testsuite , QuickCheck , process , bytestring + , hspec default-language: GHC2021 diff --git a/llvm.ll b/llvm.ll new file mode 100644 index 0000000..cd6b190 --- /dev/null +++ b/llvm.ll @@ -0,0 +1,10 @@ +@.str = private unnamed_addr constant [3 x i8] c"%i +", align 1 +declare i32 @printf(ptr noalias nocapture, ...) + +; Ident "main": EAdd (TMono (Ident "Int")) (ELit (TMono (Ident "Int")) (LInt 3)) (ELit (TMono (Ident "Int")) (LInt 3)) +define i64 @main() { + %1 = add i64 3, 3 + call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef %1) + ret i64 0 +} diff --git a/sample-programs/good1 b/sample-programs/good1 new file mode 100644 index 0000000..b7aff4b --- /dev/null +++ b/sample-programs/good1 @@ -0,0 +1,6 @@ +main : _Int ; +main = (id : _Int -> _Int) 5 ; + +id : 'a -> 'a ; +id x = (x : 'a); + diff --git a/sample-programs/mono.crf b/sample-programs/mono.crf new file mode 100644 index 0000000..e682b7d --- /dev/null +++ b/sample-programs/mono.crf @@ -0,0 +1,5 @@ +const x y = x; + +f x = (const x 'c'); + +main = f 5; diff --git a/src/Main.hs b/src/Main.hs index 99cd84b..1864a17 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedRecordDot #-} - module Main where import Control.Monad (when) @@ -20,7 +19,6 @@ import System.Exit (ExitCode (ExitFailure), exitFailure, exitSuccess, exitWith) import System.IO (stderr) - import Codegen.Codegen (generateCode) import Compiler (compile) import Grammar.ErrM (Err) @@ -105,6 +103,10 @@ main' opts s = do --let lifted = lambdaLift typechecked --printToErr $ printTree lifted + printToErr "\n -- Compiler --" + let monomorphized = monomorphize typechecked + printToErr $ show monomorphized + -- printToErr "\n-- Lambda Lifter --" -- let lifted = lambdaLift typechecked -- printToErr $ printTree lifted @@ -122,6 +124,19 @@ main' opts s = do compile generatedCode spawnWait "./output/hello_world" + --printToErr "\n -- Compiler --" + --generatedCode <- fromCompilerErr $ generateCode (monomorphize typechecked) + --putStrLn generatedCode + + --check <- doesPathExist "output" + --when check (removeDirectoryRecursive "output") + --createDirectory "output" + --when debug $ do + -- writeFile "output/llvm.ll" generatedCode + -- debugDotViz + + --compile generatedCode + --spawnWait "./hello_world" -- interpred <- fromInterpreterErr $ interpret lifted -- putStrLn "\n-- interpret" -- print interpred diff --git a/src/Monomorphizer/Monomorphizer.hs b/src/Monomorphizer/Monomorphizer.hs index 01cc4a4..17994c0 100644 --- a/src/Monomorphizer/Monomorphizer.hs +++ b/src/Monomorphizer/Monomorphizer.hs @@ -1,78 +1,217 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} - -module Monomorphizer.Monomorphizer (monomorphize) where - -import Data.Coerce (coerce) +-- | For now, converts polymorphic functions to concrete ones based on usage. +-- Assumes lambdas are lifted. +-- +-- This step of compilation is as follows: +-- +-- Split all function bindings into monomorphic and polymorphic binds. The +-- monomorphic bindings will be part of this compilation step. +-- Apply the following monomorphization function on all monomorphic binds, with +-- their type as an additional argument. +-- +-- The function that transforms Binds operates on both monomorphic and +-- polymorphic functions, creates a context in which all possible polymorphic types +-- are mapped to concrete types, created using the additional argument. +-- Expressions are then recursively processed. The type of these expressions +-- are changed to using the mapped generic types. The expected type provided +-- in the recursion is changed depending on the different nodes. +-- +-- When an external bind is encountered (with EId), it is checked whether it +-- exists in outputed binds or not. If it does, nothing further is evaluated. +-- If not, the bind transformer function is called on it with the +-- expected type in this context. The result of this computation (a monomorphic +-- bind) is added to the resulting set of binds. + +{-# LANGUAGE LambdaCase #-} +module Monomorphizer.Monomorphizer (monomorphize, morphExp, morphBind) where +import qualified TypeChecker.TypeCheckerIr as T +import TypeChecker.TypeCheckerIr (Ident (Ident)) import qualified Monomorphizer.MonomorphizerIr as M -import qualified TypeChecker.TypeCheckerIr as T -import TypeChecker.TypeCheckerIr (Ident (..)) +import Debug.Trace +import Control.Monad.State (MonadState, gets, modify, StateT (runStateT)) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Maybe (fromJust) +import Control.Monad.Reader (Reader, MonadReader (local, ask), asks, runReader) +import Data.Coerce (coerce) + +-- | State Monad wrapper for "Env". +newtype EnvM a = EnvM (StateT Output (Reader Env) a) + deriving (Functor, Applicative, Monad, MonadState Output, MonadReader Env) + +type Output = Map.Map Ident Outputted +-- When a bind is being processed, it is Incomplete in the state, also +-- called marked. +data Outputted = Incomplete | Complete M.Bind + +-- Static environment +data Env = Env { + -- | All binds in the program. + input :: Map.Map Ident T.Bind, + -- | Maps polymorphic identifiers with concrete types. + polys :: Map.Map Ident M.Type, + -- | Local variables + locals :: Set.Set Ident +} + +runEnvM :: Output -> Env -> EnvM () -> Output +runEnvM o env (EnvM stateM) = snd $ runReader (runStateT stateM o) env + +-- | Creates the environment based on the input binds. +createEnv :: [T.Bind] -> Env +createEnv binds = Env { input = Map.fromList kvPairs, + polys = Map.empty, + locals = Set.empty } + where + kvPairs :: [(Ident, T.Bind)] + kvPairs = map (\b@(T.Bind (ident, _) _ _) -> (ident, b)) binds + +localExists :: Ident -> EnvM Bool +localExists ident = asks (Set.member ident . locals) + +-- | Gets a polymorphic bind from an id. +getInputBind :: Ident -> EnvM (Maybe T.Bind) +getInputBind ident = asks (Map.lookup ident . input) + +-- | Add monomorphic function derived from a polymorphic one, to env. +addOutputBind :: M.Bind -> EnvM () +addOutputBind b@(M.Bind (ident, _) _ _) = modify (Map.insert ident (Complete b)) + +-- | Marks a global bind as being processed, meaning that when encountered again, +-- it should not be recursively processed. +markBind :: Ident -> EnvM () +markBind ident = modify (Map.insert ident Incomplete) + +-- | Check if bind has been touched or not. +isBindMarked :: Ident -> EnvM Bool +isBindMarked ident = gets (Map.member ident) + +-- | Finds main bind +getMain :: EnvM T.Bind +getMain = asks (\env -> fromJust $ Map.lookup (T.Ident "main") (input env)) + +-- NOTE: could make this function more optimized +-- | Makes a kv pair list of polymorphic to monomorphic mappings, throws runtime +-- error when encountering different structures between the two arguments. +mapTypes :: T.Type -> M.Type -> [(Ident, M.Type)] +mapTypes (T.TLit _) (M.TLit _) = [] +mapTypes (T.TVar (T.MkTVar i1)) tm = [(i1, tm)] +mapTypes (T.TFun pt1 pt2) (M.TFun mt1 mt2) = mapTypes pt1 mt1 ++ + mapTypes pt2 mt2 +mapTypes _ _ = error "structure of types not the same!" + +-- | Gets the mapped monomorphic type of a polymorphic type in the current context. +getMonoFromPoly :: T.Type -> EnvM M.Type +getMonoFromPoly t = do env <- ask + return $ getMono (polys env) t + where + getMono :: Map.Map Ident M.Type -> T.Type -> M.Type + getMono polys t = case t of + (T.TLit ident) -> M.TLit (coerce ident) + (T.TFun t1 t2) -> M.TFun (getMono polys t1) (getMono polys t2) + (T.TVar (T.MkTVar ident)) -> case Map.lookup ident polys of + Just concrete -> concrete + Nothing -> error $ + "type not found! type: " ++ show ident ++ ", error in previous compilation steps" + _ -> error "Not implemented" + +-- | If ident not already in env's output, morphed bind to output +-- (and all referenced binds within this bind). +-- Returns the annotated bind name. +-- TODO: Redundancy? btype and t should always be the same. +morphBind :: M.Type -> T.Bind -> EnvM Ident +morphBind expectedType b@(T.Bind (Ident _, btype) args (exp, t)) = + local (\env -> env { locals = Set.fromList (map fst args), + polys = Map.fromList (mapTypes btype expectedType) + }) $ do + -- The "new name" is used to find out if it is already marked or not. + let name' = newName expectedType b + bindMarked <- isBindMarked (coerce name') + -- Return with right name if already marked + if bindMarked then return name' else do + -- Mark so that this bind will not be processed in recursive or cyclic + -- function calls + markBind (coerce name') + exp' <- morphExp expectedType exp + addOutputBind $ M.Bind (coerce name', expectedType) + [] (exp', expectedType) + return name' + +-- Morphs function applications, such as EApp and EAdd +morphApp :: M.Type -> T.ExpT -> T.ExpT -> EnvM M.Exp +morphApp expectedType (e1, t1) (e2, t2)= do + t1' <- getMonoFromPoly t1 + t2' <- getMonoFromPoly t2 + e2' <- morphExp t2' e2 + e1' <- morphExp (M.TFun t2' expectedType) e1 + return $ M.EApp (e1', t1') (e2', t2') + +-- TODO: Change in tree so that these are the same. +-- Converts Lit +convertLit :: T.Lit -> M.Lit +convertLit (T.LInt v) = M.LInt v +convertLit (T.LChar v) = M.LChar v + +morphExp :: M.Type -> T.Exp -> EnvM M.Exp +morphExp expectedType exp = case exp of + T.ELit lit -> return $ M.ELit (convertLit lit) + T.EApp e1 e2 -> do + morphApp expectedType e1 e2 + T.EAdd e1 e2 -> do + morphApp expectedType e1 e2 + T.EAbs ident (exp, t) -> local (\env -> env { locals = Set.insert ident (locals env) }) $ do + t' <- getMonoFromPoly t + morphExp t' exp + T.EVar ident@(Ident str) -> do + isLocal <- localExists ident + if isLocal then do + return $ M.EVar (coerce ident) + else do + bind <- getInputBind ident + case bind of + Nothing -> + error $ "bind of name: " ++ str ++ " not found, bug in previous compilation steps" + Just bind' -> do + -- New bind to process + newBindName <- morphBind expectedType bind' + return $ M.EVar (coerce newBindName) + + T.ELet (T.Bind {}) _ -> error "lets not possible yet" + + _ -> error "Not implemented yet" + +-- Creates a new identifier for a function with an assigned type +newName :: M.Type -> T.Bind -> Ident +newName t (T.Bind (Ident bindName, _) _ _) = Ident (bindName ++ "$" ++ newName' t) + where + newName' :: M.Type -> String + newName' (M.TLit (Ident str)) = str + newName' (M.TFun t1 t2) = newName' t1 ++ "_" ++ newName' t2 + +-- Monomorphization step monomorphize :: T.Program -> M.Program -monomorphize (T.Program ds) = M.Program $ monoDefs ds +monomorphize (T.Program defs) = M.Program $ (getDefsFromBinds . getBindsFromOutput) + (runEnvM Map.empty (createEnv $ getBindsFromDefs defs) monomorphize') + where + monomorphize' :: EnvM () + monomorphize' = do + main <- getMain + morphBind (M.TLit $ Ident "Int") main + return () -monoDefs :: [T.Def] -> [M.Def] -monoDefs = map monoDef +getBindsFromOutput :: Output -> [M.Bind] +getBindsFromOutput outputMap = (map snd . Map.toList) $ fmap + (\case + Incomplete -> error "Internal bug in monomorphizer" + Complete b -> b ) + outputMap -monoDef :: T.Def -> M.Def -monoDef (T.DBind bind) = M.DBind $ monoBind bind -monoDef (T.DData d) = M.DData $ monoData d +getBindsFromDefs :: [T.Def] -> [T.Bind] +getBindsFromDefs = foldl (\bs -> \case + T.DBind b -> b:bs + T.DData _ -> bs + ) [] +getDefsFromBinds :: [M.Bind] -> [M.Def] +getDefsFromBinds = foldl (\ds b -> M.DBind b : ds) [] -monoBind :: T.Bind -> M.Bind -monoBind (T.Bind name args (e, t)) = M.Bind (monoId name) (map monoId args) (monoExpr e, monoType t) - -monoData :: T.Data -> M.Data -monoData (T.Data id cs) = M.Data (monoType id) (map monoConstructor cs) - -monoConstructor :: T.Inj -> M.Inj -monoConstructor (T.Inj (Ident i) t) = M.Inj (T.Ident i) (monoType t) - -monoExpr :: T.Exp -> M.Exp -monoExpr = \case - T.EVar i -> M.EVar i - T.ELit lit -> M.ELit $ monoLit lit - T.ELet bind expt -> M.ELet (monoBind bind) (monoexpt expt) - T.EApp expt1 expt2 -> M.EApp (monoexpt expt1) (monoexpt expt2) - T.EAdd expt1 expt2 -> M.EAdd (monoexpt expt1) (monoexpt expt2) - T.EAbs _i _expt -> error "BUG" - T.ECase expt injs -> M.ECase (monoexpt expt) (monoInjs injs) - T.EInj i -> M.EVar i - -monoAbsType :: T.Type -> M.Type -monoAbsType (T.TLit u) = M.TLit (coerce u) -monoAbsType (T.TVar _v) = M.TLit "Int" -monoAbsType (T.TAll _v _t) = error "NOT ALL TYPES" -monoAbsType (T.TFun t1 t2) = M.TFun (monoAbsType t1) (monoAbsType t2) -monoAbsType (T.TData _ _) = error "NOT INDEXED TYPES" - -monoType :: T.Type -> M.Type -monoType (T.TAll _ t) = monoType t -monoType (T.TVar (T.MkTVar i)) = M.TLit "Int" -monoType (T.TLit (Ident i)) = M.TLit (T.Ident i) -monoType (T.TFun t1 t2) = M.TFun (monoType t1) (monoType t2) -monoType (T.TData (Ident n) t) = M.TLit (T.Ident (n ++ concatMap show t)) - -monoexpt :: T.ExpT -> M.ExpT -monoexpt (e, t) = (monoExpr e, monoType t) - -monoId :: T.Id -> M.Id -monoId (n, t) = (coerce n, monoType t) - -monoLit :: T.Lit -> M.Lit -monoLit (T.LInt i) = M.LInt i -monoLit (T.LChar c) = M.LChar c - -monoInjs :: [T.Branch] -> [M.Branch] -monoInjs = map monoInj - -monoInj :: T.Branch -> M.Branch -monoInj (T.Branch (patt, t) expt) = M.Branch (monoPattern patt, monoType t) (monoexpt expt) - -monoPattern :: T.Pattern -> M.Pattern -monoPattern (T.PVar (id, t)) = M.PVar (id, monoType t) -monoPattern (T.PLit (lit, t)) = M.PLit (monoLit lit, monoType t) -monoPattern (T.PInj id ps) = M.PInj (coerce id) (map monoPattern ps) --- DO NOT DO THIS FOR REAL THOUGH -monoPattern (T.PEnum (Ident i)) = M.PInj (T.Ident i) [] -monoPattern T.PCatch = M.PCatch diff --git a/src/TreeConverter.hs b/src/TreeConverter.hs new file mode 100644 index 0000000..2dfa7d2 --- /dev/null +++ b/src/TreeConverter.hs @@ -0,0 +1,13 @@ +module TreeConverter where + +--import qualified Grammar.Abs as G +--import qualified TypeChecker.TypeCheckerIr as T +-- +--convertToTypecheckerIR :: G.Program -> Either String T.Program +--convertToTypecheckerIR (G.Program defs) = T.Program (map convertDef defs) +-- +--convertDef :: G.Bind -> T.Bind +--convertDef (G.Bind name t _ args exp) = T.Bind (name, t) (map (\i -> (i, T.TMono "Int"))) (convertExp exp) +-- +-- + diff --git a/src/TypeChecker/TypeCheckerIr.hs b/src/TypeChecker/TypeCheckerIr.hs index f8216c5..c307ffe 100644 --- a/src/TypeChecker/TypeCheckerIr.hs +++ b/src/TypeChecker/TypeCheckerIr.hs @@ -7,7 +7,7 @@ module TypeChecker.TypeCheckerIr ( ) where import Data.String (IsString) -import Grammar.Abs (Lit (..), TVar (..)) +import Grammar.Abs (Lit (..)) import Grammar.Print import Prelude import qualified Prelude as C (Eq, Ord, Read, Show) @@ -56,6 +56,9 @@ data Exp' t | ECase (ExpT' t) [Branch' t] deriving (C.Eq, C.Ord, C.Show, C.Read) +data TVar = MkTVar Ident + deriving (C.Eq, C.Ord, C.Show, C.Read) + type Id' t = (Ident, t) type ExpT' t = (Exp' t, t) diff --git a/test_program b/test_program new file mode 100644 index 0000000..751a976 --- /dev/null +++ b/test_program @@ -0,0 +1,6 @@ +main : _Int ; +main = double 3 ; + +double : _Int -> _Int ; +double x = x + x ; + diff --git a/tests/Tests.hs b/tests/Tests.hs index 7bcb0af..43aecca 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -1,4 +1,3 @@ - module Main where import Test.Hspec @@ -8,3 +7,4 @@ import TestTypeCheckerHm (testTypeCheckerHm) main = hspec $ do testTypeCheckerBidir testTypeCheckerHm +