diff --git a/language.cabal b/language.cabal index 9783156..38f5ef5 100644 --- a/language.cabal +++ b/language.cabal @@ -38,6 +38,7 @@ executable language Renamer.Renamer Codegen.Codegen Codegen.LlvmIr + TreeConverter hs-source-dirs: src @@ -69,6 +70,8 @@ Test-suite language-testsuite Auxiliary TypeChecker.TypeChecker TypeChecker.TypeCheckerIr + Monomorpher.Monomorpher + Monomorpher.MonomorpherIr Renamer.Renamer Compiler @@ -85,5 +88,7 @@ Test-suite language-testsuite , QuickCheck , process , bytestring + , hspec default-language: GHC2021 + diff --git a/llvm.ll b/llvm.ll new file mode 100644 index 0000000..cd6b190 --- /dev/null +++ b/llvm.ll @@ -0,0 +1,10 @@ +@.str = private unnamed_addr constant [3 x i8] c"%i +", align 1 +declare i32 @printf(ptr noalias nocapture, ...) + +; Ident "main": EAdd (TMono (Ident "Int")) (ELit (TMono (Ident "Int")) (LInt 3)) (ELit (TMono (Ident "Int")) (LInt 3)) +define i64 @main() { + %1 = add i64 3, 3 + call i32 (ptr, ...) @printf(ptr noundef @.str, i64 noundef %1) + ret i64 0 +} diff --git a/sample-programs/good1 b/sample-programs/good1 new file mode 100644 index 0000000..b7aff4b --- /dev/null +++ b/sample-programs/good1 @@ -0,0 +1,6 @@ +main : _Int ; +main = (id : _Int -> _Int) 5 ; + +id : 'a -> 'a ; +id x = (x : 'a); + diff --git a/src/Monomorphizer/Monomorphizer.hs b/src/Monomorphizer/Monomorphizer.hs index 7062b79..6267f39 100644 --- a/src/Monomorphizer/Monomorphizer.hs +++ b/src/Monomorphizer/Monomorphizer.hs @@ -1,76 +1,223 @@ +-- | For now, converts polymorphic functions to concrete ones based on usage. +-- Assumes lambdas are lifted. +-- +-- This step of compilation is as follows: +-- +-- Split all function bindings into monomorphic and polymorphic binds. The +-- monomorphic bindings will be part of this compilation step. +-- Apply the following monomorphization function on all monomorphic binds, with +-- their type as an additional argument. +-- +-- The function that transforms Binds operates on both monomorphic and +-- polymorphic functions, creates a context in which all possible polymorphic types +-- are mapped to concrete types, created using the additional argument. +-- Expressions are then recursively processed. The type of these expressions +-- are changed to using the mapped generic types. The expected type provided +-- in the recursion is changed depending on the different nodes. +-- +-- When an external bind is encountered (with EId), it is checked whether it +-- exists in outputed binds or not. If it does, nothing further is evaluated. +-- If not, the bind transformer function is called on it with the +-- expected type in this context. The result of this computation (a monomorphic +-- bind) is added to the resulting set of binds. + {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -module Monomorphizer.Monomorphizer (monomorphize) where +module Monomorphizer.Monomorphizer (monomorphize, morphExp, morphBind) where +import qualified TypeChecker.TypeCheckerIr as T +import TypeChecker.TypeCheckerIr (Ident (Ident)) +import qualified Monomorphizer.MonomorphizerIr as M + +import Debug.Trace +import Control.Monad.State (MonadState, gets, modify, StateT (runStateT)) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Maybe (fromJust) +import Control.Monad.Reader (Reader, MonadReader (local, ask), asks, runReader) import Data.Coerce (coerce) -import Monomorphizer.MonomorphizerIr qualified as M -import TypeChecker.TypeCheckerIr qualified as T +-- | State Monad wrapper for "Env". +newtype EnvM a = EnvM (StateT Output (Reader Env) a) + deriving (Functor, Applicative, Monad, MonadState Output, MonadReader Env) +type Output = Map.Map M.Ident Outputted +-- When a bind is being processed, it is Incomplete in the state, also +-- called marked. +data Outputted = Incomplete | Complete M.Bind + +-- Static environment +data Env = Env { + -- | All binds in the program. + input :: Map.Map Ident T.Bind, + -- | Maps polymorphic identifiers with concrete types. + polys :: Map.Map Ident M.Type, + -- | Local variables + locals :: Set.Set Ident +} + +runEnvM :: Output -> Env -> EnvM () -> Output +runEnvM o env (EnvM stateM) = snd $ runReader (runStateT stateM o) env + + + +-- | Creates the environment based on the input binds. +createEnv :: [T.Bind] -> Env +createEnv binds = Env { input = Map.fromList kvPairs, + polys = Map.empty, + locals = Set.empty } + where + kvPairs :: [(Ident, T.Bind)] + kvPairs = map (\b@(T.Bind (ident, _) _ _) -> (ident, b)) binds + +localExists :: Ident -> EnvM Bool +localExists ident = asks (Set.member ident . locals) + +-- | Gets a polymorphic bind from an id. +getInputBind :: Ident -> EnvM (Maybe T.Bind) +getInputBind ident = asks (Map.lookup ident . input) + +-- | Add monomorphic function derived from a polymorphic one, to env. +addOutputBind :: M.Bind -> EnvM () +addOutputBind b@(M.Bind (ident, _) _ _) = modify (Map.insert ident (Complete b)) + +-- | Marks a global bind as being processed, meaning that when encountered again, +-- it should not be recursively processed. +markBind :: M.Ident -> EnvM () +markBind ident = modify (Map.insert ident Incomplete) + +-- | Check if bind has been touched or not. +isBindMarked :: M.Ident -> EnvM Bool +isBindMarked ident = gets (Map.member ident) + +-- | Finds main bind +getMain :: EnvM T.Bind +getMain = asks (\env -> fromJust $ Map.lookup (T.Ident "main") (input env)) + +-- NOTE: could make this function more optimized +-- | Makes a kv pair list of polymorphic to monomorphic mappings, throws runtime +-- error when encountering different structures between the two arguments. +mapTypes :: T.Type -> M.Type -> [(Ident, M.Type)] +mapTypes (T.TLit _) (M.TLit _) = [] +mapTypes (T.TVar (T.MkTVar i1)) tm = [(i1, tm)] +mapTypes (T.TFun pt1 pt2) (M.TFun mt1 mt2) = mapTypes pt1 mt1 ++ + mapTypes pt2 mt2 +mapTypes _ _ = error "structure of types not the same!" + +-- | Gets the mapped monomorphic type of a polymorphic type in the current context. +getMonoFromPoly :: T.Type -> EnvM M.Type +getMonoFromPoly t = do env <- ask + return $ getMono (polys env) t + where + getMono :: Map.Map Ident M.Type -> T.Type -> M.Type + getMono polys t = case t of + (T.TLit ident) -> M.TLit (convertIdent ident) + (T.TFun t1 t2) -> M.TFun (getMono polys t1) (getMono polys t2) + (T.TVar (T.MkTVar ident)) -> case Map.lookup ident polys of + Just concrete -> concrete + Nothing -> error $ + "type not found! type: " ++ show ident ++ ", error in previous compilation steps" + _ -> error "Not implemented" + +-- | If ident not already in env's output, morphed bind to output +-- (and all referenced binds within this bind). +-- Returns the annotated bind name. +-- TODO: Redundancy? btype and t should always be the same. +morphBind :: M.Type -> T.Bind -> EnvM Ident +morphBind expectedType b@(T.Bind (Ident _, btype) args (exp, t)) = + local (\env -> env { locals = Set.fromList (map fst args), + polys = Map.fromList (mapTypes btype expectedType) + }) $ do + -- The "new name" is used to find out if it is already marked or not. + let name' = newName expectedType b + bindMarked <- isBindMarked (convertIdent name') + -- Return with right name if already marked + if bindMarked then return name' else do + -- Mark so that this bind will not be processed in recursive or cyclic + -- function calls + markBind (coerce name') + exp' <- morphExp expectedType exp + addOutputBind $ M.Bind (convertIdent name', expectedType) + [] (exp', expectedType) + return name' + +-- Morphs function applications, such as EApp and EAdd +morphApp :: M.Type -> T.ExpT -> T.ExpT -> EnvM M.Exp +morphApp expectedType (e1, t1) (e2, t2)= do + t1' <- getMonoFromPoly t1 + t2' <- getMonoFromPoly t2 + e2' <- morphExp t2' e2 + e1' <- morphExp (M.TFun t2' expectedType) e1 + return $ M.EApp (e1', t1') (e2', t2') + +-- TODO: Change in tree so that these are the same. +-- Converts Lit +convertLit :: T.Lit -> M.Lit +convertLit (T.LInt v) = M.LInt v +convertLit (T.LChar v) = M.LChar v +-- Converts Ident +convertIdent :: T.Ident -> M.Ident +convertIdent (T.Ident str) = M.Ident str + +morphExp :: M.Type -> T.Exp -> EnvM M.Exp +morphExp expectedType exp = case exp of + T.ELit lit -> return $ M.ELit (convertLit lit) + T.EApp e1 e2 -> do + morphApp expectedType e1 e2 + T.EAdd e1 e2 -> do + morphApp expectedType e1 e2 + T.EAbs _ _ -> do + error "EAbs found in Monomorpher, not implemented" + T.EId ident@(Ident str) -> do + isLocal <- localExists ident + if isLocal then do + return $ M.EId (convertIdent ident) + else do + bind <- getInputBind ident + case bind of + Nothing -> + error $ "bind of name: " ++ str ++ " not found, bug in previous compilation steps" + Just bind' -> do + -- New bind to process + newBindName <- morphBind expectedType bind' + return $ M.EId (convertIdent newBindName) + + T.ELet (T.Bind {}) _ -> error "lets not possible yet" + + _ -> error "Not implemented yet" + +-- Creates a new identifier for a function with an assigned type +newName :: M.Type -> T.Bind -> Ident +newName t (T.Bind (Ident bindName, _) _ _) = Ident (bindName ++ "$" ++ newName' t) + where + newName' :: M.Type -> String + newName' (M.TLit (M.Ident str)) = str + newName' (M.TFun t1 t2) = newName' t1 ++ "_" ++ newName' t2 + +-- Monomorphization step monomorphize :: T.Program -> M.Program -monomorphize (T.Program ds) = M.Program $ monoDefs ds +monomorphize (T.Program defs) = M.Program $ (getDefsFromBinds . getBindsFromOutput) + (runEnvM Map.empty (createEnv $ getBindsFromDefs defs) monomorphize') + where + monomorphize' :: EnvM () + monomorphize' = do + main <- getMain + morphBind (M.TLit $ M.Ident "Int") main + return () -monoDefs :: [T.Def] -> [M.Def] -monoDefs = map monoDef +getBindsFromOutput :: Output -> [M.Bind] +getBindsFromOutput outputMap = (map snd . Map.toList) $ fmap + (\case + Incomplete -> error "Internal bug in monomorphizer" + Complete b -> b ) + outputMap -monoDef :: T.Def -> M.Def -monoDef (T.DBind bind) = M.DBind $ monoBind bind -monoDef (T.DData d) = M.DData $ monoData d +getBindsFromDefs :: [T.Def] -> [T.Bind] +getBindsFromDefs = foldl (\bs -> \case + T.DBind b -> b:bs + T.DData _ -> bs + ) [] -monoBind :: T.Bind -> M.Bind -monoBind (T.Bind name args (e, t)) = M.Bind (monoId name) (map monoId args) (monoExpr e, monoType t) +getDefsFromBinds :: [M.Bind] -> [M.Def] +getDefsFromBinds = foldl (\ds b -> M.DBind b : ds) [] -monoData :: T.Data -> M.Data -monoData (T.Data (T.Ident id) cs) = M.Data (M.TLit (M.Ident id)) (map monoConstructor cs) - -monoConstructor :: T.Constructor -> M.Constructor -monoConstructor (T.Constructor (T.Ident i) t) = M.Constructor (M.Ident i) (monoType t) - -monoExpr :: T.Exp -> M.Exp -monoExpr = \case - T.EId (T.Ident i) -> M.EId (M.Ident i) - T.ELit lit -> M.ELit $ monoLit lit - T.ELet bind expt -> M.ELet (monoBind bind) (monoexpt expt) - T.EApp expt1 expt2 -> M.EApp (monoexpt expt1) (monoexpt expt2) - T.EAdd expt1 expt2 -> M.EAdd (monoexpt expt1) (monoexpt expt2) - T.EAbs _i _expt -> error "BUG" - T.ECase expt injs -> M.ECase (monoexpt expt) (monoInjs injs) - -monoAbsType :: T.Type -> M.Type -monoAbsType (T.TLit u) = M.TLit (coerce u) -monoAbsType (T.TVar _v) = M.TLit "Int" -monoAbsType (T.TAll _v _t) = error "NOT ALL TYPES" -monoAbsType (T.TFun t1 t2) = M.TFun (monoAbsType t1) (monoAbsType t2) -monoAbsType (T.TData _ _) = error "NOT INDEXED TYPES" - -monoType :: T.Type -> M.Type -monoType (T.TAll _ t) = monoType t -monoType (T.TVar (T.MkTVar i)) = M.TLit "Int" -monoType (T.TLit (T.Ident i)) = M.TLit (M.Ident i) -monoType (T.TFun t1 t2) = M.TFun (monoType t1) (monoType t2) -monoType (T.TData (T.Ident n) t) = M.TLit (M.Ident (n ++ concatMap show t)) - -monoexpt :: T.ExpT -> M.ExpT -monoexpt (e, t) = (monoExpr e, monoType t) - -monoId :: T.Id -> M.Id -monoId (n, t) = (coerce n, monoType t) - -monoLit :: T.Lit -> M.Lit -monoLit (T.LInt i) = M.LInt i -monoLit (T.LChar c) = M.LChar c - -monoInjs :: [T.Branch] -> [M.Branch] -monoInjs = map monoInj - -monoInj :: T.Branch -> M.Branch -monoInj (T.Branch (init, t) expt) = M.Branch (monoInit init, monoType t) (monoexpt expt) - -monoInit :: T.Pattern -> M.Pattern -monoInit (T.PVar (id, t)) = M.PVar (coerce id, monoType t) -monoInit (T.PLit (lit, t)) = M.PLit (monoLit lit, monoType t) -monoInit (T.PInj id ps) = M.PInj (coerce id) (monoInit <$> ps) --- DO NOT DO THIS FOR REAL THOUGH -monoInit (T.PEnum (T.Ident i)) = M.PInj (M.Ident i) [] -monoInit T.PCatch = M.PCatch diff --git a/src/TreeConverter.hs b/src/TreeConverter.hs new file mode 100644 index 0000000..2dfa7d2 --- /dev/null +++ b/src/TreeConverter.hs @@ -0,0 +1,13 @@ +module TreeConverter where + +--import qualified Grammar.Abs as G +--import qualified TypeChecker.TypeCheckerIr as T +-- +--convertToTypecheckerIR :: G.Program -> Either String T.Program +--convertToTypecheckerIR (G.Program defs) = T.Program (map convertDef defs) +-- +--convertDef :: G.Bind -> T.Bind +--convertDef (G.Bind name t _ args exp) = T.Bind (name, t) (map (\i -> (i, T.TMono "Int"))) (convertExp exp) +-- +-- + diff --git a/test_program b/test_program new file mode 100644 index 0000000..751a976 --- /dev/null +++ b/test_program @@ -0,0 +1,6 @@ +main : _Int ; +main = double 3 ; + +double : _Int -> _Int ; +double x = x + x ; + diff --git a/tests/Tests.hs b/tests/Tests.hs new file mode 100644 index 0000000..edfd90b --- /dev/null +++ b/tests/Tests.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} +{-# HLINT ignore "Use <$>" #-} + +module Main where +import Grammar.Abs (Ident (Ident), Literal (LInt)) +import qualified TypeChecker.TypeCheckerIr as T + +import Monomorpher.Monomorpher (monomorphize) +import Grammar.Print (printTree) +import System.IO (stderr) +import GHC.IO.Handle.Text (hPutStrLn) + + +printToErr :: String -> IO () +printToErr = hPutStrLn stderr + +main :: IO () +main = do + -- Only demonstrations for now, will fail if error is thrown. + simpleDemo + +-- A simple demo +simpleDemo = do + demo "main = f 5" $ simpleProgram [f] + (mainApp (T.EId ("f", typeIntToInt)) lit5) + demo "main = bigId 5" $ simpleProgram [bigId] + (mainApp (T.EId ("bigId", typeIntToInt)) lit5) + demo "main = g 5" $ simpleProgram [g, bigId] + (mainApp (T.EId ("g", typeIntToInt)) lit5) + demo "main = (bigConst 5) ((bigConst 5) True)" $ simpleProgram [bigConst] + (T.EApp typeInt + -- (bigConst 5) + (T.EApp typeIntToInt (T.EId ("bigConst", typeIntToIntToInt)) lit5) + -- ((bigConst 5) True) + (T.EApp typeInt + (T.EApp typeBoolToInt + (T.EId ("bigConst", typeIntToBoolToInt)) + lit5 + ) + litTrue + ) + ) + +-- Nice demo 👍 +demo :: String -> T.Program -> IO () +demo label prg = do + printToErr $ "#### " ++ label ++ " ####" + printToErr " * Lifted Tree * " + printToErr $ printTree prg + printToErr " * Monomorphized Tree * " + printToErr $ printTree (monomorphize prg) + printToErr "##########\n" + +-- Programs in the form of "main = 'func' 'x'" +simpleProgram :: [T.Bind] -> T.Exp -> T.Program +simpleProgram binds input = T.Program (T.Bind ("main", typeInt) [] input:binds) + +-- Applies two expressions, has type Int +mainApp :: T.Exp -> T.Exp -> T.Exp +mainApp = T.EApp typeInt + +-- f :: Int -> Int +-- f x = x + x +f = T.Bind ("f", typeIntToInt) [("x", typeInt)] fExp +fExp = T.EAdd typeInt (T.EId ("x", typeInt)) (T.EId ("x", typeInt)) + +-- bigId :: a -> a +-- bigId x = x +bigId = T.Bind (Ident "bigId", typeAToA) [(Ident "x", typeA)] bigIdExp +bigIdExp = T.EAdd typeA (T.EId (Ident "x", typeA)) (T.EId ("x", typeA)) + +-- bigConst :: a -> a -> a +-- bigConst x y = x +bigConst = T.Bind ("bigConst", typeAToAToA) [("x", typeA), ("y", typeA)] bigConstExp +bigConstExp = T.EId ("x", typeA) + +-- g :: a -> a +-- g x = x + (bigId x) +g = T.Bind ("g", typeAToA) [("x", typeA)] gExp +gExp = T.EAdd typeA (T.EId ("x", typeA)) (T.EApp typeA (T.EId ("bigId", typeAToA)) (T.EId ("x", typeA))) + +-- | Reusable test constructs for Monomorpher. +typeInt = T.TMono "Int" + +typeIntToInt = T.TArr typeInt typeInt + +typeIntToIntToInt = T.TArr typeInt typeIntToInt + + +typeA = T.TPol $ Ident "a" + +typeAToA = T.TArr typeA typeA + +typeAToAToA = T.TArr typeA typeAToA + + +typeBool = T.TMono "Bool" + +typeBoolToBool = T.TArr typeBool typeBool + +typeBoolToBoolToBool = T.TArr typeBool typeBoolToBool + + +lit5 = T.ELit typeInt $ T.LInt 5 + +litTrue = T.ELit typeBool T.LBool + + +typeBoolToInt = T.TArr typeBool typeInt +typeIntToBoolToInt = T.TArr typeInt typeBoolToInt +