This commit is contained in:
sebastianselander 2023-04-27 12:49:29 +02:00
parent e9852079ab
commit 55fd35d661

View file

@ -1,72 +1,84 @@
-- | For now, converts polymorphic functions to concrete ones based on usage.
-- Assumes lambdas are lifted.
--
-- This step of compilation is as follows:
--
-- Split all function bindings into monomorphic and polymorphic binds. The
-- monomorphic bindings will be part of this compilation step.
-- Apply the following monomorphization function on all monomorphic binds, with
-- their type as an additional argument.
--
-- The function that transforms Binds operates on both monomorphic and
-- polymorphic functions, creates a context in which all possible polymorphic types
-- are mapped to concrete types, created using the additional argument.
-- Expressions are then recursively processed. The type of these expressions
-- are changed to using the mapped generic types. The expected type provided
-- in the recursion is changed depending on the different nodes.
--
-- When an external bind is encountered (with EId), it is checked whether it
-- exists in outputed binds or not. If it does, nothing further is evaluated.
-- If not, the bind transformer function is called on it with the
-- expected type in this context. The result of this computation (a monomorphic
-- bind) is added to the resulting set of binds.
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{- | For now, converts polymorphic functions to concrete ones based on usage.
Assumes lambdas are lifted.
This step of compilation is as follows:
Split all function bindings into monomorphic and polymorphic binds. The
monomorphic bindings will be part of this compilation step.
Apply the following monomorphization function on all monomorphic binds, with
their type as an additional argument.
The function that transforms Binds operates on both monomorphic and
polymorphic functions, creates a context in which all possible polymorphic types
are mapped to concrete types, created using the additional argument.
Expressions are then recursively processed. The type of these expressions
are changed to using the mapped generic types. The expected type provided
in the recursion is changed depending on the different nodes.
When an external bind is encountered (with EId), it is checked whether it
exists in outputed binds or not. If it does, nothing further is evaluated.
If not, the bind transformer function is called on it with the
expected type in this context. The result of this computation (a monomorphic
bind) is added to the resulting set of binds.
-}
module Monomorphizer.Monomorphizer (monomorphize, morphExp, morphBind) where module Monomorphizer.Monomorphizer (monomorphize, morphExp, morphBind) where
import Monomorphizer.DataTypeRemover (removeDataTypes) import Monomorphizer.DataTypeRemover (removeDataTypes)
import qualified Monomorphizer.MonomorphizerIr as O import Monomorphizer.MonomorphizerIr qualified as O
import qualified Monomorphizer.MorbIr as M import Monomorphizer.MorbIr qualified as M
import qualified TypeChecker.TypeCheckerIr as T
import TypeChecker.TypeCheckerIr (Ident (Ident)) import TypeChecker.TypeCheckerIr (Ident (Ident))
import TypeChecker.TypeCheckerIr qualified as T
import Control.Monad.Reader (MonadReader (ask, local), import Control.Monad.Reader (
Reader, asks, runReader, when) MonadReader (ask, local),
import Control.Monad.State (MonadState, StateT (runStateT), Reader,
gets, modify) asks,
runReader,
when,
)
import Control.Monad.State (
MonadState,
StateT (runStateT),
gets,
modify,
)
import Data.Coerce (coerce) import Data.Coerce (coerce)
import qualified Data.Map as Map import Data.Map qualified as Map
import Data.Maybe (fromJust) import Data.Maybe (fromJust)
import qualified Data.Set as Set import Data.Set qualified as Set
import Debug.Trace import Debug.Trace
import Grammar.Print (printTree) import Grammar.Print (printTree)
-- | EnvM is the monad containing the read-only state as well as the {- | EnvM is the monad containing the read-only state as well as the
-- output state containing monomorphized functions and to-be monomorphized output state containing monomorphized functions and to-be monomorphized
-- data type declarations. data type declarations.
-}
newtype EnvM a = EnvM (StateT Output (Reader Env) a) newtype EnvM a = EnvM (StateT Output (Reader Env) a)
deriving (Functor, Applicative, Monad, MonadState Output, MonadReader Env) deriving (Functor, Applicative, Monad, MonadState Output, MonadReader Env)
type Output = Map.Map Ident Outputted type Output = Map.Map Ident Outputted
-- | Data structure describing outputted top-level information, that is {- | Data structure describing outputted top-level information, that is
-- Binds, Polymorphic Data types (monomorphized in a later step) and Binds, Polymorphic Data types (monomorphized in a later step) and
-- Marked bind, which means that it is in the process of monomorphization Marked bind, which means that it is in the process of monomorphization
-- and should not be monomorphized again. and should not be monomorphized again.
-}
data Outputted = Marked | Complete M.Bind | Data M.Type T.Data data Outputted = Marked | Complete M.Bind | Data M.Type T.Data
-- | Static environment. -- | Static environment.
data Env = Env { data Env = Env
-- | All binds in the program. { input :: Map.Map Ident T.Bind
input :: Map.Map Ident T.Bind, -- ^ All binds in the program.
-- | All constructors mapped to their respective polymorphic data def , dataDefs :: Map.Map Ident T.Data
-- ^ All constructors mapped to their respective polymorphic data def
-- which includes all other constructors. -- which includes all other constructors.
dataDefs :: Map.Map Ident T.Data, , polys :: Map.Map Ident M.Type
-- | Maps polymorphic identifiers with concrete types. -- ^ Maps polymorphic identifiers with concrete types.
polys :: Map.Map Ident M.Type, , locals :: Set.Set Ident
-- | Local variables. -- ^ Local variables.
locals :: Set.Set Ident }
}
-- | Determines if the identifier describes a local variable in the given context. -- | Determines if the identifier describes a local variable in the given context.
localExists :: Ident -> EnvM Bool localExists :: Ident -> EnvM Bool
@ -80,8 +92,9 @@ getInputBind ident = asks (Map.lookup ident . input)
addOutputBind :: M.Bind -> EnvM () addOutputBind :: M.Bind -> EnvM ()
addOutputBind b@(M.Bind (ident, _) _ _) = modify (Map.insert ident (Complete b)) addOutputBind b@(M.Bind (ident, _) _ _) = modify (Map.insert ident (Complete b))
-- | Marks a global bind as being processed, meaning that when encountered again, {- | Marks a global bind as being processed, meaning that when encountered again,
-- it should not be recursively processed. it should not be recursively processed.
-}
markBind :: Ident -> EnvM () markBind :: Ident -> EnvM ()
markBind ident = modify (Map.insert ident Marked) markBind ident = modify (Map.insert ident Marked)
@ -93,21 +106,25 @@ isBindMarked ident = gets (Map.member ident)
getMain :: EnvM T.Bind getMain :: EnvM T.Bind
getMain = asks (\env -> fromJust $ Map.lookup (T.Ident "main") (input env)) getMain = asks (\env -> fromJust $ Map.lookup (T.Ident "main") (input env))
-- | Makes a kv pair list of polymorphic to monomorphic mappings, throws runtime {- | Makes a kv pair list of polymorphic to monomorphic mappings, throws runtime
-- error when encountering different structures between the two arguments. error when encountering different structures between the two arguments.
-}
mapTypes :: T.Type -> M.Type -> [(Ident, M.Type)] mapTypes :: T.Type -> M.Type -> [(Ident, M.Type)]
mapTypes (T.TLit _) (M.TLit _) = [] mapTypes (T.TLit _) (M.TLit _) = []
mapTypes (T.TVar (T.MkTVar i1)) tm = [(i1, tm)] mapTypes (T.TVar (T.MkTVar i1)) tm = [(i1, tm)]
mapTypes (T.TFun pt1 pt2) (M.TFun mt1 mt2) = mapTypes pt1 mt1 ++ mapTypes (T.TFun pt1 pt2) (M.TFun mt1 mt2) =
mapTypes pt2 mt2 mapTypes pt1 mt1
mapTypes (T.TData tIdent pTs) (M.TData mIdent mTs) = if tIdent /= mIdent ++ mapTypes pt2 mt2
mapTypes (T.TData tIdent pTs) (M.TData mIdent mTs) =
if tIdent /= mIdent
then error "the data type names of monomorphic and polymorphic data types does not match" then error "the data type names of monomorphic and polymorphic data types does not match"
else foldl (\xs (p, m) -> mapTypes p m ++ xs) [] (zip pTs mTs) else foldl (\xs (p, m) -> mapTypes p m ++ xs) [] (zip pTs mTs)
mapTypes t1 t2 = error $ "structure of types not the same: '" ++ printTree t1 ++ "', '" ++ printTree t2 ++ "'" mapTypes t1 t2 = error $ "structure of types not the same: '" ++ printTree t1 ++ "', '" ++ printTree t2 ++ "'"
-- | Gets the mapped monomorphic type of a polymorphic type in the current context. -- | Gets the mapped monomorphic type of a polymorphic type in the current context.
getMonoFromPoly :: T.Type -> EnvM M.Type getMonoFromPoly :: T.Type -> EnvM M.Type
getMonoFromPoly t = do env <- ask getMonoFromPoly t = do
env <- ask
return $ getMono (polys env) t return $ getMono (polys env) t
where where
getMono :: Map.Map Ident M.Type -> T.Type -> M.Type getMono :: Map.Map Ident M.Type -> T.Type -> M.Type
@ -117,22 +134,30 @@ getMonoFromPoly t = do env <- ask
(T.TVar (T.MkTVar ident)) -> case Map.lookup ident polys of (T.TVar (T.MkTVar ident)) -> case Map.lookup ident polys of
Just concrete -> concrete Just concrete -> concrete
Nothing -> M.TLit (Ident "void") Nothing -> M.TLit (Ident "void")
--error $ "type not found! type: " ++ show ident ++ ", error in previous compilation steps" -- error $ "type not found! type: " ++ show ident ++ ", error in previous compilation steps"
(T.TData ident args) -> M.TData ident (map (getMono polys) args) (T.TData ident args) -> M.TData ident (map (getMono polys) args)
-- | If ident not already in env's output, morphed bind to output {- | If ident not already in env's output, morphed bind to output
-- (and all referenced binds within this bind). (and all referenced binds within this bind).
-- Returns the annotated bind name. Returns the annotated bind name.
-}
morphBind :: M.Type -> T.Bind -> EnvM Ident morphBind :: M.Type -> T.Bind -> EnvM Ident
morphBind expectedType b@(T.Bind (Ident str, btype) args (exp, expt)) = morphBind expectedType b@(T.Bind (Ident str, btype) args (exp, expt)) =
local (\env -> env { locals = Set.fromList (map fst args), local
polys = Map.fromList (mapTypes btype expectedType) ( \env ->
}) $ do env
{ locals = Set.fromList (map fst args)
, polys = Map.fromList (mapTypes btype expectedType)
}
)
$ do
-- The "new name" is used to find out if it is already marked or not. -- The "new name" is used to find out if it is already marked or not.
let name' = newFuncName expectedType b let name' = newFuncName expectedType b
bindMarked <- isBindMarked (coerce name') bindMarked <- isBindMarked (coerce name')
-- Return with right name if already marked -- Return with right name if already marked
if bindMarked then return name' else do if bindMarked
then return name'
else do
-- Mark so that this bind will not be processed in recursive or cyclic -- Mark so that this bind will not be processed in recursive or cyclic
-- function calls -- function calls
markBind (coerce name') markBind (coerce name')
@ -140,29 +165,35 @@ morphBind expectedType b@(T.Bind (Ident str, btype) args (exp, expt)) =
exp' <- morphExp expt' exp exp' <- morphExp expt' exp
-- Get monomorphic type sof args -- Get monomorphic type sof args
args' <- mapM morphArg args args' <- mapM morphArg args
addOutputBind $ M.Bind (coerce name', expectedType) addOutputBind $
args' (exp', expt') M.Bind
(coerce name', expectedType)
args'
(exp', expt')
return name' return name'
-- | Monomorphizes arguments of a bind. -- | Monomorphizes arguments of a bind.
morphArg :: (Ident, T.Type) -> EnvM (Ident, M.Type) morphArg :: (Ident, T.Type) -> EnvM (Ident, M.Type)
morphArg (ident, t) = do t' <- getMonoFromPoly t morphArg (ident, t) = do
t' <- getMonoFromPoly t
return (ident, t') return (ident, t')
-- | Gets the data bind from the name of a constructor. -- | Gets the data bind from the name of a constructor.
getInputData :: Ident -> EnvM (Maybe T.Data) getInputData :: Ident -> EnvM (Maybe T.Data)
getInputData ident = do env <- ask getInputData ident = do
env <- ask
return $ Map.lookup ident (dataDefs env) return $ Map.lookup ident (dataDefs env)
-- | Monomorphize a constructor using it's global name. Constructors may {- | Monomorphize a constructor using it's global name. Constructors may
-- appear as expressions in the tree, or as patterns in case-expressions. appear as expressions in the tree, or as patterns in case-expressions.
-}
morphCons :: M.Type -> Ident -> EnvM () morphCons :: M.Type -> Ident -> EnvM ()
morphCons expectedType ident = do morphCons expectedType ident = do
maybeD <- getInputData ident maybeD <- getInputData ident
case maybeD of case maybeD of
Nothing -> error $ "identifier '" ++ show ident ++ "' not found" Nothing -> error $ "identifier '" ++ show ident ++ "' not found"
Just d -> do Just d -> do
modify (\output -> Map.insert ident (Data expectedType d) output ) modify (\output -> Map.insert ident (Data expectedType d) output)
-- | Converts literals from input to output tree. -- | Converts literals from input to output tree.
convertLit :: T.Lit -> M.Lit convertLit :: T.Lit -> M.Lit
@ -187,7 +218,7 @@ morphExp expectedType exp = case exp of
e1' <- morphExp t1' e1 e1' <- morphExp t1' e1
e2' <- morphExp t2' e2 e2' <- morphExp t2' e2
return $ M.EAdd (e1', expectedType) (e2', expectedType) return $ M.EAdd (e1', expectedType) (e2', expectedType)
T.EAbs ident (exp, t) -> local (\env -> env { locals = Set.insert ident (locals env) }) $ do T.EAbs ident (exp, t) -> local (\env -> env{locals = Set.insert ident (locals env)}) $ do
t' <- getMonoFromPoly t t' <- getMonoFromPoly t
morphExp t' exp morphExp t' exp
T.ECase (exp, t) bs -> do T.ECase (exp, t) bs -> do
@ -197,7 +228,8 @@ morphExp expectedType exp = case exp of
return $ M.ECase (exp', t') bs' return $ M.ECase (exp', t') bs'
T.EVar ident -> do T.EVar ident -> do
isLocal <- localExists ident isLocal <- localExists ident
if isLocal then do if isLocal
then do
return $ M.EVar (coerce ident) return $ M.EVar (coerce ident)
else do else do
bind <- getInputBind ident bind <- getInputBind ident
@ -210,8 +242,7 @@ morphExp expectedType exp = case exp of
-- New bind to process -- New bind to process
newBindName <- morphBind expectedType bind' newBindName <- morphBind expectedType bind'
return $ M.EVar (coerce newBindName) return $ M.EVar (coerce newBindName)
T.ELet (T.Bind{}) _ -> error "lets not possible yet"
T.ELet (T.Bind {}) _ -> error "lets not possible yet"
-- | Monomorphizes case-of branches. -- | Monomorphizes case-of branches.
morphBranch :: T.Branch -> EnvM M.Branch morphBranch :: T.Branch -> EnvM M.Branch
@ -220,27 +251,31 @@ morphBranch (T.Branch (p, pt) (e, et)) = do
et' <- getMonoFromPoly et et' <- getMonoFromPoly et
env <- ask env <- ask
(p', newLocals) <- morphPattern pt' (locals env) (p, pt) (p', newLocals) <- morphPattern pt' (locals env) (p, pt)
local (const env { locals = newLocals }) $ do local (const env{locals = newLocals}) $ do
e' <- morphExp et' e e' <- morphExp et' e
return $ M.Branch (p', pt') (e', et') return $ M.Branch (p', pt') (e', et')
-- | Morphs pattern (pattern => expression), gives the newly bound local variables. -- | Morphs pattern (pattern => expression), gives the newly bound local variables.
morphPattern :: M.Type -> Set.Set Ident -> (T.Pattern, T.Type) -> EnvM (M.Pattern, Set.Set Ident) morphPattern :: M.Type -> Set.Set Ident -> (T.Pattern, T.Type) -> EnvM (M.Pattern, Set.Set Ident)
morphPattern expectedType ls (p, t) = case p of morphPattern expectedType ls (p, t) = case p of
T.PVar ident -> do t' <- getMonoFromPoly t T.PVar ident -> do
t' <- getMonoFromPoly t
return (M.PVar (ident, t'), Set.insert ident ls) return (M.PVar (ident, t'), Set.insert ident ls)
T.PLit lit -> do t' <- getMonoFromPoly t T.PLit lit -> do
t' <- getMonoFromPoly t
return (M.PLit (convertLit lit, t'), ls) return (M.PLit (convertLit lit, t'), ls)
T.PCatch -> return (M.PCatch, ls) T.PCatch -> return (M.PCatch, ls)
-- Constructor ident -- Constructor ident
T.PEnum ident -> do morphCons expectedType ident T.PEnum ident -> do
morphCons expectedType ident
return (M.PEnum ident, ls) return (M.PEnum ident, ls)
T.PInj ident ps -> do morphCons expectedType ident T.PInj ident ps -> do
morphCons expectedType ident
let (M.TData tIdent ts) = expectedType let (M.TData tIdent ts) = expectedType
-- TODO: this is wrong! -- TODO: this is wrong!
pairs <- mapM (\(pat, patT) -> morphPattern patT ls pat) (zip ps ts) pairs <- mapM (\(pat, patT) -> morphPattern patT ls pat) (zip ps ts)
if length ts == length ps then if length ts == length ps
return (M.PCatch, Set.singleton $ Ident "$1y") then return (M.PCatch, Set.singleton $ Ident "$1y")
else return (M.PInj ident (map fst pairs), Set.unions (map snd pairs)) else return (M.PInj ident (map fst pairs), Set.unions (map snd pairs))
-- | Creates a new identifier for a function with an assigned type. -- | Creates a new identifier for a function with an assigned type.
@ -260,8 +295,12 @@ newName t (Ident str) = Ident $ str ++ "$" ++ newName' t
-- | Monomorphization step. -- | Monomorphization step.
monomorphize :: T.Program -> O.Program monomorphize :: T.Program -> O.Program
monomorphize (T.Program defs) = removeDataTypes $ M.Program (getDefsFromOutput monomorphize (T.Program defs) =
(runEnvM Map.empty (createEnv defs) monomorphize')) removeDataTypes $
M.Program
( getDefsFromOutput
(runEnvM Map.empty (createEnv defs) monomorphize')
)
where where
monomorphize' :: EnvM () monomorphize' :: EnvM ()
monomorphize' = do monomorphize' = do
@ -275,10 +314,13 @@ runEnvM o env (EnvM stateM) = snd $ runReader (runStateT stateM o) env
-- | Creates the environment based on the input binds. -- | Creates the environment based on the input binds.
createEnv :: [T.Def] -> Env createEnv :: [T.Def] -> Env
createEnv defs = Env { input = Map.fromList bindPairs, createEnv defs =
dataDefs = Map.fromList dataPairs, Env
polys = Map.empty, { input = Map.fromList bindPairs
locals = Set.empty } , dataDefs = Map.fromList dataPairs
, polys = Map.empty
, locals = Set.empty
}
where where
bindPairs = (map (\b -> (getBindName b, b)) . getBindsFromDefs) defs bindPairs = (map (\b -> (getBindName b, b)) . getBindsFromDefs) defs
dataPairs :: [(Ident, T.Data)] dataPairs :: [(Ident, T.Data)]
@ -291,44 +333,61 @@ getBindName (T.Bind (ident, _) _ _) = ident
-- Helper functions -- Helper functions
-- Gets custom data declarations form defs. -- Gets custom data declarations form defs.
getDataFromDefs :: [T.Def] -> [T.Data] getDataFromDefs :: [T.Def] -> [T.Data]
getDataFromDefs = foldl (\bs -> \case getDataFromDefs =
foldl
( \bs -> \case
T.DBind _ -> bs T.DBind _ -> bs
T.DData d -> d:bs) [] T.DData d -> d : bs
)
[]
getConsName :: T.Inj -> Ident getConsName :: T.Inj -> Ident
getConsName (T.Inj ident _) = ident getConsName (T.Inj ident _) = ident
getBindsFromDefs :: [T.Def] -> [T.Bind] getBindsFromDefs :: [T.Def] -> [T.Bind]
getBindsFromDefs = foldl (\bs -> \case getBindsFromDefs =
T.DBind b -> b:bs foldl
T.DData _ -> bs) [] ( \bs -> \case
T.DBind b -> b : bs
T.DData _ -> bs
)
[]
getDefsFromOutput :: Output -> [M.Def] getDefsFromOutput :: Output -> [M.Def]
getDefsFromOutput o = getDefsFromOutput o =
map M.DBind binds ++ map M.DBind binds
(map (M.DData . snd) . Map.toList) (createNewData dataInput Map.empty) ++ (map (M.DData . snd) . Map.toList) (createNewData dataInput Map.empty)
where where
(binds, dataInput) = splitBindsAndData o (binds, dataInput) = splitBindsAndData o
-- | Splits the output into binds and data declaration components (used in createNewData) -- | Splits the output into binds and data declaration components (used in createNewData)
splitBindsAndData :: Output -> ([M.Bind], [(Ident, M.Type, T.Data)]) splitBindsAndData :: Output -> ([M.Bind], [(Ident, M.Type, T.Data)])
splitBindsAndData output = foldl splitBindsAndData output =
(\(oBinds, oData) (ident, o) -> case o of foldl
( \(oBinds, oData) (ident, o) -> case o of
Marked -> error "internal bug in monomorphizer" Marked -> error "internal bug in monomorphizer"
Complete b -> (b:oBinds, oData) Complete b -> (b : oBinds, oData)
Data t d -> (oBinds, (ident, t, d):oData)) Data t d -> (oBinds, (ident, t, d) : oData)
)
([], []) ([], [])
(Map.toList output) (Map.toList output)
-- | Converts all found constructors to monomorphic data declarations. -- | Converts all found constructors to monomorphic data declarations.
createNewData :: [(Ident, M.Type, T.Data)] -> Map.Map Ident M.Data -> Map.Map Ident M.Data createNewData :: [(Ident, M.Type, T.Data)] -> Map.Map Ident M.Data -> Map.Map Ident M.Data
createNewData [] o = o createNewData [] o = o
createNewData ((consIdent, consType, polyData):input) o = createNewData ((consIdent, consType, polyData) : input) o =
createNewData input $ createNewData input $
Map.insertWith (\_ (M.Data _ cs) -> M.Data newDataType (newCons:cs)) Map.insertWith
newDataName (M.Data newDataType [newCons]) o (\_ (M.Data _ cs) -> M.Data newDataType (newCons : cs))
newDataName
(M.Data newDataType [newCons])
o
where where
T.Data (T.TData polyDataIdent _) _ = polyData polyDataIdent = case polyData of
T.Data (T.TData i _) _ -> i
T.Data (T.TLit i) _ -> i
t -> error $ "Data type is :" ++ show t ++ " which should be impossible"
newDataType = getDataType consType newDataType = getDataType consType
newDataName = newName newDataType polyDataIdent newDataName = newName newDataType polyDataIdent
newCons = M.Inj consIdent consType newCons = M.Inj consIdent consType
@ -338,4 +397,3 @@ getDataType :: M.Type -> M.Type
getDataType (M.TFun t1 t2) = getDataType t2 getDataType (M.TFun t1 t2) = getDataType t2
getDataType tData@(M.TData _ _) = tData getDataType tData@(M.TData _ _) = tData
getDataType _ = error "???" getDataType _ = error "???"