churf/src/TypeChecker/TypeChecker.hs

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}

-- | A module for type checking and inference using algorithm W, Hindley-Milner
module TypeChecker.TypeChecker where

import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State
import Data.Foldable (traverse_)
import Data.Functor.Identity (runIdentity)
import Data.List (foldl')
import Data.Map (Map)
import Data.Map qualified as M
import Data.Set (Set)
import Data.Set qualified as S
import Debug.Trace (trace)
import Grammar.Abs
import Grammar.Print (printTree)
import TypeChecker.TypeCheckerIr (
    Ctx (..),
    Env (..),
    Error,
    Infer,
    Poly (..),
    Subst,
 )
import TypeChecker.TypeCheckerIr qualified as T

initCtx = Ctx mempty

initEnv = Env 0 mempty mempty

runPretty :: Exp -> Either Error String
runPretty = fmap (printTree . fst) . run . inferExp

run :: Infer a -> Either Error a
run = runC initEnv initCtx

runC :: Env -> Ctx -> Infer a -> Either Error a
runC e c = runIdentity . runExceptT . flip runReaderT c . flip evalStateT e

typecheck :: Program -> Either Error T.Program
typecheck = run . checkPrg

{- | Start by freshening the type variable of data types to avoid clash with
other user defined polymorphic types
-}
freshenData :: Data -> Infer Data
freshenData (Data (Constr name ts) constrs) = do
    fr <- fresh
    let fr' = case fr of
            TPol a -> a
            -- Meh, this part assumes fresh generates a polymorphic type
            _ ->
                error
                    "Bug: implementation of \
                    \ fresh and freshenData are not compatible"
    let new_ts = map (freshenType fr') ts
    let new_constrs = map (freshenConstr fr') constrs
    return $ Data (Constr name new_ts) new_constrs
  where
    freshenType :: Ident -> Type -> Type
    freshenType iden = \case
        (TPol _) -> TPol iden
        (TArr a b) -> TArr (freshenType iden a) (freshenType iden b)
        (TConstr (Constr a ts)) ->
            TConstr (Constr a (map (freshenType iden) ts))
        rest -> rest

    freshenConstr :: Ident -> Constructor -> Constructor
    freshenConstr iden (Constructor name t) =
        Constructor name (freshenType iden t)

checkData :: Data -> Infer ()
checkData d = do
    d' <- freshenData d
    case d' of
        (Data typ@(Constr name ts) constrs) -> do
            unless
                (all isPoly ts)
                (throwError $ unwords ["Data type incorrectly declared"])
            traverse_
                ( \(Constructor name' t') ->
                    if TConstr typ == retType t'
                        then insertConstr name' t'
                        else
                            throwError $
                                unwords
                                    [ "return type of constructor:"
                                    , printTree name
                                    , "with type:"
                                    , printTree (retType t')
                                    , "does not match data: "
                                    , printTree typ
                                    ]
                )
                constrs

retType :: Type -> Type
retType (TArr _ t2) = retType t2
retType a = a

checkPrg :: Program -> Infer T.Program
checkPrg (Program bs) = do
    preRun bs
    T.Program <$> checkDef bs
  where
    preRun :: [Def] -> Infer ()
    preRun [] = return ()
    preRun (x : xs) = case x of
        DBind (Bind n t _ _ _) -> insertSig n t >> preRun xs
        DData d@(Data _ _) -> checkData d >> preRun xs

    checkDef :: [Def] -> Infer [T.Def]
    checkDef [] = return []
    checkDef (x : xs) = case x of
        (DBind b) -> do
            b' <- checkBind b
            fmap (T.DBind b' :) (checkDef xs)
        (DData d) -> fmap (T.DData d :) (checkDef xs)

checkBind :: Bind -> Infer T.Bind
checkBind (Bind n t _ args e) = do
    (t', e') <- inferExp $ makeLambda e (reverse args)
    s <- unify t t'
    let t'' = apply s t
    unless
        (t `typeEq` t'')
        ( throwError $
            unwords
                [ "Top level signature"
                , printTree t
                , "does not match body with inferred type:"
                , printTree t''
                ]
        )
    return $ T.Bind (n, t) e'
  where
    makeLambda :: Exp -> [Ident] -> Exp
    makeLambda = foldl (flip EAbs)

{- | Check if two types are considered equal
  For the purpose of the algorithm two polymorphic types are always considered
  equal
-}
typeEq :: Type -> Type -> Bool
typeEq (TArr l r) (TArr l' r') = typeEq l l' && typeEq r r'
typeEq (TMono a) (TMono b) = a == b
typeEq (TConstr (Constr name a)) (TConstr (Constr name' b)) =
    length a == length b
        && name == name'
        && and (zipWith typeEq a b)
typeEq (TPol _) (TPol _) = True
typeEq _ _ = False

isMoreSpecificOrEq :: Type -> Type -> Bool
isMoreSpecificOrEq _ (TPol _) = True
isMoreSpecificOrEq (TArr a b) (TArr c d) =
    isMoreSpecificOrEq a c && isMoreSpecificOrEq b d
isMoreSpecificOrEq (TConstr (Constr n1 ts1)) (TConstr (Constr n2 ts2)) =
    n1 == n2
        && length ts1 == length ts2
        && and (zipWith isMoreSpecificOrEq ts1 ts2)
isMoreSpecificOrEq a b = a == b

isPoly :: Type -> Bool
isPoly (TPol _) = True
isPoly _ = False

inferExp :: Exp -> Infer (Type, T.Exp)
inferExp e = do
    (s, t, e') <- algoW e
    let subbed = apply s t
    return (subbed, replace subbed e')

replace :: Type -> T.Exp -> T.Exp
replace t = \case
    T.ELit _ e -> T.ELit t e
    T.EId (n, _) -> T.EId (n, t)
    T.EAbs _ name e -> T.EAbs t name e
    T.EApp _ e1 e2 -> T.EApp t e1 e2
    T.EAdd _ e1 e2 -> T.EAdd t e1 e2
    T.ELet (T.Bind (n, _) e1) e2 -> T.ELet (T.Bind (n, t) e1) e2
    T.ECase _ expr injs -> T.ECase t expr injs

algoW :: Exp -> Infer (Subst, Type, T.Exp)
algoW = \case
    -- \| TODO: More testing need to be done. Unsure of the correctness of this
    EAnn e t -> do
        (s1, t', e') <- algoW e
        unless
            (t `isMoreSpecificOrEq` t')
            ( throwError $
                unwords
                    [ "Annotated type:"
                    , printTree t
                    , "does not match inferred type:"
                    , printTree t'
                    ]
            )
        applySt s1 $ do
            s2 <- unify t t'
            return (s2 `compose` s1, t, e')

    -- \| ------------------
    -- \|   Γ ⊢ i : Int, ∅

    ELit (LInt n) ->
        return (nullSubst, TMono "Int", T.ELit (TMono "Int") (LInt n))
    ELit a -> error $ "NOT IMPLEMENTED YET: ELit " ++ show a
    -- \| x : σ ∈ Γ   τ = inst(σ)
    -- \| ----------------------
    -- \|     Γ ⊢ x : τ, ∅

    EId i -> do
        var <- asks vars
        case M.lookup i var of
            Just t -> inst t >>= \x -> return (nullSubst, x, T.EId (i, x))
            Nothing -> do
                sig <- gets sigs
                case M.lookup i sig of
                    Just t -> return (nullSubst, t, T.EId (i, t))
                    Nothing -> do
                        constr <- gets constructors
                        case M.lookup i constr of
                            Just t -> return (nullSubst, t, T.EId (i, t))
                            Nothing ->
                                throwError $
                                    "Unbound variable: " ++ show i

    -- \| τ = newvar   Γ, x : τ ⊢ e : τ', S
    -- \| ---------------------------------
    -- \|     Γ ⊢ w λx. e : Sτ → τ', S

    EAbs name e -> do
        fr <- fresh
        withBinding name (Forall [] fr) $ do
            (s1, t', e') <- algoW e
            let varType = apply s1 fr
            let newArr = TArr varType t'
            return (s1, newArr, T.EAbs newArr (name, varType) e')

    -- \| Γ ⊢ e₀ : τ₀, S₀    S₀Γ ⊢ e₁ : τ₁, S₁
    -- \| s₂ = mgu(s₁τ₀, Int)    s₃ = mgu(s₂τ₁, Int)
    -- \| ------------------------------------------
    -- \|        Γ ⊢ e₀ + e₁ : Int, S₃S₂S₁S₀
    -- This might be wrong

    EAdd e0 e1 -> do
        (s1, t0, e0') <- algoW e0
        applySt s1 $ do
            (s2, t1, e1') <- algoW e1
            -- applySt s2 $ do
            s3 <- unify (apply s2 t0) (TMono "Int")
            s4 <- unify (apply s3 t1) (TMono "Int")
            return
                ( s4 `compose` s3 `compose` s2 `compose` s1
                , TMono "Int"
                , T.EAdd (TMono "Int") e0' e1'
                )

    -- \| Γ ⊢ e₀ : τ₀, S₀    S₀Γ ⊢ e₁ : τ₁, S1
    -- \| τ' = newvar    S₂ = mgu(S₁τ₀, τ₁ → τ')
    -- \| --------------------------------------
    -- \|       Γ ⊢ e₀ e₁ : S₂τ', S₂S₁S₀

    EApp e0 e1 -> do
        fr <- fresh
        (s0, t0, e0') <- algoW e0
        applySt s0 $ do
            (s1, t1, e1') <- algoW e1
            -- applySt s1 $ do
            s2 <- unify (apply s1 t0) (TArr t1 fr)
            let t = apply s2 fr
            return (s2 `compose` s1 `compose` s0, t, T.EApp t e0' e1')

    -- \| Γ ⊢ e₀ : τ, S₀     S₀Γ, x : S̅₀Γ̅(τ) ⊢ e₁ : τ', S₁
    -- \| ----------------------------------------------
    -- \|        Γ ⊢ let x = e₀ in e₁ : τ', S₁S₀

    -- The bar over S₀ and Γ means "generalize"

    ELet name e0 e1 -> do
        (s1, t1, e0') <- algoW e0
        env <- asks vars
        let t' = generalize (apply s1 env) t1
        withBinding name t' $ do
            (s2, t2, e1') <- algoW e1
            return (s2 `compose` s1, t2, T.ELet (T.Bind (name, t2) e0') e1')
    ECase caseExpr injs -> do
        (_, t0, e0') <- algoW caseExpr
        (injs', ts) <- mapAndUnzipM (checkInj t0) injs
        case ts of
            [] -> throwError "Case expression missing any matches"
            ts -> do
                unified <- zipWithM unify ts (tail ts)
                let unified' = foldl' compose mempty unified
                let typ = apply unified' (head ts)
                return (unified', typ, T.ECase typ e0' injs')

-- | Unify two types producing a new substitution
unify :: Type -> Type -> Infer Subst
unify t0 t1 = do
    trace ("t0: " ++ show t0) return ()
    trace ("t1: " ++ show t1) return ()
    case (t0, t1) of
        (TArr a b, TArr c d) -> do
            s1 <- unify a c
            s2 <- unify (apply s1 b) (apply s1 d)
            return $ s1 `compose` s2
        (TPol a, b) -> occurs a b
        (a, TPol b) -> occurs b a
        (TMono a, TMono b) ->
            if a == b then return M.empty else throwError "Types do not unify"
        -- \| TODO: Figure out a cleaner way to express the same thing
        (TConstr (Constr name t), TConstr (Constr name' t')) ->
            if name == name' && length t == length t'
                then do
                    xs <- zipWithM unify t t'
                    return $ foldr compose nullSubst xs
                else
                    throwError $
                        unwords
                            [ "Type constructor:"
                            , printTree name
                            , "(" ++ printTree t ++ ")"
                            , "does not match with:"
                            , printTree name'
                            , "(" ++ printTree t' ++ ")"
                            ]
        (a, b) ->
            throwError . unwords $
                [ "Type:"
                , printTree a
                , "can't be unified with:"
                , printTree b
                ]

{- | Check if a type is contained in another type.
I.E. { a = a -> b } is an unsolvable constraint since there is no substitution
such that these are equal
-}
occurs :: Ident -> Type -> Infer Subst
occurs _ (TPol _) = return nullSubst
occurs i t =
    if S.member i (free t)
        then
            throwError $
                unwords
                    [ "Occurs check failed, can't unify"
                    , printTree (TPol i)
                    , "with"
                    , printTree t
                    ]
        else return $ M.singleton i t

-- | Generalize a type over all free variables in the substitution set
generalize :: Map Ident Poly -> Type -> Poly
generalize env t = Forall (S.toList $ free t S.\\ free env) t

{- | Instantiate a polymorphic type. The free type variables are substituted
with fresh ones.
-}
inst :: Poly -> Infer Type
inst (Forall xs t) = do
    xs' <- mapM (const fresh) xs
    let s = M.fromList $ zip xs xs'
    return $ apply s t

-- | Compose two substitution sets
compose :: Subst -> Subst -> Subst
compose m1 m2 = M.map (apply m1) m2 `M.union` m1

-- | A class representing free variables functions
class FreeVars t where
    -- | Get all free variables from t
    free :: t -> Set Ident

    -- | Apply a substitution to t
    apply :: Subst -> t -> t

instance FreeVars Type where
    free :: Type -> Set Ident
    free (TPol a) = S.singleton a
    free (TMono _) = mempty
    free (TArr a b) = free a `S.union` free b
    -- \| Not guaranteed to be correct
    free (TConstr (Constr _ a)) =
        foldl' (\acc x -> free x `S.union` acc) S.empty a

    apply :: Subst -> Type -> Type
    apply sub t = do
        case t of
            TMono a -> TMono a
            TPol a -> case M.lookup a sub of
                Nothing -> TPol a
                Just t -> t
            TArr a b -> TArr (apply sub a) (apply sub b)
            TConstr (Constr name a) -> TConstr (Constr name (map (apply sub) a))

instance FreeVars Poly where
    free :: Poly -> Set Ident
    free (Forall xs t) = free t S.\\ S.fromList xs
    apply :: Subst -> Poly -> Poly
    apply s (Forall xs t) = Forall xs (apply (foldr M.delete s xs) t)

instance FreeVars (Map Ident Poly) where
    free :: Map Ident Poly -> Set Ident
    free m = foldl' S.union S.empty (map free $ M.elems m)
    apply :: Subst -> Map Ident Poly -> Map Ident Poly
    apply s = M.map (apply s)

-- | Apply substitutions to the environment.
applySt :: Subst -> Infer a -> Infer a
applySt s = local (\st -> st{vars = apply s (vars st)})

-- | Represents the empty substition set
nullSubst :: Subst
nullSubst = M.empty

-- | Generate a new fresh variable and increment the state counter
fresh :: Infer Type
fresh = do
    n <- gets count
    modify (\st -> st{count = n + 1})
    return . TPol . Ident $ show n

-- | Run the monadic action with an additional binding
withBinding :: (Monad m, MonadReader Ctx m) => Ident -> Poly -> m a -> m a
withBinding i p = local (\st -> st{vars = M.insert i p (vars st)})

-- | Insert a function signature into the environment
insertSig :: Ident -> Type -> Infer ()
insertSig i t = modify (\st -> st{sigs = M.insert i t (sigs st)})

-- | Insert a constructor with its data type
insertConstr :: Ident -> Type -> Infer ()
insertConstr i t =
    modify (\st -> st{constructors = M.insert i t (constructors st)})

-------- PATTERN MATCHING ---------

-- "case expr of", the type of 'expr' is caseType
checkInj :: Type -> Inj -> Infer (T.Inj, Type)
checkInj caseType (Inj it expr) = do
    (args, t') <- initType caseType it
    (_, t, e') <- local (\st -> st{vars = args `M.union` vars st}) (algoW expr)
    return (T.Inj (it, t') e', t)

initType :: Type -> Init -> Infer (Map Ident Poly, Type)
initType expected = \case
    InitLit lit ->
        let returnType = litType lit
         in if expected == returnType
                then return (mempty, expected)
                else
                    throwError $
                        unwords
                            [ "Inferred type"
                            , printTree returnType
                            , "does not match expected type:"
                            , printTree expected
                            ]
    InitConstr c args -> do
        st <- gets constructors
        case M.lookup c st of
            Nothing ->
                throwError $
                    unwords
                        [ "Constructor:"
                        , printTree c
                        , "does not exist"
                        ]
            Just t -> do
                let flat = flattenType t
                let returnType = last flat
                case ( length (init flat) == length args
                     , returnType `isMoreSpecificOrEq` expected
                     ) of
                    (True, True) ->
                        return
                            ( M.fromList $ zip args (map (Forall []) flat)
                            , expected
                            )
                    (False, _) ->
                        throwError $
                            "Can't partially match on the constructor: "
                                ++ printTree c
                    (_, False) ->
                        throwError $
                            unwords
                                [ "Inferred type"
                                , printTree returnType
                                , "does not match expected type:"
                                , printTree expected
                                ]
    InitCatch -> return (mempty, expected)

flattenType :: Type -> [Type]
flattenType (TArr a b) = flattenType a ++ flattenType b
flattenType a = [a]

litType :: Literal -> Type
litType (LInt _) = TMono "Int"