Search code examples
ocamldsllambda-calculusgadt

Simple lambda calculus DSL using GADTs in OCaml


How do you define a simple lambda calculus-like DSL in OCaml using GADTs? Specifically, I can't figure out how to properly define the type checker to translate from an untyped AST to a typed AST nor can I figure out the correct type for the context and environment.

Here's some code for a simple lambda calculus-like language using the traditional approach in OCaml

(* Here's a traditional implementation of a lambda calculus like language *)

type typ =
| Boolean
| Integer
| Arrow of typ*typ

type exp =
| Add of exp*exp
| And of exp*exp
| App of exp*exp
| Lam of string*typ*exp
| Var of string
| Int of int
| Bol of bool

let e1=Add(Int 1,Add(Int 2,Int 3))
let e2=Add(Int 1,Add(Int 2,Bol false)) (* Type error *)
let e3=App(Lam("x",Integer,Add(Var "x",Var "x")),Int 4)

let rec typecheck con e =
    match e with
    | Add(e1,e2) ->
        let t1=typecheck con e1 in
        let t2=typecheck con e2 in
        begin match (t1,t2) with 
        | (Integer,Integer) -> Integer
        | _ -> failwith "Tried to add with something other than Integers"
        end
    | And(e1,e2) ->
        let t1=typecheck con e1 in
        let t2=typecheck con e2 in
        begin match (t1,t2) with 
        | (Boolean,Boolean) -> Boolean 
        | _ -> failwith "Tried to and with something other than Booleans"
        end
    | App(e1,e2) ->
        let t1=typecheck con e1 in
        let t2=typecheck con e2 in
        begin match t1 with 
        | Arrow(t11,t12) ->
            if t11 <> t2 then
                failwith "Mismatch of types on a function application"
            else
                t12
        | _ -> failwith "Tried to apply a non-arrow type" 
        end
    | Lam(x,t,e) ->
        Arrow (t,typecheck ((x,t)::con) e)
    | Var x  ->
        let (y,t) = List.find (fun (y,t)->y=x) con in
        t
    | Int _ -> Integer
    | Bol _ -> Boolean

let t1 = typecheck [] e1
(* let t2 = typecheck [] e2 *)
let t3 = typecheck [] e3

type value = 
| VBoolean of bool
| VInteger of int
| VArrow of ((string*value) list -> value -> value)

let rec eval env e = 
    match e with
    | Add(e1,e2) ->
        let v1=eval env e1 in
        let v2=eval env e2 in
        begin match (v1,v2) with 
        | (VInteger i1,VInteger i2) -> VInteger (i1+i2) 
        | _ -> failwith "Tried to add with something other than Integers"
        end
    | And(e1,e2) ->
        let v1=eval env e1 in
        let v2=eval env e2 in
        begin match (v1,v2) with 
        | (VBoolean b1,VBoolean b2) -> VBoolean (b1 && b2) 
        | _ -> failwith "Tried to and with something other than Booleans"
        end
    | App(e1,e2) ->
        let v1=eval env e1 in
        let v2=eval env e2 in
        begin match v1 with 
        | VArrow a1 -> a1  env v2 
        | _ -> failwith "Tried to apply a non-arrow type" 
        end
    | Lam(x,t,e) ->
        VArrow (fun env' v' -> eval ((x,v')::env') e) 
    | Var x  ->
        let (y,v) = List.find (fun (y,t)->y=x) env in
        v 
    | Int i -> VInteger i 
    | Bol b -> VBoolean b

let v1 = eval [] e1
let v3 = eval [] e3

Now, I'm trying to translate this into something that uses GADTs. Here's my start

(* Now, we try to GADT the process *) 

type exp =
| Add of exp*exp
| And of exp*exp
| App of exp*exp
| Lam of string*typ*exp
| Var of string
| Int of int
| Bol of bool

let e1=Add(Int 1,Add(Int 2,Int 3))
let e2=Add(Int 1,Add(Int 2,Bol false))
let e3=App(Lam("x",Integer,Add(Var "x",Var "x")),Int 4)

type _ texp =
| TAdd : int texp * int texp -> int texp
| TAnd : bool texp * bool texp -> bool texp
| TApp : ('a -> 'b) texp * 'a texp -> 'b texp
| TLam : string*'b texp -> ('a -> 'b) texp
| TVar : string -> 'a texp
| TInt : int -> int texp
| TBol : bool -> bool texp

let te1 = TAdd(TInt 1,TAdd(TInt 2,TInt 3))

let rec typecheck : type a. exp -> a texp = fun e ->
    match e with
    | Add(e1,e2) ->
        let te1 = typecheck e1 in
        let te2 = typecheck e2 in
        TAdd (te1,te2)
    | _ -> failwith "todo"

Here's the problem. First, I'm not sure how to define the correct type for TLam and TVar in the type texp. Generally, I would provide the type with the variable name, but I'm not sure how to do that in this context. Second, I don't know the correct type for the context in the function typecheck. Before, I used some kind of list, but now I'm sure sure of the type of the list. Third, after leaving out the context, the typecheck function doesn't type check itself. It fails with the message

File "test03.ml", line 32, characters 8-22:
Error: This expression has type int texp
       but an expression was expected of type a texp
       Type int is not compatible with type a 

which makes complete sense. This is more of an issue of that I'm not sure what the correct type for typecheck should be.

In any case, how do you go about fixing these functions?


Edit 1

Here's a possible type for the context or environment

type _ ctx =
| Empty : unit ctx
| Item :  string * 'a * 'b ctx -> ('a*'b) ctx

Edit 2

The trick with the environment is to make sure that the type of the environment is embedded into the type of the expression. Otherwise, there's not enough information in order to make things type safe. Here's a completed interpreter. At the moment, I do not have a valid type checker to move from untyped expressions to typed expressions.

type (_,_) texp =
| TAdd : ('e,int) texp * ('e,int) texp -> ('e,int) texp
| TAnd : ('e,bool) texp * ('e,bool) texp -> ('e,bool) texp
| TApp : ('e,('a -> 'b)) texp * ('e,'a) texp -> ('e,'b) texp
| TLam : (('a*'e),'b) texp -> ('e,('a -> 'b)) texp
| TVar0 : (('a*'e),'a) texp
| TVarS : ('e,'a) texp -> (('b*'e),'a) texp
| TInt : int -> ('e,int) texp
| TBol : bool -> ('e,bool) texp

let te1 = TAdd(TInt 1,TAdd(TInt 2,TInt 3))
(*let te2 = TAdd(TInt 1,TAdd(TInt 2,TBol false))*)
let te3 = TApp(TLam(TAdd(TVar0,TVar0)),TInt 4)
let te4 = TApp(TApp(TLam(TLam(TAdd(TVar0,TVarS(TVar0)))),TInt 4),TInt 5)
let te5 = TLam(TLam(TVarS(TVar0)))

let rec eval : type e t. e -> (e,t) texp -> t = fun env e -> 
    match e with
    | TAdd (e1,e2) ->
        let v1 = eval env e1 in
        let v2 = eval env e2 in
        v1 + v2
    | TAnd (e1,e2) ->
        let v1 = eval env e1 in
        let v2 = eval env e2 in
        v1 && v2
    | TApp (e1,e2) ->
        let v1 = eval env e1 in
        let v2 = eval env e2 in
        v1 v2
    | TLam e ->
        fun x -> eval (x,env) e 
    | TVar0 ->
        let (v,vs)=env in
        v
    | TVarS e ->
        let (v,vs)=env in
        eval vs e 
    | TInt i -> i
    | TBol b -> b

Then, we have

# eval () te1;;
- : int = 6
# eval () te3;;
- : int = 8
# eval () te5;;
- : '_a -> '_b -> '_a = <fun>
# eval () te4;;
- : int = 9

Solution

  • Alright, so I finally worked things out. Since I may not be the only one who finds this interesting, here's a complete set of code that does both type checking and evaluation:

    type (_,_) texp =
    | TAdd : ('gamma,int) texp * ('gamma,int) texp -> ('gamma,int) texp
    | TAnd : ('gamma,bool) texp * ('gamma,bool) texp -> ('gamma,bool) texp
    | TApp : ('gamma,('t1 -> 't2)) texp * ('gamma,'t1) texp -> ('gamma,'t2) texp
    | TLam : (('gamma*'t1),'t2) texp -> ('gamma,('t1 -> 't2)) texp
    | TVar0 : (('gamma*'t),'t) texp
    | TVarS : ('gamma,'t1) texp -> (('gamma*'t2),'t1) texp
    | TInt : int -> ('gamma,int) texp
    | TBol : bool -> ('gamma,bool) texp
    
    type _ typ =
    | Integer : int typ
    | Boolean : bool typ
    | Arrow : 'a typ * 'b typ -> ('a -> 'b) typ
    
    type (_,_) iseq = IsEqual : ('a,'a) iseq
    let rec is_equal : type a b. a typ -> b typ -> (a,b) iseq option = fun a b ->
        match a, b with
        | Integer, Integer -> Some IsEqual
        | Boolean, Boolean -> Some IsEqual
        | Arrow(t1,t2), Arrow(u1,u2) ->
            begin match is_equal t1 u1, is_equal t2 u2 with
            | Some IsEqual, Some IsEqual -> Some IsEqual
            | _ -> None
            end
        | _ -> None
    
    type _ isint = IsInt : int isint
    let is_integer : type a. a typ -> a isint option = fun a -> 
        match a with
        | Integer -> Some IsInt
        | _ -> None
    
    type _ isbool = IsBool : bool isbool
    let is_boolean : type a. a typ -> a isbool option = fun a -> 
        match a with
        | Boolean -> Some IsBool 
        | _ -> None
    
    type _ context =
    | CEmpty : unit context 
    | CVar : 'a context * 't typ -> ('a*'t) context 
    
    type exp =
    | Add of exp*exp
    | And of exp*exp
    | App of exp*exp
    | Lam : 'a typ * exp -> exp
    | Var0
    | VarS of exp
    | Int of int
    | Bol of bool
    
    type _ exists_texp =
    | Exists : ('gamma,'t) texp * 't typ -> 'gamma exists_texp
    
    let rec typecheck
        : type gamma t. gamma context -> exp -> gamma exists_texp =
    fun ctx e ->
        match e with
        | Int i -> Exists ((TInt i) , Integer)
        | Bol b -> Exists ((TBol b) , Boolean)
        | Var0 ->
            begin match ctx with
            | CEmpty -> failwith "Tried to grab a nonexistent variable"
            | CVar(ctx,t) -> Exists (TVar0 , t)
            end
        | VarS e ->
            begin match ctx with
            | CEmpty -> failwith "Tried to grab a nonexistent variable"
            | CVar(ctx,_) ->
                let tet = typecheck ctx e in
                begin match tet with
                | Exists (te,t) -> Exists ((TVarS te) , t)
                end
            end
        | Lam(t1,e) ->
            let tet2 = typecheck (CVar (ctx,t1)) e in
            begin match tet2 with
            | Exists (te,t2) -> Exists ((TLam te) , (Arrow(t1,t2)))
            end
        | App(e1,e2) ->
            let te1t1 = typecheck ctx e1 in
            let te2t2 = typecheck ctx e2 in
            begin match te1t1,te2t2 with
            | Exists (te1,t1),Exists (te2,t2) ->
                begin match t1 with
                | Arrow(t11,t12) ->
                    let p = is_equal t11 t2 in
                    begin match p with
                    | Some IsEqual -> 
                        Exists ((TApp (te1,te2)) , t12)
                    | None -> 
                        failwith "Mismatch of types on a function application"
                    end
                | _ -> failwith "Tried to apply a non-arrow type" 
                end
            end
        | Add(e1,e2) ->
            let te1t1 = typecheck ctx e1 in
            let te2t2 = typecheck ctx e2 in
            begin match te1t1,te2t2 with
            | Exists (te1,t1),Exists (te2,t2) ->
                let p = is_equal t1 t2 in
                let q = is_integer t1 in
                begin match p,q with
                | Some IsEqual, Some IsInt ->
                    Exists ((TAdd (te1,te2)) , t1)
                | _ ->
                    failwith "Tried to add with something other than Integers"
                end
            end
        | And(e1,e2) ->
            let te1t1 = typecheck ctx e1 in
            let te2t2 = typecheck ctx e2 in
            begin match te1t1,te2t2 with
            | Exists (te1,t1),Exists (te2,t2) ->
                let p = is_equal t1 t2 in
                let q = is_boolean t1 in
                begin match p,q with
                | Some IsEqual, Some IsBool ->
                    Exists ((TAnd (te1,te2)) , t1)
                | _ ->
                    failwith "Tried to and with something other than Booleans"
                end
            end
    
    let e1 = Add(Int 1,Add(Int 2,Int 3))
    let e2 = Add(Int 1,Add(Int 2,Bol false))
    let e3 = App(Lam(Integer,Add(Var0,Var0)),Int 4)
    let e4 = App(App(Lam(Integer,Lam(Integer,Add(Var0,VarS(Var0)))),Int 4),Int 5)
    let e5 = Lam(Integer,Lam(Integer,VarS(Var0)))
    let e6 = App(Lam(Integer,Var0),Int 1)
    let e7 = App(Lam(Integer,Lam(Integer,Var0)),Int 1)
    let e8 = Lam(Integer,Var0)
    let e9 = Lam(Integer,Lam(Integer,Var0))
    
    let tet1 = typecheck CEmpty e1
    (*let tet2 = typecheck CEmpty e2*)
    let tet3 = typecheck CEmpty e3
    let tet4 = typecheck CEmpty e4
    let tet5 = typecheck CEmpty e5
    let tet6 = typecheck CEmpty e6
    let tet7 = typecheck CEmpty e7
    let tet8 = typecheck CEmpty e8
    let tet9 = typecheck CEmpty e9
    
    let rec eval : type gamma t. gamma -> (gamma,t) texp -> t = fun env e -> 
        match e with
        | TAdd (e1,e2) ->
            let v1 = eval env e1 in
            let v2 = eval env e2 in
            v1 + v2
        | TAnd (e1,e2) ->
            let v1 = eval env e1 in
            let v2 = eval env e2 in
            v1 && v2
        | TApp (e1,e2) ->
            let v1 = eval env e1 in
            let v2 = eval env e2 in
            v1 v2
        | TLam e ->
            fun x -> eval (env,x) e 
        | TVar0 ->
            let (env,x)=env in
            x
        | TVarS e ->
            let (env,x)=env in
            eval env e 
        | TInt i -> i
        | TBol b -> b
    
    type exists_v =
    | ExistsV : 't -> exists_v
    
    let typecheck_eval e =
        let tet = typecheck CEmpty e in
        match tet with
        | Exists (te,t) -> ExistsV (eval () te)
    
    let v1 = typecheck_eval e1
    let v3 = typecheck_eval e3
    let v4 = typecheck_eval e4
    let v5 = typecheck_eval e5
    let v6 = typecheck_eval e6
    let v7 = typecheck_eval e7
    let v8 = typecheck_eval e8
    let v9 = typecheck_eval e9
    

    Here are the pieces I had trouble with and how I managed to resolve them

    1. In order to correctly type the typed expressions texp, the type of the environment needed to be built into the type of texp. This implies, as gasche correctly noted, that we needed some sort of De Bruijin notation. The easiest was just Var0 and VarS. In order to use variable names, we'd just have to preprocess the AST.
    2. The type of the expression, typ, needed to include both variant types to match on as well as the type we use in the typed expression. In other words, that also needed to be a GADT.
    3. We require three proofs in order to ferret out the correct types in the type checker. These are is_equal, is_integer, and is_bool. The code for is_equal is actually in the OCaml manual under Advanced examples. Specifically, look at the definition of eq_type.
    4. The type exp, for the untyped AST, actually needs to be a GADT also. The lambda abstraction needs access to typ, which is a GADT.
    5. The type checker returns an existential type of both a typed expression as well as the type. We need both to get the program to check type. Also, we need the existential because the untyped expression may or may not have a type.
    6. The existential type, exists_texp, exposes the type of the environment/context, but not the type. We need this type exposed in order to type check properly.
    7. Once everything is setup, the evaluator follows the type rules exactly.
    8. The result of combining the type checker with the evaluator must be another existential type. A priori, we don't know the resulting type, so we have to hide it in an existential package.