Search code examples
ocamlhashtabletrie

Trying to understand how to implement Tries with hash tables in OCaml


I'm trying to understand how to implement Tries with hash tables in Ocaml. It's the excercise W06 04 from the MOOC "Introduction to Functional Programming in OCaml" in https://www.fun-mooc.fr/.

I really appreciate if someone can help me to understand how to implement the recursive Tries using the imperative hash tables.

The course has ended, I just want to understand.

This is what I'm trying (module GenericTrie and the template of module Trie was given, we had to instantiate Hashtbl.Make functor and implement empty, lookup and insert):

module type GenericTrie = sig
  type 'a char_table
  type 'a trie = Trie of 'a option * 'a trie char_table
  val empty : unit -> 'a trie
  val insert : 'a trie -> string -> 'a -> 'a trie
  val lookup : 'a trie -> string -> 'a option
end

module CharHashedType = struct
  type t = char
  let equal a b = a = b
  let hash a = Hashtbl.hash a
end

module CharHashtbl  = Hashtbl.Make(CharHashedType)

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t =
  struct
    type 'a char_table = 'a CharHashtbl.t
    type 'a trie = Trie of 'a option * 'a trie char_table

    let empty () =
      Trie (None, CharHashtbl.create 10)
    ;;

    let lookup trie w =
      let len = String.length w in
      let rec lookup' trie' idx =
        if idx >= len then let Trie (x, _) = trie' in x
        else
          let Trie (_, table) = trie' in
          if CharHashtbl.mem table w.[idx] then
            lookup' (CharHashtbl.find table w.[idx]) (idx + 1)
          else raise Not_found
      in
      lookup' trie 0
    ;;

    let insert trie w v =

    ;;
  end

Solution

  • According to the answer I gave here, the only difference is that instead of having a Map you have a Hashtbl

    So, from what I understand, your trie associates to a string an 'a option which means that you can store the resulting string or a boolean or anything else but we don't care about that.

    First, I would have written

    type 'a trie = {value : 'a option;
                    children : 'a trie char_table}
    

    Because I find it strange to use a constructor if you just need one and the record will help for the future :

    module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
      type 'a char_table = 'a CharHashtbl.t
      type 'a trie = {value : 'a option;
                      children : 'a trie char_table}
    
      let empty () =
        {value = None; children = CharHashtbl.create 10}
      ;;
    
    
      let lookup trie w =
        let len = String.length w in
        let rec lookup' {value; children} idx =
          if idx >= len then value
          else
            let char = w.[idx] in
            let child = CharHashtbl.find children char in
            lookup' child (idx + 1)
        in
        lookup' trie 0
      ;;
    

    I simplified lookup (note that writing if Module.mem ... then Module.find ... else raise Not_found is strictly equivalent to Module.find ...)

    Then, what about insert? The algorithm is pretty simple :

    • if I reached the last letter of my string, I associate to it the value
    • if not, either there's one child associated to the current letter and I recursively go through this branch or there's not and I need to create this branch.

    Which, in OCaml, gives :

      let insert trie w v =
        let len = String.length w in
        let rec aux trie idx =
          if idx >= len then {trie with value = Some v}
          else
            let char = w.[idx] in
            let child = try CharHashtbl.find trie.children char
              with Not_found -> empty () in
            let child' = aux child (idx + 1) in
            CharHashtbl.add trie.children char child';
            trie
        in aux trie 0
      ;;
    

    As a side note, I'd like to point out the fact that it's really strange to mix persistent and mutable datatypes. My preference, in this case, would be this :

    module type GenericTrie = sig
      type 'a char_table
      type 'a trie = {mutable value : 'a option;
                      children : 'a trie char_table}
      val empty : unit -> 'a trie
      val insert : 'a trie -> string -> 'a -> unit
      val lookup : 'a trie -> string -> 'a option
    end;;
    
    module CharHashedType = struct
      type t = char
      let equal a b = a = b
      let hash a = Hashtbl.hash a
    end;;
    
    module CharHashtbl  = Hashtbl.Make(CharHashedType)
    
    module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
      type 'a char_table = 'a CharHashtbl.t
      type 'a trie = {mutable value : 'a option;
                      children : 'a trie char_table}
    
      let empty () =
        {value = None; children = CharHashtbl.create 10}
      ;;
    
      let lookup trie w =
        let len = String.length w in
        let rec lookup' {value; children} idx =
          if idx >= len then value
          else
            let char = w.[idx] in
            let child = CharHashtbl.find children char in
            lookup' child (idx + 1)
        in
        lookup' trie 0
      ;;
    
      let insert trie w v =
        let len = String.length w in
        let rec aux trie idx =
          if idx >= len then trie.value <- Some v
          else
            let char = w.[idx] in
            let child = try CharHashtbl.find trie.children char
              with Not_found ->
                let child = empty () in
                CharHashtbl.add trie.children char child;
                child
            in
            aux child (idx + 1)
        in aux trie 0
      ;;
    

    In both cases, let's print it with this function :

    let (t : string Trie.trie) = Trie.empty ();;
    
    let rec pp fmt trie =
      let open Trie in
      let {value; children} = trie in
      Format.fprintf fmt "@[<v 1>";
      (match value with
        | None -> Format.fprintf fmt "None"
        | Some s -> Format.fprintf fmt "Some %s" s
      );
      CharHashtbl.iter (fun char trie ->
        Format.fprintf fmt "@ %c@ %a" char pp trie
      ) children;
      Format.fprintf fmt "@]"
    
    let () = 
      Trie.insert t "foo" "foo";
      Trie.insert t "fol" "fol";
      Trie.insert t "far" "far";
      Format.printf "%a@." pp t
    

    I'll have this output :

    None
     f
     None
      o
      None
       o
       Some foo
       l
       Some fol
      a
      None
       r
       Some far