I'm trying to understand how to implement Tries with hash tables in Ocaml. It's the excercise W06 04 from the MOOC "Introduction to Functional Programming in OCaml" in https://www.fun-mooc.fr/.
I really appreciate if someone can help me to understand how to implement the recursive Tries using the imperative hash tables.
The course has ended, I just want to understand.
This is what I'm trying (module GenericTrie and the template of module Trie was given, we had to instantiate Hashtbl.Make functor and implement empty, lookup and insert):
module type GenericTrie = sig
type 'a char_table
type 'a trie = Trie of 'a option * 'a trie char_table
val empty : unit -> 'a trie
val insert : 'a trie -> string -> 'a -> 'a trie
val lookup : 'a trie -> string -> 'a option
end
module CharHashedType = struct
type t = char
let equal a b = a = b
let hash a = Hashtbl.hash a
end
module CharHashtbl = Hashtbl.Make(CharHashedType)
module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t =
struct
type 'a char_table = 'a CharHashtbl.t
type 'a trie = Trie of 'a option * 'a trie char_table
let empty () =
Trie (None, CharHashtbl.create 10)
;;
let lookup trie w =
let len = String.length w in
let rec lookup' trie' idx =
if idx >= len then let Trie (x, _) = trie' in x
else
let Trie (_, table) = trie' in
if CharHashtbl.mem table w.[idx] then
lookup' (CharHashtbl.find table w.[idx]) (idx + 1)
else raise Not_found
in
lookup' trie 0
;;
let insert trie w v =
;;
end
According to the answer I gave here, the only difference is that instead of having a Map
you have a Hashtbl
So, from what I understand, your trie associates to a string
an 'a option
which means that you can store the resulting string or a boolean or anything else but we don't care about that.
First, I would have written
type 'a trie = {value : 'a option;
children : 'a trie char_table}
Because I find it strange to use a constructor if you just need one and the record will help for the future :
module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
type 'a char_table = 'a CharHashtbl.t
type 'a trie = {value : 'a option;
children : 'a trie char_table}
let empty () =
{value = None; children = CharHashtbl.create 10}
;;
let lookup trie w =
let len = String.length w in
let rec lookup' {value; children} idx =
if idx >= len then value
else
let char = w.[idx] in
let child = CharHashtbl.find children char in
lookup' child (idx + 1)
in
lookup' trie 0
;;
I simplified lookup
(note that writing if Module.mem ... then Module.find ... else raise Not_found
is strictly equivalent to Module.find ...
)
Then, what about insert
? The algorithm is pretty simple :
Which, in OCaml, gives :
let insert trie w v =
let len = String.length w in
let rec aux trie idx =
if idx >= len then {trie with value = Some v}
else
let char = w.[idx] in
let child = try CharHashtbl.find trie.children char
with Not_found -> empty () in
let child' = aux child (idx + 1) in
CharHashtbl.add trie.children char child';
trie
in aux trie 0
;;
As a side note, I'd like to point out the fact that it's really strange to mix persistent and mutable datatypes. My preference, in this case, would be this :
module type GenericTrie = sig
type 'a char_table
type 'a trie = {mutable value : 'a option;
children : 'a trie char_table}
val empty : unit -> 'a trie
val insert : 'a trie -> string -> 'a -> unit
val lookup : 'a trie -> string -> 'a option
end;;
module CharHashedType = struct
type t = char
let equal a b = a = b
let hash a = Hashtbl.hash a
end;;
module CharHashtbl = Hashtbl.Make(CharHashedType)
module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
type 'a char_table = 'a CharHashtbl.t
type 'a trie = {mutable value : 'a option;
children : 'a trie char_table}
let empty () =
{value = None; children = CharHashtbl.create 10}
;;
let lookup trie w =
let len = String.length w in
let rec lookup' {value; children} idx =
if idx >= len then value
else
let char = w.[idx] in
let child = CharHashtbl.find children char in
lookup' child (idx + 1)
in
lookup' trie 0
;;
let insert trie w v =
let len = String.length w in
let rec aux trie idx =
if idx >= len then trie.value <- Some v
else
let char = w.[idx] in
let child = try CharHashtbl.find trie.children char
with Not_found ->
let child = empty () in
CharHashtbl.add trie.children char child;
child
in
aux child (idx + 1)
in aux trie 0
;;
In both cases, let's print it with this function :
let (t : string Trie.trie) = Trie.empty ();;
let rec pp fmt trie =
let open Trie in
let {value; children} = trie in
Format.fprintf fmt "@[<v 1>";
(match value with
| None -> Format.fprintf fmt "None"
| Some s -> Format.fprintf fmt "Some %s" s
);
CharHashtbl.iter (fun char trie ->
Format.fprintf fmt "@ %c@ %a" char pp trie
) children;
Format.fprintf fmt "@]"
let () =
Trie.insert t "foo" "foo";
Trie.insert t "fol" "fol";
Trie.insert t "far" "far";
Format.printf "%a@." pp t
I'll have this output :
None
f
None
o
None
o
Some foo
l
Some fol
a
None
r
Some far