I have a list of pairs that simulate a dictionary, like [("key1",32);("key2",54);...]
and I need a function that can filter the list and return a new list, remove the pair that have duplicate keys.
Example: a list like [("key",32);("key",78);("anotherKey",12)]
has to be filtered as [("key",32);("anotherKey",12)]
.
I've achieved this using Hashtable, but I prefer a solution that use List instead. In the code below I can't find a way to substitute hashtable with a list, because in the else
branch i don't know how to proper update the seen
list.
let getKey (k,v) = k;;
(* Hashtable *)
let filter (list : (string * int list) : (string * int) list =
let seen = Hashtbl.create (List.length list) in
List.filter
(fun pair ->
let exists = not (Hashtbl.mem seen (getKey pair)) in
Hashtbl.replace seen (getKey pair) ();
exists)
list
;;
(* List *)
let filter (list : (string * int) list) : (string * int) list =
let seen = [] in
List.filter
(fun pair ->
let exists = List.mem (getKey pair) seen in
if exists then failwith("duplicate")
else (* what to do here? *);
true)
list
;;
Not sure I understand what is the problem. As for me, the solution can be expressed quite clearly:
The implementation just follows the description:
let rec is_member key = function
| [] -> false
| (k, _)::tail ->
if k = key then true
else is_member key tail
;;
let dedup list =
let rec aux acc = function
| [] -> List.rev acc
| ((k, v) as hd)::tl ->
if is_member k acc then aux acc tl
else aux (hd::acc) tl
in aux [] list
;;
utop # dedup [("key",32);("key",78);("anotherKey",12)];;
- : (string * int) list = [("key", 32); ("anotherKey", 12)]
Regarding the standard library (and List in particular) you cannot achieve your goal using List.filter
- because this function doesn't accumulate the result but just iterates over list elements one by one. If you need an accumulated result you should use smth. like fold_left
instead, for example:
let dedup_list list =
List.fold_left (fun acc ((k, v) as el) -> if is_member k acc then acc else el::acc) [] list
|> List.rev
;;
utop # dedup_list [("key",32);("key",78);("anotherKey",12)];;
- : (string * int) list = [("key", 32); ("anotherKey", 12)]