Skip to content

Commit

Permalink
Addition of Hashtbl.merge and merge_all
Browse files Browse the repository at this point in the history
Closes #891
  • Loading branch information
rixed committed Jul 29, 2019
1 parent 89f324d commit abdd546
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Changelog
(Gabriel Scherer, report by Marcel Hark)
#886, #887

- added BatHashtbl.merge and merge_all
__#891
__(Cedric Cellier, Francois Berenger, Gabriel Scherer)

## v2.9.0 (minor release)

This minor release adds support for OCaml 4.07.0, as well as a certain
Expand Down
44 changes: 44 additions & 0 deletions src/batHashtbl.mli
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,29 @@ val filter_map_inplace: ('key -> 'a -> 'a option) -> ('key, 'a) t -> unit
(** [filter_map_inplace f m] performs like filter_map but modify [m]
inplace instead of creating a new Hashtbl. *)

val merge: ('a -> 'b option -> 'c option -> 'd option) ->
('a, 'b) t -> ('a, 'c) t -> ('a, 'd) t
(** [merge f a b] returns a new Hashtbl which is build from the bindings of
[a] and [b] according to the function [f], that is given all defined keys
one by one, along with the value from [a] (if defined) and the value from
[b] (if defined), and has to return the (optional) resulting value.
It is assumed that each key is bound at most once in [a] and [b].
See [merge_all] for a more general alternative if this is not the case.
@since NEXT_RELEASE
*)

val merge_all: ('a -> 'b list -> 'c list -> 'd list) ->
('a, 'b) t -> ('a, 'c) t -> ('a, 'd) t
(** [merge_all f a b] is similar to [merge], but passes to [f] all bindings
for a key (most recent first, as returned by [find_all]). [f] must then
return all the new bindings of the merged hashtable (or an empty list if
that key should not be bound in the resulting hashtable). Those new
bindings will be inserted in reverse, so that the head of the list will
become the most recent binding in the merged hashtable.
@since NEXT_RELEASE
*)

(** {6 The polymorphic hash primitive}*)

val hash : 'a -> int
Expand Down Expand Up @@ -336,6 +359,10 @@ sig
val modify : key:'a -> f:('b -> 'b) -> ('a, 'b) t -> unit
val modify_def : default:'b -> key:'a -> f:('b -> 'b) -> ('a, 'b) t -> unit
val modify_opt : key:'a -> f:('b option -> 'b option) -> ('a, 'b) t -> unit
val merge: f:('a -> 'b option -> 'c option -> 'd option) ->
left:('a, 'b) t -> right:('a, 'c) t -> ('a, 'd) t
val merge_all: f:('a -> 'b list -> 'c list -> 'd list) ->
left:('a, 'b) t -> right:('a, 'c) t -> ('a, 'd) t
end

(** {6 Functorial interface} *)
Expand Down Expand Up @@ -392,6 +419,10 @@ sig
val modify : key -> ('a -> 'a) -> 'a t -> unit
val modify_def : 'a -> key -> ('a -> 'a) -> 'a t -> unit
val modify_opt : key -> ('a option -> 'a option) -> 'a t -> unit
val merge : (key -> 'a option -> 'b option -> 'c option) ->
'a t -> 'b t -> 'c t
val merge_all : (key -> 'a list -> 'b list -> 'c list) ->
'a t -> 'b t -> 'c t
val keys : 'a t -> key BatEnum.t
val values : 'a t -> 'a BatEnum.t
val enum : 'a t -> (key * 'a) BatEnum.t
Expand Down Expand Up @@ -464,6 +495,10 @@ sig
val modify : key:key -> f:('a -> 'a) -> 'a t -> unit
val modify_def : default:'a -> key:key -> f:('a -> 'a) -> 'a t -> unit
val modify_opt : key:key -> f:('a option -> 'a option) -> 'a t -> unit
val merge : f:(key -> 'a option -> 'b option -> 'c option) ->
left:'a t -> right:'b t -> 'c t
val merge_all : f:(key -> 'a list -> 'b list -> 'c list) ->
left:'a t -> right:'b t -> 'c t
end

end
Expand Down Expand Up @@ -554,6 +589,10 @@ sig
val filteri_inplace : ('key -> 'a -> bool) -> ('key, 'a, [>`Write]) t -> unit
val filter_map : ('key -> 'a -> 'b option) -> ('key, 'a, [>`Read]) t -> ('key, 'b, _) t
val filter_map_inplace : ('key -> 'a -> 'a option) -> ('key, 'a, [>`Write]) t -> unit
val merge : ('key -> 'a option -> 'b option -> 'c option) ->
('key, 'a, [>`Read]) t -> ('key, 'b, [>`Read]) t -> ('key, 'c, _) t
val merge_all : ('key -> 'a list -> 'b list -> 'c list) ->
('key, 'a, [>`Read]) t -> ('key, 'b, [>`Read]) t -> ('key, 'c, _) t

(**{6 Conversions}*)

Expand Down Expand Up @@ -597,6 +636,11 @@ sig
val filter_map : f:(key:'key -> data:'a -> 'b option) -> ('key, 'a, [>`Read]) t -> ('key, 'b, _) t
val filter_map_inplace : f:(key:'key -> data:'a -> 'a option) -> ('key, 'a, [>`Write]) t -> unit
val fold : f:(key:'a -> data:'b -> 'c -> 'c) -> ('a, 'b, [>`Read]) t -> init:'c -> 'c
val merge : f:('key -> 'a option -> 'b option -> 'c option) ->
left:('key, 'a, [>`Read]) t -> right:('key, 'b, [>`Read]) t -> ('key, 'c, _) t
val merge_all : f:('key -> 'a list -> 'b list -> 'c list) ->
left:('key, 'a, [>`Read]) t -> right:('key, 'b, [>`Read]) t -> ('key, 'c, _) t

end

end (* Cap module *)
178 changes: 178 additions & 0 deletions src/batHashtbl.mlv
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,130 @@ let filter_map_inplace f h =
filter_map_inplace (fun _ x -> if x>3 then Some (x+1) else None) h ; \
to_sorted_list h) [5; 6]
*)


let merge f h1 h2 =
let res = create (max (length h1) (length h2)) in
let may_add_res k v1 v2 =
BatOption.may (add res k) (f k v1 v2) in
iter (fun k v1 ->
may_add_res k (Some v1) (find_option h2 k)
) h1 ;
iter (fun k v2 ->
match find h1 k with
| exception Not_found ->
may_add_res k None (Some v2)
| _ -> () (* done above *)
) h2 ;
res

(*$inject
let union = merge (fun _ l r -> if l = None then r else l)
let inter = merge (fun _ l r -> if l = None then l else r)
let equal h1 h2 = to_sorted_list h1 = to_sorted_list h2
let empty = create 0
let h_1_5 = Enum.combine (1 -- 5, 1 -- 5) |> of_enum
let h_1_3 = Enum.combine (1 -- 3, 1 -- 3) |> of_enum
let h_3_5 = Enum.combine (3 -- 5, 3 -- 5) |> of_enum
let of_uniq_list l = List.unique l |> List.map (fun i -> i, i) |> of_list
*)
(*$= merge & ~printer
[] \
(merge (fun k _ _ -> Some k) empty empty |> to_sorted_list)
[1; 2; 3; 4; 5] \
(merge (fun _ l _ -> l) h_1_5 empty |> to_sorted_list)
[] \
(merge (fun _ _ r -> r) h_1_5 empty |> to_sorted_list)
[] \
(merge (fun _ l _ -> l) empty h_1_5 |> to_sorted_list)
[1; 2; 3; 4; 5] \
(merge (fun _ _ r -> r) empty h_1_5 |> to_sorted_list)
[1; 2; 3] \
(let h = Enum.combine (3 -- 6, 13 -- 15) |> of_enum in \
merge (fun _ l _ -> l) h_1_3 h |> to_sorted_list)
[13; 14; 15] \
(let h = Enum.combine (3 -- 5, 13 -- 15) |> of_enum in \
merge (fun _ _ r -> r) h_1_3 h |> to_sorted_list)
[] \
(merge (fun _ _ _ -> None) h_1_3 h_3_5 |> to_sorted_list)
*)
(*$= union & ~printer
[1; 2; 3; 4; 5] \
(union h_1_3 h_3_5 |> to_sorted_list)
*)
(*$= inter & ~printer
[3] \
(inter h_1_3 h_3_5 |> to_sorted_list)
*)
(*$Q equal
(Q.list Q.small_int) (fun l -> \
let h = of_uniq_list l in \
equal (inter h h) h)
(Q.list Q.small_int) (fun l -> \
let h = of_uniq_list l in \
equal (union h h) h)
(Q.list Q.small_int) (fun l -> \
let h = of_uniq_list l in \
equal (union h empty) h)
(Q.list Q.small_int) (fun l -> \
let h = of_uniq_list l in \
equal (inter h empty) empty)
(Q.pair (Q.list Q.small_int) (Q.list Q.small_int)) (fun (l1, l2) -> \
let h1 = of_uniq_list l1 and h2 = of_uniq_list l2 in \
equal (inter h1 h2) (inter h2 h1))
(Q.pair (Q.list Q.small_int) (Q.list Q.small_int)) (fun (l1, l2) -> \
let h1 = of_uniq_list l1 and h2 = of_uniq_list l2 in \
equal (union h1 h2) (union h2 h1))
*)

let merge_all f h1 h2 =
let res = create (max (length h1) (length h2)) in
let may_add_res k v1 v2 =
List.iter (add res k) (f k v1 v2 |> List.rev) in
iter (fun k _ ->
let l1 = find_all h1 k
and l2 = find_all h2 k in
may_add_res k l1 l2
) h1 ;
iter (fun k _ ->
match find_all h1 k with
| [] ->
let l2 = find_all h2 k in
may_add_res k [] l2
| _ -> () (* done above *)
) h2 ;
res

(*$= merge_all & ~printer
[] \
(let h1 = create 0 and h2 = create 0 in \
merge_all (fun k _ _ -> [k]) h1 h2 |> to_sorted_list)
[1; 2; 3; 4; 5] \
(let h = create 0 in \
merge_all (fun _ l _ -> l) h_1_5 h |> to_sorted_list)
[] \
(let h = create 0 in \
merge_all (fun _ _ r -> r) h_1_5 h |> to_sorted_list)
[] \
(let h = create 0 in \
merge_all (fun _ l _ -> l) h h_1_5 |> to_sorted_list)
[1; 2; 3; 4; 5] \
(let h = create 0 in \
merge_all (fun _ _ r -> r) h h_1_5 |> to_sorted_list)
[1; 2; 3] \
(let h = Enum.combine (3 -- 6, 13 -- 15) |> of_enum in \
merge_all (fun _ l _ -> l) h_1_3 h |> to_sorted_list)
[13; 14; 15] \
(let h = Enum.combine (3 -- 5, 13 -- 15) |> of_enum in \
merge_all (fun _ _ r -> r) h_1_3 h |> to_sorted_list)
[] \
(merge_all (fun _ _ _ -> []) h_1_3 h_3_5 |> to_sorted_list)
[2; 1] \
(let h1 = of_list [1, 1] in \
let h2 = copy h1 in \
Hashtbl.add h2 1 2 ;\
let h = merge_all (fun _ _ r -> r) h1 h2 in \
find_all h 1)
*)


Expand Down Expand Up @@ -400,6 +524,8 @@ struct
let modify ~key ~f = modify key f
let modify_def ~default ~key ~f = modify_def default key f
let modify_opt ~key ~f = modify_opt key f
let merge ~f ~left ~right = merge f left right
let merge_all ~f ~left ~right = merge_all f left right
end

module type HashedType = Hashtbl.HashedType
Expand Down Expand Up @@ -435,6 +561,10 @@ sig
val modify : key -> ('a -> 'a) -> 'a t -> unit
val modify_def : 'a -> key -> ('a -> 'a) -> 'a t -> unit
val modify_opt : key -> ('a option -> 'a option) -> 'a t -> unit
val merge : (key -> 'a option -> 'b option -> 'c option) ->
'a t -> 'b t -> 'c t
val merge_all : (key -> 'a list -> 'b list -> 'c list) ->
'a t -> 'b t -> 'c t
val keys : 'a t -> key BatEnum.t
val values : 'a t -> 'a BatEnum.t
val enum : 'a t -> (key * 'a) BatEnum.t
Expand Down Expand Up @@ -495,6 +625,10 @@ sig
val modify : key:key -> f:('a -> 'a) -> 'a t -> unit
val modify_def : default:'a -> key:key -> f:('a -> 'a) -> 'a t -> unit
val modify_opt : key:key -> f:('a option -> 'a option) -> 'a t -> unit
val merge : f:(key -> 'a option -> 'b option -> 'c option) ->
left:'a t -> right:'b t -> 'c t
val merge_all : f:(key -> 'a list -> 'b list -> 'c list) ->
left:'a t -> right:'b t -> 'c t
end

end
Expand Down Expand Up @@ -718,6 +852,44 @@ struct
in
modify_opt key f' h

let merge f a b =
let res = create (max (length a) (length b)) in
let may_add_res k v1 v2 =
BatOption.may (add res k) (f k v1 v2) in
iter (fun k v1 ->
match find b k with
| exception Not_found ->
may_add_res k (Some v1) None
| v2 ->
may_add_res k (Some v1) (Some v2)
) a ;
iter (fun k v2 ->
match find a k with
| exception Not_found ->
may_add_res k None (Some v2)
| _ -> () (* done above *)
) b ;
res

let merge_all f a b =
let res = create (max (length a) (length b)) in
let may_add_res k v1 v2 =
List.iter (add res k) (f k v1 v2 |> List.rev) in
iter (fun k _ ->
let l1 = find_all a k
and l2 = find_all b k in
may_add_res k l1 l2
) a ;
iter (fun k _ ->
match find_all a k with
| [] ->
let l2 = find_all b k in
may_add_res k [] l2
| _ -> () (* done above *)
) b ;
res


module Labels =
struct
let label f = fun key data -> f ~key ~data
Expand All @@ -736,6 +908,8 @@ struct
let modify ~key ~f = modify key f
let modify_def ~default ~key ~f = modify_def default key f
let modify_opt ~key ~f = modify_opt key f
let merge ~f ~left ~right = merge f left right
let merge_all ~f ~left ~right = merge_all f left right
end

module Exceptionless =
Expand Down Expand Up @@ -797,6 +971,8 @@ struct
let filter = filter
let filteri = filteri
let filter_map = filter_map
let merge = merge
let merge_all = merge_all
module Labels =
struct
let label f = fun key data -> f ~key ~data
Expand All @@ -815,6 +991,8 @@ struct
let modify ~key ~f = modify key f
let modify_def ~default ~key ~f = modify_def default key f
let modify_opt ~key ~f = modify_opt key f
let merge ~f ~left ~right = merge f left right
let merge_all ~f ~left ~right = merge_all f left right
end

module Exceptionless =
Expand Down

0 comments on commit abdd546

Please sign in to comment.