From abdd546ae006f7d2833e4d3a51636c49f748e3f2 Mon Sep 17 00:00:00 2001 From: Cedric Cellier Date: Fri, 19 Apr 2019 17:17:53 +0200 Subject: [PATCH] Addition of Hashtbl.merge and merge_all Closes #891 --- ChangeLog | 4 + src/batHashtbl.mli | 44 +++++++++++ src/batHashtbl.mlv | 178 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+) diff --git a/ChangeLog b/ChangeLog index ffd23a213..66d6d6913 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,10 @@ Changelog (Gabriel Scherer, report by Marcel Hark) #886, #887 +- added BatHashtbl.merge and merge_all +__#891 +__(Cedric Cellier, Francois Berenger, Gabriel Scherer) + ## v2.9.0 (minor release) This minor release adds support for OCaml 4.07.0, as well as a certain diff --git a/src/batHashtbl.mli b/src/batHashtbl.mli index 92c0e301d..eaf789107 100644 --- a/src/batHashtbl.mli +++ b/src/batHashtbl.mli @@ -241,6 +241,29 @@ val filter_map_inplace: ('key -> 'a -> 'a option) -> ('key, 'a) t -> unit (** [filter_map_inplace f m] performs like filter_map but modify [m] inplace instead of creating a new Hashtbl. *) +val merge: ('a -> 'b option -> 'c option -> 'd option) -> + ('a, 'b) t -> ('a, 'c) t -> ('a, 'd) t +(** [merge f a b] returns a new Hashtbl which is build from the bindings of + [a] and [b] according to the function [f], that is given all defined keys + one by one, along with the value from [a] (if defined) and the value from + [b] (if defined), and has to return the (optional) resulting value. + + It is assumed that each key is bound at most once in [a] and [b]. + See [merge_all] for a more general alternative if this is not the case. + @since NEXT_RELEASE +*) + +val merge_all: ('a -> 'b list -> 'c list -> 'd list) -> + ('a, 'b) t -> ('a, 'c) t -> ('a, 'd) t +(** [merge_all f a b] is similar to [merge], but passes to [f] all bindings + for a key (most recent first, as returned by [find_all]). [f] must then + return all the new bindings of the merged hashtable (or an empty list if + that key should not be bound in the resulting hashtable). Those new + bindings will be inserted in reverse, so that the head of the list will + become the most recent binding in the merged hashtable. + @since NEXT_RELEASE +*) + (** {6 The polymorphic hash primitive}*) val hash : 'a -> int @@ -336,6 +359,10 @@ sig val modify : key:'a -> f:('b -> 'b) -> ('a, 'b) t -> unit val modify_def : default:'b -> key:'a -> f:('b -> 'b) -> ('a, 'b) t -> unit val modify_opt : key:'a -> f:('b option -> 'b option) -> ('a, 'b) t -> unit + val merge: f:('a -> 'b option -> 'c option -> 'd option) -> + left:('a, 'b) t -> right:('a, 'c) t -> ('a, 'd) t + val merge_all: f:('a -> 'b list -> 'c list -> 'd list) -> + left:('a, 'b) t -> right:('a, 'c) t -> ('a, 'd) t end (** {6 Functorial interface} *) @@ -392,6 +419,10 @@ sig val modify : key -> ('a -> 'a) -> 'a t -> unit val modify_def : 'a -> key -> ('a -> 'a) -> 'a t -> unit val modify_opt : key -> ('a option -> 'a option) -> 'a t -> unit + val merge : (key -> 'a option -> 'b option -> 'c option) -> + 'a t -> 'b t -> 'c t + val merge_all : (key -> 'a list -> 'b list -> 'c list) -> + 'a t -> 'b t -> 'c t val keys : 'a t -> key BatEnum.t val values : 'a t -> 'a BatEnum.t val enum : 'a t -> (key * 'a) BatEnum.t @@ -464,6 +495,10 @@ sig val modify : key:key -> f:('a -> 'a) -> 'a t -> unit val modify_def : default:'a -> key:key -> f:('a -> 'a) -> 'a t -> unit val modify_opt : key:key -> f:('a option -> 'a option) -> 'a t -> unit + val merge : f:(key -> 'a option -> 'b option -> 'c option) -> + left:'a t -> right:'b t -> 'c t + val merge_all : f:(key -> 'a list -> 'b list -> 'c list) -> + left:'a t -> right:'b t -> 'c t end end @@ -554,6 +589,10 @@ sig val filteri_inplace : ('key -> 'a -> bool) -> ('key, 'a, [>`Write]) t -> unit val filter_map : ('key -> 'a -> 'b option) -> ('key, 'a, [>`Read]) t -> ('key, 'b, _) t val filter_map_inplace : ('key -> 'a -> 'a option) -> ('key, 'a, [>`Write]) t -> unit + val merge : ('key -> 'a option -> 'b option -> 'c option) -> + ('key, 'a, [>`Read]) t -> ('key, 'b, [>`Read]) t -> ('key, 'c, _) t + val merge_all : ('key -> 'a list -> 'b list -> 'c list) -> + ('key, 'a, [>`Read]) t -> ('key, 'b, [>`Read]) t -> ('key, 'c, _) t (**{6 Conversions}*) @@ -597,6 +636,11 @@ sig val filter_map : f:(key:'key -> data:'a -> 'b option) -> ('key, 'a, [>`Read]) t -> ('key, 'b, _) t val filter_map_inplace : f:(key:'key -> data:'a -> 'a option) -> ('key, 'a, [>`Write]) t -> unit val fold : f:(key:'a -> data:'b -> 'c -> 'c) -> ('a, 'b, [>`Read]) t -> init:'c -> 'c + val merge : f:('key -> 'a option -> 'b option -> 'c option) -> + left:('key, 'a, [>`Read]) t -> right:('key, 'b, [>`Read]) t -> ('key, 'c, _) t + val merge_all : f:('key -> 'a list -> 'b list -> 'c list) -> + left:('key, 'a, [>`Read]) t -> right:('key, 'b, [>`Read]) t -> ('key, 'c, _) t + end end (* Cap module *) diff --git a/src/batHashtbl.mlv b/src/batHashtbl.mlv index 3e83ba071..90c2fcd18 100644 --- a/src/batHashtbl.mlv +++ b/src/batHashtbl.mlv @@ -367,6 +367,130 @@ let filter_map_inplace f h = filter_map_inplace (fun _ x -> if x>3 then Some (x+1) else None) h ; \ to_sorted_list h) [5; 6] *) + + +let merge f h1 h2 = + let res = create (max (length h1) (length h2)) in + let may_add_res k v1 v2 = + BatOption.may (add res k) (f k v1 v2) in + iter (fun k v1 -> + may_add_res k (Some v1) (find_option h2 k) + ) h1 ; + iter (fun k v2 -> + match find h1 k with + | exception Not_found -> + may_add_res k None (Some v2) + | _ -> () (* done above *) + ) h2 ; + res + +(*$inject + let union = merge (fun _ l r -> if l = None then r else l) + let inter = merge (fun _ l r -> if l = None then l else r) + let equal h1 h2 = to_sorted_list h1 = to_sorted_list h2 + let empty = create 0 + let h_1_5 = Enum.combine (1 -- 5, 1 -- 5) |> of_enum + let h_1_3 = Enum.combine (1 -- 3, 1 -- 3) |> of_enum + let h_3_5 = Enum.combine (3 -- 5, 3 -- 5) |> of_enum + let of_uniq_list l = List.unique l |> List.map (fun i -> i, i) |> of_list +*) +(*$= merge & ~printer + [] \ + (merge (fun k _ _ -> Some k) empty empty |> to_sorted_list) + [1; 2; 3; 4; 5] \ + (merge (fun _ l _ -> l) h_1_5 empty |> to_sorted_list) + [] \ + (merge (fun _ _ r -> r) h_1_5 empty |> to_sorted_list) + [] \ + (merge (fun _ l _ -> l) empty h_1_5 |> to_sorted_list) + [1; 2; 3; 4; 5] \ + (merge (fun _ _ r -> r) empty h_1_5 |> to_sorted_list) + [1; 2; 3] \ + (let h = Enum.combine (3 -- 6, 13 -- 15) |> of_enum in \ + merge (fun _ l _ -> l) h_1_3 h |> to_sorted_list) + [13; 14; 15] \ + (let h = Enum.combine (3 -- 5, 13 -- 15) |> of_enum in \ + merge (fun _ _ r -> r) h_1_3 h |> to_sorted_list) + [] \ + (merge (fun _ _ _ -> None) h_1_3 h_3_5 |> to_sorted_list) +*) +(*$= union & ~printer + [1; 2; 3; 4; 5] \ + (union h_1_3 h_3_5 |> to_sorted_list) +*) +(*$= inter & ~printer + [3] \ + (inter h_1_3 h_3_5 |> to_sorted_list) +*) +(*$Q equal + (Q.list Q.small_int) (fun l -> \ + let h = of_uniq_list l in \ + equal (inter h h) h) + (Q.list Q.small_int) (fun l -> \ + let h = of_uniq_list l in \ + equal (union h h) h) + (Q.list Q.small_int) (fun l -> \ + let h = of_uniq_list l in \ + equal (union h empty) h) + (Q.list Q.small_int) (fun l -> \ + let h = of_uniq_list l in \ + equal (inter h empty) empty) + (Q.pair (Q.list Q.small_int) (Q.list Q.small_int)) (fun (l1, l2) -> \ + let h1 = of_uniq_list l1 and h2 = of_uniq_list l2 in \ + equal (inter h1 h2) (inter h2 h1)) + (Q.pair (Q.list Q.small_int) (Q.list Q.small_int)) (fun (l1, l2) -> \ + let h1 = of_uniq_list l1 and h2 = of_uniq_list l2 in \ + equal (union h1 h2) (union h2 h1)) +*) + +let merge_all f h1 h2 = + let res = create (max (length h1) (length h2)) in + let may_add_res k v1 v2 = + List.iter (add res k) (f k v1 v2 |> List.rev) in + iter (fun k _ -> + let l1 = find_all h1 k + and l2 = find_all h2 k in + may_add_res k l1 l2 + ) h1 ; + iter (fun k _ -> + match find_all h1 k with + | [] -> + let l2 = find_all h2 k in + may_add_res k [] l2 + | _ -> () (* done above *) + ) h2 ; + res + +(*$= merge_all & ~printer + [] \ + (let h1 = create 0 and h2 = create 0 in \ + merge_all (fun k _ _ -> [k]) h1 h2 |> to_sorted_list) + [1; 2; 3; 4; 5] \ + (let h = create 0 in \ + merge_all (fun _ l _ -> l) h_1_5 h |> to_sorted_list) + [] \ + (let h = create 0 in \ + merge_all (fun _ _ r -> r) h_1_5 h |> to_sorted_list) + [] \ + (let h = create 0 in \ + merge_all (fun _ l _ -> l) h h_1_5 |> to_sorted_list) + [1; 2; 3; 4; 5] \ + (let h = create 0 in \ + merge_all (fun _ _ r -> r) h h_1_5 |> to_sorted_list) + [1; 2; 3] \ + (let h = Enum.combine (3 -- 6, 13 -- 15) |> of_enum in \ + merge_all (fun _ l _ -> l) h_1_3 h |> to_sorted_list) + [13; 14; 15] \ + (let h = Enum.combine (3 -- 5, 13 -- 15) |> of_enum in \ + merge_all (fun _ _ r -> r) h_1_3 h |> to_sorted_list) + [] \ + (merge_all (fun _ _ _ -> []) h_1_3 h_3_5 |> to_sorted_list) + [2; 1] \ + (let h1 = of_list [1, 1] in \ + let h2 = copy h1 in \ + Hashtbl.add h2 1 2 ;\ + let h = merge_all (fun _ _ r -> r) h1 h2 in \ + find_all h 1) *) @@ -400,6 +524,8 @@ struct let modify ~key ~f = modify key f let modify_def ~default ~key ~f = modify_def default key f let modify_opt ~key ~f = modify_opt key f + let merge ~f ~left ~right = merge f left right + let merge_all ~f ~left ~right = merge_all f left right end module type HashedType = Hashtbl.HashedType @@ -435,6 +561,10 @@ sig val modify : key -> ('a -> 'a) -> 'a t -> unit val modify_def : 'a -> key -> ('a -> 'a) -> 'a t -> unit val modify_opt : key -> ('a option -> 'a option) -> 'a t -> unit + val merge : (key -> 'a option -> 'b option -> 'c option) -> + 'a t -> 'b t -> 'c t + val merge_all : (key -> 'a list -> 'b list -> 'c list) -> + 'a t -> 'b t -> 'c t val keys : 'a t -> key BatEnum.t val values : 'a t -> 'a BatEnum.t val enum : 'a t -> (key * 'a) BatEnum.t @@ -495,6 +625,10 @@ sig val modify : key:key -> f:('a -> 'a) -> 'a t -> unit val modify_def : default:'a -> key:key -> f:('a -> 'a) -> 'a t -> unit val modify_opt : key:key -> f:('a option -> 'a option) -> 'a t -> unit + val merge : f:(key -> 'a option -> 'b option -> 'c option) -> + left:'a t -> right:'b t -> 'c t + val merge_all : f:(key -> 'a list -> 'b list -> 'c list) -> + left:'a t -> right:'b t -> 'c t end end @@ -718,6 +852,44 @@ struct in modify_opt key f' h + let merge f a b = + let res = create (max (length a) (length b)) in + let may_add_res k v1 v2 = + BatOption.may (add res k) (f k v1 v2) in + iter (fun k v1 -> + match find b k with + | exception Not_found -> + may_add_res k (Some v1) None + | v2 -> + may_add_res k (Some v1) (Some v2) + ) a ; + iter (fun k v2 -> + match find a k with + | exception Not_found -> + may_add_res k None (Some v2) + | _ -> () (* done above *) + ) b ; + res + + let merge_all f a b = + let res = create (max (length a) (length b)) in + let may_add_res k v1 v2 = + List.iter (add res k) (f k v1 v2 |> List.rev) in + iter (fun k _ -> + let l1 = find_all a k + and l2 = find_all b k in + may_add_res k l1 l2 + ) a ; + iter (fun k _ -> + match find_all a k with + | [] -> + let l2 = find_all b k in + may_add_res k [] l2 + | _ -> () (* done above *) + ) b ; + res + + module Labels = struct let label f = fun key data -> f ~key ~data @@ -736,6 +908,8 @@ struct let modify ~key ~f = modify key f let modify_def ~default ~key ~f = modify_def default key f let modify_opt ~key ~f = modify_opt key f + let merge ~f ~left ~right = merge f left right + let merge_all ~f ~left ~right = merge_all f left right end module Exceptionless = @@ -797,6 +971,8 @@ struct let filter = filter let filteri = filteri let filter_map = filter_map + let merge = merge + let merge_all = merge_all module Labels = struct let label f = fun key data -> f ~key ~data @@ -815,6 +991,8 @@ struct let modify ~key ~f = modify key f let modify_def ~default ~key ~f = modify_def default key f let modify_opt ~key ~f = modify_opt key f + let merge ~f ~left ~right = merge f left right + let merge_all ~f ~left ~right = merge_all f left right end module Exceptionless =