From 05912518a5b650b032ca6bac4ff9f265875ec35d Mon Sep 17 00:00:00 2001 From: Craig Ferguson Date: Fri, 26 Jun 2020 14:29:24 +0200 Subject: [PATCH] Handle sharing of Index instances explicitly --- CHANGES.md | 8 +++++ README.md | 5 +-- bench/bench.ml | 8 +++-- index.opam | 4 +-- src/cache.ml | 52 ++++++++++++++++++++++++++++++ src/index.ml | 71 ++++++++++++++++++++++------------------- src/index_intf.ml | 51 ++++++++++++++++++++++------- src/unix/index_unix.mli | 4 +-- test/cache.ml | 31 ++++++++++++++++++ test/dune | 2 +- test/main.ml | 2 ++ test/search.ml | 18 ++++------- test/unix/common.ml | 17 ++++++---- test/unix/common.mli | 3 +- test/unix/main.ml | 28 +++++++++------- 15 files changed, 220 insertions(+), 84 deletions(-) create mode 100644 src/cache.ml create mode 100644 test/cache.ml create mode 100644 test/main.ml diff --git a/CHANGES.md b/CHANGES.md index 1f475c63..e9c12826 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +# Unreleased + +## Changed + +- Caching of `Index` instances is now explicit: `Index.Make` requires a cache + implementation, and `Index.v` may be passed a cache to be used for instance + sharing. The default behaviour is _not_ to share instances. (#188) + # 1.2.1 (2020-06-24) ## Added diff --git a/README.md b/README.md index 545bc2f6..a24a53a6 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ Index is a scalable implementation of persistent indices in OCaml. It takes an arbitrary IO implementation and user-supplied content types and supplies a standard key-value interface for persistent storage. -Index provides instance sharing by default: -each OCaml run-time shares a common singleton instance. + +Index supports instance sharing: +each OCaml runtime can share a common singleton instance. Index supports multiple-reader/single-writer access. Concurrent access is safely managed using lock files. diff --git a/bench/bench.ml b/bench/bench.ml index 94791a0f..a7eac165 100644 --- a/bench/bench.ml +++ b/bench/bench.ml @@ -151,7 +151,8 @@ let absent_bindings_pool = ref [||] let sorted_bindings_pool = ref [||] module Index = struct - module Index = Index_unix.Private.Make (Context.Key) (Context.Value) + module Index = + Index_unix.Private.Make (Context.Key) (Context.Value) (Index.Cache.Noop) let add_metrics = let no_tags x = x in @@ -211,7 +212,10 @@ module Index = struct read_absent ~with_metrics !absent_bindings_pool t let run ~with_metrics ~nb_entries ~log_size ~root ~name ~fresh ~readonly b = - let index = Index.v ~fresh ~readonly ~log_size (root // name) in + let index = + Index.v ~cache:(Index.empty_cache ()) ~fresh ~readonly ~log_size + (root // name) + in let result = Benchmark.run ~nb_entries (b ~with_metrics index) in Index.close index; result diff --git a/index.opam b/index.opam index 03bba221..5bc882a8 100644 --- a/index.opam +++ b/index.opam @@ -35,8 +35,8 @@ Index is a scalable implementation of persistent indices in OCaml. It takes an arbitrary IO implementation and user-supplied content types and supplies a standard key-value interface for persistent -storage. Index provides instance sharing by default: each OCaml -run-time shares a common singleton instance. +storage. Index provides instance sharing: each OCaml +run-time can share a common singleton instance. Index supports multiple-reader/single-writer access. Concurrent access is safely managed using lock files.""" diff --git a/src/cache.ml b/src/cache.ml new file mode 100644 index 00000000..ce82246a --- /dev/null +++ b/src/cache.ml @@ -0,0 +1,52 @@ +(* The MIT License + +Copyright (c) 2019 Craig Ferguson + Thomas Gazagnaire + Ioana Cristescu + Clément Pascutto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. *) + +module type S = sig + type ('k, 'v) t + (** A cache of values of type ['v], indexed by keys of type ['k]. *) + + val create : unit -> (_, _) t + + val add : ('k, 'v) t -> 'k -> 'v -> unit + + val find : ('k, 'v) t -> 'k -> 'v option + + val remove : ('k, _) t -> 'k -> unit +end + +(** Cache implementation that always misses. *) +module Noop : S = struct + type (_, _) t = unit + + let create () = () + + let add () _ _ = () + + let find () _ = None + + let remove () _ = () +end + +(** Cache implementation that always finds previously-added values, and grows + indefinitely. *) +module Unbounded : S = struct + include Hashtbl + + let create () = create 0 + + let find = find_opt +end diff --git a/src/index.ml b/src/index.ml index 35338bcd..ffb902b9 100644 --- a/src/index.ml +++ b/src/index.ml @@ -17,6 +17,7 @@ all copies or substantial portions of the Software. *) include Index_intf module Stats = Stats +module Cache = Cache let may f = function None -> () | Some bf -> f bf @@ -31,7 +32,8 @@ module Make_private (V : Value) (IO : Io.S) (Mutex : MUTEX) - (Thread : THREAD) = + (Thread : THREAD) + (Cache : Cache.S) = struct type 'a async = 'a Thread.t @@ -219,36 +221,9 @@ struct Int64.of_float rounded end) - let with_cache ~v ~clear = - let roots = Hashtbl.create 0 in - let f ?(fresh = false) ?(readonly = false) ~log_size root = - Log.info (fun l -> - l "[%s] v fresh=%b readonly=%b log_size=%d" (Filename.basename root) - fresh readonly log_size); - try - if not (Sys.file_exists (index_dir root)) then ( - Log.debug (fun l -> - l "[%s] does not exist anymore, cleaning up the fd cache" - (Filename.basename root)); - Hashtbl.remove roots (root, true); - Hashtbl.remove roots (root, false); - raise Not_found ); - let t = Hashtbl.find roots (root, readonly) in - if t.open_instances <> 0 then ( - Log.debug (fun l -> l "[%s] found in cache" (Filename.basename root)); - t.open_instances <- t.open_instances + 1; - let t = ref (Some t) in - if fresh then clear t; - t ) - else ( - Hashtbl.remove roots (root, readonly); - raise Not_found ) - with Not_found -> - let instance = v ~fresh ~readonly ~log_size root in - Hashtbl.add roots (root, readonly) instance; - ref (Some instance) - in - `Staged f + type cache = (string * bool, instance) Cache.t + + let empty_cache = Cache.create let v_no_cache ~fresh ~readonly ~log_size root = Log.debug (fun l -> @@ -330,7 +305,39 @@ struct pending_cancel = false; } - let (`Staged v) = with_cache ~v:v_no_cache ~clear + let v ?(cache = empty_cache ()) ?(fresh = false) ?(readonly = false) ~log_size + root = + let new_instance () = + let instance = v_no_cache ~fresh ~readonly ~log_size root in + Cache.add cache (root, readonly) instance; + ref (Some instance) + in + Log.info (fun l -> + l "[%s] v fresh=%b readonly=%b log_size=%d" (Filename.basename root) + fresh readonly log_size); + match + (Cache.find cache (root, readonly), Sys.file_exists (index_dir root)) + with + | None, _ -> new_instance () + | Some _, false -> + Log.debug (fun l -> + l "[%s] does not exist anymore, cleaning up the fd cache" + (Filename.basename root)); + Cache.remove cache (root, true); + Cache.remove cache (root, false); + new_instance () + | Some t, true -> ( + match t.open_instances with + | 0 -> + Cache.remove cache (root, readonly); + new_instance () + | _ -> + Log.debug (fun l -> + l "[%s] found in cache" (Filename.basename root)); + t.open_instances <- t.open_instances + 1; + let t = ref (Some t) in + if fresh then clear t; + t ) let interpolation_search index key = let hashed_key = K.hash key in diff --git a/src/index_intf.ml b/src/index_intf.ml index c842c4b6..b7047afa 100644 --- a/src/index_intf.ml +++ b/src/index_intf.ml @@ -114,9 +114,17 @@ module type S = sig type value (** The type for values. *) - val v : ?fresh:bool -> ?readonly:bool -> log_size:int -> string -> t + type cache + (** The type for caches of index instances. *) + + val empty_cache : unit -> cache + (** Construct a new empty cache of index instances. *) + + val v : + ?cache:cache -> ?fresh:bool -> ?readonly:bool -> log_size:int -> string -> t (** The constructor for indexes. + @param cache a cache instance to use for instance sharing. @param fresh whether an existing index should be overwritten. @param read_only whether read-only mode is enabled for this index. @param log_size the maximum number of bindings in the `log` IO. *) @@ -162,7 +170,7 @@ module type S = sig end module type Index = sig - (** The input of [Make] for keys. *) + (** The input of {!Make} for keys. *) module type Key = sig (* N.B. We use [sig ... end] redirections to avoid linking to the [_intf] file in the generated docs. Once Odoc 2 is released, this can be @@ -172,12 +180,8 @@ module type Index = sig (** @inline *) end - module Stats : sig - include module type of Stats - (** @inline *) - end - - (** The input of [Make] for values. The same requirements as for [Key] apply. *) + (** The input of {!Make} for values. The same requirements as for {!Key} + apply. *) module type Value = sig include Value (** @inline *) @@ -198,6 +202,13 @@ module type Index = sig (** @inline *) end + (** Signatures and implementations of caches. {!Make} requires a cache in + order to provide instance sharing. *) + module Cache : sig + include module type of Cache + (** @inline *) + end + (** Index module signature. *) module type S = sig include S @@ -212,8 +223,19 @@ module type Index = sig (** The exception raised when any operation is attempted on a closed index, except for [close], which is idempotent. *) - module Make (K : Key) (V : Value) (IO : IO) (M : MUTEX) (T : THREAD) : - S with type key = K.t and type value = V.t + module Make + (K : Key) + (V : Value) + (IO : IO) + (M : MUTEX) + (T : THREAD) + (C : Cache.S) : S with type key = K.t and type value = V.t + + (** Run-time metric tracking for index instances. *) + module Stats : sig + include module type of Stats + (** @inline *) + end (** These modules should not be used. They are exposed purely for testing purposes. *) @@ -263,7 +285,12 @@ module type Index = sig is timed. *) end - module Make (K : Key) (V : Value) (IO : IO) (M : MUTEX) (T : THREAD) : - S with type key = K.t and type value = V.t + module Make + (K : Key) + (V : Value) + (_ : IO) + (_ : MUTEX) + (_ : THREAD) + (_ : Cache.S) : S with type key = K.t and type value = V.t end end diff --git a/src/unix/index_unix.mli b/src/unix/index_unix.mli index 14ddafd7..0880813f 100644 --- a/src/unix/index_unix.mli +++ b/src/unix/index_unix.mli @@ -15,7 +15,7 @@ furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. *) -module Make (K : Index.Key) (V : Index.Value) : +module Make (K : Index.Key) (V : Index.Value) (C : Index.Cache.S) : Index.S with type key = K.t and type value = V.t module Syscalls = Syscalls @@ -28,6 +28,6 @@ module Private : sig module Raw = Raw - module Make (K : Index.Key) (V : Index.Value) : + module Make (K : Index.Key) (V : Index.Value) (C : Index.Cache.S) : Index.Private.S with type key = K.t and type value = V.t end diff --git a/test/cache.ml b/test/cache.ml new file mode 100644 index 00000000..9401f64d --- /dev/null +++ b/test/cache.ml @@ -0,0 +1,31 @@ +let check_none msg = Alcotest.(check (option reject)) msg None + +let check_some msg x = Alcotest.(check (option int)) msg (Some x) + +let test_noop () = + let open Index.Cache.Noop in + (* Test that added entries are never found. *) + let c = create () in + find c "not-added" |> check_none "Cannot find non-existent value"; + add c "added" 1; + find c "added" |> check_none "Cannot find added value"; + remove c "added"; + find c "added" |> check_none "Can't find added value after remove"; + () + +let test_unbounded () = + let open Index.Cache.Unbounded in + (* Test that added entries are always found. *) + let c = create () in + find c "not-added" |> check_none "Cannot find non-existent value"; + add c "added" 1; + find c "added" |> check_some "Can find added value" 1; + remove c "added"; + find c "added" |> check_none "Can't find added value after remove"; + () + +let tests = + [ + Alcotest.test_case "noop" `Quick test_noop; + Alcotest.test_case "unbounded" `Quick test_unbounded; + ] diff --git a/test/dune b/test/dune index 44fa7657..a1aa39a4 100644 --- a/test/dune +++ b/test/dune @@ -1,4 +1,4 @@ (test - (name search) + (name main) (package index) (libraries index alcotest)) diff --git a/test/main.ml b/test/main.ml new file mode 100644 index 00000000..b358bd93 --- /dev/null +++ b/test/main.ml @@ -0,0 +1,2 @@ +let () = + Alcotest.run "index" [ ("cache", Cache.tests); ("search", Search.tests) ] diff --git a/test/search.ml b/test/search.ml index b9a023b5..c49993ce 100644 --- a/test/search.ml +++ b/test/search.ml @@ -98,14 +98,10 @@ let interpolation_constant_metric () = |> Alcotest.(check string) "" v) array -let () = - Random.self_init (); - Alcotest.run "search" - [ - ( "interpolation", - [ - Alcotest.test_case "unique" `Quick interpolation_unique; - Alcotest.test_case "constant metric" `Quick - interpolation_constant_metric; - ] ); - ] +let tests = + [ + Alcotest.test_case "unique" `Quick interpolation_unique; + Alcotest.test_case "constant metric" `Quick interpolation_constant_metric; + ] + +let () = Random.self_init () diff --git a/test/unix/common.ml b/test/unix/common.ml index eee2cfc2..0b6bd7f2 100644 --- a/test/unix/common.ml +++ b/test/unix/common.ml @@ -64,7 +64,7 @@ module Value = struct let pp s = Fmt.fmt "%s" s end -module Index = Index_unix.Private.Make (Key) (Value) +module Index = Index_unix.Private.Make (Key) (Value) (Index.Cache.Unbounded) module Make_context (Config : sig val root : string @@ -82,21 +82,24 @@ struct type t = { rw : Index.t; tbl : (string, string) Hashtbl.t; + cache : Index.cache; clone : ?fresh:bool -> readonly:bool -> unit -> Index.t; } let empty_index () = let name = fresh_name "empty_index" in - let rw = Index.v ~fresh:true ~log_size:4 name in + let cache = Index.empty_cache () in + let rw = Index.v ~cache ~fresh:true ~log_size:4 name in let tbl = Hashtbl.create 0 in let clone ?(fresh = false) ~readonly () = - Index.v ~fresh ~log_size:4 ~readonly name + Index.v ~cache ~fresh ~log_size:4 ~readonly name in - { rw; tbl; clone } + { rw; tbl; clone; cache } let full_index ?(size = 103) () = let name = fresh_name "full_index" in - let t = Index.v ~fresh:true ~log_size:4 name in + let cache = Index.empty_cache () in + let t = Index.v ~cache ~fresh:true ~log_size:4 name in let tbl = Hashtbl.create 0 in for _ = 1 to size do let k = Key.v () in @@ -106,9 +109,9 @@ struct done; Index.flush t; let clone ?(fresh = false) ~readonly () = - Index.v ~fresh ~log_size:4 ~readonly name + Index.v ~cache ~fresh ~log_size:4 ~readonly name in - { rw = t; tbl; clone } + { rw = t; tbl; clone; cache } end let ignore_value (_ : Value.t) = () diff --git a/test/unix/common.mli b/test/unix/common.mli index 52e23388..cf8e58cf 100644 --- a/test/unix/common.mli +++ b/test/unix/common.mli @@ -22,9 +22,10 @@ module Index : Index.Private.S with type key = Key.t and type value = Value.t module Make_context (Config : sig val root : string end) : sig - type t = { + type t = private { rw : Index.t; tbl : (string, string) Hashtbl.t; + cache : Index.cache; clone : ?fresh:bool -> readonly:bool -> unit -> Index.t; } diff --git a/test/unix/main.ml b/test/unix/main.ml index 33ddaecd..0ca78a88 100644 --- a/test/unix/main.ml +++ b/test/unix/main.ml @@ -159,14 +159,14 @@ end (* Tests of behaviour after restarting the index *) module DuplicateInstance = struct let find_present () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let rw2 = clone ~readonly:false () in check_equivalence rw tbl; Index.close rw; Index.close rw2 let find_absent () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let rw2 = clone ~readonly:false () in test_find_absent rw tbl; Index.close rw; @@ -180,7 +180,7 @@ module DuplicateInstance = struct Index.close rw2 let membership () = - let Context.{ tbl; clone; rw } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let rw2 = clone ~readonly:false () in check_equivalence_mem rw2 tbl; Index.close rw; @@ -188,10 +188,14 @@ module DuplicateInstance = struct let fail_restart_ro_fresh () = let reuse_name = Context.fresh_name "empty_index" in - let rw = Index.v ~fresh:true ~readonly:false ~log_size:4 reuse_name in + let cache = Index.empty_cache () in + let rw = + Index.v ~cache ~fresh:true ~readonly:false ~log_size:4 reuse_name + in let exn = I.RO_not_allowed in Alcotest.check_raises "Index readonly cannot be fresh." exn (fun () -> - ignore_index (Index.v ~fresh:true ~readonly:true ~log_size:4 reuse_name)); + ignore_index + (Index.v ~cache ~fresh:true ~readonly:true ~log_size:4 reuse_name)); Index.close rw let sync () = @@ -241,7 +245,7 @@ module Readonly = struct Index.close rw let readonly_clear () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let ro = clone ~readonly:true () in check_equivalence ro tbl; Index.clear rw; @@ -344,7 +348,7 @@ end (* Tests of {Index.close} *) module Close = struct let close_reopen_rw () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in Index.close rw; let w = clone ~readonly:false () in check_equivalence w tbl; @@ -365,14 +369,14 @@ module Close = struct Index.close rw let open_readonly_close_rw () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let ro = clone ~readonly:true () in Index.close rw; check_equivalence ro tbl; Index.close ro let close_reopen_readonly () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in Index.close rw; let ro = clone ~readonly:true () in check_equivalence ro tbl; @@ -517,7 +521,7 @@ module Filter = struct (** Test that the results of [filter] are propagated to a clone which was created before. *) let clone_then_filter () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let k = random_existing_key tbl in Hashtbl.remove tbl k; let rw2 = clone ~readonly:false () in @@ -530,7 +534,7 @@ module Filter = struct (** Test that the results of [filter] are propagated to a clone which was created after. *) let filter_then_clone () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let k = random_existing_key tbl in Hashtbl.remove tbl k; Index.filter rw (fun (k', _) -> not (String.equal k k')); @@ -543,7 +547,7 @@ module Filter = struct (** Test that using [filter] doesn't affect fresh clones created later at the same path. *) let empty_after_filter_and_fresh () = - let Context.{ rw; tbl; clone } = Context.full_index () in + let Context.{ rw; tbl; clone; _ } = Context.full_index () in let k = random_existing_key tbl in Hashtbl.remove tbl k; Index.filter rw (fun (k', _) -> not (String.equal k k'));