Skip to content

Commit

Permalink
Remove uses of Cstruct
Browse files Browse the repository at this point in the history
This avoids unecessary copies now that hashes are stored in strings (thanks to
digestif). The only possible issue is we need to copy (possibly large) file
that has been mmaped.
  • Loading branch information
samoht committed Sep 25, 2018
1 parent e7dc0ca commit a01efa2
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 190 deletions.
2 changes: 0 additions & 2 deletions irmin.opam
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ depends: [
"result"
"fmt" {>= "0.8.0"}
"uri" {>= "1.3.12"}
"cstruct" {>= "1.6.0"}
"jsonm" {>= "1.0.0"}
"lwt" {>= "2.4.7"}
"digestif"
"ocamlgraph"
"hex" {>= "0.2.0"}
"logs" {>= "0.5.0"}
"astring"
]
Expand Down
2 changes: 1 addition & 1 deletion src/irmin-chunk/dune
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(library
(name irmin_chunk)
(public_name irmin-chunk)
(libraries irmin cstruct))
(libraries irmin))
20 changes: 10 additions & 10 deletions src/irmin-fs/irmin_fs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ module type IO = sig
type path = string
val rec_files: path -> string list Lwt.t
val file_exists: path -> bool Lwt.t
val read_file: path -> Cstruct.t option Lwt.t
val read_file: path -> string option Lwt.t
val mkdir: path -> unit Lwt.t
type lock
val lock_file: string -> lock
val write_file: ?temp_dir:path -> ?lock:lock ->
path -> Cstruct.t -> unit Lwt.t
path -> string -> unit Lwt.t
val test_and_set_file: ?temp_dir:path -> lock:lock ->
string -> test:Cstruct.t option -> set:Cstruct.t option -> bool Lwt.t
string -> test:string option -> set:string option -> bool Lwt.t
val remove_file: ?lock:lock -> path -> unit Lwt.t
end

Expand Down Expand Up @@ -81,7 +81,7 @@ struct
IO.file_exists file

let value v =
match Irmin.Type.decode_cstruct V.t v with
match Irmin.Type.decode_string V.t v with
| Ok v -> Some v
| Error (`Msg e) ->
Log.err (fun l -> l "Irmin_fs.value %s" e);
Expand Down Expand Up @@ -124,8 +124,8 @@ struct

let add t value =
Log.debug (fun f -> f "add");
let value = Irmin.Type.encode_cstruct V.t value in
let key = K.digest Irmin.Type.cstruct value in
let value = Irmin.Type.encode_string V.t value in
let key = K.digest Irmin.Type.string value in
let file = file_of_key t key in
let temp_dir = temp_dir t in
(IO.file_exists file >>= function
Expand All @@ -148,7 +148,7 @@ module Link_ext (IO: IO) (S: Config) (K:Irmin.Hash.S) = struct
let add t index key =
Log.debug (fun f -> f "add link");
let file = file_of_key t index in
let value = Irmin.Type.encode_cstruct K.t key in
let value = Irmin.Type.encode_string K.t key in
let temp_dir = temp_dir t in
IO.file_exists file >>= function
| true -> Lwt.return_unit
Expand Down Expand Up @@ -217,7 +217,7 @@ struct
stop () >>= fun () ->
W.unwatch t.w id

let raw_value v = Irmin.Type.encode_cstruct V.t v
let raw_value v = Irmin.Type.encode_string V.t v

let set t key value =
Log.debug (fun f -> f "update");
Expand Down Expand Up @@ -337,7 +337,7 @@ module IO_mem = struct

type t = {
watches: (string, string -> unit Lwt.t) Hashtbl.t;
files : (string, Cstruct.t) Hashtbl.t;
files : (string, string) Hashtbl.t;
}

let t = {
Expand Down Expand Up @@ -404,7 +404,7 @@ module IO_mem = struct

let equal x y = match x, y with
| None , None -> true
| Some x, Some y -> Cstruct.equal x y
| Some x, Some y -> String.equal x y
| _ -> false

let test_and_set_file ?temp_dir:_ ~lock file ~test ~set =
Expand Down
6 changes: 3 additions & 3 deletions src/irmin-fs/irmin_fs.mli
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ module type IO = sig
val file_exists: path -> bool Lwt.t
(** [file_exist f] is true if [f] exists. *)

val read_file: path -> Cstruct.t option Lwt.t
val read_file: path -> string option Lwt.t
(** Read the contents of a file using mmap. *)

(** {2 Write Operations} *)
Expand All @@ -53,11 +53,11 @@ module type IO = sig
(** [lock_file f] is the lock associated to the file [f]. *)

val write_file: ?temp_dir:path -> ?lock:lock ->
path -> Cstruct.t -> unit Lwt.t
path -> string -> unit Lwt.t
(** Atomic writes. *)

val test_and_set_file: ?temp_dir:string -> lock:lock ->
path ->test:Cstruct.t option -> set:Cstruct.t option -> bool Lwt.t
path ->test:string option -> set:string option -> bool Lwt.t
(** Test and set. *)

val remove_file: ?lock:lock -> path -> unit Lwt.t
Expand Down
21 changes: 11 additions & 10 deletions src/irmin-unix/fs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,15 @@ module IO = struct
try not (Sys.is_directory f) with Sys_error _ -> false
) dir

let write_cstruct fd b =
let write_string fd b =
let rec rwrite fd buf ofs len =
Lwt_bytes.write fd buf ofs len >>= fun n ->
Lwt_unix.write_string fd buf ofs len >>= fun n ->
if len = 0 then Lwt.fail End_of_file
else if n < len then rwrite fd buf (ofs + n) (len - n)
else Lwt.return_unit in
match Cstruct.len b with
match String.length b with
| 0 -> Lwt.return_unit
| len -> rwrite fd (Cstruct.to_bigarray b) 0 len
| len -> rwrite fd b 0 len

let delays = Array.init 20 (fun i -> 0.1 *. (float i) ** 2.)

Expand Down Expand Up @@ -238,19 +238,19 @@ module IO = struct

let read_file_with_read file size =
let chunk_size = max 4096 (min size 0x100000) in
let buf = Cstruct.create size in
let buf = Bytes.create size in
let flags = [Unix.O_RDONLY] in
let perm = 0o0 in
Lwt_unix.openfile file flags perm >>= fun fd ->
let rec aux off =
let read_size = min chunk_size (size - off) in
Lwt_bytes.read fd buf.Cstruct.buffer off read_size >>= fun read ->
Lwt_unix.read fd buf off read_size >>= fun read ->
(* It should test for read = 0 in case size is larger than the
real size of the file. This may happen for instance if the
file was truncated while reading. *)
let off = off + read in
if off >= size then
Lwt.return buf
Lwt.return (Bytes.unsafe_to_string buf)
else
aux off
in
Expand All @@ -261,7 +261,8 @@ module IO = struct
let fd = Unix.(openfile file [O_RDONLY; O_NONBLOCK] 0o644) in
let ba = Lwt_bytes.map_file ~fd ~shared:false () in
Unix.close fd;
Lwt.return (Cstruct.of_bigarray ba)
(* XXX(samoht): ideally we should not do a copy here. *)
Lwt.return (Lwt_bytes.to_string ba)

let read_file file =
Lwt.catch (fun () ->
Expand All @@ -280,7 +281,7 @@ module IO = struct

let write_file ?temp_dir ?lock file b =
let write () =
with_write_file file ?temp_dir (fun fd -> write_cstruct fd b)
with_write_file file ?temp_dir (fun fd -> write_string fd b)
in
Lock.with_lock lock (fun () ->
Lwt.catch write (function
Expand All @@ -294,7 +295,7 @@ module IO = struct
read_file file >>= fun v ->
let equal = match test, v with
| None , None -> true
| Some x, Some y -> Cstruct.equal x y
| Some x, Some y -> String.equal x y
| _ -> false
in
if not equal then Lwt.return false
Expand Down
6 changes: 3 additions & 3 deletions src/irmin-unix/resolver.ml
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ module Contents = struct
type t = (module Irmin.Contents.S)

let all = ref [
"string" , (module Irmin.Contents.String: Irmin.Contents.S);
"cstruct", (module Irmin.Contents.Cstruct);
"json", (module Irmin.Contents.Json);
"string", (module Irmin.Contents.String: Irmin.Contents.S);
"bytes" , (module Irmin.Contents.Bytes);
"json" , (module Irmin.Contents.Json);
]
let default = ref (module Irmin.Contents.String: Irmin.Contents.S)

Expand Down
10 changes: 5 additions & 5 deletions src/irmin/contents.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ module String = struct
let of_string s = Ok s
end

module Cstruct = struct
type t = Cstruct.t
let t = Type.cstruct
module Bytes = struct
type t = bytes
let t = Type.bytes
let merge = Merge.idempotent Type.(option t)
let pp ppf b = Fmt.string ppf (Cstruct.to_string b)
let of_string s = Ok (Cstruct.of_string s)
let pp ppf b = Fmt.string ppf (Bytes.unsafe_to_string b)
let of_string s = Ok (Bytes.unsafe_of_string s)
end

let lexeme e x = ignore (Jsonm.encode e (`Lexeme x))
Expand Down
2 changes: 1 addition & 1 deletion src/irmin/contents.mli
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type json = [
]

module String: S.CONTENTS with type t = string
module Cstruct: S.CONTENTS with type t = Cstruct.t
module Bytes: S.CONTENTS with type t = bytes
module Json: S.CONTENTS with type t = (string * json) list
module Json_value: S.CONTENTS with type t = json
module Json_tree(Store: S.STORE with type contents = json): sig
Expand Down
4 changes: 1 addition & 3 deletions src/irmin/dot.ml
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ module Make (S: S.STORE) = struct
let s =
if String.length s <= 10 then s
else String.with_range s ~len:10 in
let s =
if is_valid_utf8 s then s
else (let `Hex s = Hex.of_string s in s) in
let s = if is_valid_utf8 s then s else "<blob>" in
s in
let label_of_node k _ =
let s =
Expand Down
3 changes: 1 addition & 2 deletions src/irmin/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
(library
(name irmin)
(public_name irmin)
(libraries result fmt uri cstruct jsonm lwt ocamlgraph hex logs astring
digestif))
(libraries result fmt uri jsonm lwt ocamlgraph logs astring digestif))
57 changes: 20 additions & 37 deletions src/irmin/irmin.mli
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,12 @@ module Type: sig
val bytes: bytes t
(** [bytes] is a representation of the [bytes] type. *)

val cstruct: Cstruct.t t
(** [cstruct] is a representation of the [Cstruct.t] type. *)

val string_of: len -> string t
(** Like {!string} but add control to the size. *)

val bytes_of: len -> bytes t
(** Like {!bytes} but add control to the size. *)

val cstruct_of: len -> Cstruct.t t
(** Like {!cstruct} but add control to the size. *)

val list: ?len:len -> 'a t -> 'a list t
(** [list t] is a representation of list of values of type [t]. *)

Expand Down Expand Up @@ -389,35 +383,28 @@ module Type: sig
(** [decode_json_lexemes] is similar to {!decode_json} but use an
already decoded list of JSON lexemes instead of a decoder. *)

val encode_cstruct: ?buf:Cstruct.t -> 'a t -> 'a -> Cstruct.t
(** [encode_cstruct t e] encodes [t] into a `Cstruct.t`. The size of
the returned buffer is precomputed and the buffer is allocated at
once.
val encode_string: ?buf:bytes -> 'a t -> 'a -> string
(** [encode_string t e] encodes [t] into a [string] buffer. The size
of the returned buffer is precomputed and the buffer is allocated
at once or it can be passed using the optional argument [buf].
{b NOTE:} There is a special case when the parameter [t] is a
single buffer (of type [cstruct], [bytes] or [string]): the
original value is returned as is, without being copied. *)
single buffer (of type [bytes] or [string]): the original value
is returned as is, without being copied. *)

val decode_cstruct: ?exact:bool -> 'a t ->
Cstruct.t -> ('a, [`Msg of string]) result
(** [decode_cstruct t buf] decodes values of type [t] as produced by
[encode_cstruct t v].
val decode_string: ?exact:bool -> 'a t -> string -> ('a, [`Msg of string]) result
(** [decode_string t buf] decodes values of type [t] as produced by
[encode_string t v].
{b NOTE:} When the parameter [t] is a single buffer (of type
[cstruct], [bytes] or [string]) the original buffer is returned
as is, otherwise sub-[cstruct] are copied. *)
[bytes] or [string]) the original buffer is returned without
being copied. *)

val encode_bytes: ?buf:bytes -> 'a t -> 'a -> bytes
(** Same as {!encode_bytes} but using a string. *)
(** Same as {!encode_string} but using a [bytes] buffer. *)

val decode_bytes: ?exact:bool -> 'a t -> bytes -> ('a, [`Msg of string]) result
(** Same as {!decode_bytes} but using a string. *)

val encode_string: ?buf:bytes -> 'a t -> 'a -> string
(** Same as {!encode_cstruct} but using a string. *)

val decode_string: ?exact:bool -> 'a t -> string -> ('a, [`Msg of string]) result
(** Same as {!decode_cstruct} but using a string. *)
(** Same as {!decode_string} but using a [bytes] buffer. *)

val size_of: 'a t -> 'a -> int
end
Expand Down Expand Up @@ -1036,7 +1023,7 @@ end
}
Default contents for idempotent {{!Contents.String}string}
and {{!Contents.Cstruct}C-buffers like} values are provided. *)
and {{!Contents.Bytes}bytes} buffers are provided. *)
module Contents: sig

module type S0 = sig
Expand Down Expand Up @@ -1095,16 +1082,12 @@ module Contents: sig
end

module String: S with type t = string
(** String values where only the last modified value is kept on
merge. If the value has been modified concurrently, the [merge]
function conflicts. Assume that update operations are
idempotent. *)

module Cstruct: S with type t = Cstruct.t
(** Cstruct values where only the last modified value is kept on
merge. If the value has been modified concurrently, the [merge]
function conflicts. Assume that update operations are
idempotent. *)
(** Contents of type [string], with the default 3-way merge
strategy: assume that update operations are idempotent and
conflict iff values are modified concurrently. *)

module Bytes: S with type t = bytes
(** Same as {!String} but for values of type [bytes]. *)

type json = [
| `Null
Expand Down
Loading

0 comments on commit a01efa2

Please sign in to comment.