Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[git] Correctly call git ls-tree so unicode files are not quoted #3879

Merged
merged 2 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Unreleased
makes it possible to use the build info module inside the preprocessor.
(#3848, fix #3848, @rgrinberg)

- Correctly call `git ls-tree` so unicode files are not quoted, this
fixes problems with `dune subst` in the presence of unicode
files. Fixes #3219 (#3879, @ejgallego)

2.7.1 (2/09/2020)
-----------------

Expand Down
3 changes: 3 additions & 0 deletions src/dune_engine/process.ml
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,9 @@ let run_capture = run_capture_gen ~f:Stdune.Io.read_file

let run_capture_lines = run_capture_gen ~f:Stdune.Io.lines_of_file

let run_capture_zero_separated =
run_capture_gen ~f:Stdune.Io.zero_strings_of_file

let run_capture_line ?dir ?stderr_to ?stdin_from ?env ?(purpose = Internal_job)
fail_mode prog args =
run_capture_gen ?dir ?stderr_to ?stdin_from ?env ~purpose fail_mode prog args
Expand Down
11 changes: 11 additions & 0 deletions src/dune_engine/process.mli
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,14 @@ val run_capture_lines :
-> Path.t
-> string list
-> 'a Fiber.t

val run_capture_zero_separated :
?dir:Path.t
-> ?stderr_to:Io.output Io.t
-> ?stdin_from:Io.input Io.t
-> ?env:Env.t
-> ?purpose:purpose
-> (string list, 'a) failure_mode
-> Path.t
-> string list
-> 'a Fiber.t
11 changes: 6 additions & 5 deletions src/dune_engine/vcs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ let run t args =
in
String.trim s

let run_lines t args =
Process.run_capture_lines Strict (prog t) args ~dir:t.root ~env:Env.initial
let run_zero_separated t args =
Process.run_capture_zero_separated Strict (prog t) args ~dir:t.root
~env:Env.initial

let hg_describe t =
let open Fiber.O in
Expand Down Expand Up @@ -130,7 +131,7 @@ let commit_id =
let files =
let f args t =
let open Fiber.O in
let+ l = run_lines t args in
let+ l = run_zero_separated t args in
List.map l ~f:Path.in_source
in
Staged.unstage
Expand All @@ -142,5 +143,5 @@ let files =

let to_dyn = Dyn.Encoder.list Path.to_dyn
end ))
~git:(f [ "ls-tree"; "-r"; "--name-only"; "HEAD" ])
~hg:(f [ "files" ])
~git:(f [ "ls-tree"; "-z"; "-r"; "--name-only"; "HEAD" ])
~hg:(f [ "files"; "-0" ])
47 changes: 47 additions & 0 deletions src/stdune/io.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,50 @@ let input_lines =
in
fun ic -> loop ic []

let input_zero_from_buffer from buf =
match String.index_from_opt buf from '\x00' with
| None -> None
| Some eos -> Some (String.sub buf ~pos:from ~len:(eos - from), eos + 1)

(* Note, the complexity of this function will be bad if the zero-separated
elements are much larger than the current input buffer *)
let input_zero_separated =
(* Take all the \0-terminated strings from [buf], return the scanned list and
the remainder *)
let rec scan_inputs_buf from buf acc =
(* note that from is untouched if input_zero_from_buffer returns None *)
match input_zero_from_buffer from buf with
| Some (istr, from) -> scan_inputs_buf from buf (istr :: acc)
| None ->
let total_len = String.length buf in
if total_len > from then
let rest = String.sub buf ~pos:from ~len:(total_len - from) in
(Some rest, acc)
else
(None, acc)
in
let ibuf_size = 65536 in
let ibuf = Bytes.create ibuf_size in
let rec input_loop ic rem acc =
let res = input ic ibuf 0 ibuf_size in
if res = 0 then
(* end of file, check if there is a remainder, and return the results *)
match rem with
| Some rem -> List.rev (rem :: acc)
| None -> List.rev acc
else
(* new input, append remainder and scan it *)
let actual_input = Bytes.sub_string ibuf ~pos:0 ~len:res in
let actual_input =
match rem with
| None -> actual_input
| Some rem -> rem ^ actual_input
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we wanted to improve the worst-case scenario of repeatedly concatenating a string here, rem could be a string list instead (the function scan_inputs_buf would have to be adapted in turn).

But I understand that this is not relevant for the case at hand of dealing with filenames.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's improve this indeed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After giving it a go IMHO the extra complexity of the code was not worth it; I've added a comment instead. We can improve it later if we want.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, thanks.

in
let rem, acc = scan_inputs_buf 0 actual_input acc in
input_loop ic rem acc
in
fun ic -> input_loop ic None []

let copy_channels =
let buf_len = 65536 in
let buf = Bytes.create buf_len in
Expand Down Expand Up @@ -137,6 +181,9 @@ struct

let lines_of_file fn = with_file_in fn ~f:input_lines ~binary:false

let zero_strings_of_file fn =
with_file_in fn ~f:input_zero_separated ~binary:true

let write_file ?binary fn data =
with_file_out ?binary fn ~f:(fun oc -> output_string oc data)

Expand Down
3 changes: 3 additions & 0 deletions src/stdune/io_intf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ module type S = sig

val lines_of_file : path -> string list

(** Reads zero-separated strings from a file *)
val zero_strings_of_file : path -> string list

val read_file : ?binary:bool -> path -> string

val write_file : ?binary:bool -> path -> string -> unit
Expand Down
37 changes: 37 additions & 0 deletions test/blackbox-tests/test-cases/subst.t/run.t
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,40 @@ And without an opam file preset.
(package (name foo) (authors "John Doe <john@doe.com>"))

$ rm -rf .git

Test subst and files with unicode (#3879)
-----------------------------------------

$ rm -f foo.opam

$ cat > dune-project <<EOF
> (lang dune 2.0)
> (name foo)
> (package (name foo) (authors "John Doe <john@doe.com>"))
> EOF

$ X=%%; cat > α-term.ml <<EOF
> let name = "${X}NAME${X}"
> let authors = "${X}PKG_AUTHORS${X}"
> let version = "${X}VERSION${X}"
> EOF

$ git init --quiet
$ git add .
$ git commit -am _ --quiet
$ git tag -a 1.0 -m 1.0

$ dune subst

$ cat α-term.ml
let name = "foo"
let authors = "John Doe <john@doe.com>"
let version = "1.0"

$ cat dune-project
(lang dune 2.0)
(name foo)
(version 1.0)
(package (name foo) (authors "John Doe <john@doe.com>"))

$ rm -rf .git