Skip to content

Commit

Permalink
Refactor/rename shell_escape_winsomely() and add escaping function fo…
Browse files Browse the repository at this point in the history
…r CMD.EXE syntax (#38352)

* refactor/rename shell_escape_winsomely(), add shell_escape_wincmd()

* Using `escape_raw_string()` makes `shell_escape_winsomely()` much easier to understand.

* Name of `shell_escape_winsomely()` changed to `escape_microsoft_c_args()`, as this function has nothing to do with any “shell”.

* Added `shell_escape_wincmd()` to escape metacharacters of `CMD.EXE`

* remove shell_escape_winsomely()
  • Loading branch information
mgkuhn authored Nov 19, 2020
1 parent b602577 commit 72e67d7
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 72 deletions.
2 changes: 1 addition & 1 deletion base/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ include("iobuffer.jl")
# strings & printing
include("intfuncs.jl")
include("strings/strings.jl")
include("regex.jl")
include("parse.jl")
include("shell.jl")
include("regex.jl")
include("show.jl")
include("arrayshow.jl")
include("methodshow.jl")
Expand Down
6 changes: 4 additions & 2 deletions base/cmd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,10 @@ shell_escape(cmd::Cmd; special::AbstractString="") =
shell_escape(cmd.exec..., special=special)
shell_escape_posixly(cmd::Cmd) =
shell_escape_posixly(cmd.exec...)
shell_escape_winsomely(cmd::Cmd) =
shell_escape_winsomely(cmd.exec...)
escape_microsoft_c_args(cmd::Cmd) =
escape_microsoft_c_args(cmd.exec...)
escape_microsoft_c_args(io::IO, cmd::Cmd) =
escape_microsoft_c_args(io::IO, cmd.exec...)

function show(io::IO, cmd::Cmd)
print_env = cmd.env !== nothing
Expand Down
137 changes: 90 additions & 47 deletions base/shell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -252,60 +252,103 @@ shell_escape_posixly(args::AbstractString...) =
sprint(print_shell_escaped_posixly, args...)


function print_shell_escaped_winsomely(io::IO, args::AbstractString...)
first = true
for arg in args
first || write(io, ' ')
first = false
# Quote any arg that contains a whitespace (' ' or '\t') or a double quote mark '"'.
# It's also valid to quote an arg with just a whitespace,
# but the following may be 'safer', and both implementations are valid anyways.
quotes = any(c -> c in (' ', '\t', '"'), arg) || isempty(arg)
quotes && write(io, '"')
backslashes = 0
for c in arg
if c == '\\'
backslashes += 1
"""
shell_escape_wincmd(s::AbstractString)
shell_escape_wincmd(io::IO, s::AbstractString)
The unexported `shell_escape_wincmd` function escapes Windows
`cmd.exe` shell meta characters. It escapes `()!^<>&|` by placing a
`^` in front. An `@` is only escaped at the start of the string. Pairs
of `"` characters and the strings they enclose are passed through
unescaped. Any remaining `"` is escaped with `^` to ensure that the
number of unescaped `"` characters in the result remains even.
Since `cmd.exe` substitutes variable references (like `%USER%`)
_before_ processing the escape characters `^` and `"`, this function
makes no attempt to escape the percent sign (`%`).
Input strings with ASCII control characters that cannot be escaped
(NUL, CR, LF) will cause an `ArgumentError` exception.
With an I/O stream parameter `io`, the result will be written there,
rather than returned as a string.
See also: [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref)
# Example
```jldoctest
julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
"a^^\\"^o\\"^^u^\\""
```
"""
function shell_escape_wincmd(io::IO, s::AbstractString)
# https://stackoverflow.com/a/4095133/1990689
occursin(r"[\r\n\0]", s) &&
throw(ArgumentError("control character unsupported by CMD.EXE"))
i = 1
len = ncodeunits(s)
if len > 0 && s[1] == '@'
write(io, '^')
end
while i <= len
c = s[i]
if c == '"' && (j = findnext('"', s, nextind(s,i))) !== nothing
write(io, SubString(s,i,j))
i = j
else
if c in ('"', '(', ')', '!', '^', '<', '>', '&', '|')
write(io, '^', c)
else
# escape all backslashes and the following double quote
c == '"' && (backslashes = backslashes * 2 + 1)
for j = 1:backslashes
# backslashes aren't special here
write(io, '\\')
end
backslashes = 0
write(io, c)
end
end
# escape all backslashes, letting the terminating double quote we add below to then be interpreted as a special char
quotes && (backslashes *= 2)
for j = 1:backslashes
write(io, '\\')
end
quotes && write(io, '"')
i = nextind(s,i)
end
return nothing
end

shell_escape_wincmd(s::AbstractString) = sprint(shell_escape_wincmd, s;
sizehint = 2*sizeof(s))

"""
shell_escaped_winsomely(args::Union{Cmd,AbstractString...})::String
Convert the collection of strings `args` into single string suitable for passing as the argument
string for a Windows command line. Windows passes the entire command line as a single string to
the application (unlike POSIX systems, where the list of arguments are passed separately).
Many Windows API applications (including julia.exe), use the conventions of the [Microsoft C
runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments) to
split that command line into a list of strings. This function implements the inverse of such a
C runtime command-line parser. It joins command-line arguments to be passed to a Windows console
application into a command line, escaping or quoting meta characters such as space,
double quotes and backslash where needed. This may be useful in concert with the `windows_verbatim`
flag to [`Cmd`](@ref) when constructing process pipelines.
escape_microsoft_c_args(args::Union{Cmd,AbstractString...})
escape_microsoft_c_args(io::IO, args::Union{Cmd,AbstractString...})
# Example
```jldoctest
julia> println(shell_escaped_winsomely("A B\\", "C"))
"A B\\" C
Convert a collection of string arguments into a string that can be
passed to many Windows command-line applications.
Microsoft Windows passes the entire command line as a single string to
the application (unlike POSIX systems, where the shell splits the
command line into a list of arguments). Many Windows API applications
(including julia.exe), use the conventions of the [Microsoft C/C++
runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments)
to split that command line into a list of strings.
This function implements an inverse for a parser compatible with these rules.
It joins command-line arguments to be passed to a Windows
C/C++/Julia application into a command line, escaping or quoting the
meta characters space, TAB, double quote and backslash where needed.
See also: [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref)
"""
shell_escape_winsomely(args::AbstractString...) =
sprint(print_shell_escaped_winsomely, args..., sizehint=(sum(length, args)) + 3*length(args))
function escape_microsoft_c_args(io::IO, args::AbstractString...)
# http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
first = true
for arg in args
if first
first = false
else
write(io, ' ') # separator
end
if isempty(arg) || occursin(r"[ \t\"]", arg)
# Julia raw strings happen to use the same escaping convention
# as the argv[] parser in Microsoft's C runtime library.
write(io, '"')
escape_raw_string(io, arg)
write(io, '"')
else
write(io, arg)
end
end
end
escape_microsoft_c_args(args::AbstractString...) =
sprint(escape_microsoft_c_args, args...;
sizehint = (sum(sizeof.(args)) + 3*length(args)))
67 changes: 45 additions & 22 deletions test/spawn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -720,86 +720,109 @@ if Sys.iswindows()
end


# shell escaping on Windows
@testset "shell_escape_winsomely" begin
@testset "shell escaping on Windows" begin
# Note argument A can be parsed both as A or "A".
# We do not test that the parsing satisfies either of these conditions.
# In other words, tests may fail even for valid parsing.
# This is done to avoid overly verbose tests.

# input :
# output: ""
@test Base.shell_escape_winsomely("") == "\"\""
@test Base.escape_microsoft_c_args("") == "\"\""

@test Base.shell_escape_winsomely("A") == "A"
@test Base.escape_microsoft_c_args("A") == "A"

@test Base.shell_escape_winsomely(`A`) == "A"
@test Base.escape_microsoft_c_args(`A`) == "A"

# input : hello world
# output: "hello world"
@test Base.shell_escape_winsomely("hello world") == "\"hello world\""
@test Base.escape_microsoft_c_args("hello world") == "\"hello world\""

# input : hello world
# output: "hello world"
@test Base.shell_escape_winsomely("hello\tworld") == "\"hello\tworld\""
@test Base.escape_microsoft_c_args("hello\tworld") == "\"hello\tworld\""

# input : hello"world
# output: "hello\"world" (also valid) hello\"world
@test Base.shell_escape_winsomely("hello\"world") == "\"hello\\\"world\""
@test Base.escape_microsoft_c_args("hello\"world") == "\"hello\\\"world\""

# input : hello""world
# output: "hello\"\"world" (also valid) hello\"\"world
@test Base.shell_escape_winsomely("hello\"\"world") == "\"hello\\\"\\\"world\""
@test Base.escape_microsoft_c_args("hello\"\"world") == "\"hello\\\"\\\"world\""

# input : hello\world
# output: hello\world
@test Base.shell_escape_winsomely("hello\\world") == "hello\\world"
@test Base.escape_microsoft_c_args("hello\\world") == "hello\\world"

# input : hello\\world
# output: hello\\world
@test Base.shell_escape_winsomely("hello\\\\world") == "hello\\\\world"
@test Base.escape_microsoft_c_args("hello\\\\world") == "hello\\\\world"

# input : hello\"world
# output: "hello\"world" (also valid) hello\"world
@test Base.shell_escape_winsomely("hello\\\"world") == "\"hello\\\\\\\"world\""
@test Base.escape_microsoft_c_args("hello\\\"world") == "\"hello\\\\\\\"world\""

# input : hello\\"world
# output: "hello\\\\\"world" (also valid) hello\\\\\"world
@test Base.shell_escape_winsomely("hello\\\\\"world") == "\"hello\\\\\\\\\\\"world\""
@test Base.escape_microsoft_c_args("hello\\\\\"world") == "\"hello\\\\\\\\\\\"world\""

# input : hello world\
# output: "hello world\\"
@test Base.shell_escape_winsomely("hello world\\") == "\"hello world\\\\\""
@test Base.escape_microsoft_c_args("hello world\\") == "\"hello world\\\\\""

# input : A\B
# output: A\B"
@test Base.shell_escape_winsomely("A\\B") == "A\\B"
@test Base.escape_microsoft_c_args("A\\B") == "A\\B"

# input : [A\, B]
# output: "A\ B"
@test Base.shell_escape_winsomely("A\\", "B") == "A\\ B"
@test Base.escape_microsoft_c_args("A\\", "B") == "A\\ B"

# input : A"B
# output: "A\"B"
@test Base.shell_escape_winsomely("A\"B") == "\"A\\\"B\""
@test Base.escape_microsoft_c_args("A\"B") == "\"A\\\"B\""

# input : [A B\, C]
# output: "A B\\" C
@test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C"
@test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C"

# input : [A "B, C]
# output: "A \"B" C
@test Base.shell_escape_winsomely("A \"B", "C") == "\"A \\\"B\" C"
@test Base.escape_microsoft_c_args("A \"B", "C") == "\"A \\\"B\" C"

# input : [A B\, C]
# output: "A B\\" C
@test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C"
@test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C"

# input :[A\ B\, C]
# output: "A\ B\\" C
@test Base.shell_escape_winsomely("A\\ B\\", "C") == "\"A\\ B\\\\\" C"
@test Base.escape_microsoft_c_args("A\\ B\\", "C") == "\"A\\ B\\\\\" C"

# input : [A\ B\, C, D K]
# output: "A\ B\\" C "D K"
@test Base.shell_escape_winsomely("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\""
@test Base.escape_microsoft_c_args("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\""

# shell_escape_wincmd
@test Base.shell_escape_wincmd("") == ""
@test Base.shell_escape_wincmd("\"") == "^\""
@test Base.shell_escape_wincmd("\"\"") == "\"\""
@test Base.shell_escape_wincmd("\"\"\"") == "\"\"^\""
@test Base.shell_escape_wincmd("\"\"\"\"") == "\"\"\"\""
@test Base.shell_escape_wincmd("a^\"^o\"^u\"") == "a^^\"^o\"^^u^\""
@test Base.shell_escape_wincmd("ä^\"\"\"") == "ä^^\"\"^^ü^\""
@test Base.shell_escape_wincmd("@@()!^<>&|\"") == "^@@^(^)^!^^^<^>^&^|^\""
@test_throws ArgumentError Base.shell_escape_wincmd("\0")
@test_throws ArgumentError Base.shell_escape_wincmd("\r")
@test_throws ArgumentError Base.shell_escape_wincmd("\n")

# combined tests of shell_escape_wincmd and escape_microsoft_c_args
@test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(
"julia", "-e", "println(ARGS)", raw"He said \"a^2+b^2=c^2\"!" )) ==
"julia -e println^(ARGS^) \"He said \\\"a^^2+b^^2=c^^2\\\"!\""

ascii95 = String(range(' ',stop='~')); # all printable ASCII characters
args = ["ab ^` c", " \" ", "\"", ascii95, ascii95,
"\"\\\"\\", "", "|", "&&", ";"];
@test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;"

end

0 comments on commit 72e67d7

Please sign in to comment.