Skip to content

Commit

Permalink
Merge pull request #26093 from JuliaLang/nl/String
Browse files Browse the repository at this point in the history
Always resize input to zero length in String(::Vector{UInt8})
  • Loading branch information
JeffBezanson authored Mar 14, 2018
2 parents 4f70460 + 63a72d1 commit 4be51b7
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 19 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ Library improvements
* `Char` is now a subtype of `AbstractChar`, and most of the functions that
take character arguments now accept any `AbstractChar` ([#26286]).

* `String(array)` now accepts an arbitrary `AbstractVector{UInt8}`. For `Vector`
inputs, it "steals" the memory buffer, leaving them with an empty buffer which
is guaranteed not to be shared with the `String` object. For other types of vectors
(in particular immutable vectors), a copy is made and the input is not truncated ([#26093]).

* `Irrational` is now a subtype of `AbstractIrrational` ([#24245]).

* Introduced the `empty` function, the functional pair to `empty!` which returns a new,
Expand Down Expand Up @@ -1398,6 +1403,7 @@ Command-line option changes
[#26009]: https://github.com/JuliaLang/julia/issues/26009
[#26071]: https://github.com/JuliaLang/julia/issues/26071
[#26080]: https://github.com/JuliaLang/julia/issues/26080
[#26093]: https://github.com/JuliaLang/julia/issues/26093
[#26149]: https://github.com/JuliaLang/julia/issues/26149
[#26154]: https://github.com/JuliaLang/julia/issues/26154
[#26156]: https://github.com/JuliaLang/julia/issues/26156
Expand Down
2 changes: 1 addition & 1 deletion base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ function resize!(a::Vector, nl::Integer)
l = length(a)
if nl > l
ccall(:jl_array_grow_end, Cvoid, (Any, UInt), a, nl-l)
else
elseif nl != l
if nl < 0
throw(ArgumentError("new length must be ≥ 0"))
end
Expand Down
29 changes: 15 additions & 14 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,22 @@ const ByteArray = Union{Vector{UInt8},Vector{Int8}}
# String constructor docstring from boot.jl, workaround for #16730
# and the unavailability of @doc in boot.jl context.
"""
String(v::Vector{UInt8})
Create a new `String` from a vector `v` of bytes containing
UTF-8 encoded characters. This function takes "ownership" of
the array, which means that you should not subsequently modify
`v` (since strings are supposed to be immutable in Julia) for
as long as the string exists.
If you need to subsequently modify `v`, use `String(copy(v))` instead.
String(v::AbstractVector{UInt8})
Create a new `String` object from a byte vector `v` containing UTF-8 encoded
characters. If `v` is `Vector{UInt8}` it will be truncated to zero length and
future modification of `v` cannot affect the contents of the resulting string.
To avoid truncation use `String(copy(v))`.
When possible, the memory of `v` will be used without copying when the `String`
object is created. This is guaranteed to be the case for byte vectors returned
by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
[`read(io, nb)`](@ref). This allows zero-copy conversion of I/O data to strings.
In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
to guarantee consistent behavior.
"""
function String(v::Array{UInt8,1})
ccall(:jl_array_to_string, Ref{String}, (Any,), v)
end
String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)

"""
unsafe_string(p::Ptr{UInt8}, [length::Integer])
Expand Down Expand Up @@ -64,8 +67,6 @@ unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{V

(::Type{Vector{UInt8}})(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)

String(a::AbstractVector{UInt8}) = String(copyto!(StringVector(length(a)), a))

String(s::CodeUnits{UInt8,String}) = s.s

## low-level functions ##
Expand Down
16 changes: 13 additions & 3 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,13 +434,14 @@ JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)

JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
{
size_t len = jl_array_len(a);
if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
(jl_array_ndims(a) != 1 ||
((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (jl_array_len(a) + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
jl_value_t *o = jl_array_data_owner(a);
if (jl_is_string(o)) {
a->flags.isshared = 1;
*(size_t*)o = jl_array_len(a);
*(size_t*)o = len;
a->nrows = 0;
#ifdef STORE_ARRAY_LEN
a->length = 0;
Expand All @@ -449,7 +450,12 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
return o;
}
}
return jl_pchar_to_string((const char*)jl_array_data(a), jl_array_len(a));
a->nrows = 0;
#ifdef STORE_ARRAY_LEN
a->length = 0;
#endif
a->maxsize = 0;
return jl_pchar_to_string((const char*)jl_array_data(a), len);
}

JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
Expand Down Expand Up @@ -1016,6 +1022,8 @@ JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec)
jl_bounds_error_int((jl_value_t*)a, dec);
if (__unlikely(a->flags.isshared))
array_try_unshare(a);
if (dec == 0)
return;
jl_array_del_at_beg(a, 0, dec, n);
}

Expand All @@ -1026,6 +1034,8 @@ JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec)
jl_bounds_error_int((jl_value_t*)a, 0);
if (__unlikely(a->flags.isshared))
array_try_unshare(a);
if (dec == 0)
return;
jl_array_del_at_end(a, n - dec, dec, n);
}

Expand Down
15 changes: 14 additions & 1 deletion test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,21 @@
using Random

@testset "constructors" begin
@test String([0x61,0x62,0x63,0x21]) == "abc!"
v = [0x61,0x62,0x63,0x21]
@test String(v) == "abc!" && isempty(v)
@test String("abc!") == "abc!"
@test String(0x61:0x63) == "abc"

# Check that resizing empty source vector does not corrupt string
b = IOBuffer()
write(b, "ab")
x = take!(b)
s = String(x)
resize!(x, 0)
empty!(x) # Another method which must be tested
@test s == "ab"
resize!(x, 1)
@test s == "ab"

@test isempty(string())
@test eltype(GenericString) == Char
Expand Down

1 comment on commit 4be51b7

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

Please sign in to comment.