Skip to content

Commit

Permalink
Always resize input to zero length in String(::Vector{UInt8})
Browse files Browse the repository at this point in the history
This makes the behavior more predictable than only resizing Vector{UInt8}
inputs when they have been allocated via StringVector, as the caller may have
obtained them from other code without knowing how they were created. This ensures
code will not rely on the fact that a copy is made in many common cases.
The behavior is also simpler to document.
  • Loading branch information
nalimilan committed Feb 20, 2018
1 parent b0303ca commit d1eb31c
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 18 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,11 @@ Library improvements
* The function `thisind(s::AbstractString, i::Integer)` returns the largest valid index
less or equal than `i` in the string `s` or `0` if no such index exists ([#24414]).

* `String(array)` now accepts an arbitrary `AbstractVector{UInt8}` and "steals" the
memory buffer of mutable arrays, leaving the byte vector with an empty buffer which
is guaranteed not to be shared with the `String` object; if the byte vector is
immutable, it simply shares memory with the string and is not truncated ([#26093]).

* `Irrational` is now a subtype of `AbstractIrrational` ([#24245]).

* Introduced the `empty` function, the functional pair to `empty!` which returns a new,
Expand Down Expand Up @@ -1324,3 +1329,4 @@ Command-line option changes
[#25745]: https://github.com/JuliaLang/julia/issues/25745
[#25896]: https://github.com/JuliaLang/julia/issues/25896
[#25998]: https://github.com/JuliaLang/julia/issues/25998
[#26093]: https://github.com/JuliaLang/julia/issues/26093
29 changes: 15 additions & 14 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,22 @@ const ByteArray = Union{Vector{UInt8},Vector{Int8}}
# String constructor docstring from boot.jl, workaround for #16730
# and the unavailability of @doc in boot.jl context.
"""
String(v::Vector{UInt8})
Create a new `String` from a vector `v` of bytes containing
UTF-8 encoded characters. This function takes "ownership" of
the array, which means that you should not subsequently modify
`v` (since strings are supposed to be immutable in Julia) for
as long as the string exists.
If you need to subsequently modify `v`, use `String(copy(v))` instead.
String(v::AbstractVector{UInt8})
Create a new `String` object from a byte vector `v` containing UTF-8 encoded
characters. If `v` is `Vector{UInt8}` it will be truncated to zero length and
future modification of `v` cannot affect the contents of the resulting string.
To avoid truncation use `String(copy(v))`.
When possible, the memory of `v` will be used without copying when the `String`
object is created. This is guaranteed to be the case for byte vectors returned
by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
[`read(io, nb)`](@ref). This allows zero-copy conversion of I/O data to strings.
In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
to guarantee consistent behavior.
"""
function String(v::Array{UInt8,1})
ccall(:jl_array_to_string, Ref{String}, (Any,), v)
end
String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v))
String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v)

"""
unsafe_string(p::Ptr{UInt8}, [length::Integer])
Expand Down Expand Up @@ -64,8 +67,6 @@ unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{V

(::Type{Vector{UInt8}})(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(uninitialized, length(s)), s)

String(a::AbstractVector{UInt8}) = String(copyto!(StringVector(length(a)), a))

String(s::CodeUnits{UInt8,String}) = s.s

## low-level functions ##
Expand Down
12 changes: 9 additions & 3 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,13 +434,14 @@ JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)

JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
{
size_t len = jl_array_len(a);
if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
(jl_array_ndims(a) != 1 ||
((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (jl_array_len(a) + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
jl_value_t *o = jl_array_data_owner(a);
if (jl_is_string(o)) {
a->flags.isshared = 1;
*(size_t*)o = jl_array_len(a);
*(size_t*)o = len;
a->nrows = 0;
#ifdef STORE_ARRAY_LEN
a->length = 0;
Expand All @@ -449,7 +450,12 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
return o;
}
}
return jl_pchar_to_string((const char*)jl_array_data(a), jl_array_len(a));
a->nrows = 0;
#ifdef STORE_ARRAY_LEN
a->length = 0;
#endif
a->maxsize = 0;
return jl_pchar_to_string((const char*)jl_array_data(a), len);
}

JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
Expand Down
4 changes: 3 additions & 1 deletion test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
using Random

@testset "constructors" begin
@test String([0x61,0x62,0x63,0x21]) == "abc!"
v = [0x61,0x62,0x63,0x21]
@test String(v) == "abc!" && isempty(v)
@test String("abc!") == "abc!"
@test String(0x61:0x63) == "abc"

@test isempty(string())
@test eltype(GenericString) == Char
Expand Down

0 comments on commit d1eb31c

Please sign in to comment.