Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Widen type signature of bytes2hex #39710

Merged
merged 6 commits into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ New library features
--------------------

* The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value` pairs to specify multiple attributes. ([#39381])
* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}` and now only require that they're iterable and have a length. ([#39710])

Standard library changes
------------------------
Expand Down
99 changes: 62 additions & 37 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -595,15 +595,20 @@ replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) =
# hex <-> bytes conversion

"""
hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}})
hex2bytes(itr)

Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a
Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a
`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair
of hexadecimal digits in `s` gives the value of one byte in the return vector.
of hexadecimal digits in `itr` gives the value of one byte in the return vector.

The length of `s` must be even, and the returned array has half of the length of `s`.
The length of `itr` must be even, and the returned array has half of the length of `itr`.
See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.

!!! compat "Julia 1.7"
Calling hex2bytes with iterables producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.

# Examples
```jldoctest
julia> s = string(12345, base = 16)
Expand Down Expand Up @@ -632,46 +637,64 @@ julia> hex2bytes(a)
"""
function hex2bytes end

hex2bytes(s::AbstractString) = hex2bytes(String(s))
hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)

# special case - valid bytes are checked in the generic implementation
function hex2bytes!(dest::AbstractArray{UInt8}, s::String)
sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only"))

_firstbyteidx(s::String) = 1
_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s))
_lastbyteidx(s::String) = sizeof(s)
_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s)
hex2bytes!(dest, transcode(UInt8, s))
end

"""
hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
hex2bytes!(dest::AbstractVector{UInt8}, itr)

Convert an array `s` of bytes representing a hexadecimal string to its binary
Convert an iterable `itr` of bytes representing a hexadecimal string to its binary
representation, similar to [`hex2bytes`](@ref) except that the output is written in-place
in `d`. The length of `s` must be exactly twice the length of `d`.
"""
function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
if 2length(d) != sizeof(s)
isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length"))
throw(ArgumentError("output array must be half length of input array"))
to `dest`. The length of `dest` must be half the length of `itr`.

!!! compat "Julia 1.7"
Calling hex2bytes! with iterators producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.
"""
function hex2bytes!(dest::AbstractArray{UInt8}, itr)
isodd(length(itr)) && throw(ArgumentError("length of iterable must be even"))
@boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable"))
iszero(length(itr)) && return dest

next = iterate(itr)
@inbounds for i in eachindex(dest)
x,state = next
y,state = iterate(itr, state)
next = iterate(itr, state)
dest[i] = number_from_hex(x) << 4 + number_from_hex(y)
end
j = first(eachindex(d)) - 1
for i = _firstbyteidx(s):2:_lastbyteidx(s)
@inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1))
end
return d

return dest
end

@inline number_from_hex(c) =
(UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') :
(UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) :
(UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) :
@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c))
@inline number_from_hex(c::Char) = number_from_hex(UInt8(c))
@inline function number_from_hex(c::UInt8)
UInt8('0') <= c <= UInt8('9') && return c - UInt8('0')
c |= 0b0100000
UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a
throw(ArgumentError("byte is not an ASCII hexadecimal digit"))
end

"""
bytes2hex(a::AbstractArray{UInt8}) -> String
bytes2hex(io::IO, a::AbstractArray{UInt8})
bytes2hex(itr) -> String
bytes2hex(io::IO, itr)

Convert an iterator `itr` of bytes to its hexadecimal string representation, either
returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream
via `bytes2hex(io, itr)`. The hexadecimal characters are all lowercase.

Convert an array `a` of bytes to its hexadecimal string representation, either
returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream
via `bytes2hex(io, a)`. The hexadecimal characters are all lowercase.
!!! compat "Julia 1.7"
Calling bytes2hex with iterators producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.

# Examples
```jldoctest
Expand All @@ -689,17 +712,19 @@ julia> bytes2hex(b)
"""
function bytes2hex end

function bytes2hex(a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
b = Base.StringVector(2*length(a))
@inbounds for (i, x) in enumerate(a)
function bytes2hex(itr)
eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
b = Base.StringVector(2*length(itr))
@inbounds for (i, x) in enumerate(itr)
b[2i - 1] = hex_chars[1 + x >> 4]
b[2i ] = hex_chars[1 + x & 0xf]
end
return String(b)
end

function bytes2hex(io::IO, a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
for x in a
function bytes2hex(io::IO, itr)
eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
for x in itr
print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf]))
end
end
Expand Down
5 changes: 5 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ end
#non-hex characters
@test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
end

@testset "Issue 39284" begin
@test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF")))
@test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec]
end
end

# b"" should be immutable
Expand Down