Skip to content

Commit

Permalink
Widen type signature of bytes2hex (JuliaLang#39710)
Browse files Browse the repository at this point in the history
and make it slightly faster!

Also improves error message on hex2bytes! when passing a non-ASCII string,
And ADD compat notice, fix implementation to be more generic in regards to AbstractString.
  • Loading branch information
Seelengrab authored and johanmon committed Jul 5, 2021
1 parent 4696a4f commit ea24004
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 37 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ New library features
--------------------

* The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value` pairs to specify multiple attributes. ([#39381])
* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}` and now only require that they're iterable and have a length. ([#39710])

Standard library changes
------------------------
Expand Down
99 changes: 62 additions & 37 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -595,15 +595,20 @@ replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) =
# hex <-> bytes conversion

"""
hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}})
hex2bytes(itr)
Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a
Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a
`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair
of hexadecimal digits in `s` gives the value of one byte in the return vector.
of hexadecimal digits in `itr` gives the value of one byte in the return vector.
The length of `s` must be even, and the returned array has half of the length of `s`.
The length of `itr` must be even, and the returned array has half of the length of `itr`.
See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.
!!! compat "Julia 1.7"
Calling hex2bytes with iterables producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.
# Examples
```jldoctest
julia> s = string(12345, base = 16)
Expand Down Expand Up @@ -632,46 +637,64 @@ julia> hex2bytes(a)
"""
function hex2bytes end

hex2bytes(s::AbstractString) = hex2bytes(String(s))
hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)

# special case - valid bytes are checked in the generic implementation
function hex2bytes!(dest::AbstractArray{UInt8}, s::String)
sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only"))

_firstbyteidx(s::String) = 1
_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s))
_lastbyteidx(s::String) = sizeof(s)
_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s)
hex2bytes!(dest, transcode(UInt8, s))
end

"""
hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
hex2bytes!(dest::AbstractVector{UInt8}, itr)
Convert an array `s` of bytes representing a hexadecimal string to its binary
Convert an iterable `itr` of bytes representing a hexadecimal string to its binary
representation, similar to [`hex2bytes`](@ref) except that the output is written in-place
in `d`. The length of `s` must be exactly twice the length of `d`.
"""
function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
if 2length(d) != sizeof(s)
isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length"))
throw(ArgumentError("output array must be half length of input array"))
to `dest`. The length of `dest` must be half the length of `itr`.
!!! compat "Julia 1.7"
Calling hex2bytes! with iterators producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.
"""
function hex2bytes!(dest::AbstractArray{UInt8}, itr)
isodd(length(itr)) && throw(ArgumentError("length of iterable must be even"))
@boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable"))
iszero(length(itr)) && return dest

next = iterate(itr)
@inbounds for i in eachindex(dest)
x,state = next
y,state = iterate(itr, state)
next = iterate(itr, state)
dest[i] = number_from_hex(x) << 4 + number_from_hex(y)
end
j = first(eachindex(d)) - 1
for i = _firstbyteidx(s):2:_lastbyteidx(s)
@inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1))
end
return d

return dest
end

@inline number_from_hex(c) =
(UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') :
(UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) :
(UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) :
@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c))
@inline number_from_hex(c::Char) = number_from_hex(UInt8(c))
@inline function number_from_hex(c::UInt8)
UInt8('0') <= c <= UInt8('9') && return c - UInt8('0')
c |= 0b0100000
UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a
throw(ArgumentError("byte is not an ASCII hexadecimal digit"))
end

"""
bytes2hex(a::AbstractArray{UInt8}) -> String
bytes2hex(io::IO, a::AbstractArray{UInt8})
bytes2hex(itr) -> String
bytes2hex(io::IO, itr)
Convert an iterator `itr` of bytes to its hexadecimal string representation, either
returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream
via `bytes2hex(io, itr)`. The hexadecimal characters are all lowercase.
Convert an array `a` of bytes to its hexadecimal string representation, either
returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream
via `bytes2hex(io, a)`. The hexadecimal characters are all lowercase.
!!! compat "Julia 1.7"
Calling bytes2hex with iterators producing UInt8 requires
version 1.7. In earlier versions, you can collect the iterable
before calling instead.
# Examples
```jldoctest
Expand All @@ -689,17 +712,19 @@ julia> bytes2hex(b)
"""
function bytes2hex end

function bytes2hex(a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
b = Base.StringVector(2*length(a))
@inbounds for (i, x) in enumerate(a)
function bytes2hex(itr)
eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
b = Base.StringVector(2*length(itr))
@inbounds for (i, x) in enumerate(itr)
b[2i - 1] = hex_chars[1 + x >> 4]
b[2i ] = hex_chars[1 + x & 0xf]
end
return String(b)
end

function bytes2hex(io::IO, a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
for x in a
function bytes2hex(io::IO, itr)
eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
for x in itr
print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf]))
end
end
Expand Down
5 changes: 5 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ end
#non-hex characters
@test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
end

@testset "Issue 39284" begin
@test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF")))
@test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec]
end
end

# b"" should be immutable
Expand Down

0 comments on commit ea24004

Please sign in to comment.