From 62de4725a5e5c89ba0a8a680451cf3207173f521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 12 Sep 2018 23:27:56 +0200 Subject: [PATCH] move codelen and first_utf8_byte to Char.jl (#28894) --- base/char.jl | 3 +++ base/strings/string.jl | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/base/char.jl b/base/char.jl index 700c61db27c77..bec67add95b71 100644 --- a/base/char.jl +++ b/base/char.jl @@ -196,6 +196,9 @@ isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y) hash(x::Char, h::UInt) = hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h)) +first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8 +codelen(c::Char) = 4 - (trailing_zeros(0xff000000 | reinterpret(UInt32, c)) >> 3) + # fallbacks: isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y)) ==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y) diff --git a/base/strings/string.jl b/base/strings/string.jl index 7c9172496a425..148659a8ee737 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -293,17 +293,10 @@ length(s::String) = length(s, 1, ncodeunits(s), ncodeunits(s)) end end -# TODO: delete or move to char.jl -first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8 - ## overload methods for efficiency ## isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i -# UTF-8 encoding length of a character -# TODO: delete or move to char.jl -codelen(c::Char) = 4 - (trailing_zeros(0xff000000 | reinterpret(UInt32, c)) >> 3) - """ repeat(c::AbstractChar, r::Integer) -> String