From e04b7dc2328dc206ace65beaeac43c07e90c1a13 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 9 Dec 2017 21:54:56 +0100 Subject: [PATCH] Deprecate isnumber(), is_assigned_char() and normalize_string() isnumeric() is consistent with Python and Rust (but not Go), and less easy to confuse with isdigit(). Improve documentation to make confusion less easy. Also fix a few uses where isdigit() is more appropriate than isnumber(). --- NEWS.md | 6 +- base/client.jl | 2 +- base/distributed/Distributed.jl | 2 +- base/precompile.jl | 2 +- base/regex.jl | 6 +- base/strings/utf8proc.jl | 40 ++++++----- doc/src/manual/faq.md | 6 +- stdlib/Unicode/docs/src/index.md | 6 +- stdlib/Unicode/src/Unicode.jl | 16 +++-- stdlib/Unicode/test/runtests.jl | 118 +++++++++++++++---------------- test/strings/basic.jl | 2 +- 11 files changed, 112 insertions(+), 94 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4d90d1b4d78dc6..6a771985095124 100644 --- a/NEWS.md +++ b/NEWS.md @@ -744,6 +744,10 @@ Deprecated or removed `isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`, `isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`. + * `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char` + in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three + in the new `Unicode` standard library module ([#25021]). + Command-line option changes --------------------------- @@ -1708,7 +1712,7 @@ Command-line option changes [#24221]: https://github.com/JuliaLang/julia/issues/24221 [#24240]: https://github.com/JuliaLang/julia/issues/24240 [#24245]: https://github.com/JuliaLang/julia/issues/24245 -[#24250]: https://github.com/JuliaLang/julia/issues/24250 +[#24250]: https://github.com/JuliaLang/julia/issues/2425 [#24263]: https://github.com/JuliaLang/julia/issues/24263 [#24279]: https://github.com/JuliaLang/julia/issues/24279 [#24281]: https://github.com/JuliaLang/julia/issues/24281 diff --git a/base/client.jl b/base/client.jl index df6f5df0c9d3d7..88914e1d49f2f0 100644 --- a/base/client.jl +++ b/base/client.jl @@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString) s = split(line, '*'; keep = false) map!(strip, s, s) if length(s) > 1 - cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1]) + cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1]) push!(machines,(s[2], cnt)) else push!(machines,line) diff --git a/base/distributed/Distributed.jl b/base/distributed/Distributed.jl index 3697d528216bd1..e70522f4025fd8 100644 --- a/base/distributed/Distributed.jl +++ b/base/distributed/Distributed.jl @@ -15,7 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte binding_module, notify_error, atexit, julia_exename, julia_cmd, AsyncGenerator, display_error, acquire, release, invokelatest, warn_once, shell_escape_posixly, uv_error -using Base.UTF8proc: isascii, isdigit, isnumber +using Base.UTF8proc: isascii, isdigit, isnumeric # NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use diff --git a/base/precompile.jl b/base/precompile.jl index f0be54c9270af2..79c1864aca2eac 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -68,7 +68,7 @@ precompile(Tuple{typeof(Base.lstrip), Base.SubString{String}, Array{Char, 1}}) precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), String, Char}) precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), Base.SubString{String}, Char}) precompile(Tuple{typeof(Base.map!), typeof(Base.strip), Array{Base.SubString{String}, 1}, Array{Base.SubString{String}, 1}}) -precompile(Tuple{typeof(Base.UTF8proc.isnumber), Base.SubString{String}}) +precompile(Tuple{typeof(Base.UTF8proc.isnumeric), Base.SubString{String}}) precompile(Tuple{Type{Core.Inference.Generator{I, F} where F where I}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}) precompile(Tuple{Type{Core.Inference.Generator{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}, Type{Core.Inference.Const}}}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}) precompile(Tuple{typeof(Core.Inference.convert), Type{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}) diff --git a/base/regex.jl b/base/regex.jl index 8f8a5a36fcff9a..466cf09d229e16 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -338,11 +338,11 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) if repl[next_i] == SUB_CHAR write(io, SUB_CHAR) i = nextind(repl, next_i) - elseif UTF8proc.isnumber(repl[next_i]) + elseif UTF8proc.isdigit(repl[next_i]) group = parse(Int, repl[next_i]) i = nextind(repl, next_i) while i <= e - if UTF8proc.isnumber(repl[i]) + if UTF8proc.isdigit(repl[i]) group = 10group + parse(Int, repl[i]) i = nextind(repl, i) else @@ -364,7 +364,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) end # TODO: avoid this allocation groupname = SubString(repl, groupstart, prevind(repl, i)) - if all(UTF8proc.isnumber,groupname) + if all(UTF8proc.isdigit, groupname) _write_capture(io, re, parse(Int, groupname)) else group = PCRE.substring_number_from_name(re.regex, groupname) diff --git a/base/strings/utf8proc.jl b/base/strings/utf8proc.jl index 20fb360eba0fac..270f41510ee52d 100644 --- a/base/strings/utf8proc.jl +++ b/base/strings/utf8proc.jl @@ -148,7 +148,7 @@ end utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags) -function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false) +function normalize(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false) flags = 0 stable && (flags = flags | UTF8PROC_STABLE) compat && (flags = flags | UTF8PROC_COMPAT) @@ -173,7 +173,7 @@ function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=fa end """ - normalize_string(s::AbstractString, normalform::Symbol) + normalize(s::AbstractString, normalform::Symbol) Normalize the string `s` according to one of the four "normal forms" of the Unicode standard: `normalform` can be `:NFC`, `:NFD`, `:NFKC`, or `:NFKD`. Normal forms C @@ -185,7 +185,7 @@ canonical choice (e.g. they expand ligatures into the individual characters), wi being more compact. Alternatively, finer control and additional transformations may be be obtained by calling -`normalize_string(s; keywords...)`, where any number of the following boolean keywords +`normalize(s; keywords...)`, where any number of the following boolean keywords options (which all default to `false` except for `compose`) are specified: * `compose=false`: do not perform canonical composition @@ -209,17 +209,17 @@ For example, NFKC corresponds to the options `compose=true, compat=true, stable= # Examples ```jldoctest -julia> "μ" == normalize_string("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5 +julia> "μ" == normalize("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5 true -julia> normalize_string("JuLiA", casefold=true) +julia> normalize("JuLiA", casefold=true) "julia" -julia> normalize_string("JúLiA", stripmark=true) +julia> normalize("JúLiA", stripmark=true) "JuLiA" ``` """ -function normalize_string(s::AbstractString, nf::Symbol) +function normalize(s::AbstractString, nf::Symbol) utf8proc_map(s, nf == :NFC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE) : nf == :NFD ? (UTF8PROC_STABLE | UTF8PROC_DECOMPOSE) : nf == :NFKC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE @@ -275,20 +275,20 @@ category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UI category_string(c) = category_strings[category_code(c)+1] """ - is_assigned_char(c) -> Bool + isassigned(c) -> Bool Returns `true` if the given char or integer is an assigned Unicode code point. # Examples ```jldoctest -julia> is_assigned_char(101) +julia> isassigned(101) true -julia> is_assigned_char('\\x01') +julia> isassigned('\\x01') true ``` """ -is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN +isassigned(c) = category_code(c) != UTF8PROC_CATEGORY_CN ## libc character class predicates ## @@ -342,7 +342,7 @@ end """ isdigit(c::Char) -> Bool -Tests whether a character is a numeric digit (0-9). +Tests whether a character is a decimal digit (0-9). # Examples ```jldoctest @@ -380,25 +380,31 @@ false isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO) """ - isnumber(c::Char) -> Bool + isnumeric(c::Char) -> Bool Tests whether a character is numeric. A character is classified as numeric if it belongs to the Unicode general category Number, i.e. a character whose category code begins with 'N'. +Note that this broad category includes characters such as ¾ and ௰. +Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9. + # Examples ```jldoctest -julia> isnumber('9') +julia> isnumeric('௰') +true + +julia> isnumeric('9') true -julia> isnumber('α') +julia> isnumeric('α') false -julia> isnumber('❤') +julia> isnumeric('❤') false ``` """ -isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO) +isnumeric(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO) """ isalnum(c::Char) -> Bool diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index ebcae376c8446d..119777b3434764 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -617,8 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference (variable, object field, or array element) is uninitialized, accessing it will immediately throw -an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref) -functions. +an error. This situation can be detected using the [`isdefined`](@ref) or +[`isassigned`](@ref Base.isassigned) functions. Some functions are used only for their side effects, and do not need to return a value. In these cases, the convention is to return the value `nothing`, which is just a singleton object of type @@ -627,7 +627,7 @@ this convention, and that the REPL does not print anything for it. Some language would not otherwise have a value also yield `nothing`, for example `if false; end`. To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the -[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details. +[`missing`](@ref) object. See the [`Missing Values`](@ref missing) section for more details. The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of as nothing but rather a tuple of zero values. diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md index 86ab0d7383256d..4f519aa0bc05e5 100644 --- a/stdlib/Unicode/docs/src/index.md +++ b/stdlib/Unicode/docs/src/index.md @@ -1,8 +1,8 @@ # Unicode ```@docs -Unicode.is_assigned_char -Unicode.normalize_string +Unicode.isassigned +Unicode.normalize Unicode.graphemes Unicode.uppercase Unicode.lowercase @@ -16,7 +16,7 @@ Unicode.iscntrl Unicode.isdigit Unicode.isgraph Unicode.islower -Unicode.isnumber +Unicode.isnumeric Unicode.isprint Unicode.ispunct Unicode.isspace diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl index ff6e19f96d0ddc..1923da047defe7 100644 --- a/stdlib/Unicode/src/Unicode.jl +++ b/stdlib/Unicode/src/Unicode.jl @@ -4,14 +4,22 @@ __precompile__(true) module Unicode -using Base.UTF8proc: normalize_string, graphemes, is_assigned_char, textwidth, isvalid, - islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum, +using Base.UTF8proc: normalize, graphemes, isassigned, textwidth, isvalid, + islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum, iscntrl, ispunct, isspace, isprint, isgraph, lowercase, uppercase, titlecase, lcfirst, ucfirst -export normalize_string, graphemes, is_assigned_char, textwidth, isvalid, - islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum, +export normalize, graphemes, isassigned, textwidth, isvalid, + islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum, iscntrl, ispunct, isspace, isprint, isgraph, lowercase, uppercase, titlecase, lcfirst, ucfirst +# BEGIN 0.7 deprecations + +@deprecate isnumber(c::Char) Unicode.isnumeric(c) +@deprecate is_assigned_char(c::Char) Unicode.isassigned(c) +@deprecate normalize_string(s::AbstractString, nf::Symbol; kwargs...) Unicode.normalize(s, nf; kwargs...) + +# END 0.7 deprecations + end diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl index c77d19d5cf6762..64ea7918064b0b 100644 --- a/stdlib/Unicode/test/runtests.jl +++ b/stdlib/Unicode/test/runtests.jl @@ -4,25 +4,25 @@ using Test using Unicode @testset "string normalization" begin - # normalize_string (Unicode normalization etc.): - @test normalize_string("\u006e\u0303", :NFC) == "\u00f1" - @test "\u006e\u0303" == normalize_string("\u00f1", :NFD) - @test normalize_string("\ufb00", :NFC) != "ff" - @test normalize_string("\ufb00", :NFKC) == "ff" - @test normalize_string("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff" - @test normalize_string("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff" - @test normalize_string("\u006e\u0303", compose=true) == "\u00f1" - @test "\u006e\u0303" == normalize_string("\u00f1", decompose=true) - @test normalize_string("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc" - @test normalize_string("Σσς",casefold=true) == "σσσ" - @test normalize_string("∕⁄", lump=true) == "//" - @test normalize_string("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua" - @test normalize_string("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028" - @test normalize_string("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029" - @test normalize_string("\u00f1", stripmark=true) == "n" - @test isempty(normalize_string("\u00ad", stripignore=true)) - @test normalize_string("\t\r", stripcc=true) == " " - @test normalize_string("\t\r", stripcc=true, newline2ls=true) == " \u2028" + # normalize (Unicode normalization etc.): + @test normalize("\u006e\u0303", :NFC) == "\u00f1" + @test "\u006e\u0303" == normalize("\u00f1", :NFD) + @test normalize("\ufb00", :NFC) != "ff" + @test normalize("\ufb00", :NFKC) == "ff" + @test normalize("\u006e\u0303\ufb00", :NFKC) == "\u00f1"*"ff" + @test normalize("\u00f1\ufb00", :NFKD) == "\u006e\u0303"*"ff" + @test normalize("\u006e\u0303", compose=true) == "\u00f1" + @test "\u006e\u0303" == normalize("\u00f1", decompose=true) + @test normalize("\u006e\u0303\u00b5",compat=true) == "\u00f1\u03bc" + @test normalize("Σσς",casefold=true) == "σσσ" + @test normalize("∕⁄", lump=true) == "//" + @test normalize("\ua\n\r\r\ua", newline2lf=true) == "\ua\ua\ua\ua" + @test normalize("\ua\n\r\r\ua", newline2ls=true) == "\u2028\u2028\u2028\u2028" + @test normalize("\ua\n\r\r\ua", newline2ps=true) == "\u2029\u2029\u2029\u2029" + @test normalize("\u00f1", stripmark=true) == "n" + @test isempty(normalize("\u00ad", stripignore=true)) + @test normalize("\t\r", stripcc=true) == " " + @test normalize("\t\r", stripcc=true, newline2ls=true) == " \u2028" end @testset "unicode sa#15" begin @@ -30,7 +30,7 @@ end #http://www.unicode.org/reports/tr15/ @testset "canonical equivalence" begin - let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFC)==normalize_string(string(b...), :NFC) + let ==(a::Array{Char},b::Array{Char}) = normalize(string(a...), :NFC)==normalize(string(b...), :NFC) ==(a,b) = Base.:(==)(a,b) @test ['C', '̧'] == ['Ç'] @test ['q', '̇', '̣'] == ['q', '̣', '̇'] @@ -40,7 +40,7 @@ end end @testset "compatibility equivalence" begin - let ==(a::Array{Char},b::Array{Char}) = normalize_string(string(a...), :NFKC)==normalize_string(string(b...), :NFKC) + let ==(a::Array{Char},b::Array{Char}) = normalize(string(a...), :NFKC)==normalize(string(b...), :NFKC) ==(a,b) = Base.:(==)(a,b) @test ['ℌ'] == ['ℍ'] == ['H'] @test ['ﻨ'] == ['ﻧ'] == ['ﻦ'] == ['ﻥ'] @@ -55,36 +55,36 @@ end end @testset "singletons" begin - @test normalize_string("\U212b", :NFD) == "A\U030a" - @test normalize_string("\U212b", :NFC) == "\U00c5" - @test normalize_string("\U2126", :NFC) == normalize_string("\U2126", :NFD) == "\U03a9" + @test normalize("\U212b", :NFD) == "A\U030a" + @test normalize("\U212b", :NFC) == "\U00c5" + @test normalize("\U2126", :NFC) == normalize("\U2126", :NFD) == "\U03a9" end @testset "canonical composites" begin - @test normalize_string("\U00c5", :NFC) == "\U00c5" - @test normalize_string("\U00c5", :NFD) == "A\U030a" - @test normalize_string("\U00f4", :NFC) == "\U00f4" - @test normalize_string("\U00f4", :NFD) == "o\U0302" + @test normalize("\U00c5", :NFC) == "\U00c5" + @test normalize("\U00c5", :NFD) == "A\U030a" + @test normalize("\U00f4", :NFC) == "\U00f4" + @test normalize("\U00f4", :NFD) == "o\U0302" end @testset "multiple combining marks" begin - @test normalize_string("\U1e69", :NFD) == "s\U0323\U0307" - @test normalize_string("\U1e69", :NFC) == "\U1e69" - @test normalize_string("\U1e0b\U0323", :NFD) == "d\U0323\U0307" - @test normalize_string("\U1e0b\U0323", :NFC) == "\U1e0d\U0307" - @test normalize_string("q\U0307\U0323", :NFC) == "q\U0323\U0307" - @test normalize_string("q\U0307\U0323", :NFD) == "q\U0323\U0307" + @test normalize("\U1e69", :NFD) == "s\U0323\U0307" + @test normalize("\U1e69", :NFC) == "\U1e69" + @test normalize("\U1e0b\U0323", :NFD) == "d\U0323\U0307" + @test normalize("\U1e0b\U0323", :NFC) == "\U1e0d\U0307" + @test normalize("q\U0307\U0323", :NFC) == "q\U0323\U0307" + @test normalize("q\U0307\U0323", :NFD) == "q\U0323\U0307" end @testset "compatibility composites" begin - @test normalize_string("\Ufb01", :NFD) == normalize_string("\Ufb01", :NFC) == "\Ufb01" - @test normalize_string("\Ufb01", :NFKD) == normalize_string("\Ufb01", :NFKC) == "fi" - @test normalize_string("2\U2075", :NFD) == normalize_string("2\U2075", :NFC) == "2\U2075" - @test normalize_string("2\U2075", :NFKD) == normalize_string("2\U2075", :NFKC) == "25" - @test normalize_string("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307" - @test normalize_string("\U1e9b\U0323", :NFC) == "\U1e9b\U0323" - @test normalize_string("\U1e9b\U0323", :NFKD) == "s\U0323\U0307" - @test normalize_string("\U1e9b\U0323", :NFKC) == "\U1e69" + @test normalize("\Ufb01", :NFD) == normalize("\Ufb01", :NFC) == "\Ufb01" + @test normalize("\Ufb01", :NFKD) == normalize("\Ufb01", :NFKC) == "fi" + @test normalize("2\U2075", :NFD) == normalize("2\U2075", :NFC) == "2\U2075" + @test normalize("2\U2075", :NFKD) == normalize("2\U2075", :NFKC) == "25" + @test normalize("\U1e9b\U0323", :NFD) == "\U017f\U0323\U0307" + @test normalize("\U1e9b\U0323", :NFC) == "\U1e9b\U0323" + @test normalize("\U1e9b\U0323", :NFKD) == "s\U0323\U0307" + @test normalize("\U1e9b\U0323", :NFKC) == "\U1e69" end end @@ -95,7 +95,7 @@ end @test islower(c) == true @test isupper(c) == false @test isdigit(c) == false - @test isnumber(c) == false + @test isnumeric(c) == false end aupper=['A', 'D', 'J', 'Y', 'Z'] @@ -105,7 +105,7 @@ end @test islower(c) == false @test isupper(c) == true @test isdigit(c) == false - @test isnumber(c) == false + @test isnumeric(c) == false end nocase=['א','ﺵ'] @@ -113,7 +113,7 @@ end for c in alphas @test isalpha(c) == true - @test isnumber(c) == false + @test isnumeric(c) == false end anumber=['0', '1', '5', '9'] @@ -121,11 +121,11 @@ end for c in anumber @test isdigit(c) == true - @test isnumber(c) == true + @test isnumeric(c) == true end for c in unumber @test isdigit(c) == false - @test isnumber(c) == true + @test isnumeric(c) == true end alnums=vcat(alphas,anumber,unumber) @@ -200,7 +200,7 @@ end @test !all(isgraph," \t \n \r ") @test !all(isprint," \t \n \r ") @test !all(isalpha," \t \n \r ") - @test !all(isnumber," \t \n \r ") + @test !all(isnumeric," \t \n \r ") @test !all(ispunct," \t \n \r ") @test !all(isspace,"ΣβΣβ") @@ -209,11 +209,11 @@ end @test all(isprint,"ΣβΣβ") @test !all(isupper,"ΣβΣβ") @test !all(islower,"ΣβΣβ") - @test !all(isnumber,"ΣβΣβ") + @test !all(isnumeric,"ΣβΣβ") @test !all(iscntrl,"ΣβΣβ") @test !all(ispunct,"ΣβΣβ") - @test all(isnumber,"23435") + @test all(isnumeric,"23435") @test all(isdigit,"23435") @test all(isalnum,"23435") @test !all(isalpha,"23435") @@ -249,8 +249,8 @@ end for T in (String,GenericString) for nf in (:NFC, :NFD) for (s, g) in grphtest - s_ = T(normalize_string(s, nf)) - g_ = map(s -> normalize_string(s, nf), g) + s_ = T(normalize(s, nf)) + g_ = map(s -> normalize(s, nf), g) # #9261 if length(s_) > 0 @test typeof(first(graphemes(s_))) == SubString{typeof(s_)} @@ -260,7 +260,7 @@ end @test grph == g_ @test length(graphemes(s_)) == length(grph) end - S = [T(normalize_string(s)) for (s,g) in grphtest] + S = [T(normalize(s)) for (s,g) in grphtest] G = map(graphemes, S) @test map(graphemes, sort!(S)) == sort!(G) end @@ -280,23 +280,23 @@ end @testset "#10958 handling of embedded NUL chars" begin @test length("\0w") == length("\0α") == 2 @test textwidth("\0w") == textwidth("\0α") == 1 - @test normalize_string("\0W", casefold=true) == "\0w" + @test normalize("\0W", casefold=true) == "\0w" end @testset "ut8proc_map with GenericString" begin - @test normalize_string(GenericString("\u006e\u0303"), :NFC) == "\u00f1" + @test normalize(GenericString("\u006e\u0303"), :NFC) == "\u00f1" end -@testset "normalize_string keywords" begin - @test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, compat=true) - @test_throws ArgumentError normalize_string("\u006e\u0303", compose=false, stripmark=true) +@testset "normalize keywords" begin + @test_throws ArgumentError normalize("\u006e\u0303", compose=false, compat=true) + @test_throws ArgumentError normalize("\u006e\u0303", compose=false, stripmark=true) end @testset "fastplus" begin @test lowercase('A') == 'a' @test uppercase('a') == 'A' - @test is_assigned_char('A') + @test isassigned('A') end @testset "isspace" begin diff --git a/test/strings/basic.jl b/test/strings/basic.jl index b5f6e69446fde4..5e00ccdd88e9a5 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -278,7 +278,7 @@ end for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32] @test isnull(tryparse(T, "1\0")) end - let s = Base.UTF8proc.normalize_string("tést",:NFKC) + let s = Base.UTF8proc.normalize("tést",:NFKC) @test unsafe_string(Base.unsafe_convert(Cstring, Base.cconvert(Cstring, s))) == s @test unsafe_string(convert(Cstring, Symbol(s))) == s end