Skip to content

Commit

Permalink
Deprecate isnumber(), is_assigned_char() and normalize_string()
Browse files Browse the repository at this point in the history
isnumeric() is consistent with Python and Rust (but not Go), and less easy to confuse
with isdigit(). Improve documentation to make confusion less easy. Also fix a few uses
where isdigit() is more appropriate than isnumber().
  • Loading branch information
nalimilan committed Dec 12, 2017
1 parent c076efa commit 742918f
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 94 deletions.
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,10 @@ Deprecated or removed
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.

* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
in the new `Unicode` standard library module ([#25021]).

Command-line option changes
---------------------------

Expand Down Expand Up @@ -1708,7 +1712,7 @@ Command-line option changes
[#24221]: https://github.com/JuliaLang/julia/issues/24221
[#24240]: https://github.com/JuliaLang/julia/issues/24240
[#24245]: https://github.com/JuliaLang/julia/issues/24245
[#24250]: https://github.com/JuliaLang/julia/issues/24250
[#24250]: https://github.com/JuliaLang/julia/issues/2425
[#24263]: https://github.com/JuliaLang/julia/issues/24263
[#24279]: https://github.com/JuliaLang/julia/issues/24279
[#24281]: https://github.com/JuliaLang/julia/issues/24281
Expand Down
2 changes: 1 addition & 1 deletion base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ function load_machine_file(path::AbstractString)
s = split(line, '*'; keep = false)
map!(strip, s, s)
if length(s) > 1
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
push!(machines,(s[2], cnt))
else
push!(machines,line)
Expand Down
2 changes: 1 addition & 1 deletion base/distributed/Distributed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
binding_module, notify_error, atexit, julia_exename, julia_cmd,
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
shell_escape_posixly, uv_error
using Base.UTF8proc: isascii, isdigit, isnumber
using Base.UTF8proc: isascii, isdigit, isnumeric

# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use

Expand Down
2 changes: 1 addition & 1 deletion base/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ precompile(Tuple{typeof(Base.lstrip), Base.SubString{String}, Array{Char, 1}})
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), String, Char})
precompile(Tuple{getfield(Base, Symbol("#kw##split")), Array{Any, 1}, typeof(Base.split), Base.SubString{String}, Char})
precompile(Tuple{typeof(Base.map!), typeof(Base.strip), Array{Base.SubString{String}, 1}, Array{Base.SubString{String}, 1}})
precompile(Tuple{typeof(Base.UTF8proc.isnumber), Base.SubString{String}})
precompile(Tuple{typeof(Base.UTF8proc.isnumeric), Base.SubString{String}})
precompile(Tuple{Type{Core.Inference.Generator{I, F} where F where I}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
precompile(Tuple{Type{Core.Inference.Generator{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}, Type{Core.Inference.Const}}}, Type{Core.Inference.Const}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
precompile(Tuple{typeof(Core.Inference.convert), Type{Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}}, Tuple{Tuple{Base.DevNullStream, Base.DevNullStream, Base.DevNullStream}}})
Expand Down
6 changes: 3 additions & 3 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,11 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
if repl[next_i] == SUB_CHAR
write(io, SUB_CHAR)
i = nextind(repl, next_i)
elseif UTF8proc.isnumber(repl[next_i])
elseif UTF8proc.isdigit(repl[next_i])
group = parse(Int, repl[next_i])
i = nextind(repl, next_i)
while i <= e
if UTF8proc.isnumber(repl[i])
if UTF8proc.isdigit(repl[i])
group = 10group + parse(Int, repl[i])
i = nextind(repl, i)
else
Expand All @@ -364,7 +364,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
end
# TODO: avoid this allocation
groupname = SubString(repl, groupstart, prevind(repl, i))
if all(UTF8proc.isnumber,groupname)
if all(UTF8proc.isdigit, groupname)
_write_capture(io, re, parse(Int, groupname))
else
group = PCRE.substring_number_from_name(re.regex, groupname)
Expand Down
40 changes: 23 additions & 17 deletions base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ end

utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)

function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
function normalize(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
flags = 0
stable && (flags = flags | UTF8PROC_STABLE)
compat && (flags = flags | UTF8PROC_COMPAT)
Expand All @@ -173,7 +173,7 @@ function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=fa
end

"""
normalize_string(s::AbstractString, normalform::Symbol)
normalize(s::AbstractString, normalform::Symbol)
Normalize the string `s` according to one of the four "normal forms" of the Unicode
standard: `normalform` can be `:NFC`, `:NFD`, `:NFKC`, or `:NFKD`. Normal forms C
Expand All @@ -185,7 +185,7 @@ canonical choice (e.g. they expand ligatures into the individual characters), wi
being more compact.
Alternatively, finer control and additional transformations may be be obtained by calling
`normalize_string(s; keywords...)`, where any number of the following boolean keywords
`normalize(s; keywords...)`, where any number of the following boolean keywords
options (which all default to `false` except for `compose`) are specified:
* `compose=false`: do not perform canonical composition
Expand All @@ -209,17 +209,17 @@ For example, NFKC corresponds to the options `compose=true, compat=true, stable=
# Examples
```jldoctest
julia> "μ" == normalize_string("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
julia> "μ" == normalize("µ", compat=true) #LHS: Unicode U+03bc, RHS: Unicode U+00b5
true
julia> normalize_string("JuLiA", casefold=true)
julia> normalize("JuLiA", casefold=true)
"julia"
julia> normalize_string("JúLiA", stripmark=true)
julia> normalize("JúLiA", stripmark=true)
"JuLiA"
```
"""
function normalize_string(s::AbstractString, nf::Symbol)
function normalize(s::AbstractString, nf::Symbol)
utf8proc_map(s, nf == :NFC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE) :
nf == :NFD ? (UTF8PROC_STABLE | UTF8PROC_DECOMPOSE) :
nf == :NFKC ? (UTF8PROC_STABLE | UTF8PROC_COMPOSE
Expand Down Expand Up @@ -275,20 +275,20 @@ category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UI
category_string(c) = category_strings[category_code(c)+1]

"""
is_assigned_char(c) -> Bool
isassigned(c) -> Bool
Returns `true` if the given char or integer is an assigned Unicode code point.
# Examples
```jldoctest
julia> is_assigned_char(101)
julia> isassigned(101)
true
julia> is_assigned_char('\\x01')
julia> isassigned('\\x01')
true
```
"""
is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
isassigned(c) = category_code(c) != UTF8PROC_CATEGORY_CN

## libc character class predicates ##

Expand Down Expand Up @@ -342,7 +342,7 @@ end
"""
isdigit(c::Char) -> Bool
Tests whether a character is a numeric digit (0-9).
Tests whether a character is a decimal digit (0-9).
# Examples
```jldoctest
Expand Down Expand Up @@ -380,25 +380,31 @@ false
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)

"""
isnumber(c::Char) -> Bool
isnumeric(c::Char) -> Bool
Tests whether a character is numeric.
A character is classified as numeric if it belongs to the Unicode general category Number,
i.e. a character whose category code begins with 'N'.
Note that this broad category includes characters such as ¾ and ௰.
Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9.
# Examples
```jldoctest
julia> isnumber('9')
julia> isnumeric('௰')
true
julia> isnumeric('9')
true
julia> isnumber('α')
julia> isnumeric('α')
false
julia> isnumber('❤')
julia> isnumeric('❤')
false
```
"""
isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)
isnumeric(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)

"""
isalnum(c::Char) -> Bool
Expand Down
6 changes: 3 additions & 3 deletions doc/src/manual/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -617,8 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli

Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference
(variable, object field, or array element) is uninitialized, accessing it will immediately throw
an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref)
functions.
an error. This situation can be detected using the [`isdefined`](@ref) or
[`isassigned`](@ref Base.isassigned) functions.

Some functions are used only for their side effects, and do not need to return a value. In these
cases, the convention is to return the value `nothing`, which is just a singleton object of type
Expand All @@ -627,7 +627,7 @@ this convention, and that the REPL does not print anything for it. Some language
would not otherwise have a value also yield `nothing`, for example `if false; end`.

To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the
[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details.
[`missing`](@ref) object. See the [`Missing Values`](@ref missing) section for more details.

The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of
as nothing but rather a tuple of zero values.
Expand Down
6 changes: 3 additions & 3 deletions stdlib/Unicode/docs/src/index.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Unicode

```@docs
Unicode.is_assigned_char
Unicode.normalize_string
Unicode.isassigned
Unicode.normalize
Unicode.graphemes
Unicode.uppercase
Unicode.lowercase
Expand All @@ -16,7 +16,7 @@ Unicode.iscntrl
Unicode.isdigit
Unicode.isgraph
Unicode.islower
Unicode.isnumber
Unicode.isnumeric
Unicode.isprint
Unicode.ispunct
Unicode.isspace
Expand Down
17 changes: 13 additions & 4 deletions stdlib/Unicode/src/Unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,23 @@ __precompile__(true)

module Unicode

using Base.UTF8proc: normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
using Base.UTF8proc: normalize, graphemes, isassigned, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph,
lowercase, uppercase, titlecase, lcfirst, ucfirst

export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
export normalize, graphemes, isassigned, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph,
lowercase, uppercase, titlecase, lcfirst, ucfirst

# BEGIN 0.7 deprecations

@deprecate isnumber(c::Char) Unicode.isnumeric(c)
@deprecate is_assigned_char(c::Char) Unicode.isassigned(c)
@deprecate normalize_string(s::AbstractString, nf::Symbol; kwargs...) Unicode.normalize(s, nf; kwargs...)
@deprecate normalize_string(s::AbstractString; kwargs...) Unicode.normalize(s; kwargs...)

# END 0.7 deprecations

end
Loading

0 comments on commit 742918f

Please sign in to comment.