Skip to content

Commit

Permalink
Merge pull request #23393 from JuliaLang/rf/titlecase
Browse files Browse the repository at this point in the history
titlecase: chars not starting a word can be converted to lowercase
  • Loading branch information
JeffBezanson authored Jan 8, 2018
2 parents df91458 + f94ab0a commit 8245356
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 10 deletions.
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,15 @@ This section lists changes that do not have deprecation warnings.
* `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
consistent with higher order arrays ([#25365]).

* the default behavior of `titlecase` is changed in two ways ([#23393]):
+ characters not starting a word are converted to lowercase;
a new keyword argument `strict` is added which
allows to get the old behavior when it's `false`.
+ any non-letter character is considered as a word separator;
to get the old behavior (only "space" characters are considered as
word separators), use the keyword `wordsep=isspace`.


Library improvements
--------------------

Expand Down Expand Up @@ -918,6 +927,7 @@ Deprecated or removed

* `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]).


Command-line option changes
---------------------------

Expand Down
36 changes: 30 additions & 6 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,19 @@ function isupper(c::Char)
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
end

"""
iscased(c::Char) -> Bool
Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
"""
function iscased(c::Char)
cat = category_code(c)
return cat == UTF8PROC_CATEGORY_LU ||
cat == UTF8PROC_CATEGORY_LT ||
cat == UTF8PROC_CATEGORY_LL
end


"""
isdigit(c::Char) -> Bool
Expand Down Expand Up @@ -649,27 +662,38 @@ julia> lowercase("STRINGS AND THINGS")
lowercase(s::AbstractString) = map(lowercase, s)

"""
titlecase(s::AbstractString) -> String
titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
Capitalize the first character of each word in `s`.
Capitalize the first character of each word in `s`;
if `strict` is true, every other character is
converted to lowercase, otherwise they are left unchanged.
By default, all non-letters are considered as word separators;
a predicate can be passed as the `wordsep` keyword to determine
which characters should be considered as word separators.
See also [`ucfirst`](@ref) to capitalize only the first
character in `s`.
# Examples
```jldoctest
julia> titlecase("the Julia programming language")
julia> titlecase("the JULIA programming language")
"The Julia Programming Language"
julia> titlecase("ISS - international space station", strict=false)
"ISS - International Space Station"
julia> titlecase("a-a b-b", wordsep = c->c==' ')
"A-a B-b"
```
"""
function titlecase(s::AbstractString)
function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true)
startword = true
b = IOBuffer()
for c in s
if isspace(c)
if wordsep(c)
print(b, c)
startword = true
else
print(b, startword ? titlecase(c) : c)
print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
startword = false
end
end
Expand Down
2 changes: 1 addition & 1 deletion stdlib/Unicode/src/Unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module Unicode
using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph,
lowercase, uppercase, titlecase, lcfirst, ucfirst
lowercase, uppercase, titlecase, lcfirst, ucfirst, iscased

export graphemes, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
Expand Down
12 changes: 9 additions & 3 deletions stdlib/Unicode/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

using Test
using Unicode
using Unicode: normalize, isassigned
using Unicode: normalize, isassigned, iscased

@testset "string normalization" begin
# normalize (Unicode normalization etc.):
Expand Down Expand Up @@ -366,8 +366,14 @@ end
@testset "titlecase" begin
@test titlecase('lj') == 'Lj'
@test titlecase("ljubljana") == "Ljubljana"
@test titlecase("aBc ABC") == "ABc ABC"
@test titlecase("abcD EFG\n\thij") == "AbcD EFG\n\tHij"
@test titlecase("aBc ABC") == "Abc Abc"
@test titlecase("aBc ABC", strict=true) == "Abc Abc"
@test titlecase("aBc ABC", strict=false) == "ABc ABC"
@test titlecase("abcD EFG\n\thij", strict=true) == "Abcd Efg\n\tHij"
@test titlecase("abcD EFG\n\thij", strict=false) == "AbcD EFG\n\tHij"
@test titlecase("abc-def") == "Abc-Def"
@test titlecase("abc-def", wordsep = !iscased) == "Abc-Def"
@test titlecase("abc-def", wordsep = isspace) == "Abc-def"
end
end

Expand Down

0 comments on commit 8245356

Please sign in to comment.