diff --git a/NEWS.md b/NEWS.md index b512e2c1b363b..dfe7f1e3d4e76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -365,6 +365,15 @@ This section lists changes that do not have deprecation warnings. * `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be consistent with higher order arrays ([#25365]). + * the default behavior of `titlecase` is changed in two ways ([#23393]): + + characters not starting a word are converted to lowercase; + a new keyword argument `strict` is added which + allows to get the old behavior when it's `false`. + + any non-letter character is considered as a word separator; + to get the old behavior (only "space" characters are considered as + word separators), use the keyword `wordsep=isspace`. + + Library improvements -------------------- @@ -918,6 +927,7 @@ Deprecated or removed * `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]). + Command-line option changes --------------------------- diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index 96eaa0d65342d..8447a125601b1 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -384,6 +384,19 @@ function isupper(c::Char) cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT end +""" + iscased(c::Char) -> Bool + +Tests whether a character is cased, i.e. is lower-, upper- or title-cased. +""" +function iscased(c::Char) + cat = category_code(c) + return cat == UTF8PROC_CATEGORY_LU || + cat == UTF8PROC_CATEGORY_LT || + cat == UTF8PROC_CATEGORY_LL +end + + """ isdigit(c::Char) -> Bool @@ -649,27 +662,38 @@ julia> lowercase("STRINGS AND THINGS") lowercase(s::AbstractString) = map(lowercase, s) """ - titlecase(s::AbstractString) -> String + titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String -Capitalize the first character of each word in `s`. +Capitalize the first character of each word in `s`; +if `strict` is true, every other character is +converted to lowercase, otherwise they are left unchanged. +By default, all non-letters are considered as word separators; +a predicate can be passed as the `wordsep` keyword to determine +which characters should be considered as word separators. See also [`ucfirst`](@ref) to capitalize only the first character in `s`. # Examples ```jldoctest -julia> titlecase("the Julia programming language") +julia> titlecase("the JULIA programming language") "The Julia Programming Language" + +julia> titlecase("ISS - international space station", strict=false) +"ISS - International Space Station" + +julia> titlecase("a-a b-b", wordsep = c->c==' ') +"A-a B-b" ``` """ -function titlecase(s::AbstractString) +function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true) startword = true b = IOBuffer() for c in s - if isspace(c) + if wordsep(c) print(b, c) startword = true else - print(b, startword ? titlecase(c) : c) + print(b, startword ? titlecase(c) : strict ? lowercase(c) : c) startword = false end end diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl index 59077acb6a79c..e55a8f6cc39ef 100644 --- a/stdlib/Unicode/src/Unicode.jl +++ b/stdlib/Unicode/src/Unicode.jl @@ -7,7 +7,7 @@ module Unicode using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid, islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum, iscntrl, ispunct, isspace, isprint, isgraph, - lowercase, uppercase, titlecase, lcfirst, ucfirst + lowercase, uppercase, titlecase, lcfirst, ucfirst, iscased export graphemes, textwidth, isvalid, islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum, diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl index dacf266ccbaef..5a5b83eb12b87 100644 --- a/stdlib/Unicode/test/runtests.jl +++ b/stdlib/Unicode/test/runtests.jl @@ -2,7 +2,7 @@ using Test using Unicode -using Unicode: normalize, isassigned +using Unicode: normalize, isassigned, iscased @testset "string normalization" begin # normalize (Unicode normalization etc.): @@ -366,8 +366,14 @@ end @testset "titlecase" begin @test titlecase('lj') == 'Lj' @test titlecase("ljubljana") == "Ljubljana" - @test titlecase("aBc ABC") == "ABc ABC" - @test titlecase("abcD EFG\n\thij") == "AbcD EFG\n\tHij" + @test titlecase("aBc ABC") == "Abc Abc" + @test titlecase("aBc ABC", strict=true) == "Abc Abc" + @test titlecase("aBc ABC", strict=false) == "ABc ABC" + @test titlecase("abcD EFG\n\thij", strict=true) == "Abcd Efg\n\tHij" + @test titlecase("abcD EFG\n\thij", strict=false) == "AbcD EFG\n\tHij" + @test titlecase("abc-def") == "Abc-Def" + @test titlecase("abc-def", wordsep = !iscased) == "Abc-Def" + @test titlecase("abc-def", wordsep = isspace) == "Abc-def" end end