Skip to content

Commit

Permalink
titlecase: all non-letters are considered word-separators
Browse files Browse the repository at this point in the history
This is to be consistent with `istitle`.
  • Loading branch information
rfourquet committed Aug 22, 2017
1 parent 637c623 commit 7ff1b18
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
2 changes: 1 addition & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1709,7 +1709,7 @@ export hex2num
@deprecate diagm(A::SparseMatrixCSC) spdiagm(sparsevec(A))

# PR #23393
@deprecate titlecase(s::AbstractString) titlecase(s, false)
@deprecate titlecase(s::AbstractString) titlecase(s, false, true)

# END 0.7 deprecations

Expand Down
16 changes: 11 additions & 5 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -423,17 +423,23 @@ julia> titlecase("ISS - international space station", false)
"ISS - International Space Station"
```
"""
function titlecase(s::AbstractString, strict::Bool)
function titlecase(s::AbstractString, strict::Bool, compat=false)
startword = true
b = IOBuffer()
for c in s
if isspace(c)
if compat
if isspace(c)
print(b, c)
startword = true
continue
end
elseif !iscased(c)
print(b, c)
startword = true
else
print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
startword = false
continue
end
print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
startword = false
end
return String(take!(b))
end
Expand Down
15 changes: 14 additions & 1 deletion base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module UTF8proc

import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase

export isgraphemebreak, category_code, category_abbrev, category_string
export isgraphemebreak, category_code, category_abbrev, category_string, iscased

# also exported by Base:
export normalize_string, graphemes, is_assigned_char, charwidth, isvalid,
Expand Down Expand Up @@ -333,6 +333,19 @@ false
"""
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)

"""
iscased(c::Char) -> Bool
Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
"""
function iscased(c::Char)
ccode = category_code(c)
return ccode == UTF8PROC_CATEGORY_LU ||
ccode == UTF8PROC_CATEGORY_LT ||
ccode == UTF8PROC_CATEGORY_LL
end


"""
isnumber(c::Char) -> Bool
Expand Down

0 comments on commit 7ff1b18

Please sign in to comment.