Skip to content

Commit

Permalink
Deprecate matchall (#26071)
Browse files Browse the repository at this point in the history
It is not obvious that returning matching SubStrings rather than RegexMatch objects is a good idea.
  • Loading branch information
nalimilan authored Feb 22, 2018
1 parent f82e845 commit da98033
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 82 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,8 @@ Deprecated or removed

* `ismatch(regex, str)` has been deprecated in favor of `contains(str, regex)` ([#24673]).

* `matchall` has been deprecated in favor of `collect(m.match for m in eachmatch(r, s))` ([#26071]).

* `similar(::Associative)` has been deprecated in favor of `empty(::Associative)`, and
`similar(::Associative, ::Pair{K, V})` has been deprecated in favour of
`empty(::Associative, K, V)` ([#24390]).
Expand Down Expand Up @@ -1359,3 +1361,4 @@ Command-line option changes
[#25990]: https://github.com/JuliaLang/julia/issues/25990
[#25998]: https://github.com/JuliaLang/julia/issues/25998
[#26009]: https://github.com/JuliaLang/julia/issues/26009
[#26071]: https://github.com/JuliaLang/julia/issues/26071
6 changes: 5 additions & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ end
@deprecate names(m, all) names(m, all = all)
@deprecate names(m, all, imported) names(m, all = all, imported = imported)
@deprecate eachmatch(re, str, overlap) eachmatch(re, str, overlap = overlap)
@deprecate matchall(re, str, overlap) matchall(re, str, overlap = overlap)
@deprecate matchall(re, str, overlap) collect(m.match for m in eachmatch(re, str, overlap = overlap))
@deprecate chop(s, head) chop(s, head = head)
@deprecate chop(s, head, tail) chop(s, head = head, tail = tail)
@deprecate tryparse(T::Type{<:Integer}, s, base) tryparse(T, s, base = base)
Expand Down Expand Up @@ -1405,6 +1405,10 @@ end
Base.@deprecate log Base.log
end

# PR 26071
@deprecate(matchall(r::Regex, s::AbstractString; overlap::Bool = false),
collect(m.match for m in eachmatch(r, s, overlap = overlap)))

# END 0.7 deprecations

# BEGIN 1.0 deprecations
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,6 @@ export
findprev,
occursin,
match,
matchall,
searchsorted,
searchsortedfirst,
searchsortedlast,
Expand Down
60 changes: 0 additions & 60 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -205,66 +205,6 @@ match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
"regex matching is only available for the String type; use String(s) to convert"
))

"""
matchall(r::Regex, s::AbstractString; overlap::Bool = false]) -> Vector{AbstractString}
Return a vector of the matching substrings from [`eachmatch`](@ref).
# Examples
```jldoctest
julia> rx = r"a.a"
r"a.a"
julia> matchall(rx, "a1a2a3a")
2-element Array{SubString{String},1}:
"a1a"
"a3a"
julia> matchall(rx, "a1a2a3a", overlap = true)
3-element Array{SubString{String},1}:
"a1a"
"a2a"
"a3a"
```
"""
function matchall(re::Regex, str::String; overlap::Bool = false)
regex = compile(re).regex
n = sizeof(str)
matches = SubString{String}[]
offset = UInt32(0)
opts = re.match_options
opts_nonempty = opts | PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART
prevempty = false
ovec = re.ovec
while true
result = PCRE.exec(regex, str, offset, prevempty ? opts_nonempty : opts, re.match_data)
if !result
if prevempty && offset < n
offset = UInt32(nextind(str, offset + 1) - 1)
prevempty = false
continue
else
break
end
end

push!(matches, SubString(str, ovec[1]+1, ovec[2]))
prevempty = offset == ovec[2]
if overlap
if !prevempty
offset = UInt32(ovec[1]+1)
end
else
offset = ovec[2]
end
end
matches
end

matchall(re::Regex, str::SubString; overlap::Bool = false) =
matchall(re, String(str), overlap = overlap)

# TODO: return only start index and update deprecation
function findnext(re::Regex, str::Union{String,SubString}, idx::Integer)
if idx > nextind(str,lastindex(str))
Expand Down
1 change: 0 additions & 1 deletion doc/src/base/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ Base.isvalid(::Any, ::Any)
Base.isvalid(::AbstractString, ::Integer)
Base.match
Base.eachmatch
Base.matchall
Base.isless(::AbstractString, ::AbstractString)
Base.:(==)(::AbstractString, ::AbstractString)
Base.cmp(::AbstractString, ::AbstractString)
Expand Down
4 changes: 2 additions & 2 deletions stdlib/Markdown/src/render/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ end
function plain(io::IO, code::Code)
# If the code includes a fenced block this will break parsing,
# so it must be enclosed by a longer ````-sequence.
n = mapreduce(length, max, 2, matchall(r"^`+"m, code.code)) + 1
n = mapreduce(m -> length(m.match), max, 2, eachmatch(r"^`+"m, code.code)) + 1
println(io, "`" ^ n, code.language)
println(io, code.code)
println(io, "`" ^ n)
Expand Down Expand Up @@ -121,7 +121,7 @@ plaininline(io::IO, md::Italic) = plaininline(io, "*", md.text, "*")

function plaininline(io::IO, md::Code)
if contains(md.code, "`")
n = maximum(length(m) for m in matchall(r"(`+)", md.code))
n = maximum(length(m.match) for m in eachmatch(r"(`+)", md.code))
s = "`"^((iseven(n) ? 1 : 2) + n)
print(io, s, Base.startswith(md.code, "`") ? " " : "")
print(io, md.code, endswith(md.code, "`") ? " " : "", s)
Expand Down
2 changes: 1 addition & 1 deletion stdlib/REPL/src/REPLCompletions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ function complete_path(path::AbstractString, pos; use_envpath=false)
end

matchList = String[replace(s, r"\s" => "\\ ") for s in matches]
startpos = pos - lastindex(prefix) + 1 - length(matchall(r" ", prefix))
startpos = pos - lastindex(prefix) + 1 - count(equalto(' '), prefix)
# The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`,
# hence we need to add one to get the first index. This is also correct when considering
# pos, because pos is the `lastindex` a larger string which `endswith(path)==true`.
Expand Down
31 changes: 15 additions & 16 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,21 @@ function collect_eachmatch(re, str; overlap=false)
[m.match for m in collect(eachmatch(re, str, overlap = overlap))]
end

for f in [matchall, collect_eachmatch]
@test f(r"a?b?", "asbd") == ["a","","b","",""] == f(r"""a?b?""", "asbd")
@test f(r"a?b?", "asbd", overlap=true) == ["a","","b","",""]
@test f(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"]
@test f(r".\s", "x \u2200 x \u2203 y") == ["x ", "", "x ", ""]
@test f(r"(\w+)(\s*)", "The dark side of the moon") ==
["The ", "dark ", "side ", "of ", "the ", "moon"]
@test f(r"", "") == [""]
@test f(r"", "", overlap=true) == [""]
@test f(r"aa", "aaaa") == ["aa", "aa"]
@test f(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"]
@test f(r"", "aaa") == ["", "", "", ""]
@test f(r"", "aaa", overlap=true) == ["", "", "", ""]
@test f(r"GCG","GCGCG") == ["GCG"]
@test f(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"]
end
@test collect_eachmatch(r"a?b?", "asbd") == ["a","","b","",""] ==
collect_eachmatch(r"""a?b?""", "asbd")
@test collect_eachmatch(r"a?b?", "asbd", overlap=true) == ["a","","b","",""]
@test collect_eachmatch(r"\w+", "hello", overlap=true) == ["hello","ello","llo","lo","o"]
@test collect_eachmatch(r".\s", "x \u2200 x \u2203 y") == ["x ", "", "x ", ""]
@test collect_eachmatch(r"(\w+)(\s*)", "The dark side of the moon") ==
["The ", "dark ", "side ", "of ", "the ", "moon"]
@test collect_eachmatch(r"", "") == [""]
@test collect_eachmatch(r"", "", overlap=true) == [""]
@test collect_eachmatch(r"aa", "aaaa") == ["aa", "aa"]
@test collect_eachmatch(r"aa", "aaaa", overlap=true) == ["aa", "aa", "aa"]
@test collect_eachmatch(r"", "aaa") == ["", "", "", ""]
@test collect_eachmatch(r"", "aaa", overlap=true) == ["", "", "", ""]
@test collect_eachmatch(r"GCG","GCGCG") == ["GCG"]
@test collect_eachmatch(r"GCG","GCGCG",overlap=true) == ["GCG","GCG"]

# Issue 8278
target = """71.163.72.113 - - [30/Jul/2014:16:40:55 -0700] "GET emptymind.org/thevacantwall/wp-content/uploads/2013/02/DSC_006421.jpg HTTP/1.1" 200 492513 "http://images.search.yahoo.com/images/view;_ylt=AwrB8py9gdlTGEwADcSjzbkF;_ylu=X3oDMTI2cGZrZTA5BHNlYwNmcC1leHAEc2xrA2V4cARvaWQDNTA3NTRiMzYzY2E5OTEwNjBiMjc2YWJhMjkxMTEzY2MEZ3BvcwM0BGl0A2Jpbmc-?back=http%3A%2F%2Fus.yhs4.search.yahoo.com%2Fyhs%2Fsearch%3Fei%3DUTF-8%26p%3Dapartheid%2Bwall%2Bin%2Bpalestine%26type%3Dgrvydef%26param1%3D1%26param2%3Dsid%253Db01676f9c26355f014f8a9db87545d61%2526b%253DChrome%2526ip%253D71.163.72.113%2526p%253Dgroovorio%2526x%253DAC811262A746D3CD%2526dt%253DS940%2526f%253D7%2526a%253Dgrv_tuto1_14_30%26hsimp%3Dyhs-fullyhosted_003%26hspart%3Dironsource&w=588&h=387&imgurl=occupiedpalestine.files.wordpress.com%2F2012%2F08%2F5-peeking-through-the-wall.jpg%3Fw%3D588%26h%3D387&rurl=http%3A%2F%2Fwww.stopdebezetting.com%2Fwereldpers%2Fcompare-the-berlin-wall-vs-israel-s-apartheid-wall-in-palestine.html&size=49.0KB&name=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&p=apartheid+wall+in+palestine&oid=50754b363ca991060b276aba291113cc&fr2=&fr=&tt=...+%3Cb%3EApartheid+wall+in+Palestine%3C%2Fb%3E...+%7C+Or+you+go+peeking+through+the+%3Cb%3Ewall%3C%2Fb%3E&b=0&ni=21&no=4&ts=&tab=organic&sigr=13evdtqdq&sigb=19k7nsjvb&sigi=12o2la1db&sigt=12lia2m0j&sign=12lia2m0j&.crumb=.yUtKgFI6DE&hsimp=yhs-fullyhosted_003&hspart=ironsource" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
Expand Down

0 comments on commit da98033

Please sign in to comment.