From 149f43b02309579d761a6fd4d7524a56e05fbba1 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sat, 29 Aug 2020 22:15:12 -0400 Subject: [PATCH 01/25] expose findfirst findnext for UInt8 vector --- base/strings/search.jl | 57 +++++++++++++++++++++++++++++++++++++----- test/strings/search.jl | 10 ++++++++ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index b1908ac99c860..b776d9344d99e 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -123,6 +123,26 @@ true """ findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) +""" + findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} + +Find the first occurrence of `pattern` in `ary`. + +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) +2:3 +``` +""" +function findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} + _search(A, pattern, firstindex(A)) +end + + + # AbstractString implementation of the generic findnext interface function findnext(testf::Function, s::AbstractString, i::Integer) i = Int(i) @@ -174,9 +194,12 @@ function _searchindex(s::String, t::String, i::Integer) _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i) end -function _searchindex(s::ByteArray, t::ByteArray, i::Integer) - n = sizeof(t) - m = sizeof(s) +function _searchindex(s::AbstractVector{T}, + t::AbstractVector{T}, + i::Integer) where T <:Union{Int8,UInt8} + + n = length(t) + m = length(s) if n == 0 return 1 <= i <= m+1 ? max(1, i) : 0 @@ -194,7 +217,7 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer) bloom_mask = UInt64(0) skip = n - 1 tlast = _nthbyte(t,n) - for j in 1:n + for j in firstindex(s):n bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) if _nthbyte(t,j) == tlast && j < n skip = n - j - 1 @@ -235,8 +258,8 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer) 0 end -function _search(s::Union{AbstractString,ByteArray}, - t::Union{AbstractString,AbstractChar,Int8,UInt8}, +function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, + t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, i::Integer) idx = _searchindex(s,t,i) if isempty(t) @@ -296,6 +319,28 @@ julia> findnext('o', "Hello to the world", 6) findnext(ch::AbstractChar, string::AbstractString, ind::Integer) = findnext(==(ch), string, ind) +""" +findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} + +Find the next occurrence of `pattern` in `A` starting at position `start`. + +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 5) === nothing +true + +julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) +4:5 +``` +""" +function findnext(pattern::AbstractVector{T}, + A::AbstractVector{T}, ind::Integer) where T<:Union{Int8,UInt8} + _search(A, pattern, ind) +end + """ findlast(pattern::AbstractString, string::AbstractString) diff --git a/test/strings/search.jl b/test/strings/search.jl index 8a7abaec50309..5d7b5479430f7 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -408,3 +408,13 @@ for T = (UInt, BigInt) @test findprev(isletter, astr, T(x)) isa Int end end + +# issue 37280 +let A = [0x40, 0x52, 0x62, 0x52, 0x62] + @test findfirst([0x99], A) === nothing + @test findfirst([0x52], A) == 2:2 + @test findfirst([0x52, 0x62], A) == 2:3 + @test findnext([0x52, 0x62], A, 2) == 2:3 + @test findnext([0x52, 0x62], A, 3) == 4:5 + @test findnext([0x52, 0x62], A, 5) === nothing +end From d1055eb65d58b80253508e895a7411d0b58b4c2d Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 10:59:43 -0400 Subject: [PATCH 02/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index b776d9344d99e..0c87c994070e2 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -320,7 +320,7 @@ findnext(ch::AbstractChar, string::AbstractString, ind::Integer) = findnext(==(ch), string, ind) """ -findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} + findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} Find the next occurrence of `pattern` in `A` starting at position `start`. From acc37600ef4afe4dc53c86d104cdae02b026e740 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 10:59:53 -0400 Subject: [PATCH 03/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 0c87c994070e2..80a77b101d372 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -322,7 +322,7 @@ findnext(ch::AbstractChar, string::AbstractString, ind::Integer) = """ findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} -Find the next occurrence of `pattern` in `A` starting at position `start`. +Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`. !!! compat "Julia 1.6" This method requires at least Julia 1.6. From b1acc167bb3f7114c935d9f946ce91ca12d57ba6 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 11:00:08 -0400 Subject: [PATCH 04/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 80a77b101d372..9b7a7cf9f18c9 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -137,9 +137,8 @@ julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) 2:3 ``` """ -function findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} +findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} = _search(A, pattern, firstindex(A)) -end From 1338f8e889945b84937caa2383ee7d9f42076800 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 11:00:57 -0400 Subject: [PATCH 05/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 9b7a7cf9f18c9..69ad4e42951e8 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -335,10 +335,8 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 4:5 ``` """ -function findnext(pattern::AbstractVector{T}, - A::AbstractVector{T}, ind::Integer) where T<:Union{Int8,UInt8} +findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, ind::Integer) where T<:Union{Int8,UInt8} = _search(A, pattern, ind) -end """ findlast(pattern::AbstractString, string::AbstractString) From 5f05214bf7ac6072e269c5c523567dfa1b5318df Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 11:01:04 -0400 Subject: [PATCH 06/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 69ad4e42951e8..2c668794b5665 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -126,7 +126,7 @@ findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) """ findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} -Find the first occurrence of `pattern` in `ary`. +Find the first occurrence of sequence `pattern` in vector `A`. !!! compat "Julia 1.6" This method requires at least Julia 1.6. From 2d3913c6f3940fc057a040c21c06f6c38525ec00 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 11:01:34 -0400 Subject: [PATCH 07/25] Update test/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- test/strings/search.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/strings/search.jl b/test/strings/search.jl index 5d7b5479430f7..e79bbed6b3db3 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -412,9 +412,9 @@ end # issue 37280 let A = [0x40, 0x52, 0x62, 0x52, 0x62] @test findfirst([0x99], A) === nothing - @test findfirst([0x52], A) == 2:2 - @test findfirst([0x52, 0x62], A) == 2:3 - @test findnext([0x52, 0x62], A, 2) == 2:3 - @test findnext([0x52, 0x62], A, 3) == 4:5 + @test findfirst([0x52], A) === 2:2 + @test findfirst([0x52, 0x62], A) === 2:3 + @test findnext([0x52, 0x62], A, 2) === 2:3 + @test findnext([0x52, 0x62], A, 3) === 4:5 @test findnext([0x52, 0x62], A, 5) === nothing end From eba8833d386cb1515cd8835950aab7a5d8b9f933 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Sun, 30 Aug 2020 11:01:54 -0400 Subject: [PATCH 08/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 2c668794b5665..bc2511926909a 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -196,7 +196,6 @@ end function _searchindex(s::AbstractVector{T}, t::AbstractVector{T}, i::Integer) where T <:Union{Int8,UInt8} - n = length(t) m = length(s) From bd63ef461e94366bbf4f6158d15ec156765b9a83 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 11:29:46 -0400 Subject: [PATCH 09/25] address comments --- base/strings/search.jl | 34 +++++++++++++++++----------------- test/strings/search.jl | 25 +++++++++++++++---------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index bc2511926909a..38aa32a2c8716 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -193,18 +193,18 @@ function _searchindex(s::String, t::String, i::Integer) _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i) end -function _searchindex(s::AbstractVector{T}, - t::AbstractVector{T}, - i::Integer) where T <:Union{Int8,UInt8} +function _searchindex(s::T, + t::T, + i::Integer) where T<:AbstractVector{<:Union{Int8,UInt8}} + n = length(t) m = length(s) - if n == 0 return 1 <= i <= m+1 ? max(1, i) : 0 elseif m == 0 return 0 elseif n == 1 - return something(findnext(isequal(_nthbyte(t,1)), s, i), 0) + return something(findnext(isequal(t[1]), s, i), 0) end w = m - n @@ -214,21 +214,21 @@ function _searchindex(s::AbstractVector{T}, bloom_mask = UInt64(0) skip = n - 1 - tlast = _nthbyte(t,n) + tlast = t[n] for j in firstindex(s):n - bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) - if _nthbyte(t,j) == tlast && j < n + bloom_mask |= _search_bloom_mask(t[j]) + if t[j]== tlast && j < n skip = n - j - 1 end end i -= 1 while i <= w - if _nthbyte(s,i+n) == tlast + if s[i+n]== tlast # check candidate j = 0 while j < n - 1 - if _nthbyte(s,i+j+1) != _nthbyte(t,j+1) + if s[i+j+1] != t[j+1] break end j += 1 @@ -240,13 +240,13 @@ function _searchindex(s::AbstractVector{T}, end # no match, try to rule out the next character - if i < w && bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0 + if i < w && bloom_mask & _search_bloom_mask(s[i+n+1]) == 0 i += n else i += skip end elseif i < w - if bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0 + if bloom_mask & _search_bloom_mask(s[i+n+1]) == 0 i += n end end @@ -295,7 +295,7 @@ julia> findnext("Lang", "JuliaLang", 2) 6:9 ``` """ -findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, Int(i)) +findnext(t::AbstractString, s::AbstractString, start::Integer) = _search(s, t, Int(start)) """ findnext(ch::AbstractChar, string::AbstractString, start::Integer) @@ -314,8 +314,8 @@ julia> findnext('o', "Hello to the world", 6) 8 ``` """ -findnext(ch::AbstractChar, string::AbstractString, ind::Integer) = - findnext(==(ch), string, ind) +findnext(ch::AbstractChar, string::AbstractString, start::Integer) = + findnext(==(ch), string, start) """ findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} @@ -334,8 +334,8 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 4:5 ``` """ -findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, ind::Integer) where T<:Union{Int8,UInt8} = - _search(A, pattern, ind) +findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} = + _search(A, pattern, start) """ findlast(pattern::AbstractString, string::AbstractString) diff --git a/test/strings/search.jl b/test/strings/search.jl index e79bbed6b3db3..c3120f52d2791 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -390,6 +390,21 @@ s_18109 = "fooα🐨βcd3" @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6] end +# issue 37280 +@testset "UInt8, Int8 vector" begin + for VT in [Int8, UInt8] + A = VT[0x40, 0x52, 0x62, 0x52, 0x62] + @test findfirst(VT[0x30], A) === nothing + @test findfirst(VT[0x52], A) === 2:2 + pattern = VT[0x52, 0x62] + @test findfirst(pattern, A) === 2:3 + @test findnext(pattern, A, 2) === 2:3 + @test findnext(pattern, A, 3) === 4:5 + @test findnext(pattern, A, 5) === nothing + @test findnext(pattern, A, 99) === nothing + end +end + # issue 32568 for T = (UInt, BigInt) for x = (4, 5) @@ -408,13 +423,3 @@ for T = (UInt, BigInt) @test findprev(isletter, astr, T(x)) isa Int end end - -# issue 37280 -let A = [0x40, 0x52, 0x62, 0x52, 0x62] - @test findfirst([0x99], A) === nothing - @test findfirst([0x52], A) === 2:2 - @test findfirst([0x52, 0x62], A) === 2:3 - @test findnext([0x52, 0x62], A, 2) === 2:3 - @test findnext([0x52, 0x62], A, 3) === 4:5 - @test findnext([0x52, 0x62], A, 5) === nothing -end From cc6d364b87de4fda770d0499d03743484b18681d Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 13:57:14 -0400 Subject: [PATCH 10/25] address comments add OffsetArray test --- base/strings/search.jl | 52 ++++++++++++++++++++++++------------------ test/offsetarray.jl | 16 +++++++++++++ test/strings/search.jl | 1 + 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 38aa32a2c8716..ee615ff32c148 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -124,7 +124,8 @@ true findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) """ - findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} + findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) where {T<:Union{Int8,UInt8}} Find the first occurrence of sequence `pattern` in vector `A`. @@ -137,8 +138,10 @@ julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) 2:3 ``` """ -findfirst(pattern::AbstractVector{T}, A::AbstractVector{T}) where {T<:Union{Int8,UInt8}} = +function findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) _search(A, pattern, firstindex(A)) +end @@ -185,7 +188,8 @@ function _search_bloom_mask(c) end _nthbyte(s::String, i) = codeunit(s, i) -_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i] +_nthbyte(a::Union{Vector{UInt8},Vector{Int8}}, i) = a[i] +_nthbyte(t::AbstractVector, index) = t[firstindex(t) + (index-1)] function _searchindex(s::String, t::String, i::Integer) # Check for fast case of a single byte @@ -193,42 +197,44 @@ function _searchindex(s::String, t::String, i::Integer) _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i) end -function _searchindex(s::T, - t::T, - i::Integer) where T<:AbstractVector{<:Union{Int8,UInt8}} - +function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, + t::AbstractVector{<:Union{Int8,UInt8}}, + i::Integer) where T <:Union{Int8,UInt8} n = length(t) m = length(s) + f_s = firstindex(s) + i < f_s && throw(BoundsError(s, i)) + if n == 0 - return 1 <= i <= m+1 ? max(1, i) : 0 + return f_s <= i <= m+1 ? max(f_s, i) : 0 elseif m == 0 return 0 elseif n == 1 - return something(findnext(isequal(t[1]), s, i), 0) + return something(findnext(isequal(_nthbyte(t,1)), s, i), 0) end w = m - n - if w < 0 || i - 1 > w + if w < 0 || i - f_s > w return 0 end bloom_mask = UInt64(0) - skip = n - 1 - tlast = t[n] - for j in firstindex(s):n - bloom_mask |= _search_bloom_mask(t[j]) - if t[j]== tlast && j < n + skip = n - f_s + tlast = _nthbyte(t,n) + for j in eachindex(t) + bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) + if _nthbyte(t,j) == tlast && j < n skip = n - j - 1 end end i -= 1 while i <= w - if s[i+n]== tlast + if _nthbyte(s,i+n) == tlast # check candidate j = 0 while j < n - 1 - if s[i+j+1] != t[j+1] + if _nthbyte(s,i+j+1) != _nthbyte(t,j+1) break end j += 1 @@ -236,17 +242,17 @@ function _searchindex(s::T, # match found if j == n - 1 - return i+1 + return i+f_s end # no match, try to rule out the next character - if i < w && bloom_mask & _search_bloom_mask(s[i+n+1]) == 0 + if i < w && bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0 i += n else i += skip end elseif i < w - if bloom_mask & _search_bloom_mask(s[i+n+1]) == 0 + if bloom_mask & _search_bloom_mask(_nthbyte(s,i+n+1)) == 0 i += n end end @@ -318,7 +324,7 @@ findnext(ch::AbstractChar, string::AbstractString, start::Integer) = findnext(==(ch), string, start) """ - findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} + findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`. @@ -334,7 +340,9 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 4:5 ``` """ -findnext(pattern::AbstractVector{T}, A::AbstractVector{T}, start::Integer) where T<:Union{Int8,UInt8} = +findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) where T<:Union{Int8,UInt8} = _search(A, pattern, start) """ diff --git a/test/offsetarray.jl b/test/offsetarray.jl index cd5c5bc848ace..41ff61cf97e63 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -630,3 +630,19 @@ end @test last(v, 100) !== v @test last(v, 1) == [v[end]] end + +@testset "findfirst findnext of U/Int8 Offset Array" begin + for VT in [Int8, UInt8] + OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1) + for PT in [Int8, UInt8] + pattern = PT[0x52, 0x62] + @test findfirst(pattern, OA) === 3:4 + @test findnext(pattern, OA, 2) === 3:4 + @test findnext(pattern, OA, 4) === 5:6 + @test findnext(pattern, OA, 6) === nothing + @test findnext(pattern, OA, 7) === nothing + @test_throws BoundsError findnext(pattern, OA, 1) + end + end +end + diff --git a/test/strings/search.jl b/test/strings/search.jl index c3120f52d2791..3b40bff0fd59b 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -402,6 +402,7 @@ end @test findnext(pattern, A, 3) === 4:5 @test findnext(pattern, A, 5) === nothing @test findnext(pattern, A, 99) === nothing + @test_throws BoundsError findnext(pattern, A, -3) end end From fdb1d9ee263b95f73a62a129b0a697420c1b5805 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 15:38:51 -0400 Subject: [PATCH 11/25] add findlast findprev --- base/strings/search.jl | 66 ++++++++++++++++++++++++++++++++---------- test/offsetarray.jl | 11 +++++++ test/strings/search.jl | 12 ++++++++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index ee615ff32c148..55d42d40922a4 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -125,7 +125,7 @@ findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) """ findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}) where {T<:Union{Int8,UInt8}} + A::AbstractVector{<:Union{Int8,UInt8}}) Find the first occurrence of sequence `pattern` in vector `A`. @@ -199,7 +199,7 @@ end function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, - i::Integer) where T <:Union{Int8,UInt8} + i::Integer) n = length(t) m = length(s) f_s = firstindex(s) @@ -342,7 +342,7 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) """ findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) where T<:Union{Int8,UInt8} = + start::Integer) = _search(A, pattern, start) """ @@ -363,6 +363,22 @@ julia> findfirst("Julia", "JuliaLang") findlast(pattern::AbstractString, string::AbstractString) = findprev(pattern, string, lastindex(string)) +""" + findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) + +Find the last occurrence of `pattern` in array `A`. Equivalent to +[`findprev(pattern, A, lastindex(A))`](@ref). + +# Examples +```jldoctest +julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62]) +3:4 +``` +""" +findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, +A::AbstractVector{<:Union{Int8,UInt8}}) = + findprev(pattern, A, lastindex(A)) """ findlast(ch::AbstractChar, string::AbstractString) @@ -436,12 +452,14 @@ function _rsearchindex(s::String, t::String, i::Integer) end end -function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) - n = sizeof(t) - m = sizeof(s) +function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, k::Integer) + n = length(t) + m = length(s) + f_s = firstindex(s) + k < f_s && throw(BoundsError(s, k)) if n == 0 - return 0 <= k <= m ? max(k, 1) : 0 + return 0 <= k <= m ? max(f_s, k) : 0 elseif m == 0 return 0 elseif n == 1 @@ -449,14 +467,14 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) end w = m - n - if w < 0 || k <= 0 + if w < 0 || k <= f_s return 0 end bloom_mask = UInt64(0) skip = n - 1 tfirst = _nthbyte(t,1) - for j in n:-1:1 + for j in reverse(eachindex(t)) bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) if _nthbyte(t,j) == tfirst && j > 1 skip = j - 2 @@ -477,7 +495,7 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) # match found if j == n - return i + return i + f_s - 1 end # no match, try to rule out the next character @@ -497,9 +515,9 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) 0 end -function _rsearch(s::Union{AbstractString,ByteArray}, - t::Union{AbstractString,AbstractChar,Int8,UInt8}, - i::Integer) +function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, + t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, + i::Integer) idx = _rsearchindex(s,t,i) if isempty(t) idx:idx-1 @@ -552,9 +570,27 @@ julia> findprev('o', "Hello to the world", 18) 15 ``` """ -findprev(ch::AbstractChar, string::AbstractString, ind::Integer) = - findprev(==(ch), string, ind) +findprev(ch::AbstractChar, string::AbstractString, start::Integer) = + findprev(==(ch), string, start) +""" + findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) + +Find the previous occurrence of the sequence `pattern` in vector `A` starting at position `start`. + +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) +2:3 +``` +""" +findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) = + _rsearch(A, pattern, start) """ occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 41ff61cf97e63..086acd78c9c77 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -641,7 +641,18 @@ end @test findnext(pattern, OA, 4) === 5:6 @test findnext(pattern, OA, 6) === nothing @test findnext(pattern, OA, 7) === nothing + @test findnext(pattern, OA, 2) === 3:4 + @test findnext(pattern, OA, 4) === 5:6 + @test findnext(pattern, OA, 6) === nothing + @test findnext(pattern, OA, 99) === nothing @test_throws BoundsError findnext(pattern, OA, 1) + + @test findlast(pattern, OA) === 5:6 + @test findprev(pattern, OA, 2) === nothing + @test findprev(pattern, OA, 4) === 3:4 + @test findprev(pattern, OA, 6) === 5:6 + @test findprev(pattern, OA, 99) === findlast(pattern, OA) + @test_throws BoundsError findprev(pattern, OA, 1) end end end diff --git a/test/strings/search.jl b/test/strings/search.jl index 3b40bff0fd59b..f3d7caeaefa5d 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -394,15 +394,27 @@ end @testset "UInt8, Int8 vector" begin for VT in [Int8, UInt8] A = VT[0x40, 0x52, 0x62, 0x52, 0x62] + @test findfirst(VT[0x30], A) === nothing @test findfirst(VT[0x52], A) === 2:2 + @test findlast(VT[0x30], A) === nothing + @test findlast(VT[0x52], A) === 4:4 + pattern = VT[0x52, 0x62] + @test findfirst(pattern, A) === 2:3 @test findnext(pattern, A, 2) === 2:3 @test findnext(pattern, A, 3) === 4:5 @test findnext(pattern, A, 5) === nothing @test findnext(pattern, A, 99) === nothing @test_throws BoundsError findnext(pattern, A, -3) + + @test findlast(pattern, A) === 4:5 + @test findprev(pattern, A, 3) === 2:3 + @test findprev(pattern, A, 5) === 4:5 + @test findprev(pattern, A, 2) === nothing + @test findprev(pattern, A, 99) === findlast(pattern, A) + @test_throws BoundsError findprev(pattern, A, -2) end end From 114e380b5c6ac5fef94a9e4ba0cf7f2f61f34bb6 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 18:26:25 -0400 Subject: [PATCH 12/25] implement comments --- base/strings/search.jl | 59 +++++++++++++++++++++++------------------- test/offsetarray.jl | 10 ++++--- test/strings/search.jl | 10 ++++--- 3 files changed, 45 insertions(+), 34 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 55d42d40922a4..f35a8d6e1beb5 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -188,8 +188,7 @@ function _search_bloom_mask(c) end _nthbyte(s::String, i) = codeunit(s, i) -_nthbyte(a::Union{Vector{UInt8},Vector{Int8}}, i) = a[i] -_nthbyte(t::AbstractVector, index) = t[firstindex(t) + (index-1)] +_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)] function _searchindex(s::String, t::String, i::Integer) # Check for fast case of a single byte @@ -199,14 +198,14 @@ end function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, - i::Integer) + _i::Integer) n = length(t) m = length(s) - f_s = firstindex(s) - i < f_s && throw(BoundsError(s, i)) + i = Int(_i) - (firstindex(s) - 1) + i < 1 && throw(BoundsError(s, _i)) if n == 0 - return f_s <= i <= m+1 ? max(f_s, i) : 0 + return 1 <= i <= m+1 ? max(1, i) : 0 elseif m == 0 return 0 elseif n == 1 @@ -214,14 +213,14 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, end w = m - n - if w < 0 || i - f_s > w + if w < 0 || i - 1 > w return 0 end bloom_mask = UInt64(0) - skip = n - f_s + skip = n - 1 tlast = _nthbyte(t,n) - for j in eachindex(t) + for j in 1:n bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) if _nthbyte(t,j) == tlast && j < n skip = n - j - 1 @@ -242,7 +241,8 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, # match found if j == n - 1 - return i+f_s + # restore in case `s` is an OffSetArray + return i+firstindex(s) end # no match, try to rule out the next character @@ -333,17 +333,20 @@ Find the next occurrence of the sequence `pattern` in vector `A` starting at pos # Examples ```jldoctest -julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 5) === nothing +julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing true julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 4:5 ``` """ -findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) = +function findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) + (start == (lastindex(A)+1)) && return nothing + (start > (lastindex(A)+1)) && throw(BoundsError(A, start)) _search(A, pattern, start) +end """ findlast(pattern::AbstractString, string::AbstractString) @@ -376,9 +379,10 @@ julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62]) 3:4 ``` """ -findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, -A::AbstractVector{<:Union{Int8,UInt8}}) = +function findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, +A::AbstractVector{<:Union{Int8,UInt8}}) findprev(pattern, A, lastindex(A)) +end """ findlast(ch::AbstractChar, string::AbstractString) @@ -452,14 +456,14 @@ function _rsearchindex(s::String, t::String, i::Integer) end end -function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, k::Integer) +function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer) n = length(t) m = length(s) - f_s = firstindex(s) - k < f_s && throw(BoundsError(s, k)) + k = Int(_k) - (firstindex(s) - 1) + k < 1 && throw(BoundsError(s, _k)) if n == 0 - return 0 <= k <= m ? max(f_s, k) : 0 + return 0 <= k <= m ? max(k, 1) : 0 elseif m == 0 return 0 elseif n == 1 @@ -467,14 +471,14 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector end w = m - n - if w < 0 || k <= f_s + if w < 0 || k <= 0 return 0 end bloom_mask = UInt64(0) skip = n - 1 tfirst = _nthbyte(t,1) - for j in reverse(eachindex(t)) + for j in n:-1:1 bloom_mask |= _search_bloom_mask(_nthbyte(t,j)) if _nthbyte(t,j) == tfirst && j > 1 skip = j - 2 @@ -495,7 +499,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector # match found if j == n - return i + f_s - 1 + return i - 1 + firstindex(s) end # no match, try to rule out the next character @@ -587,10 +591,13 @@ julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 2:3 ``` """ -findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) = +function findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) + (start == (lastindex(A)+1)) && return nothing + (start > (lastindex(A)+1)) && throw(BoundsError(A, start)) _rsearch(A, pattern, start) +end """ occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 086acd78c9c77..93512cc4978c6 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -636,6 +636,7 @@ end OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1) for PT in [Int8, UInt8] pattern = PT[0x52, 0x62] + l_OA = lastindex(OA) @test findfirst(pattern, OA) === 3:4 @test findnext(pattern, OA, 2) === 3:4 @test findnext(pattern, OA, 4) === 5:6 @@ -643,17 +644,18 @@ end @test findnext(pattern, OA, 7) === nothing @test findnext(pattern, OA, 2) === 3:4 @test findnext(pattern, OA, 4) === 5:6 - @test findnext(pattern, OA, 6) === nothing - @test findnext(pattern, OA, 99) === nothing + # 1 idx too far is allowed + @test findnext(pattern, OA, l_OA+1) === nothing + @test_throws BoundsError findnext(pattern, OA, l_OA+2) @test_throws BoundsError findnext(pattern, OA, 1) @test findlast(pattern, OA) === 5:6 @test findprev(pattern, OA, 2) === nothing @test findprev(pattern, OA, 4) === 3:4 @test findprev(pattern, OA, 6) === 5:6 - @test findprev(pattern, OA, 99) === findlast(pattern, OA) + @test findnext(pattern, OA, l_OA+1) === nothing + @test_throws BoundsError findnext(pattern, OA, l_OA+2) @test_throws BoundsError findprev(pattern, OA, 1) end end end - diff --git a/test/strings/search.jl b/test/strings/search.jl index f3d7caeaefa5d..4b56a4198feac 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -405,16 +405,18 @@ end @test findfirst(pattern, A) === 2:3 @test findnext(pattern, A, 2) === 2:3 @test findnext(pattern, A, 3) === 4:5 - @test findnext(pattern, A, 5) === nothing - @test findnext(pattern, A, 99) === nothing + # 1 idx too long is allowed + @test findnext(pattern, A, length(A)+1) === nothing @test_throws BoundsError findnext(pattern, A, -3) + @test_throws BoundsError findnext(pattern, A, length(A)+2) @test findlast(pattern, A) === 4:5 @test findprev(pattern, A, 3) === 2:3 @test findprev(pattern, A, 5) === 4:5 @test findprev(pattern, A, 2) === nothing - @test findprev(pattern, A, 99) === findlast(pattern, A) - @test_throws BoundsError findprev(pattern, A, -2) + @test findprev(pattern, A, length(A)+1) === nothing + @test_throws BoundsError findprev(pattern, A, -3) + @test_throws BoundsError findprev(pattern, A, length(A)+2) end end From 5e1c0e3ead9f871a17e9593b23c7a210d5840596 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 21:47:45 -0400 Subject: [PATCH 13/25] let _(r)searchindex handle exception --- base/strings/search.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index f35a8d6e1beb5..456ff399a0c7a 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -202,7 +202,7 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, n = length(t) m = length(s) i = Int(_i) - (firstindex(s) - 1) - i < 1 && throw(BoundsError(s, _i)) + (i < 1 || i > m+1) && throw(BoundsError(s, _i)) if n == 0 return 1 <= i <= m+1 ? max(1, i) : 0 @@ -344,7 +344,6 @@ function findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) (start == (lastindex(A)+1)) && return nothing - (start > (lastindex(A)+1)) && throw(BoundsError(A, start)) _search(A, pattern, start) end @@ -460,7 +459,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector n = length(t) m = length(s) k = Int(_k) - (firstindex(s) - 1) - k < 1 && throw(BoundsError(s, _k)) + (k < 1 || k > m+1) && throw(BoundsError(s, _k)) if n == 0 return 0 <= k <= m ? max(k, 1) : 0 @@ -497,9 +496,9 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector j += 1 end - # match found + # match found, restore in case `s` is an OffsetArray if j == n - return i - 1 + firstindex(s) + return i + (firstindex(s) - 1) end # no match, try to rule out the next character @@ -595,7 +594,6 @@ function findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) (start == (lastindex(A)+1)) && return nothing - (start > (lastindex(A)+1)) && throw(BoundsError(A, start)) _rsearch(A, pattern, start) end """ From c28246a70950a809ff797831abc2efc76b663d3b Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 22:21:38 -0400 Subject: [PATCH 14/25] fix _rsearchindex special behavior --- base/strings/search.jl | 14 +++++--------- test/offsetarray.jl | 4 ++-- test/strings/search.jl | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 456ff399a0c7a..61eead5fafd92 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -340,12 +340,10 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 4:5 ``` """ -function findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, +findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) - (start == (lastindex(A)+1)) && return nothing + start::Integer)= _search(A, pattern, start) -end """ findlast(pattern::AbstractString, string::AbstractString) @@ -459,7 +457,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector n = length(t) m = length(s) k = Int(_k) - (firstindex(s) - 1) - (k < 1 || k > m+1) && throw(BoundsError(s, _k)) + k < 1 && throw(BoundsError(s, _k)) if n == 0 return 0 <= k <= m ? max(k, 1) : 0 @@ -590,12 +588,10 @@ julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) 2:3 ``` """ -function findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, +findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) - (start == (lastindex(A)+1)) && return nothing + start::Integer) = _rsearch(A, pattern, start) -end """ occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 93512cc4978c6..dac8080b1fee1 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -653,8 +653,8 @@ end @test findprev(pattern, OA, 2) === nothing @test findprev(pattern, OA, 4) === 3:4 @test findprev(pattern, OA, 6) === 5:6 - @test findnext(pattern, OA, l_OA+1) === nothing - @test_throws BoundsError findnext(pattern, OA, l_OA+2) + @test findprev(pattern, OA, l_OA+1) == findlast(pattern, OA) + @test findprev(pattern, OA, l_OA+2) == findlast(pattern, OA) @test_throws BoundsError findprev(pattern, OA, 1) end end diff --git a/test/strings/search.jl b/test/strings/search.jl index 4b56a4198feac..6bc8d107a55fe 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -414,9 +414,9 @@ end @test findprev(pattern, A, 3) === 2:3 @test findprev(pattern, A, 5) === 4:5 @test findprev(pattern, A, 2) === nothing - @test findprev(pattern, A, length(A)+1) === nothing + @test findprev(pattern, A, length(A)+1) == findlast(pattern, A) + @test findprev(pattern, A, length(A)+2) == findlast(pattern, A) @test_throws BoundsError findprev(pattern, A, -3) - @test_throws BoundsError findprev(pattern, A, length(A)+2) end end From 68df4cf51816e338872171ed8964c3bd55955fde Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 22:24:26 -0400 Subject: [PATCH 15/25] style fix --- base/strings/search.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 61eead5fafd92..b82c1b9646433 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -138,10 +138,9 @@ julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) 2:3 ``` """ -function findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}) +findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}})= _search(A, pattern, firstindex(A)) -end @@ -376,10 +375,10 @@ julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62]) 3:4 ``` """ -function findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, -A::AbstractVector{<:Union{Int8,UInt8}}) +findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}})= findprev(pattern, A, lastindex(A)) -end + """ findlast(ch::AbstractChar, string::AbstractString) From 13bd2fa853fe0b89fd83afd10a5631285e0d58e6 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 30 Aug 2020 22:57:35 -0400 Subject: [PATCH 16/25] restirct to 1-indexed array --- base/strings/search.jl | 2 ++ test/offsetarray.jl | 29 ----------------------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index b82c1b9646433..0a1cd2d681e88 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -198,6 +198,7 @@ end function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _i::Integer) + require_one_based_indexing(s) n = length(t) m = length(s) i = Int(_i) - (firstindex(s) - 1) @@ -453,6 +454,7 @@ function _rsearchindex(s::String, t::String, i::Integer) end function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer) + require_one_based_indexing(s) n = length(t) m = length(s) k = Int(_k) - (firstindex(s) - 1) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index dac8080b1fee1..cd5c5bc848ace 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -630,32 +630,3 @@ end @test last(v, 100) !== v @test last(v, 1) == [v[end]] end - -@testset "findfirst findnext of U/Int8 Offset Array" begin - for VT in [Int8, UInt8] - OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1) - for PT in [Int8, UInt8] - pattern = PT[0x52, 0x62] - l_OA = lastindex(OA) - @test findfirst(pattern, OA) === 3:4 - @test findnext(pattern, OA, 2) === 3:4 - @test findnext(pattern, OA, 4) === 5:6 - @test findnext(pattern, OA, 6) === nothing - @test findnext(pattern, OA, 7) === nothing - @test findnext(pattern, OA, 2) === 3:4 - @test findnext(pattern, OA, 4) === 5:6 - # 1 idx too far is allowed - @test findnext(pattern, OA, l_OA+1) === nothing - @test_throws BoundsError findnext(pattern, OA, l_OA+2) - @test_throws BoundsError findnext(pattern, OA, 1) - - @test findlast(pattern, OA) === 5:6 - @test findprev(pattern, OA, 2) === nothing - @test findprev(pattern, OA, 4) === 3:4 - @test findprev(pattern, OA, 6) === 5:6 - @test findprev(pattern, OA, l_OA+1) == findlast(pattern, OA) - @test findprev(pattern, OA, l_OA+2) == findlast(pattern, OA) - @test_throws BoundsError findprev(pattern, OA, 1) - end - end -end From 54b1d9d5ebfa9b363e307beefe44948f712fa685 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Mon, 31 Aug 2020 11:41:30 -0400 Subject: [PATCH 17/25] Update test/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- test/strings/search.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/strings/search.jl b/test/strings/search.jl index 6bc8d107a55fe..b7e04772c04b2 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -392,8 +392,8 @@ end # issue 37280 @testset "UInt8, Int8 vector" begin - for VT in [Int8, UInt8] - A = VT[0x40, 0x52, 0x62, 0x52, 0x62] + for T in [Int8, UInt8], VT in [Int8, UInt8] + A = T[0x40, 0x52, 0x62, 0x52, 0x62] @test findfirst(VT[0x30], A) === nothing @test findfirst(VT[0x52], A) === 2:2 From 1bff73058cd0529f7faa9c21b2dcd0025346aaa3 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Mon, 31 Aug 2020 11:41:37 -0400 Subject: [PATCH 18/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 0a1cd2d681e88..ae824d972acc7 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -576,7 +576,9 @@ findprev(ch::AbstractChar, string::AbstractString, start::Integer) = findprev(==(ch), string, start) """ - findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) + findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) Find the previous occurrence of the sequence `pattern` in vector `A` starting at position `start`. From ed2496114ef37152c14e2ed2ad67e8496069a4b6 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Mon, 31 Aug 2020 11:42:00 -0400 Subject: [PATCH 19/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index ae824d972acc7..dd3ffc27de731 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -377,7 +377,7 @@ julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62]) ``` """ findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}})= + A::AbstractVector{<:Union{Int8,UInt8}}) = findprev(pattern, A, lastindex(A)) """ From 1d64047f3ab18c87fe6971c841717ef0be080557 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Mon, 31 Aug 2020 11:42:11 -0400 Subject: [PATCH 20/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index dd3ffc27de731..92e5a230616ed 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -142,8 +142,6 @@ findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}})= _search(A, pattern, firstindex(A)) - - # AbstractString implementation of the generic findnext interface function findnext(testf::Function, s::AbstractString, i::Integer) i = Int(i) From ec9fdc490fc4777a31205c44082355d0952da65c Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Mon, 31 Aug 2020 11:43:04 -0400 Subject: [PATCH 21/25] Update base/strings/search.jl Co-authored-by: Milan Bouchet-Valat --- base/strings/search.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 92e5a230616ed..77271eeb1bf69 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -322,7 +322,9 @@ findnext(ch::AbstractChar, string::AbstractString, start::Integer) = findnext(==(ch), string, start) """ - findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, A::AbstractVector{<:Union{Int8,UInt8}}, start::Integer) + findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`. From e30cef9d084a5852374f20b3f32873fb44fe1694 Mon Sep 17 00:00:00 2001 From: Moelf Date: Mon, 31 Aug 2020 11:44:14 -0400 Subject: [PATCH 22/25] address comments --- base/strings/search.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 77271eeb1bf69..e82287339e2fd 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -125,7 +125,7 @@ findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) """ findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}) + A::AbstractVector{<:Union{Int8,UInt8}}) Find the first occurrence of sequence `pattern` in vector `A`. @@ -139,7 +139,7 @@ julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) ``` """ findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}})= + A::AbstractVector{<:Union{Int8,UInt8}}) = _search(A, pattern, firstindex(A)) # AbstractString implementation of the generic findnext interface @@ -341,8 +341,8 @@ julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) ``` """ findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer)= + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) = _search(A, pattern, start) """ @@ -365,7 +365,7 @@ findlast(pattern::AbstractString, string::AbstractString) = """ findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}) + A::AbstractVector{<:Union{Int8,UInt8}}) Find the last occurrence of `pattern` in array `A`. Equivalent to [`findprev(pattern, A, lastindex(A))`](@ref). @@ -518,8 +518,8 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector end function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, - t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, - i::Integer) + t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, + i::Integer) idx = _rsearchindex(s,t,i) if isempty(t) idx:idx-1 @@ -592,8 +592,8 @@ julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) ``` """ findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, - A::AbstractVector{<:Union{Int8,UInt8}}, - start::Integer) = + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) = _rsearch(A, pattern, start) """ occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString) From cb012682694fdc2a03a85eb55d8d00c597ae50a4 Mon Sep 17 00:00:00 2001 From: Moelf Date: Mon, 31 Aug 2020 18:44:55 -0400 Subject: [PATCH 23/25] change sentinel value to firstindex - 1 --- base/strings/search.jl | 34 ++++++++++++++++----------------- test/offsetarray.jl | 43 ++++++++++++++++++++++++++++++++++++++++++ test/strings/search.jl | 2 +- 3 files changed, 61 insertions(+), 18 deletions(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index e82287339e2fd..e3e45cc9889af 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -196,23 +196,23 @@ end function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _i::Integer) - require_one_based_indexing(s) + sentinel = firstindex(s) - 1 n = length(t) m = length(s) - i = Int(_i) - (firstindex(s) - 1) + i = Int(_i) - sentinel (i < 1 || i > m+1) && throw(BoundsError(s, _i)) if n == 0 - return 1 <= i <= m+1 ? max(1, i) : 0 + return 1 <= i <= m+1 ? max(1, i) : sentinel elseif m == 0 - return 0 + return sentinel elseif n == 1 - return something(findnext(isequal(_nthbyte(t,1)), s, i), 0) + return something(findnext(isequal(_nthbyte(t,1)), s, i), sentinel) end w = m - n if w < 0 || i - 1 > w - return 0 + return sentinel end bloom_mask = UInt64(0) @@ -257,7 +257,7 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, i += 1 end - 0 + sentinel end function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, @@ -266,7 +266,7 @@ function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, idx = _searchindex(s,t,i) if isempty(t) idx:idx-1 - elseif idx > 0 + elseif idx > firstindex(s) - 1 idx:(idx + lastindex(t) - 1) else nothing @@ -454,23 +454,23 @@ function _rsearchindex(s::String, t::String, i::Integer) end function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer) - require_one_based_indexing(s) + sentinel = firstindex(s) - 1 n = length(t) m = length(s) - k = Int(_k) - (firstindex(s) - 1) + k = Int(_k) - sentinel k < 1 && throw(BoundsError(s, _k)) if n == 0 - return 0 <= k <= m ? max(k, 1) : 0 + return 0 <= k <= m ? max(k, 1) : sentinel elseif m == 0 - return 0 + return sentinel elseif n == 1 - return something(findprev(isequal(_nthbyte(t,1)), s, k), 0) + return something(findprev(isequal(_nthbyte(t,1)), s, k), sentinel) end w = m - n if w < 0 || k <= 0 - return 0 + return sentinel end bloom_mask = UInt64(0) @@ -497,7 +497,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector # match found, restore in case `s` is an OffsetArray if j == n - return i + (firstindex(s) - 1) + return i + sentinel end # no match, try to rule out the next character @@ -514,7 +514,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector i -= 1 end - 0 + sentinel end function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, @@ -523,7 +523,7 @@ function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, idx = _rsearchindex(s,t,i) if isempty(t) idx:idx-1 - elseif idx > 0 + elseif idx > firstindex(s) - 1 idx:(idx + lastindex(t) - 1) else nothing diff --git a/test/offsetarray.jl b/test/offsetarray.jl index cd5c5bc848ace..24c7e3d5e3b1a 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -630,3 +630,46 @@ end @test last(v, 100) !== v @test last(v, 1) == [v[end]] end + +@testset "findfirst findnext of U/Int8 Offset Array" begin + for VT in [Int8, UInt8] + OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1) + OB = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], -2) + for PT in [Int8, UInt8] + pattern = PT[0x52, 0x62] + l_OA = lastindex(OA) + l_OB = lastindex(OB) + @test findfirst(pattern, OA) === 3:4 + @test findnext(pattern, OA, 2) === 3:4 + @test findnext(pattern, OA, 4) === 5:6 + @test findnext(pattern, OA, 6) === nothing + @test findnext(pattern, OA, 7) === nothing + @test findnext(pattern, OA, 2) === 3:4 + @test findnext(pattern, OA, 4) === 5:6 + # negatively Offset array + @test findfirst(pattern, OB) === 0:1 + @test findnext(pattern, OB, -1) === 0:1 + @test findnext(pattern, OB, 1) === 2:3 + @test findnext(pattern, OB, 3) === nothing + @test findnext(pattern, OB, 4) === nothing + @test findnext(pattern, OB, -1) === 0:1 + @test findnext(pattern, OB, 1) === 2:3 + # 1 idx too far is allowed + @test findnext(pattern, OA, l_OA+1) === nothing + @test_throws BoundsError findnext(pattern, OA, l_OA+2) + @test_throws BoundsError findnext(pattern, OA, 1) + @test findlast(pattern, OA) === 5:6 + @test findprev(pattern, OA, 2) === nothing + @test findprev(pattern, OA, 4) === 3:4 + @test findprev(pattern, OA, 6) === 5:6 + @test findprev(pattern, OA, l_OA+1) == findlast(pattern, OA) + @test findprev(pattern, OA, l_OA+2) == findlast(pattern, OA) + @test findlast(pattern, OB) === 2:3 + @test findprev(pattern, OB, -1) === nothing + @test findprev(pattern, OB, 1) === 0:1 + @test findprev(pattern, OB, 3) === 2:3 + @test findprev(pattern, OB, l_OB+1) == findlast(pattern, OB) + @test findprev(pattern, OB, l_OB+2) == findlast(pattern, OB) + end + end +end diff --git a/test/strings/search.jl b/test/strings/search.jl index b7e04772c04b2..6b0080abea02d 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -405,7 +405,7 @@ end @test findfirst(pattern, A) === 2:3 @test findnext(pattern, A, 2) === 2:3 @test findnext(pattern, A, 3) === 4:5 - # 1 idx too long is allowed + # 1 idx too far is allowed @test findnext(pattern, A, length(A)+1) === nothing @test_throws BoundsError findnext(pattern, A, -3) @test_throws BoundsError findnext(pattern, A, length(A)+2) From ff679b0cc887f9c5c4dd1d7cfb0e832b7699a9f5 Mon Sep 17 00:00:00 2001 From: Jerry Ling Date: Wed, 2 Sep 2020 10:33:42 -0400 Subject: [PATCH 24/25] NEWS for find* on Vector of U/Int8 --- NEWS.md | 2 ++ base/strings/search.jl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9942996ef4633..a0792b3c697a0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,8 @@ New language features * The library name passed to `ccall` or `@ccall` can now be an expression involving global variables and function calls. The expression will be evaluated the first time the `ccall` executes ([#36458]). +* `findfirst`, `findnext`, `findlast`, and `findall` now support arguments `(pattern, array)` + where `pattern` and `array` are `AbstractVector{<:Union{Int8,UInt8}}` (mix allowed) ([#37283]) Language changes ---------------- diff --git a/base/strings/search.jl b/base/strings/search.jl index e3e45cc9889af..dca4d808e10ba 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -266,7 +266,7 @@ function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, idx = _searchindex(s,t,i) if isempty(t) idx:idx-1 - elseif idx > firstindex(s) - 1 + elseif idx >= firstindex(s) idx:(idx + lastindex(t) - 1) else nothing From 653515891001cc5b93c18c9b443adea469d40b18 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 26 Oct 2020 17:18:35 -0400 Subject: [PATCH 25/25] Update NEWS.md shorten NEWS --- NEWS.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 46b1d5311dabc..23cf4eed7cf0c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,8 +13,7 @@ New language features * The library name passed to `ccall` or `@ccall` can now be an expression involving global variables and function calls. The expression will be evaluated the first time the `ccall` executes ([#36458]). -* `findfirst`, `findnext`, `findlast`, and `findall` now support arguments `(pattern, array)` - where `pattern` and `array` are `AbstractVector{<:Union{Int8,UInt8}}` (mix allowed) ([#37283]) +* `findfirst`, `findnext`, `findlast`, and `findall` now support `AbstractVector{<:Union{Int8,UInt8}}` (pattern, array) arguments ([#37283]). * `ꜛ` (U+A71B), `ꜜ` (U+A71C) and `ꜝ` (U+A71D) can now also be used as operator suffixes. They can be tab-completed from `\^uparrow`, `\^downarrow` and `\^!` in the REPL ([#37542]).