From feb1f6827124dcaed1f734a310bf5f43837a104b Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Mon, 11 Dec 2017 18:24:03 -0500 Subject: [PATCH] bounds checks on string length(s, i, j) --- base/strings/basic.jl | 39 +++++++++++++++++++++++---------------- base/strings/string.jl | 18 ++++++++---------- test/lineedit.jl | 2 +- test/strings/basic.jl | 8 ++++---- test/strings/types.jl | 6 ++++-- 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index a9dcb14f54cde..fa607b6003d32 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -313,15 +313,17 @@ isless(a::Symbol, b::Symbol) = cmp(a, b) < 0 ## character index arithmetic ## """ - length(s::AbstractString, lo::Integer=1, hi::Integer=ncodeunits(s)) -> Integer + length(s::AbstractString) -> Int + length(s::AbstractString, i::Integer, j::Integer) -> Int -The number of characters in string `s` from indices `lo` through `hi`. This is -computed as the number of code unit indices from `lo` to `hi` which are valid +The number of characters in string `s` from indices `i` through `j`. This is +computed as the number of code unit indices from `i` to `j` which are valid character indices. Without only a single string argument, this computes the -number of characters in the entire string. With `lo` and `hi` arguments it computes -the number of indices between `lo` and `hi` inclusive that are valid indices in -the string `s`. Note that the trailing character may include code units past `hi` -and still be counted. +number of characters in the entire string. With `i` and `j` arguments it +computes the number of indices between `i` and `j` inclusive that are valid +indices in the string `s`. In addition to in-bounds values, `i` may take the +out-of-bounds value `ncodeunits(s) + 1` and `j` may take the out-of-bounds +value `0`. See also: [`isvalid`](@ref), [`ncodeunits`](@ref), [`endof`](@ref), [`thisind`](@ref), [`nextind`](@ref), [`prevind`](@ref) @@ -332,18 +334,23 @@ julia> length("jμΛIα") 5 ``` """ -function length(s::AbstractString, lo::Integer=1, hi::Integer=ncodeunits(s)) - lo ≤ hi || return 0 - z = ncodeunits(s) - a = Int(max(1, min(z, lo))) - b = Int(min(z, max(1, hi))) - n = a - b - for i = a:b - n += isvalid(s, i) +length(s::AbstractString) = @inbounds return length(s, 1, ncodeunits(s)) + +function length(s::AbstractString, i::Int, j::Int) + @boundscheck begin + 0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i)) + 0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j)) end - return n + hi - lo + n = 0 + for k = i:j + @inbounds n += isvalid(s, k) + end + return n end +@propagate_inbounds length(s::AbstractString, i::Integer, j::Integer) = + length(s, Int(i), Int(j)) + """ thisind(s::AbstractString, i::Integer) -> Int diff --git a/base/strings/string.jl b/base/strings/string.jl index b1d365bb35b17..2cc20a714ea69 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -232,18 +232,16 @@ function getindex(s::String, r::UnitRange{Int}) return ss end -function length(s::String, lo::Int, hi::Int) - i, n = lo, hi - c = max(0, hi - lo + 1) +function length(s::String, i::Int, j::Int) @boundscheck begin - z = ncodeunits(s) - i = Int(max(1, min(z, lo))) - n = Int(min(z, max(1, hi))) + 0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i)) + 0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j)) end - i < n || return c - @inbounds i, j = thisind(s, i), i - c -= i < j - _length(s, i, n, c) + j < i && return 0 + c = j - i + 1 + @inbounds i, k = thisind(s, i), i + c -= i < k + _length(s, i, j, c) end length(s::String) = _length(s, 1, ncodeunits(s), ncodeunits(s)) diff --git a/test/lineedit.jl b/test/lineedit.jl index 61e1d3bd42562..cb870b8842422 100644 --- a/test/lineedit.jl +++ b/test/lineedit.jl @@ -17,7 +17,7 @@ function new_state() end charseek(buf, i) = seek(buf, nextind(content(buf), 0, i+1)-1) -charpos(buf, pos=position(buf)) = length(content(buf), 1, pos+1)-1 +charpos(buf, pos=position(buf)) = length(content(buf), 1, pos) function transform!(f, s, i = -1) # i is char-based (not bytes) buffer position buf = buffer(s) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 49df4fe48252d..512eee29e0866 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -99,13 +99,13 @@ end end @testset "issue #7248" begin - @test length("hello", 1, -1) == 0 + @test_throws BoundsError length("hello", 1, -1) == 0 @test prevind("hello", 0, 1) == -1 - @test length("hellø", 1, -1) == 0 + @test_throws BoundsError length("hellø", 1, -1) == 0 @test prevind("hellø", 0, 1) == -1 - @test length("hello", 1, 10) == 10 + @test_throws BoundsError length("hello", 1, 10) == 10 @test nextind("hello", 0, 10) == 10 - @test length("hellø", 1, 10) == 9 + @test_throws BoundsError length("hellø", 1, 10) == 9 @test nextind("hellø", 0, 10) == 11 @test_throws BoundsError checkbounds("hello", 0) @test_throws BoundsError checkbounds("hello", 6) diff --git a/test/strings/types.jl b/test/strings/types.jl index 0af2713349465..f3c549ba6b36a 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -205,8 +205,10 @@ let s = "Σx + βz - 2" end let ss = SubString("hello", 1, 5) - @test length(ss, 1, -1) == 0 - @test length(ss, 1, 10) == 10 + @test length(ss, 1, 0) == 0 + @test_throws BoundsError length(ss, 1, -1) == 0 + @test_throws BoundsError length(ss, 1, 6) + @test_throws BoundsError length(ss, 1, 10) @test prevind(ss, 0, 1) == -1 @test nextind(ss, 0, 10) == 10 end