From 8de25f5ac6c8a8ef9a8872f2d9aaaee9ddbf6bf7 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Tue, 12 Dec 2017 16:31:25 -0500 Subject: [PATCH] bounds check thisind, nextind and prevind as well --- base/repl/REPL.jl | 6 +- base/strings/basic.jl | 30 ++++--- base/strings/search.jl | 3 +- base/strings/string.jl | 11 ++- base/strings/substring.jl | 23 +++++- base/strings/util.jl | 29 +++---- test/strings/basic.jl | 160 ++++++++++++++++++++------------------ test/strings/types.jl | 24 ++++-- 8 files changed, 168 insertions(+), 118 deletions(-) diff --git a/base/repl/REPL.jl b/base/repl/REPL.jl index f7585feaa2a05..c22010c168503 100644 --- a/base/repl/REPL.jl +++ b/base/repl/REPL.jl @@ -609,7 +609,11 @@ function history_search(hist::REPLHistoryProvider, query_buffer::IOBuffer, respo # Alright, first try to see if the current match still works a = position(response_buffer) + 1 # position is zero-indexed - b = min(endof(response_str), prevind(response_str, a + sizeof(searchdata))) # ensure that b is valid + # FIXME: I'm pretty sure this is broken since it uses an index + # into the search data to index into the response string + b = a + sizeof(searchdata) + b = b ≤ ncodeunits(response_str) ? prevind(response_str, b) : b-1 + b = min(endof(response_str), b) # ensure that b is valid !skip_current && searchdata == response_str[a:b] && return true diff --git a/base/strings/basic.jl b/base/strings/basic.jl index fa607b6003d32..407ebb3638e7b 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -383,8 +383,12 @@ julia> thisind("αβγdef", 10) julia> thisind("αβγdef", 20) 20 """ -function thisind(s::AbstractString, i::Integer) - i ≤ ncodeunits(s) || return i +thisind(s::AbstractString, i::Integer) = thisind(s, Int(i)) + +function thisind(s::AbstractString, i::Int) + z = ncodeunits(s) + 1 + i == z && return i + @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i)) @inbounds while 1 < i && !isvalid(s, i) i -= 1 end @@ -415,13 +419,14 @@ julia> prevind("αβγdef", 3, 2) 0 ``` """ -function prevind(s::AbstractString, i::Integer, n::Integer=1) +prevind(s::AbstractString, i::Integer, n::Integer) = prevind(s, Int(i), Int(n)) +prevind(s::AbstractString, i::Integer) = prevind(s, Int(i)) +prevind(s::AbstractString, i::Int) = prevind(s, i, 1) + +function prevind(s::AbstractString, i::Int, n::Int) n < 0 && throw(ArgumentError("n cannot be negative: $n")) z = ncodeunits(s) + 1 - if i > z - n -= i - z - i = z - end + @boundscheck 0 < i ≤ z || throw(BoundsError(s, i)) while n > 0 && 1 < i @inbounds n -= isvalid(s, i -= 1) end @@ -452,13 +457,14 @@ julia> nextind(str, 9) 10 ``` """ -function nextind(s::AbstractString, i::Integer, n::Integer=1) +nextind(s::AbstractString, i::Integer, n::Integer) = nextind(s, Int(i), Int(n)) +nextind(s::AbstractString, i::Integer) = nextind(s, Int(i)) +nextind(s::AbstractString, i::Int) = nextind(s, i, 1) + +function nextind(s::AbstractString, i::Int, n::Int) n < 0 && throw(ArgumentError("n cannot be negative: $n")) - if i < 1 - n += i - 1 - i = 1 - end z = ncodeunits(s) + @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i)) while n > 0 && i < z @inbounds n -= isvalid(s, i += 1) end diff --git a/base/strings/search.jl b/base/strings/search.jl index 5b0fa167a783f..4b108e4e0a0b5 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -412,7 +412,8 @@ function rsearchindex(s::String, t::String, i::Integer) if endof(t) == 1 rsearch(s, t[1], i) elseif endof(t) != 0 - _rsearchindex(s, t, nextind(s, i)-1) + j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i + _rsearchindex(s, t, j) elseif i > sizeof(s) return 0 elseif i == 0 diff --git a/base/strings/string.jl b/base/strings/string.jl index 2cc20a714ea69..223f0fc817b63 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -92,14 +92,12 @@ function ==(a::String, b::String) al == sizeof(b) && 0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, al) end -## thisind, nextind, prevind ## - -thisind(s::String, i::Integer) = oftype(i, thisind(s, Int(i))) -nextind(s::String, i::Integer) = oftype(i, nextind(s, Int(i))) +## thisind, prevind, nextind ## function thisind(s::String, i::Int) n = ncodeunits(s) - between(i, 2, n) || return i + i == n + 1 && return i + @boundscheck between(i, 0, n) || throw(BoundsError(s, i)) @inbounds b = codeunit(s, i) b & 0xc0 == 0x80 || return i @inbounds b = codeunit(s, i-1) @@ -114,8 +112,9 @@ function thisind(s::String, i::Int) end function nextind(s::String, i::Int) + i == 0 && return 1 n = ncodeunits(s) - between(i, 1, n-1) || return i+1 + @boundscheck between(i, 1, n) || throw(BoundsError(s, i)) @inbounds l = codeunit(s, i) (l < 0x80) | (0xf8 ≤ l) && return i+1 if l < 0xc0 diff --git a/base/strings/substring.jl b/base/strings/substring.jl index e389de6518b49..f7abe7dfb1e4f 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -78,9 +78,26 @@ function isvalid(s::SubString, i::Integer) @inbounds return ib && isvalid(s.string, s.offset + i) end -thisind(s::SubString, i::Integer) = thisind(s.string, s.offset + i) - s.offset -nextind(s::SubString, i::Integer) = nextind(s.string, s.offset + i) - s.offset -prevind(s::SubString, i::Integer) = prevind(s.string, s.offset + i) - s.offset +function thisind(s::SubString, i::Int) + @boundscheck 0 ≤ i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i)) + @inbounds return thisind(s.string, s.offset + i) - s.offset +end +function nextind(s::SubString, i::Int, n::Int) + @boundscheck 0 ≤ i < ncodeunits(s)+1 || throw(BoundsError(s, i)) + @inbounds return nextind(s.string, s.offset + i, n) - s.offset +end +function nextind(s::SubString, i::Int) + @boundscheck 0 ≤ i < ncodeunits(s)+1 || throw(BoundsError(s, i)) + @inbounds return nextind(s.string, s.offset + i) - s.offset +end +function prevind(s::SubString, i::Int, n::Int) + @boundscheck 0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i)) + @inbounds return prevind(s.string, s.offset + i, n) - s.offset +end +function prevind(s::SubString, i::Int) + @boundscheck 0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i)) + @inbounds return prevind(s.string, s.offset + i) - s.offset +end function cmp(a::SubString{String}, b::SubString{String}) na = sizeof(a) diff --git a/base/strings/util.jl b/base/strings/util.jl index d92969de12ee1..43e96fc1a8b22 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -281,17 +281,20 @@ function _split(str::AbstractString, splitter, limit::Integer, keep_empty::Bool, i = start(str) n = endof(str) r = search(str,splitter,i) - j, k = first(r), nextind(str,last(r)) - while 0 < j <= n && length(strs) != limit-1 - if i < k - if keep_empty || i < j - push!(strs, SubString(str,i,prevind(str,j))) + if r != 0:-1 + j, k = first(r), nextind(str,last(r)) + while 0 < j <= n && length(strs) != limit-1 + if i < k + if keep_empty || i < j + push!(strs, SubString(str,i,prevind(str,j))) + end + i = k end - i = k + (k <= j) && (k = nextind(str,j)) + r = search(str,splitter,k) + r == 0:-1 && break + j, k = first(r), nextind(str,last(r)) end - (k <= j) && (k = nextind(str,j)) - r = search(str,splitter,k) - j, k = first(r), nextind(str,last(r)) end if keep_empty || !done(str,i) push!(strs, SubString(str,i)) @@ -377,18 +380,16 @@ function replace_new(str::String, pattern, repl, count::Integer) unsafe_write(out, pointer(str, i), UInt(j-i)) _replace(out, repl, str, r, pattern) end - if k e && break k = nextind(str, j) else i = k = nextind(str, k) end - if j > e - break - end r = search(str,pattern,k) + r == 0:-1 || n == count && break j, k = first(r), last(r) - n == count && break n += 1 end write(out, SubString(str,i)) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 512eee29e0866..512bbd0805943 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -99,11 +99,11 @@ end end @testset "issue #7248" begin - @test_throws BoundsError length("hello", 1, -1) == 0 - @test prevind("hello", 0, 1) == -1 - @test_throws BoundsError length("hellø", 1, -1) == 0 - @test prevind("hellø", 0, 1) == -1 - @test_throws BoundsError length("hello", 1, 10) == 10 + @test_throws BoundsError length("hello", 1, -1) + @test_throws BoundsError prevind("hello", 0, 1) + @test_throws BoundsError length("hellø", 1, -1) + @test_throws BoundsError prevind("hellø", 0, 1) + @test_throws BoundsError length("hello", 1, 10) @test nextind("hello", 0, 10) == 10 @test_throws BoundsError length("hellø", 1, 10) == 9 @test nextind("hellø", 0, 10) == 11 @@ -512,7 +512,8 @@ end SubString("123∀α>β:α+1>β123", 4, 18), SubString(s"123∀α>β:α+1>β123", 4, 18)] for s in strs - @test thisind(s, -2) == -2 + @test_throws BoundsError thisind(s, -2) + @test_throws BoundsError thisind(s, -1) @test thisind(s, 0) == 0 @test thisind(s, 1) == 1 @test thisind(s, 2) == 1 @@ -523,86 +524,97 @@ end @test thisind(s, 15) == 15 @test thisind(s, 16) == 15 @test thisind(s, 17) == 17 - @test thisind(s, 30) == 30 + @test_throws BoundsError thisind(s, 18) + @test_throws BoundsError thisind(s, 19) end end let strs = Any["", s"", SubString("123", 2, 1), SubString(s"123", 2, 1)] - for s in strs, i in -2:2 - @test thisind(s, i) == i + for s in strs + @test_throws BoundsError thisind(s, -1) + @test thisind(s, 0) == 0 + @test thisind(s, 1) == 1 + @test_throws BoundsError thisind(s, 2) end end end @testset "prevind and nextind" begin - let strs = Any["∀α>β:α+1>β", GenericString("∀α>β:α+1>β")] - for i in 1:2 - @test prevind(strs[i], 1) == 0 - @test prevind(strs[i], 1, 1) == 0 - @test prevind(strs[i], 2) == 1 - @test prevind(strs[i], 2, 1) == 1 - @test prevind(strs[i], 4) == 1 - @test prevind(strs[i], 4, 1) == 1 - @test prevind(strs[i], 5) == 4 - @test prevind(strs[i], 5, 1) == 4 - @test prevind(strs[i], 5, 2) == 1 - @test prevind(strs[i], 5, 3) == 0 - @test prevind(strs[i], 15) == 14 - @test prevind(strs[i], 15, 1) == 14 - @test prevind(strs[i], 15, 2) == 13 - @test prevind(strs[i], 15, 3) == 12 - @test prevind(strs[i], 15, 4) == 10 - @test prevind(strs[i], 15, 10) == 0 - @test prevind(strs[i], 15, 9) == 1 - @test prevind(strs[i], 16) == 15 - @test prevind(strs[i], 16, 1) == 15 - @test prevind(strs[i], 16, 2) == 14 - @test prevind(strs[i], 20) == 19 - @test prevind(strs[i], 20, 1) == 19 - @test prevind(strs[i], 20, 10) == 7 - @test prevind(strs[i], 20, 0) == 20 - - @test nextind(strs[i], -1) == 0 - @test nextind(strs[i], -1, 1) == 0 - @test nextind(strs[i], -1, 2) == 1 - @test nextind(strs[i], -1, 3) == 4 - @test nextind(strs[i], 0, 2) == 4 - @test nextind(strs[i], 0, 20) == 26 - @test nextind(strs[i], 0, 10) == 15 - @test nextind(strs[i], 1) == 4 - @test nextind(strs[i], 1, 1) == 4 - @test nextind(strs[i], 1, 2) == 6 - @test nextind(strs[i], 1, 9) == 15 - @test nextind(strs[i], 1, 10) == 17 - @test nextind(strs[i], 2) == 4 - @test nextind(strs[i], 2, 1) == 4 - @test nextind(strs[i], 3) == 4 - @test nextind(strs[i], 3, 1) == 4 - @test nextind(strs[i], 4) == 6 - @test nextind(strs[i], 4, 1) == 6 - @test nextind(strs[i], 14) == 15 - @test nextind(strs[i], 14, 1) == 15 - @test nextind(strs[i], 15) == 17 - @test nextind(strs[i], 15, 1) == 17 - @test nextind(strs[i], 20) == 21 - @test nextind(strs[i], 20, 1) == 21 - @test nextind(strs[i], 20, 0) == 20 - - for x in -10:20 - n = p = x - for j in 1:40 - p = prevind(strs[i], p) - @test prevind(strs[i], x, j) == p - n = nextind(strs[i], n) - @test nextind(strs[i], x, j) == n + for s in Any["∀α>β:α+1>β", GenericString("∀α>β:α+1>β")] + @test_throws BoundsError prevind(s, 0) + @test_throws BoundsError prevind(s, 0, 0) + @test_throws BoundsError prevind(s, 0, 1) + @test prevind(s, 1) == 0 + @test prevind(s, 1, 1) == 0 + @test prevind(s, 1, 0) == 1 + @test prevind(s, 2) == 1 + @test prevind(s, 2, 1) == 1 + @test prevind(s, 4) == 1 + @test prevind(s, 4, 1) == 1 + @test prevind(s, 5) == 4 + @test prevind(s, 5, 1) == 4 + @test prevind(s, 5, 2) == 1 + @test prevind(s, 5, 3) == 0 + @test prevind(s, 15) == 14 + @test prevind(s, 15, 1) == 14 + @test prevind(s, 15, 2) == 13 + @test prevind(s, 15, 3) == 12 + @test prevind(s, 15, 4) == 10 + @test prevind(s, 15, 10) == 0 + @test prevind(s, 15, 9) == 1 + @test prevind(s, 16) == 15 + @test prevind(s, 16, 1) == 15 + @test prevind(s, 16, 2) == 14 + @test prevind(s, 17) == 15 + @test prevind(s, 17, 1) == 15 + @test prevind(s, 17, 2) == 14 + @test_throws BoundsError prevind(s, 18) + @test_throws BoundsError prevind(s, 18, 0) + @test_throws BoundsError prevind(s, 18, 1) + + @test_throws BoundsError nextind(s, -1) + @test_throws BoundsError nextind(s, -1, 0) + @test_throws BoundsError nextind(s, -1, 1) + @test nextind(s, 0, 2) == 4 + @test nextind(s, 0, 20) == 26 + @test nextind(s, 0, 10) == 15 + @test nextind(s, 1) == 4 + @test nextind(s, 1, 1) == 4 + @test nextind(s, 1, 2) == 6 + @test nextind(s, 1, 9) == 15 + @test nextind(s, 1, 10) == 17 + @test nextind(s, 2) == 4 + @test nextind(s, 2, 1) == 4 + @test nextind(s, 3) == 4 + @test nextind(s, 3, 1) == 4 + @test nextind(s, 4) == 6 + @test nextind(s, 4, 1) == 6 + @test nextind(s, 14) == 15 + @test nextind(s, 14, 1) == 15 + @test nextind(s, 15) == 17 + @test nextind(s, 15, 1) == 17 + @test nextind(s, 15, 2) == 18 + @test nextind(s, 16) == 17 + @test nextind(s, 16, 1) == 17 + @test nextind(s, 16, 2) == 18 + @test nextind(s, 16, 3) == 19 + @test_throws BoundsError nextind(s, 17) + @test_throws BoundsError nextind(s, 17, 0) + @test_throws BoundsError nextind(s, 17, 1) + + for x in 0:ncodeunits(s)+1 + n = p = x + for j in 1:40 + if 1 ≤ p + p = prevind(s, p) + @test prevind(s, x, j) == p + end + if n ≤ ncodeunits(s) + n = nextind(s, n) + @test nextind(s, x, j) == n end end end - @test prevind(strs[1], -1) == -2 - @test prevind(strs[1], -1, 1) == -2 - - @test prevind(strs[2], -1) == -2 - @test prevind(strs[2], -1, 1) == -2 end end diff --git a/test/strings/types.jl b/test/strings/types.jl index f3c549ba6b36a..b849ddac07573 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -146,7 +146,7 @@ end @test prevind(SubString("{var}",2,4),4) == 3 # issue #4183 -@test split(SubString("x", 2, 0), "y") == AbstractString[""] +@test split(SubString("x", 2, 0), "y") == [""] # issue #6772 @test parse(Float64, SubString("10",1,1)) === 1.0 @@ -157,7 +157,7 @@ end @test !ismatch(Regex("aa"), SubString("",1,0)) @test ismatch(Regex(""), SubString("",1,0)) -# isvalid(), formerly length() and nextind() for SubString{String} +# isvalid, length, prevind, nextind for SubString{String} let s = "lorem ipsum", sdict = Dict( SubString(s, 1, 11) => "lorem ipsum", SubString(s, 1, 6) => "lorem ", @@ -177,10 +177,14 @@ let s = "lorem ipsum", sdict = Dict( end for (ss, s) in sdict @test length(ss) == length(s) - for i in 0:length(ss)+1, j = 0:length(ss)+1 + for i in 0:ncodeunits(ss), j = 0:length(ss)+1 + @test prevind(ss, i+1, j) == prevind(s, i+1, j) @test nextind(ss, i, j) == nextind(s, i, j) - @test prevind(ss, i, j) == prevind(s, i, j) end + @test_throws BoundsError prevind(s, 0) + @test_throws BoundsError prevind(ss, 0) + @test_throws BoundsError nextind(s, ncodeunits(ss)+1) + @test_throws BoundsError nextind(ss, ncodeunits(ss)+1) end end @@ -206,11 +210,17 @@ end let ss = SubString("hello", 1, 5) @test length(ss, 1, 0) == 0 - @test_throws BoundsError length(ss, 1, -1) == 0 + @test_throws BoundsError length(ss, 1, -1) @test_throws BoundsError length(ss, 1, 6) @test_throws BoundsError length(ss, 1, 10) - @test prevind(ss, 0, 1) == -1 - @test nextind(ss, 0, 10) == 10 + @test_throws BoundsError prevind(ss, 0, 1) + @test prevind(ss, 1, 1) == 0 + @test prevind(ss, 6, 1) == 5 + @test_throws BoundsError prevind(ss, 7, 1) + @test_throws BoundsError nextind(ss, -1, 1) + @test nextind(ss, 0, 1) == 1 + @test nextind(ss, 5, 1) == 6 + @test_throws BoundsError nextind(ss, 6, 1) end # length(SubString{String}) performance specialization