From e79bcd0ce7b3956bb00e30d9d9a77cf126c61bb5 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Mon, 13 May 2024 18:37:20 -0400 Subject: [PATCH 1/5] rename String's Memory method to _unsafe_takestring! --- base/gmp.jl | 2 +- base/intfuncs.jl | 12 ++++++------ base/strings/string.jl | 19 +++++++++++++------ base/strings/util.jl | 2 +- base/uuid.jl | 2 +- stdlib/FileWatching/src/pidfile.jl | 2 +- test/strings/basic.jl | 11 +++++++++++ 7 files changed, 34 insertions(+), 16 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 1f5b64a80f63b..8b3d8095232d5 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -758,7 +758,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - String(sv) + Base._unsafe_takestring!(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/intfuncs.jl b/base/intfuncs.jl index d37122b0494b4..c492434da19b5 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -764,7 +764,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + _unsafe_takestring!(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -778,7 +778,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + _unsafe_takestring!(a) end # 2-digit decimal characters ("00":"99") @@ -848,7 +848,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + _unsafe_takestring!(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -869,7 +869,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + _unsafe_takestring!(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -894,7 +894,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + _unsafe_takestring!(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -970,7 +970,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return String(str) + return _unsafe_takestring!(str) end """ diff --git a/base/strings/string.jl b/base/strings/string.jl index d091baeb6c663..3d8dc4bfdc67a 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -63,12 +63,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) -function String(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) -end +String(v::AbstractVector{UInt8}) = _unsafe_takestring!(copyto!(StringMemory(length(v)), v)) function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) @@ -85,6 +80,18 @@ function String(v::Vector{UInt8}) return str end +""" + _unsafe_takestring!(v::Memory{UInt8}) + +Create a new `String` object using the data buffer from byte vector `v`, and leaves `v` in an inconsistent state. This should only be used internally for performance-critical +`String` routines that immediately discard `v` afterwards. +""" +function _unsafe_takestring!(v::Memory{UInt8}) + len = length(v) + len == 0 && return "" + return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) +end + """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) diff --git a/base/strings/util.jl b/base/strings/util.jl index 4b701001a8676..4704148686ca9 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1066,7 +1066,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return String(b) + return Base._unsafe_takestring!(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index 9b2da3c6409db..3a45131fb1c59 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return String(a) + return Base._unsafe_takestring!(a) end end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 4c821a3d897e4..e35b5530ce0fe 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -285,7 +285,7 @@ function _rand_filename(len::Int=4) # modified from Base.Libc for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return String(slug) + return Base._unsafe_takestring!(slug) end function tryrmopenfile(path::String) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 87d812c5bf201..80eea8a70777c 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -1093,6 +1093,17 @@ let v = [0x40,0x41,0x42] @test String(view(v, 2:3)) == "AB" end +# issue #54369 +let v = Base.StringMemory(3) + v .= [0x40,0x41,0x42] + s = String(v) + @test s == "ABC" + @test v == [0x40,0x41,0x42] + v[1] = 0x42 + @test s == "ABC" + @test v == [0x42,0x41,0x42] +end + # make sure length for identical String and AbstractString return the same value, PR #25533 let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100))*"∀εa∀aε", String(rand(rng, UInt8, 200))] From 7dffd2e1b39ce2f5f159155a304f6d77c1aa05dd Mon Sep 17 00:00:00 2001 From: nhz2 Date: Mon, 13 May 2024 18:47:02 -0400 Subject: [PATCH 2/5] fix test typo --- test/strings/basic.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 80eea8a70777c..35804b9d576ea 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -1095,13 +1095,13 @@ end # issue #54369 let v = Base.StringMemory(3) - v .= [0x40,0x41,0x42] + v .= [0x41,0x42,0x43] s = String(v) @test s == "ABC" - @test v == [0x40,0x41,0x42] - v[1] = 0x42 + @test v == [0x41,0x42,0x43] + v[1] = 0x43 @test s == "ABC" - @test v == [0x42,0x41,0x42] + @test v == [0x43,0x42,0x43] end # make sure length for identical String and AbstractString return the same value, PR #25533 From e77015a57310bc36fb4b1c5cdfe6e6fc126dd7de Mon Sep 17 00:00:00 2001 From: nhz2 Date: Mon, 13 May 2024 19:36:18 -0400 Subject: [PATCH 3/5] avoid _unsafe_takestring! outside of Base --- stdlib/FileWatching/src/pidfile.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index e35b5530ce0fe..0ee86e70f8465 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -280,12 +280,12 @@ function open_exclusive(path::String; end function _rand_filename(len::Int=4) # modified from Base.Libc - slug = Base.StringMemory(len) + slug = Base.StringVector(len) chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] end - return Base._unsafe_takestring!(slug) + return String(slug) end function tryrmopenfile(path::String) From 956959a1e6c38cea6e637d9ae71fa406d9d1a70d Mon Sep 17 00:00:00 2001 From: nhz2 Date: Tue, 8 Oct 2024 14:49:17 -0400 Subject: [PATCH 4/5] replace `_unsafe_takestring!` with `unsafe_takestring` to match `takestring!` PR --- base/gmp.jl | 4 ++-- base/intfuncs.jl | 12 ++++++------ base/strings/string.jl | 16 +++++----------- base/strings/util.jl | 2 +- base/uuid.jl | 2 +- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 38e8695c7f45d..df0d9fee49348 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -11,7 +11,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb, widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit, sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit, - clamp + clamp, unsafe_takestring if Clong == Int32 const ClongMax = Union{Int8, Int16, Int32} @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - Base._unsafe_takestring!(sv) + unsafe_takestring(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 9e99644b8bfc6..1515c2f763d7e 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -766,7 +766,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - _unsafe_takestring!(a) + unsafe_takestring(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -780,7 +780,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - _unsafe_takestring!(a) + unsafe_takestring(a) end # 2-digit decimal characters ("00":"99") @@ -850,7 +850,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - _unsafe_takestring!(a) + unsafe_takestring(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -871,7 +871,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - _unsafe_takestring!(a) + unsafe_takestring(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -896,7 +896,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - _unsafe_takestring!(a) + unsafe_takestring(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -972,7 +972,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return _unsafe_takestring!(str) + return unsafe_takestring(str) end """ diff --git a/base/strings/string.jl b/base/strings/string.jl index 514f8657a5d64..68f4753568b01 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,7 +61,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = _unsafe_takestring!(copyto!(StringMemory(length(v)), v)) +String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v)) function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) @@ -78,16 +78,10 @@ function String(v::Vector{UInt8}) return str end -""" - _unsafe_takestring!(v::Memory{UInt8}) - -Create a new `String` object using the data buffer from byte vector `v`, and leaves `v` in an inconsistent state. This should only be used internally for performance-critical -`String` routines that immediately discard `v` afterwards. -""" -function _unsafe_takestring!(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) +"Create a string re-using the memory, if possible. +Mutating or reading the memory after calling this function is undefined behaviour." +function unsafe_takestring(m::Memory{UInt8}) + isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) end """ diff --git a/base/strings/util.jl b/base/strings/util.jl index 0227e3908376a..4f9aecec2738a 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return Base._unsafe_takestring!(b) + return Base.unsafe_takestring(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index 3a45131fb1c59..217c0b55adf7d 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -96,7 +96,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return Base._unsafe_takestring!(a) + return unsafe_takestring(a) end end From b08831fc418051ab3fe6e2f4cf4c3dc929b67244 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Wed, 9 Oct 2024 10:49:32 -0400 Subject: [PATCH 5/5] remove redundant `Base.` --- base/strings/util.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 4f9aecec2738a..f9e75df5dcd85 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1214,7 +1214,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return Base.unsafe_takestring(b) + return unsafe_takestring(b) end function bytes2hex(io::IO, itr)