From 953902d66b7b77e6268dd042214f45d53d1a396d Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:49:12 -0400 Subject: [PATCH] Make `String(::Memory)` copy (#54457) A more targeted fix of #54369 than #54372 Preserves the performance improvements added in #53962 by creating a new internal `_unsafe_takestring!(v::Memory{UInt8})` function that does what `String(::Memory{UInt8})` used to do. --- base/gmp.jl | 4 ++-- base/intfuncs.jl | 12 ++++++------ base/strings/string.jl | 13 +++++++------ base/strings/util.jl | 2 +- base/uuid.jl | 2 +- stdlib/FileWatching/src/pidfile.jl | 2 +- test/strings/basic.jl | 11 +++++++++++ 7 files changed, 29 insertions(+), 17 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 1eaa20d6baecf2..df0d9fee493482 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -11,7 +11,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb, widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit, sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit, - clamp + clamp, unsafe_takestring if Clong == Int32 const ClongMax = Union{Int8, Int16, Int32} @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1) sv[i] = '0' % UInt8 end isneg(n) && (sv[1] = '-' % UInt8) - String(sv) + unsafe_takestring(sv) end function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer} diff --git a/base/intfuncs.jl b/base/intfuncs.jl index ec450aff2dff2d..e8d4b65305be7d 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -792,7 +792,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end function oct(x::Unsigned, pad::Int, neg::Bool) @@ -806,7 +806,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end # 2-digit decimal characters ("00":"99") @@ -876,7 +876,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) a = StringMemory(n) append_c_digits_fast(n, x, a, 1) neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end function hex(x::Unsigned, pad::Int, neg::Bool) @@ -897,7 +897,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end const base36digits = UInt8['0':'9';'a':'z'] @@ -922,7 +922,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) i -= 1 end neg && (@inbounds a[1] = 0x2d) # UInt8('-') - String(a) + unsafe_takestring(a) end split_sign(n::Integer) = unsigned(abs(n)), n < 0 @@ -998,7 +998,7 @@ function bitstring(x::T) where {T} x = lshr_int(x, 4) i -= 4 end - return String(str) + return unsafe_takestring(str) end """ diff --git a/base/strings/string.jl b/base/strings/string.jl index a46ee60e4f0231..9f3c3d00e4b81e 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -61,12 +61,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) -function String(v::Memory{UInt8}) - len = length(v) - len == 0 && return "" - return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) -end +String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v)) function String(v::Vector{UInt8}) #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) len = length(v) @@ -83,6 +78,12 @@ function String(v::Vector{UInt8}) return str end +"Create a string re-using the memory, if possible. +Mutating or reading the memory after calling this function is undefined behaviour." +function unsafe_takestring(m::Memory{UInt8}) + isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m)) +end + """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) diff --git a/base/strings/util.jl b/base/strings/util.jl index 04d451a4fd2883..fcccb9babadfdc 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -1217,7 +1217,7 @@ function bytes2hex(itr) b[2i - 1] = hex_chars[1 + x >> 4] b[2i ] = hex_chars[1 + x & 0xf] end - return String(b) + return unsafe_takestring(b) end function bytes2hex(io::IO, itr) diff --git a/base/uuid.jl b/base/uuid.jl index 56f3a6aa417e7c..4b9bae863d9263 100644 --- a/base/uuid.jl +++ b/base/uuid.jl @@ -98,7 +98,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1] u >>= 4 end @inbounds a[24] = a[19] = a[14] = a[9] = '-' - return String(a) + return unsafe_takestring(a) end end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 95b8f20face291..6862aaa9f8453c 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -304,7 +304,7 @@ function open_exclusive(path::String; end function _rand_filename(len::Int=4) # modified from Base.Libc - slug = Base.StringMemory(len) + slug = Base.StringVector(len) chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" for i = 1:len slug[i] = chars[(Libc.rand() % length(chars)) + 1] diff --git a/test/strings/basic.jl b/test/strings/basic.jl index de04055d047af3..ee92995bd2e11a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -1093,6 +1093,17 @@ let v = [0x40,0x41,0x42] @test String(view(v, 2:3)) == "AB" end +# issue #54369 +let v = Base.StringMemory(3) + v .= [0x41,0x42,0x43] + s = String(v) + @test s == "ABC" + @test v == [0x41,0x42,0x43] + v[1] = 0x43 + @test s == "ABC" + @test v == [0x43,0x42,0x43] +end + # make sure length for identical String and AbstractString return the same value, PR #25533 let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100))*"∀εa∀aε", String(rand(rng, UInt8, 200))]