Skip to content

Commit

Permalink
Make String(::Memory) copy (#54457)
Browse files Browse the repository at this point in the history
A more targeted fix of #54369 than #54372

Preserves the performance improvements added in #53962 by creating a new
internal `_unsafe_takestring!(v::Memory{UInt8})` function that does what
`String(::Memory{UInt8})` used to do.
  • Loading branch information
nhz2 authored and pull[bot] committed Dec 15, 2024
1 parent ba7e3ea commit 953902d
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 17 deletions.
4 changes: 2 additions & 2 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit,
clamp
clamp, unsafe_takestring

if Clong == Int32
const ClongMax = Union{Int8, Int16, Int32}
Expand Down Expand Up @@ -761,7 +761,7 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
sv[i] = '0' % UInt8
end
isneg(n) && (sv[1] = '-' % UInt8)
String(sv)
unsafe_takestring(sv)
end

function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer}
Expand Down
12 changes: 6 additions & 6 deletions base/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
i -= 1
end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
unsafe_takestring(a)
end

function oct(x::Unsigned, pad::Int, neg::Bool)
Expand All @@ -806,7 +806,7 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
i -= 1
end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
unsafe_takestring(a)
end

# 2-digit decimal characters ("00":"99")
Expand Down Expand Up @@ -876,7 +876,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool)
a = StringMemory(n)
append_c_digits_fast(n, x, a, 1)
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
unsafe_takestring(a)
end

function hex(x::Unsigned, pad::Int, neg::Bool)
Expand All @@ -897,7 +897,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
@inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
unsafe_takestring(a)
end

const base36digits = UInt8['0':'9';'a':'z']
Expand All @@ -922,7 +922,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
i -= 1
end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
unsafe_takestring(a)
end

split_sign(n::Integer) = unsigned(abs(n)), n < 0
Expand Down Expand Up @@ -998,7 +998,7 @@ function bitstring(x::T) where {T}
x = lshr_int(x, 4)
i -= 4
end
return String(str)
return unsafe_takestring(str)
end

"""
Expand Down
13 changes: 7 additions & 6 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,7 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to
In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
to guarantee consistent behavior.
"""
String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v))
function String(v::Memory{UInt8})
len = length(v)
len == 0 && return ""
return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len)
end
String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v))
function String(v::Vector{UInt8})
#return ccall(:jl_array_to_string, Ref{String}, (Any,), v)
len = length(v)
Expand All @@ -83,6 +78,12 @@ function String(v::Vector{UInt8})
return str
end

"Create a string re-using the memory, if possible.
Mutating or reading the memory after calling this function is undefined behaviour."
function unsafe_takestring(m::Memory{UInt8})
isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m))
end

"""
unsafe_string(p::Ptr{UInt8}, [length::Integer])
Expand Down
2 changes: 1 addition & 1 deletion base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ function bytes2hex(itr)
b[2i - 1] = hex_chars[1 + x >> 4]
b[2i ] = hex_chars[1 + x & 0xf]
end
return String(b)
return unsafe_takestring(b)
end

function bytes2hex(io::IO, itr)
Expand Down
2 changes: 1 addition & 1 deletion base/uuid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1]
u >>= 4
end
@inbounds a[24] = a[19] = a[14] = a[9] = '-'
return String(a)
return unsafe_takestring(a)
end
end

Expand Down
2 changes: 1 addition & 1 deletion stdlib/FileWatching/src/pidfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ function open_exclusive(path::String;
end

function _rand_filename(len::Int=4) # modified from Base.Libc
slug = Base.StringMemory(len)
slug = Base.StringVector(len)
chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for i = 1:len
slug[i] = chars[(Libc.rand() % length(chars)) + 1]
Expand Down
11 changes: 11 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,17 @@ let v = [0x40,0x41,0x42]
@test String(view(v, 2:3)) == "AB"
end

# issue #54369
let v = Base.StringMemory(3)
v .= [0x41,0x42,0x43]
s = String(v)
@test s == "ABC"
@test v == [0x41,0x42,0x43]
v[1] = 0x43
@test s == "ABC"
@test v == [0x43,0x42,0x43]
end

# make sure length for identical String and AbstractString return the same value, PR #25533
let rng = MersenneTwister(1), strs = ["∀εa∀aε"*String(rand(rng, UInt8, 100))*"∀εa∀aε",
String(rand(rng, UInt8, 200))]
Expand Down

0 comments on commit 953902d

Please sign in to comment.