From d7b9ac8281cd988ffb5da9b0e7deed23b8d5cb28 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 13 Feb 2024 10:32:15 -0500 Subject: [PATCH] change IOBuffer to use Memory internally (#53192) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An Array is often still allocated on output, but this gives the compiler a chance to potentially elide that in certain cases. For measurement, it seems about 10% faster as a string builder: ``` julia> @btime repr("hello\nworld"^10); 1.096 μs (10 allocations: 640 bytes) # master 973.000 ns (9 allocations: 608 bytes) # PR 994.000 ns (8 allocations: 576 bytes) # also PR, after Revise-ing Base.wrap ``` --- base/array.jl | 34 +++-- base/iobuffer.jl | 194 ++++++++++++++++++----------- base/iostream.jl | 22 ++-- base/stream.jl | 6 +- base/strings/annotated.jl | 17 +-- base/strings/string.jl | 27 +++- base/subarray.jl | 3 + src/array.c | 15 --- src/genericmemory.c | 2 + src/jl_exported_funcs.inc | 1 - stdlib/REPL/src/LineEdit.jl | 39 +++--- stdlib/REPL/test/precompilation.jl | 2 +- test/iobuffer.jl | 1 + test/show.jl | 4 +- 14 files changed, 230 insertions(+), 137 deletions(-) diff --git a/base/array.jl b/base/array.jl index 600ae73d2338e..e1a1ee0ecf450 100644 --- a/base/array.jl +++ b/base/array.jl @@ -3067,7 +3067,8 @@ of [`unsafe_wrap`](@ref) utilizing `Memory` or `MemoryRef` instead of raw pointe """ function wrap end -@eval @propagate_inbounds function wrap(::Type{Array}, ref::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N} +# validity checking for _wrap calls, separate from allocation of Array so that it can be more likely to inline into the caller +function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N} mem = ref.mem mem_len = length(mem) + 1 - memoryrefoffset(ref) len = Core.checked_dims(dims...) @@ -3076,18 +3077,35 @@ function wrap end mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len) ref = MemoryRef(mem) end - $(Expr(:new, :(Array{T, N}), :ref, :dims)) + return ref end @noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch( "Attempted to wrap a MemoryRef of length $len with an Array of size dims=$dims, which is invalid because prod(dims) = $proddims > $len, so that the array would have more elements than the underlying memory can store.")) -function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N} - wrap(Array, MemoryRef(m), dims) +@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N} + dims = convert(Dims, dims) + ref = _wrap(m, dims) + $(Expr(:new, :(Array{T, N}), :ref, :dims)) +end + +@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N} + dims = convert(Dims, dims) + ref = _wrap(MemoryRef(m), dims) + $(Expr(:new, :(Array{T, N}), :ref, :dims)) +end +@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T} + dims = (Int(l),) + ref = _wrap(m, dims) + $(Expr(:new, :(Array{T, 1}), :ref, :dims)) end -function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T} - wrap(Array, m, (l,)) +@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T} + dims = (Int(l),) + ref = _wrap(MemoryRef(m), (l,)) + $(Expr(:new, :(Array{T, 1}), :ref, :dims)) end -function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T} - wrap(Array, MemoryRef(m), (l,)) +@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}) where {T} + ref = MemoryRef(m) + dims = (length(m),) + $(Expr(:new, :(Array{T, 1}), :ref, :dims)) end diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 895205549bc7e..dadb13e1f1e6a 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -4,32 +4,45 @@ # Stateful string mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO - data::T # T should support: getindex, setindex!, length, copyto!, and resize! + data::T # T should support: getindex, setindex!, length, copyto!, similar, and (optionally) resize! reinit::Bool # if true, data needs to be re-allocated (after take!) readable::Bool writable::Bool seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data append::Bool # add data at end instead of at pointer - size::Int # end pointer (and write pointer if append == true) + size::Int # end pointer (and write pointer if append == true) + offset maxsize::Int # fixed array size (typically pre-allocated) - ptr::Int # read (and maybe write) pointer + ptr::Int # read (and maybe write) pointer + offset + offset::Int # offset of ptr and size from actual start of data and actual size mark::Int # reset mark location for ptr (or <0 for no mark) function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool, maxsize::Integer) where T<:AbstractVector{UInt8} require_one_based_indexing(data) - new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1) + return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, 0, -1) end end -const IOBuffer = GenericIOBuffer{Vector{UInt8}} + +const IOBuffer = GenericIOBuffer{Memory{UInt8}} function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool, maxsize::Integer) where T<:AbstractVector{UInt8} GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize) end +function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool, + maxsize::Integer) + ref = data.ref + buf = GenericIOBuffer(ref.mem, readable, writable, seekable, append, maxsize) + offset = memoryrefoffset(ref) - 1 + buf.ptr += offset + buf.size = length(data) + offset + buf.offset = offset + return buf +end # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings -StringVector(n::Integer) = unsafe_wrap(Vector{UInt8}, _string_n(n)) +StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n)) +StringVector(n::Integer) = wrap(Array, StringMemory(n)) # IOBuffers behave like Files. They are typically readable and writable. They are seekable. (They can be appendable). @@ -98,7 +111,7 @@ function IOBuffer( flags = open_flags(read=read, write=write, append=append, truncate=truncate) buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize)) if flags.truncate - buf.size = 0 + buf.size = buf.offset end return buf end @@ -113,7 +126,7 @@ function IOBuffer(; size = sizehint !== nothing ? Int(sizehint) : maxsize != typemax(Int) ? Int(maxsize) : 32 flags = open_flags(read=read, write=write, append=append, truncate=truncate) buf = IOBuffer( - StringVector(size), + StringMemory(size), read=flags.read, write=flags.write, append=flags.append, @@ -135,12 +148,12 @@ See [`IOBuffer`](@ref) for the available constructors. If `data` is given, creates a `PipeBuffer` to operate on a data vector, optionally specifying a size beyond which the underlying `Array` may not be grown. """ -PipeBuffer(data::AbstractVector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) = +PipeBuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) = GenericIOBuffer(data, true, true, false, true, maxsize) -PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x) +PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize = maxsize); x.size = 0; x) _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len) -_similar_data(b::IOBuffer, len::Int) = StringVector(len) +_similar_data(b::IOBuffer, len::Int) = StringMemory(len) function copy(b::GenericIOBuffer) ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ? @@ -148,6 +161,8 @@ function copy(b::GenericIOBuffer) b.readable, b.writable, b.seekable, b.append, b.maxsize) ret.size = b.size ret.ptr = b.ptr + ret.mark = b.mark + ret.offset = b.offset return ret end @@ -156,9 +171,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ", "writable=", b.writable, ", ", "seekable=", b.seekable, ", ", "append=", b.append, ", ", - "size=", b.size, ", ", + "size=", b.size - b.offset, ", ", "maxsize=", b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ", - "ptr=", b.ptr, ", ", + "ptr=", b.ptr - b.offset, ", ", "mark=", b.mark, ")") @noinline function _throw_not_readable() @@ -240,11 +255,9 @@ read(from::GenericIOBuffer, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(fro isreadable(io::GenericIOBuffer) = io.readable iswritable(io::GenericIOBuffer) = io.writable -# TODO: GenericIOBuffer is not iterable, so doesn't really have a length. -# This should maybe be sizeof() instead. -#length(io::GenericIOBuffer) = (io.seekable ? io.size : bytesavailable(io)) +filesize(io::GenericIOBuffer) = (io.seekable ? io.size - io.offset : bytesavailable(io)) bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1 -position(io::GenericIOBuffer) = io.ptr-1 +position(io::GenericIOBuffer) = io.ptr - io.offset - 1 function skip(io::GenericIOBuffer, n::Integer) seekto = io.ptr + n @@ -262,7 +275,7 @@ function seek(io::GenericIOBuffer, n::Integer) # of an GenericIOBuffer), so that would need to be fixed in order to throw an error here #(n < 0 || n > io.size) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries.")) #io.ptr = n+1 - io.ptr = max(min(n+1, io.size+1), 1) + io.ptr = min(max(0, n)+io.offset, io.size)+1 return io end @@ -271,32 +284,66 @@ function seekend(io::GenericIOBuffer) return io end +# choose a resize strategy based on whether `resize!` is defined: +# for a Vector, we use `resize!`, but for most other types, +# this calls `similar`+copy +function _resize!(io::GenericIOBuffer, sz::Int) + a = io.data + offset = io.offset + if applicable(resize!, a, sz) + if offset != 0 + size = io.size + size > offset && copyto!(a, 1, a, offset + 1, min(sz, size - offset)) + io.ptr -= offset + io.size -= offset + io.offset = 0 + end + resize!(a, sz) + else + size = io.size + if size >= sz && sz != 0 + b = a + else + b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size - io.offset), sz)) + end + size > offset && copyto!(b, 1, a, offset + 1, min(sz, size - offset)) + io.data = b + io.ptr -= offset + io.size -= offset + io.offset = 0 + end + return io +end + function truncate(io::GenericIOBuffer, n::Integer) io.writable || throw(ArgumentError("truncate failed, IOBuffer is not writeable")) io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable")) n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n")) n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)")) + n = Int(n) if io.reinit io.data = _similar_data(io, n) io.reinit = false - elseif n > length(io.data) - resize!(io.data, n) + elseif n > length(io.data) + io.offset + _resize!(io, n) end + ismarked(io) && io.mark > n && unmark(io) + n += io.offset io.data[io.size+1:n] .= 0 io.size = n io.ptr = min(io.ptr, n+1) - ismarked(io) && io.mark > n && unmark(io) return io end function compact(io::GenericIOBuffer) io.writable || throw(ArgumentError("compact failed, IOBuffer is not writeable")) io.seekable && throw(ArgumentError("compact failed, IOBuffer is seekable")) + io.reinit && return local ptr::Int, bytes_to_move::Int - if ismarked(io) && io.mark < io.ptr - if io.mark == 0 return end - ptr = io.mark - bytes_to_move = bytesavailable(io) + (io.ptr-io.mark) + if ismarked(io) && io.mark < position(io) + io.mark == 0 && return + ptr = io.mark + io.offset + bytes_to_move = bytesavailable(io) + (io.ptr - ptr) else ptr = io.ptr bytes_to_move = bytesavailable(io) @@ -304,19 +351,24 @@ function compact(io::GenericIOBuffer) copyto!(io.data, 1, io.data, ptr, bytes_to_move) io.size -= ptr - 1 io.ptr -= ptr - 1 - io.mark -= ptr - 1 - return io + io.offset = 0 + return end @noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt) io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable")) + if io.reinit + io.data = _similar_data(io, nshort % Int) + io.reinit = false + end if !io.seekable - if !ismarked(io) && io.ptr > 1 && io.size <= io.ptr - 1 + if !ismarked(io) && io.ptr > io.offset+1 && io.size <= io.ptr - 1 io.ptr = 1 io.size = 0 + io.offset = 0 else - datastart = ismarked(io) ? io.mark : io.ptr - if (io.size+nshort > io.maxsize) || + datastart = (ismarked(io) ? io.mark : io.ptr - io.offset) + if (io.size-io.offset+nshort > io.maxsize) || (datastart > 4096 && datastart > io.size - io.ptr) || (datastart > 262144) # apply somewhat arbitrary heuristics to decide when to destroy @@ -330,23 +382,18 @@ end @inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort)) @inline function ensureroom(io::GenericIOBuffer, nshort::UInt) - if !io.writable || (!io.seekable && io.ptr > 1) + if !io.writable || (!io.seekable && io.ptr > io.offset+1) || io.reinit ensureroom_slowpath(io, nshort) end - n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize) - if io.reinit - io.data = _similar_data(io, n) - io.reinit = false - else - l = length(io.data) - if n > l - _growend!(io.data, (n - l) % UInt) - end + n = min((nshort % Int) + (io.append ? io.size : io.ptr-1) - io.offset, io.maxsize) + l = length(io.data) + io.offset + if n > l + _resize!(io, Int(n)) end return io end -eof(io::GenericIOBuffer) = (io.ptr-1 == io.size) +eof(io::GenericIOBuffer) = (io.ptr - 1 >= io.size) function closewrite(io::GenericIOBuffer) io.writable = false @@ -358,11 +405,12 @@ end io.writable = false io.seekable = false io.size = 0 + io.offset = 0 io.maxsize = 0 io.ptr = 1 io.mark = -1 - if io.writable - resize!(io.data, 0) + if io.writable && !io.reinit + io.data = _resize!(io, 0) end nothing end @@ -388,45 +436,45 @@ julia> String(take!(io)) function take!(io::GenericIOBuffer) ismarked(io) && unmark(io) if io.seekable - nbytes = io.size - data = copyto!(StringVector(nbytes), 1, io.data, 1, nbytes) + nbytes = io.size - io.offset + data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes) else nbytes = bytesavailable(io) - data = read!(io,StringVector(nbytes)) + data = read!(io, StringVector(nbytes)) end if io.writable io.ptr = 1 io.size = 0 + io.offset = 0 end return data end function take!(io::IOBuffer) ismarked(io) && unmark(io) if io.seekable - if io.writable - if io.reinit - data = StringVector(0) - else - data = resize!(io.data, io.size) - io.reinit = true - end + nbytes = filesize(io) + if nbytes == 0 || io.reinit + data = StringVector(0) + elseif io.writable + data = wrap(Array, MemoryRef(io.data, io.offset + 1), nbytes) else - data = copyto!(StringVector(io.size), 1, io.data, 1, io.size) + data = copyto!(StringVector(io.size), 1, io.data, io.offset + 1, nbytes) end else nbytes = bytesavailable(io) - if io.writable - data = io.data - io.reinit = true - _deletebeg!(data, io.ptr-1) - resize!(data, nbytes) + if nbytes == 0 + data = StringVector(0) + elseif io.writable + data = wrap(Array, MemoryRef(io.data, io.ptr), nbytes) else - data = read!(io, StringVector(nbytes)) + data = read!(io, data) end end if io.writable + io.reinit = true io.ptr = 1 io.size = 0 + io.offset = 0 end return data end @@ -440,17 +488,23 @@ state. This should only be used internally for performance-critical `String` routines that immediately discard `io` afterwards, and it *assumes* that `io` is writable and seekable. -It saves no allocations compared to `take!`, it just omits some checks. +It might save an allocation compared to `take!` (if the compiler elides the +Array allocation), as well as omits some checks. """ -_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size) +_unsafe_take!(io::IOBuffer) = + wrap(Array, io.size == io.offset ? + MemoryRef(Memory{UInt8}()) : + MemoryRef(io.data, io.offset + 1), + io.size - io.offset) function write(to::IO, from::GenericIOBuffer) + written::Int = bytesavailable(from) if to === from from.ptr = from.size + 1 - return 0 + else + written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(written)) + from.ptr += written end - written::Int = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(bytesavailable(from))) - from.ptr += written return written end @@ -497,13 +551,13 @@ function readbytes!(io::GenericIOBuffer, b::Array{UInt8}, nb::Int) read_sub(io, b, 1, nr) return nr end -read(io::GenericIOBuffer) = read!(io,StringVector(bytesavailable(io))) +read(io::GenericIOBuffer) = read!(io, StringVector(bytesavailable(io))) readavailable(io::GenericIOBuffer) = read(io) -read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, bytesavailable(io)))) +read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io)))) function occursin(delim::UInt8, buf::IOBuffer) p = pointer(buf.data, buf.ptr) - q = GC.@preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,bytesavailable(buf)) + q = GC.@preserve buf ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, delim, bytesavailable(buf)) return q != C_NULL end @@ -532,8 +586,8 @@ end function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false) copyuntil(out, s, 0x0a, keep=true) line = out.data - i = out.size - if keep || i == 0 || line[i] != 0x0a + i = out.size # XXX: this is only correct for appended data. if the data was inserted, only ptr should change + if keep || i == out.offset || line[i] != 0x0a return out elseif i < 2 || line[i-1] != 0x0d i -= 1 diff --git a/base/iostream.jl b/base/iostream.jl index ba422cd692fcd..5d972945e00e0 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -455,26 +455,24 @@ end function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) ensureroom(out, 1) # make sure we can read at least 1 byte, for iszero(n) check below - ptr = (out.append ? out.size+1 : out.ptr) - d = out.data - len = length(d) while true + d = out.data + len = length(d) + ptr = (out.append ? out.size+1 : out.ptr) GC.@preserve d @_lock_ios s n= Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t)) iszero(n) && break ptr += n - if d[ptr-1] == delim - keep || (ptr -= 1) - break - end + found = (d[ptr - 1] == delim) + found && !keep && (ptr -= 1) + out.size = max(out.size, ptr - 1) + out.append || (out.ptr = ptr) + found && break (eof(s) || len == out.maxsize) && break len = min(2len + 64, out.maxsize) - resize!(d, len) - end - out.size = max(out.size, ptr - 1) - if !out.append - out.ptr = ptr + ensureroom(out, len) + @assert length(out.data) >= len end return out end diff --git a/base/stream.jl b/base/stream.jl index 14621c464ce1b..3de61181e978d 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -608,7 +608,7 @@ end function alloc_request(buffer::IOBuffer, recommended_size::UInt) ensureroom(buffer, Int(recommended_size)) ptr = buffer.append ? buffer.size + 1 : buffer.ptr - nb = min(length(buffer.data), buffer.maxsize) - ptr + 1 + nb = min(length(buffer.data)-buffer.offset, buffer.maxsize) + buffer.offset - ptr + 1 return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb) end @@ -932,7 +932,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) nread = readbytes!(sbuf, a, nb) else newbuf = PipeBuffer(a, maxsize=nb) - newbuf.size = 0 # reset the write pointer to the beginning + newbuf.size = newbuf.offset # reset the write pointer to the beginning nread = try s.buffer = newbuf write(newbuf, sbuf) @@ -979,7 +979,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt) unsafe_read(sbuf, p, nb) else newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb)) - newbuf.size = 0 # reset the write pointer to the beginning + newbuf.size = newbuf.offset # reset the write pointer to the beginning try s.buffer = newbuf write(newbuf, sbuf) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 86790d169420e..1eeaaa668d9ee 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -200,34 +200,36 @@ julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", an function annotatedstring(xs...) isempty(xs) && return AnnotatedString("") size = mapreduce(_str_sizehint, +, xs) - s = IOContext(IOBuffer(sizehint=size), :color => true) + buf = IOBuffer(sizehint=size) + s = IOContext(buf, :color => true) annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() for x in xs + size = filesize(s.io) if x isa AnnotatedString for (region, annot) in x.annotations - push!(annotations, (s.io.size .+ (region), annot)) + push!(annotations, (size .+ (region), annot)) end print(s, x.string) elseif x isa SubString{<:AnnotatedString} for (region, annot) in x.string.annotations start, stop = first(region), last(region) if start <= x.offset + x.ncodeunits && stop > x.offset - rstart = s.io.size + max(0, start - x.offset - 1) + 1 - rstop = s.io.size + min(stop, x.offset + x.ncodeunits) - x.offset + rstart = size + max(0, start - x.offset - 1) + 1 + rstop = size + min(stop, x.offset + x.ncodeunits) - x.offset push!(annotations, (rstart:rstop, annot)) end end print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift))) elseif x isa AnnotatedChar for annot in x.annotations - push!(annotations, (1+s.io.size:1+s.io.size, annot)) + push!(annotations, (1+size:1+size, annot)) end print(s, x.char) else print(s, x) end end - str = String(resize!(s.io.data, s.io.size)) + str = String(take!(buf)) AnnotatedString(str, annotations) end @@ -400,7 +402,8 @@ AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer()) function show(io::IO, aio::AnnotatedIOBuffer) show(io, AnnotatedIOBuffer) - print(io, '(', aio.io.size, " byte", ifelse(aio.io.size == 1, "", "s"), ", ", + size = filesize(aio.io) + print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ", length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")") end diff --git a/base/strings/string.jl b/base/strings/string.jl index 29216ae97aa37..b2afce897a937 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -63,8 +63,27 @@ by [`take!`](@ref) on a writable [`IOBuffer`](@ref) and by calls to In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway to guarantee consistent behavior. """ -String(v::AbstractVector{UInt8}) = String(copyto!(StringVector(length(v)), v)) -String(v::Vector{UInt8}) = ccall(:jl_array_to_string, Ref{String}, (Any,), v) +String(v::AbstractVector{UInt8}) = String(copyto!(StringMemory(length(v)), v)) +function String(v::Memory{UInt8}) + len = length(v) + len == 0 && return "" + return ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), v, len) +end +function String(v::Vector{UInt8}) + #return ccall(:jl_array_to_string, Ref{String}, (Any,), v) + len = length(v) + len == 0 && return "" + ref = v.ref + if ref.ptr_or_offset == ref.mem.ptr + str = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), ref.mem, len) + else + str = ccall(:jl_pchar_to_string, Ref{String}, (Ptr{UInt8}, Int), ref, len) + end + # optimized empty!(v); sizehint!(v, 0) calls + setfield!(v, :size, (0,)) + setfield!(v, :ref, MemoryRef(Memory{UInt8}())) + return str +end """ unsafe_string(p::Ptr{UInt8}, [length::Integer]) @@ -97,8 +116,8 @@ Create a new `String` from an existing `AbstractString`. String(s::AbstractString) = print_to_string(s) @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s)) -unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s) -unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s)) +unsafe_wrap(::Type{Memory{UInt8}}, s::String) = ccall(:jl_string_to_genericmemory, Ref{Memory{UInt8}}, (Any,), s) +unsafe_wrap(::Type{Vector{UInt8}}, s::String) = wrap(Array, unsafe_wrap(Memory{UInt8}, s)) Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s) Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s)) diff --git a/base/subarray.jl b/base/subarray.jl index 7b7e913332aaf..eca06fa3eacff 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -543,3 +543,6 @@ end function replace_in_print_matrix(S::SubArray{<:Any,1,<:AbstractVector}, i::Integer, j::Integer, s::AbstractString) replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,)))..., j, s) end + +# XXX: this is considerably more unsafe than the other similarly named methods +unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s)) diff --git a/src/array.c b/src/array.c index 198215831497d..979772e649727 100644 --- a/src/array.c +++ b/src/array.c @@ -99,21 +99,6 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str); -JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) -{ - jl_task_t *ct = jl_current_task; - jl_genericmemory_t *mem = jl_string_to_genericmemory(str); - JL_GC_PUSH1(&mem); - int ndimwords = 1; - int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t); - jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type); - a->ref.mem = mem; - a->ref.ptr_or_offset = mem->ptr; - a->dimsize[0] = mem->length; - JL_GC_POP(); - return a; -} - JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, size_t nel, int own_buffer) { diff --git a/src/genericmemory.c b/src/genericmemory.c index 0bd4db30fd690..f0e7b695f1122 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -101,6 +101,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_ JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str) { + if (jl_string_len(str) == 0) + return (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance; jl_task_t *ct = jl_current_task; int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index a72058f10f42d..91f989f611b22 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -436,7 +436,6 @@ XX(jl_stdout_stream) \ XX(jl_stored_inline) \ XX(jl_string_ptr) \ - XX(jl_string_to_array) \ XX(jl_subtype) \ XX(jl_subtype_env) \ XX(jl_subtype_env_size) \ diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl index 01daef9d66899..9f1dd76b168af 100644 --- a/stdlib/REPL/src/LineEdit.jl +++ b/stdlib/REPL/src/LineEdit.jl @@ -813,9 +813,9 @@ end # returns the removed portion as a String function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigid_mark::Bool=true) A, B = first(r), last(r) - A >= B && isempty(ins) && return String(ins) + A >= B && isempty(ins) && return ins buf = buffer(s) - pos = position(buf) + pos = position(buf) # n.b. position(), etc, are 0-indexed adjust_pos = true if A <= pos < B seek(buf, A) @@ -824,18 +824,29 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi else adjust_pos = false end - if A < buf.mark < B || A == buf.mark == B - # rigid_mark is used only if the mark is strictly "inside" - # the region, or the region is empty and the mark is at the boundary - buf.mark = rigid_mark ? A : A + sizeof(ins) - elseif buf.mark >= B - buf.mark += sizeof(ins) - B + A - end - ensureroom(buf, B) # handle !buf.reinit from take! - ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed - buf.size = buf.size + sizeof(ins) - B + A - adjust_pos && seek(buf, position(buf) + sizeof(ins)) - return String(copy(ret)) + mark = buf.mark + if mark != -1 + if A < mark < B || A == mark == B + # rigid_mark is used only if the mark is strictly "inside" + # the region, or the region is empty and the mark is at the boundary + mark = rigid_mark ? A : A + sizeof(ins) + elseif mark >= B + mark += sizeof(ins) - B + A + end + buf.mark = -1 + end + # Implement ret = splice!(buf.data, A+1:B, codeunits(ins)) for a stream + pos = position(buf) + seek(buf, A) + ret = read(buf, A >= B ? 0 : B - A) + trail = read(buf) + seek(buf, A) + write(buf, ins) + write(buf, trail) + truncate(buf, position(buf)) + seek(buf, pos + (adjust_pos ? sizeof(ins) : 0)) + buf.mark = mark + return String(ret) end edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins) diff --git a/stdlib/REPL/test/precompilation.jl b/stdlib/REPL/test/precompilation.jl index bf0c4e924a3c0..2dcf78c114d9a 100644 --- a/stdlib/REPL/test/precompilation.jl +++ b/stdlib/REPL/test/precompilation.jl @@ -27,7 +27,7 @@ if !Sys.iswindows() tracecompile_out = read(f, String) close(ptm) # close after reading so we don't get precompiles from error shutdown - expected_precompiles = 0 + expected_precompiles = 1 n_precompiles = count(r"precompile\(", tracecompile_out) diff --git a/test/iobuffer.jl b/test/iobuffer.jl index ec77903b4a5b8..6151f90f297ee 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -120,6 +120,7 @@ end Base.compact(io) @test position(io) == 0 @test ioslength(io) == 0 + Base._resize!(io,0) Base.ensureroom(io,50) @test position(io) == 0 @test ioslength(io) == 0 diff --git a/test/show.jl b/test/show.jl index 3e67155d0acb7..d6a691029d60a 100644 --- a/test/show.jl +++ b/test/show.jl @@ -1250,12 +1250,12 @@ end @testset "PR 17117: print_array" begin s = IOBuffer(Vector{UInt8}(), read=true, write=true) Base.print_array(s, [1, 2, 3]) - @test String(resize!(s.data, s.size)) == " 1\n 2\n 3" + @test String(take!(s)) == " 1\n 2\n 3" close(s) s2 = IOBuffer(Vector{UInt8}(), read=true, write=true) z = zeros(0,0,0,0,0,0,0,0) Base.print_array(s2, z) - @test String(resize!(s2.data, s2.size)) == "" + @test String(take!(s2)) == "" close(s2) end