From 2730f29e6d5c1d0f4dabfc21693aab057aae941b Mon Sep 17 00:00:00 2001 From: TEC Date: Sat, 21 Oct 2023 19:48:18 +0800 Subject: [PATCH 1/8] Introduce AnnotatedIOBuffer This allows for styled content to be constructed incrementally, without resorting to repeated concatenation. It operates very similarly to IOContext, just with a special `write` method and specifically wrapping an IOBuffer. --- base/strings/annotated.jl | 50 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index c0f3623f41696..05bd366a80782 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -386,3 +386,53 @@ annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) = Get all annotations of `chr`. """ annotations(c::AnnotatedChar) = c.annotations + +## AnnotatedIOBuffer + +struct AnnotatedIOBuffer <: IO + io::IOBuffer + annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}} +end + +AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()) +AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer()) + +function show(io::IO, annio::AnnotatedIOBuffer) + show(io, AnnotatedIOBuffer) + print(io, '(', annio.io.size, " bytes)") +end + +position(io::AnnotatedIOBuffer) = position(io.io) +lock(io::AnnotatedIOBuffer) = lock(io.io) +unlock(io::AnnotatedIOBuffer) = unlock(io.io) + +function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) + astr = AnnotatedString(astr) + offset = position(io.io) + for (region, annot) in astr.annotations + start, stop = first(region), last(region) + push!(io.annotations, (start+offset:stop+offset, annot)) + end + write(io.io, String(astr)) +end +write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c)) +write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) +write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) +write(io::AnnotatedIOBuffer, x::UInt8) = write(io.io, x) + +""" + read(io::AnnotatedIOBuffer, AnnotatedString) + +Read the entirety of `io`, as an `AnnotatedString`. This preserves the +annotations of any `AnnotatedString`s written to `io` and otherwise acts like +`read(io::IO, String)`. +""" +function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{String}}) + str = String(take!(io.io)) + annots = copy(io.annotations) + empty!(io.annotations) + seekstart(io.io) + AnnotatedString(str, annots) +end +read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) +read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) From 311553e4987e0dd426fc9d0bc9e8587f04b89281 Mon Sep 17 00:00:00 2001 From: TEC Date: Thu, 14 Dec 2023 23:09:51 +0800 Subject: [PATCH 2/8] Make AnnotatedIOBuffer behave more like IOBuffer A few extra methods help a lot. It also turns out that we don't actually need to implement lock/unlock, the fallback implementations are enough. --- base/strings/annotated.jl | 56 ++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 05bd366a80782..4a23fd037e62e 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -397,14 +397,27 @@ end AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()) AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer()) -function show(io::IO, annio::AnnotatedIOBuffer) +function show(io::IO, aio::AnnotatedIOBuffer) show(io, AnnotatedIOBuffer) - print(io, '(', annio.io.size, " bytes)") + print(io, '(', aio.io.size, " byte", ifelse(aio.io.size == 1, "", "s"), ", ", + length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")") end +isopen(io::AnnotatedIOBuffer) = isopen(io.io) +close(io::AnnotatedIOBuffer) = close(io.io) +closewrite(io::AnnotatedIOBuffer) = closewrite(io.io) +eof(io::AnnotatedIOBuffer) = eof(io.io) +peek(io::AnnotatedIOBuffer) = peek(io.io) position(io::AnnotatedIOBuffer) = position(io.io) -lock(io::AnnotatedIOBuffer) = lock(io.io) -unlock(io::AnnotatedIOBuffer) = unlock(io.io) +seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io) +skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io) +mark(io::AnnotatedIOBuffer) = mark(io.io) +reset(io::AnnotatedIOBuffer) = reset(io.io) +unmark(io::AnnotatedIOBuffer) = unmark(io.io) +ismarked(io::AnnotatedIOBuffer) = ismarked(io.io) +copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations)) +unsafe_write(io::AnnotatedIOBuffer, b::Ptr{UInt8}, len::UInt) = unsafe_write(io.io, b, len) +unsafe_read(io::AnnotatedIOBuffer, b::Ptr{UInt8}, len::UInt) = unsafe_read(io.io, b, len) function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) astr = AnnotatedString(astr) @@ -418,21 +431,38 @@ end write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c)) write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) -write(io::AnnotatedIOBuffer, x::UInt8) = write(io.io, x) +write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) """ read(io::AnnotatedIOBuffer, AnnotatedString) -Read the entirety of `io`, as an `AnnotatedString`. This preserves the -annotations of any `AnnotatedString`s written to `io` and otherwise acts like -`read(io::IO, String)`. +Read `io` as an `AnnotatedString`, jumping to the start if `eof(io)` holds. +This preserves the annotations of any `AnnotatedString`s written to `io` and +otherwise acts like `read(io::IO, String)`. """ function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{String}}) - str = String(take!(io.io)) - annots = copy(io.annotations) - empty!(io.annotations) - seekstart(io.io) - AnnotatedString(str, annots) + if eof(io) + seekstart(io.io) + AnnotatedString(read(io.io, String), copy(io.annotations)) + else + start = position(io.io) + annots = map(((range, val),) -> (max(1, first(range) - start):last(range)-start, val), + filter(((range, _),) -> last(range) > start, + io.annotations)) + AnnotatedString(read(io.io, String), annots) + end end read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) +read(io::AnnotatedIOBuffer, x) = read(io.io, x) +# Avoid method ambiguity +read(io::AnnotatedIOBuffer, s::Type{String}) = read(io.io, s) +read(io::AnnotatedIOBuffer, b::Type{UInt8}) = read(io.io, b) + +function truncate(io::AnnotatedIOBuffer, size::Integer) + truncate(io.io, size) + filter!(((range, _),) -> first(range) <= size, io.annotations) + map!(((range, val),) -> (first(range):min(size, last(range)), val), + io.annotations, io.annotations) + io +end From c2b441b8c67a619020c33b18097c95fa4b9c4413 Mon Sep 17 00:00:00 2001 From: TEC Date: Thu, 28 Dec 2023 18:00:36 +0800 Subject: [PATCH 3/8] Make AnnotatedIOBuffer reading more generic While `String` is the only concrete type for which `read(::IOBuffer, ::Type{<:AbstractString})` is defined, is is entirely conceivable that some other custom string type could define a similar `read` method. Since making reading an `AnnotatedString` from an `AnnotatedIOBuffer` more generic is as easy as replacing the hardcoded `String` with a type parameter, we may as well do so. --- base/strings/annotated.jl | 86 +++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 34 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 4a23fd037e62e..c1de401300327 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -389,7 +389,7 @@ annotations(c::AnnotatedChar) = c.annotations ## AnnotatedIOBuffer -struct AnnotatedIOBuffer <: IO +struct AnnotatedIOBuffer <: AbstractPipe io::IOBuffer annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}} end @@ -403,61 +403,79 @@ function show(io::IO, aio::AnnotatedIOBuffer) length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")") end -isopen(io::AnnotatedIOBuffer) = isopen(io.io) -close(io::AnnotatedIOBuffer) = close(io.io) -closewrite(io::AnnotatedIOBuffer) = closewrite(io.io) -eof(io::AnnotatedIOBuffer) = eof(io.io) -peek(io::AnnotatedIOBuffer) = peek(io.io) +pipe_reader(io::AnnotatedIOBuffer) = io.io +pipe_writer(io::AnnotatedIOBuffer) = io.io + +# Useful `IOBuffer` methods that we don't get from `AbstractPipe` position(io::AnnotatedIOBuffer) = position(io.io) seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io) +seekend(io::AnnotatedIOBuffer) = seekend(io.io) skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io) -mark(io::AnnotatedIOBuffer) = mark(io.io) -reset(io::AnnotatedIOBuffer) = reset(io.io) -unmark(io::AnnotatedIOBuffer) = unmark(io.io) -ismarked(io::AnnotatedIOBuffer) = ismarked(io.io) copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations)) -unsafe_write(io::AnnotatedIOBuffer, b::Ptr{UInt8}, len::UInt) = unsafe_write(io.io, b, len) -unsafe_read(io::AnnotatedIOBuffer, b::Ptr{UInt8}, len::UInt) = unsafe_read(io.io, b, len) + +annotations(io::AnnotatedIOBuffer) = io.annotations function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) astr = AnnotatedString(astr) offset = position(io.io) - for (region, annot) in astr.annotations - start, stop = first(region), last(region) - push!(io.annotations, (start+offset:stop+offset, annot)) + if !eof(io) + # If we are overwriting an existing span in the AnnotatedIOBuffer, + # clear out any overlapping pre-existing annotations. + span = offset+1:offset+ncodeunits(astr) + filter!(((range, _),) -> first(range) < first(span) || last(range) > last(span), io.annotations) + extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] + for i in eachindex(io.annotations) + range, annot = io.annotations[i] + # Test for partial overlap + if first(range) <= first(span) <= last(range) || first(range) <= last(span) <= last(range) + io.annotations[i] = (if first(range) < first(span) + first(range):first(span)-1 + else last(span)+1:last(range) end, annot) + # If `span` fits exactly within `range`, then we've only copied over + # the beginning overhang, but also need to conserve the end overhang. + if first(range) < first(span) && last(span) < last(range) + push!(extras, (last(span)+1:last(range), annot)) + end + end + # Insert any extra entries in the appropriate position + for entry in extras + indices = searchsorted(io.annotations, (first(entry),), by=first) + splice!(io.annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[entry]) + end + end + # Fill in the annotations from `astr`, accounting for `offset` + for (region, annot) in astr.annotations + region = first(region)+offset:last(region)+offset + indices = searchsorted(io.annotations, (region,), by=first) + splice!(io.annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[(region, annot)]) + end + else + # Fill in the annotations from `astr`, accounting for `offset` + for (region, annot) in astr.annotations + region = first(region)+offset:last(region)+offset + push!(io.annotations, (region, annot)) + end end write(io.io, String(astr)) end + write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c)) write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) -""" - read(io::AnnotatedIOBuffer, AnnotatedString) -Read `io` as an `AnnotatedString`, jumping to the start if `eof(io)` holds. -This preserves the annotations of any `AnnotatedString`s written to `io` and -otherwise acts like `read(io::IO, String)`. -""" -function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{String}}) - if eof(io) - seekstart(io.io) - AnnotatedString(read(io.io, String), copy(io.annotations)) +function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString} + if (start = position(io)) == 0 + AnnotatedString(read(io.io, T), copy(io.annotations)) else - start = position(io.io) - annots = map(((range, val),) -> (max(1, first(range) - start):last(range)-start, val), - filter(((range, _),) -> last(range) > start, - io.annotations)) - AnnotatedString(read(io.io, String), annots) + annots = [(max(1, first(region) - start):last(region)-start, val) + for (region, val) in io.annotations if last(region) > start] + AnnotatedString(read(io.io, T), annots) end end read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) -read(io::AnnotatedIOBuffer, x) = read(io.io, x) -# Avoid method ambiguity -read(io::AnnotatedIOBuffer, s::Type{String}) = read(io.io, s) -read(io::AnnotatedIOBuffer, b::Type{UInt8}) = read(io.io, b) function truncate(io::AnnotatedIOBuffer, size::Integer) truncate(io.io, size) From f4c5e13a6ec85bbab83d29601342c33a44f787b2 Mon Sep 17 00:00:00 2001 From: TEC Date: Thu, 28 Dec 2023 18:00:48 +0800 Subject: [PATCH 4/8] Also read AnnotatedChars from an AnnotatedIOBuffer The `read(::AnnotatedIOBuffer, AnnotatedString)` method is intended as an approximate analogue to `read(::IOBuffer, String)`. In the same sense, it makes sense to define `read(::AnnotatedIOBuffer, AnnotatedChar)` as an analogue to `read(::IOBuffer, Char)`. --- base/strings/annotated.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index c1de401300327..ee75bb923b742 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -477,6 +477,15 @@ end read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) +function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar} + pos = position(io) + char = read(io.io, T) + annots = [annot for (range, annot) in io.annotations if pos+1 in range] + AnnotatedChar(char, annots) +end +read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char}) +read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char}) + function truncate(io::AnnotatedIOBuffer, size::Integer) truncate(io.io, size) filter!(((range, _),) -> first(range) <= size, io.annotations) From d49ba4f912848b0a0dcf71e6f3602f2533ffff27 Mon Sep 17 00:00:00 2001 From: TEC Date: Tue, 23 Jan 2024 18:04:00 +0800 Subject: [PATCH 5/8] Writing from one AnnotatedIOBuffer to another --- base/strings/annotated.jl | 84 +++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index ee75bb923b742..10d5c43addf52 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -418,53 +418,69 @@ annotations(io::AnnotatedIOBuffer) = io.annotations function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) astr = AnnotatedString(astr) offset = position(io.io) - if !eof(io) - # If we are overwriting an existing span in the AnnotatedIOBuffer, - # clear out any overlapping pre-existing annotations. - span = offset+1:offset+ncodeunits(astr) - filter!(((range, _),) -> first(range) < first(span) || last(range) > last(span), io.annotations) - extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] - for i in eachindex(io.annotations) - range, annot = io.annotations[i] - # Test for partial overlap - if first(range) <= first(span) <= last(range) || first(range) <= last(span) <= last(range) - io.annotations[i] = (if first(range) < first(span) - first(range):first(span)-1 - else last(span)+1:last(range) end, annot) - # If `span` fits exactly within `range`, then we've only copied over - # the beginning overhang, but also need to conserve the end overhang. - if first(range) < first(span) && last(span) < last(range) - push!(extras, (last(span)+1:last(range), annot)) - end - end - # Insert any extra entries in the appropriate position - for entry in extras - indices = searchsorted(io.annotations, (first(entry),), by=first) - splice!(io.annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[entry]) + eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr)) + _insert_annotations!(io, astr.annotations) + write(io.io, String(astr)) +end + +write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c)) +write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) +write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) +write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) + +function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer) + destpos = position(dest) + isappending = eof(dest) + srcpos = position(src) + nb = write(dest.io, src.io) + isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb) + srcannots = [(max(1 + srcpos, first(region)):last(region), annot) + for (region, annot) in src.annotations if first(region) >= srcpos] + _insert_annotations!(dest, srcannots, destpos - srcpos) + nb +end + +function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int}) + # Clear out any overlapping pre-existing annotations. + filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations) + extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] + for i in eachindex(annotations) + region, annot = annotations[i] + # Test for partial overlap + if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region) + annotations[i] = (if first(region) < first(span) + first(region):first(span)-1 + else last(span)+1:last(region) end, annot) + # If `span` fits exactly within `region`, then we've only copied over + # the beginning overhang, but also need to conserve the end overhang. + if first(region) < first(span) && last(span) < last(region) + push!(extras, (last(span)+1:last(region), annot)) end end - # Fill in the annotations from `astr`, accounting for `offset` - for (region, annot) in astr.annotations + # Insert any extra entries in the appropriate position + for entry in extras + indices = searchsorted(annotations, (first(entry),), by=first) + splice!(annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[entry]) + end + end + annotations +end + +function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io)) + if !eof(io) + for (region, annot) in annotations region = first(region)+offset:last(region)+offset indices = searchsorted(io.annotations, (region,), by=first) splice!(io.annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[(region, annot)]) end else - # Fill in the annotations from `astr`, accounting for `offset` - for (region, annot) in astr.annotations + for (region, annot) in annotations region = first(region)+offset:last(region)+offset push!(io.annotations, (region, annot)) end end - write(io.io, String(astr)) end -write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c)) -write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) -write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) -write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) - - function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString} if (start = position(io)) == 0 AnnotatedString(read(io.io, T), copy(io.annotations)) From 2e6ebb94d083ae21c310dc842762b824a8a88b80 Mon Sep 17 00:00:00 2001 From: TEC Date: Sat, 16 Dec 2023 02:13:57 +0800 Subject: [PATCH 6/8] Tests for AnnotatedIOBuffer --- test/strings/annotated.jl | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl index 70f490f4787ca..2a9fd4a702465 100644 --- a/test/strings/annotated.jl +++ b/test/strings/annotated.jl @@ -107,3 +107,57 @@ end @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)]) @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")]) end + +@testset "AnnotatedIOBuffer" begin + aio = Base.AnnotatedIOBuffer() + # Append-only writing + @test write(aio, Base.AnnotatedString("hello", [(1:5, :tag => 1)])) == 5 + @test write(aio, ' ') == 1 + @test write(aio, Base.AnnotatedString("world", [(1:5, :tag => 2)])) == 5 + @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] + # Reading + @test read(seekstart(deepcopy(aio.io)), String) == "hello world" + @test read(seekstart(deepcopy(aio)), String) == "hello world" + @test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag => 1), (7:11, :tag => 2)]) + @test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag => 1), (6:10, :tag => 2)]) + @test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag => 1), (3:7, :tag => 2)]) + @test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag => 2)]) + @test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag => 1)]) + @test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag => 1)]) + @test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag => 1), (7:7, :tag => 2)]) + @test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [:tag => 1]) + @test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', Pair{Symbol, Any}[]) + @test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [:tag => 2]) + # Check method compatibility with IOBuffer + @test position(aio) == 7 + @test seek(aio, 4) === aio + @test skip(aio, 2) === aio + @test Base.annotations(copy(aio)) == Base.annotations(aio) + @test take!(copy(aio).io) == take!(copy(aio.io)) + # Writing into the middle of the buffer + @test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice' + @test read(seekstart(aio), String) == "hello alice" + @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged + @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5 + @test read(seekstart(aio), String) == "hey-o alice" + @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced + @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie') + @test read(seekstart(aio), String) == "hey-o abbie" + @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly + @test read(seekstart(aio), String) == "aby-o abbie" + @test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2 + @test read(seekstart(aio), String) == "abyss abbie" + @test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + # Writing one buffer to another + newaio = Base.AnnotatedIOBuffer() + @test write(newaio, seekstart(aio)) == 11 + @test read(seekstart(newaio), String) == "abyss abbie" + @test Base.annotations(newaio) == Base.annotations(aio) + @test write(seek(newaio, 5), seek(aio, 5)) == 6 + @test Base.annotations(newaio) == Base.annotations(aio) + @test write(newaio, seek(aio, 5)) == 6 + @test read(seekstart(newaio), String) == "abyss abbie abbie" + @test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]) +end From 39839acf5363dc90f5253688c964a8f9a07e8efd Mon Sep 17 00:00:00 2001 From: TEC Date: Thu, 28 Dec 2023 18:35:15 +0800 Subject: [PATCH 7/8] Update the mention of annotated features in NEWS --- NEWS.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6eb7e6509c69f..5dd6481a4f0b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,11 +11,20 @@ New language features * The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`, but allows to add keyword arguments to the function call ([#51501]). * Support for Unicode 15.1 ([#51799]). -* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for - regional annotations to be attached to an underlying string. This type is - particularly useful for holding styling information, and is used extensively - in the new `StyledStrings` standard library. There is also a new `AnnotatedChar` - type, that is the equivalent new `AbstractChar` type. +* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}` + entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations + are preserved across operations (e.g. string concatenation with `*`) when + possible. + * `AnnotatedString` is a new `AbstractString` type. It wraps an underlying + string and allows for annotations to be attached to regions of the string. + This type is used extensively in the new `StyledStrings` standard library to + hold styling information. + * `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and + holds a list of annotations that apply to it. + * `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has + specialised `read`/`write` methods for annotated content. This can be + thought of both as a "string builder" of sorts and also as glue between + annotated and unannotated content. * `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml` to be preferentially picked up by the given julia version. i.e. in the same folder, a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia From 2b9839b14aed9c56477cdda61edcc959c2d76caa Mon Sep 17 00:00:00 2001 From: TEC Date: Wed, 31 Jan 2024 16:22:45 +0800 Subject: [PATCH 8/8] Use insert! not splice! for new annotations Co-authored-by: Jameson Nash --- base/strings/annotated.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 10d5c43addf52..20ad53b78a757 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -323,14 +323,15 @@ To remove existing `label` annotations, use a value of `nothing`. """ function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) label, val = labelval - indices = searchsorted(s.annotations, (range,), by=first) if val === nothing + indices = searchsorted(s.annotations, (range,), by=first) labelindex = filter(i -> first(s.annotations[i][2]) === label, indices) for index in Iterators.reverse(labelindex) deleteat!(s.annotations, index) end else - splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))]) + sortedindex = searchsortedlast(s.annotations, (range,), by=first) + 1 + insert!(s.annotations, sortedindex, (range, Pair{Symbol, Any}(label, val))) end s end @@ -459,8 +460,8 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, end # Insert any extra entries in the appropriate position for entry in extras - indices = searchsorted(annotations, (first(entry),), by=first) - splice!(annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[entry]) + sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1 + insert!(annotations, sortedindex, entry) end end annotations @@ -470,8 +471,8 @@ function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{U if !eof(io) for (region, annot) in annotations region = first(region)+offset:last(region)+offset - indices = searchsorted(io.annotations, (region,), by=first) - splice!(io.annotations, indices, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[(region, annot)]) + sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1 + insert!(io.annotations, sortedindex, (region, annot)) end else for (region, annot) in annotations