Skip to content

Commit

Permalink
Introduce AnnotatedIOBuffer (JuliaLang#51807)
Browse files Browse the repository at this point in the history
This allows for styled content to be constructed incrementally, without
resorting to repeated concatenation. It operates very similarly to
IOContext, just with a special `write` method and specifically wrapping
an IOBuffer.
  • Loading branch information
vtjnash authored Feb 1, 2024
2 parents c16472b + 2b9839b commit f117a50
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 7 deletions.
19 changes: 14 additions & 5 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,20 @@ New language features
* The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
but allows to add keyword arguments to the function call ([#51501]).
* Support for Unicode 15.1 ([#51799]).
* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for
regional annotations to be attached to an underlying string. This type is
particularly useful for holding styling information, and is used extensively
in the new `StyledStrings` standard library. There is also a new `AnnotatedChar`
type, that is the equivalent new `AbstractChar` type.
* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}`
entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations
are preserved across operations (e.g. string concatenation with `*`) when
possible.
* `AnnotatedString` is a new `AbstractString` type. It wraps an underlying
string and allows for annotations to be attached to regions of the string.
This type is used extensively in the new `StyledStrings` standard library to
hold styling information.
* `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and
holds a list of annotations that apply to it.
* `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has
specialised `read`/`write` methods for annotated content. This can be
thought of both as a "string builder" of sorts and also as glue between
annotated and unannotated content.
* `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
to be preferentially picked up by the given julia version. i.e. in the same folder,
a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia
Expand Down
128 changes: 126 additions & 2 deletions base/strings/annotated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -323,14 +323,15 @@ To remove existing `label` annotations, use a value of `nothing`.
"""
function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
label, val = labelval
indices = searchsorted(s.annotations, (range,), by=first)
if val === nothing
indices = searchsorted(s.annotations, (range,), by=first)
labelindex = filter(i -> first(s.annotations[i][2]) === label, indices)
for index in Iterators.reverse(labelindex)
deleteat!(s.annotations, index)
end
else
splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))])
sortedindex = searchsortedlast(s.annotations, (range,), by=first) + 1
insert!(s.annotations, sortedindex, (range, Pair{Symbol, Any}(label, val)))
end
s
end
Expand Down Expand Up @@ -386,3 +387,126 @@ annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
Get all annotations of `chr`.
"""
annotations(c::AnnotatedChar) = c.annotations

## AnnotatedIOBuffer

struct AnnotatedIOBuffer <: AbstractPipe
io::IOBuffer
annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}
end

AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}())
AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())

function show(io::IO, aio::AnnotatedIOBuffer)
show(io, AnnotatedIOBuffer)
print(io, '(', aio.io.size, " byte", ifelse(aio.io.size == 1, "", "s"), ", ",
length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
end

pipe_reader(io::AnnotatedIOBuffer) = io.io
pipe_writer(io::AnnotatedIOBuffer) = io.io

# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
position(io::AnnotatedIOBuffer) = position(io.io)
seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
seekend(io::AnnotatedIOBuffer) = seekend(io.io)
skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))

annotations(io::AnnotatedIOBuffer) = io.annotations

function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
astr = AnnotatedString(astr)
offset = position(io.io)
eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
_insert_annotations!(io, astr.annotations)
write(io.io, String(astr))
end

write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c))
write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)

function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
destpos = position(dest)
isappending = eof(dest)
srcpos = position(src)
nb = write(dest.io, src.io)
isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
srcannots = [(max(1 + srcpos, first(region)):last(region), annot)
for (region, annot) in src.annotations if first(region) >= srcpos]
_insert_annotations!(dest, srcannots, destpos - srcpos)
nb
end

function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
# Clear out any overlapping pre-existing annotations.
filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations)
extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
for i in eachindex(annotations)
region, annot = annotations[i]
# Test for partial overlap
if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
annotations[i] = (if first(region) < first(span)
first(region):first(span)-1
else last(span)+1:last(region) end, annot)
# If `span` fits exactly within `region`, then we've only copied over
# the beginning overhang, but also need to conserve the end overhang.
if first(region) < first(span) && last(span) < last(region)
push!(extras, (last(span)+1:last(region), annot))
end
end
# Insert any extra entries in the appropriate position
for entry in extras
sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1
insert!(annotations, sortedindex, entry)
end
end
annotations
end

function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
if !eof(io)
for (region, annot) in annotations
region = first(region)+offset:last(region)+offset
sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1
insert!(io.annotations, sortedindex, (region, annot))
end
else
for (region, annot) in annotations
region = first(region)+offset:last(region)+offset
push!(io.annotations, (region, annot))
end
end
end

function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
if (start = position(io)) == 0
AnnotatedString(read(io.io, T), copy(io.annotations))
else
annots = [(max(1, first(region) - start):last(region)-start, val)
for (region, val) in io.annotations if last(region) > start]
AnnotatedString(read(io.io, T), annots)
end
end
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})

function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
pos = position(io)
char = read(io.io, T)
annots = [annot for (range, annot) in io.annotations if pos+1 in range]
AnnotatedChar(char, annots)
end
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})

function truncate(io::AnnotatedIOBuffer, size::Integer)
truncate(io.io, size)
filter!(((range, _),) -> first(range) <= size, io.annotations)
map!(((range, val),) -> (first(range):min(size, last(range)), val),
io.annotations, io.annotations)
io
end
54 changes: 54 additions & 0 deletions test/strings/annotated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,57 @@ end
@test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)])
@test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")])
end

@testset "AnnotatedIOBuffer" begin
aio = Base.AnnotatedIOBuffer()
# Append-only writing
@test write(aio, Base.AnnotatedString("hello", [(1:5, :tag => 1)])) == 5
@test write(aio, ' ') == 1
@test write(aio, Base.AnnotatedString("world", [(1:5, :tag => 2)])) == 5
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)]
# Reading
@test read(seekstart(deepcopy(aio.io)), String) == "hello world"
@test read(seekstart(deepcopy(aio)), String) == "hello world"
@test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag => 1), (7:11, :tag => 2)])
@test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag => 1), (6:10, :tag => 2)])
@test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag => 1), (3:7, :tag => 2)])
@test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag => 2)])
@test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag => 1)])
@test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag => 1)])
@test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag => 1), (7:7, :tag => 2)])
@test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [:tag => 1])
@test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', Pair{Symbol, Any}[])
@test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [:tag => 2])
# Check method compatibility with IOBuffer
@test position(aio) == 7
@test seek(aio, 4) === aio
@test skip(aio, 2) === aio
@test Base.annotations(copy(aio)) == Base.annotations(aio)
@test take!(copy(aio).io) == take!(copy(aio.io))
# Writing into the middle of the buffer
@test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice'
@test read(seekstart(aio), String) == "hello alice"
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged
@test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5
@test read(seekstart(aio), String) == "hey-o alice"
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced
@test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie')
@test read(seekstart(aio), String) == "hey-o abbie"
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
@test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
@test read(seekstart(aio), String) == "aby-o abbie"
@test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
@test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
@test read(seekstart(aio), String) == "abyss abbie"
@test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
# Writing one buffer to another
newaio = Base.AnnotatedIOBuffer()
@test write(newaio, seekstart(aio)) == 11
@test read(seekstart(newaio), String) == "abyss abbie"
@test Base.annotations(newaio) == Base.annotations(aio)
@test write(seek(newaio, 5), seek(aio, 5)) == 6
@test Base.annotations(newaio) == Base.annotations(aio)
@test write(newaio, seek(aio, 5)) == 6
@test read(seekstart(newaio), String) == "abyss abbie abbie"
@test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])
end

0 comments on commit f117a50

Please sign in to comment.