Skip to content

Commit

Permalink
Merge pull request #25646 from JuliaLang/jb/readuntilchomp
Browse files Browse the repository at this point in the history
exclude delimiter by default in `readuntil`; rename `chomp` option to `keep`
  • Loading branch information
JeffBezanson authored Jan 22, 2018
2 parents bea2fb7 + 7d3c538 commit 92bf8fd
Show file tree
Hide file tree
Showing 25 changed files with 223 additions and 167 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* `readuntil` now does *not* include the delimiter in its result, matching the
behavior of `readline`. Pass `keep=true` to get the old behavior ([#25633]).

* `getindex(s::String, r::UnitRange{Int})` now throws `UnicodeError` if `last(r)`
is not a valid index into `s` ([#22572]).

Expand Down
2 changes: 1 addition & 1 deletion base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ parse_input_line(s::AbstractString) = parse_input_line(String(s))
function parse_input_line(io::IO)
s = ""
while !eof(io)
s *= readline(io, chomp=false)
s *= readline(io, keep=true)
e = parse_input_line(s)
if !(isa(e,Expr) && e.head === :incomplete)
return e
Expand Down
2 changes: 1 addition & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ end

@deprecate read(cmd::AbstractCmd, stdin::Redirectable) read(pipeline(stdin, cmd))
@deprecate readstring(cmd::AbstractCmd, stdin::Redirectable) readstring(pipeline(stdin, cmd))
@deprecate eachline(cmd::AbstractCmd, stdin; chomp::Bool=true) eachline(pipeline(stdin, cmd), chomp=chomp)
@deprecate eachline(cmd::AbstractCmd, stdin; kw...) eachline(pipeline(stdin, cmd), kw...)

@deprecate showall(x) show(x)
@deprecate showall(io, x) show(IOContext(io, :limit => false), x)
Expand Down
151 changes: 91 additions & 60 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,10 @@ flush(io::AbstractPipe) = flush(pipe_writer(io))
read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
read(io::AbstractPipe) = read(pipe_reader(io))
readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil_indexable(io::AbstractPipe, target#=::Indexable{T}=#, out) = readuntil_indexable(pipe_reader(io), target, out)

readavailable(io::AbstractPipe) = readavailable(pipe_reader(io))
Expand Down Expand Up @@ -297,10 +299,12 @@ function read! end
read!(filename::AbstractString, a) = open(io->read!(io, a), filename)

"""
readuntil(stream::IO, delim)
readuntil(filename::AbstractString, delim)
readuntil(stream::IO, delim; keep::Bool = false)
readuntil(filename::AbstractString, delim; keep::Bool = false)
Read a string from an I/O stream or a file, up to and including the given delimiter byte.
Read a string from an I/O stream or a file, up to the given delimiter.
The delimiter can be a `UInt8`, `Char`, string, or vector.
Keyword argument `keep` controls whether the delimiter is included in the result.
The text is assumed to be encoded in UTF-8.
# Examples
Expand All @@ -319,17 +323,17 @@ julia> readuntil("my_file.txt", '.')
julia> rm("my_file.txt")
```
"""
readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename)
readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), filename)

"""
readline(io::IO=STDIN; chomp::Bool=true)
readline(filename::AbstractString; chomp::Bool=true)
readline(io::IO=STDIN; keep::Bool=false)
readline(filename::AbstractString; keep::Bool=false)
Read a single line of text from the given I/O stream or file (defaults to `STDIN`).
When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the
input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `chomp` is
true (as it is by default), these trailing newline characters are removed from the
line before it is returned. When `chomp` is false, they are returned as part of the
input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `keep` is
false (as it is by default), these trailing newline characters are removed from the
line before it is returned. When `keep` is true, they are returned as part of the
line.
# Examples
Expand All @@ -342,22 +346,30 @@ julia> open("my_file.txt", "w") do io
julia> readline("my_file.txt")
"JuliaLang is a GitHub organization."
julia> readline("my_file.txt", chomp=false)
julia> readline("my_file.txt", keep=true)
"JuliaLang is a GitHub organization.\\n"
julia> rm("my_file.txt")
```
"""
function readline(filename::AbstractString; chomp::Bool=true)
function readline(filename::AbstractString; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
open(filename) do f
readline(f, chomp=chomp)
readline(f, keep=keep)
end
end

function readline(s::IO=STDIN; chomp::Bool=true)
line = readuntil(s, 0x0a)
function readline(s::IO=STDIN; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
line = readuntil(s, 0x0a, keep=true)
i = length(line)
if !chomp || i == 0 || line[i] != 0x0a
if keep || i == 0 || line[i] != 0x0a
return String(line)
elseif i < 2 || line[i-1] != 0x0d
return String(resize!(line,i-1))
Expand All @@ -367,8 +379,8 @@ function readline(s::IO=STDIN; chomp::Bool=true)
end

"""
readlines(io::IO=STDIN; chomp::Bool=true)
readlines(filename::AbstractString; chomp::Bool=true)
readlines(io::IO=STDIN; keep::Bool=false)
readlines(filename::AbstractString; keep::Bool=false)
Read all lines of an I/O stream or a file as a vector of strings. Behavior is
equivalent to saving the result of reading [`readline`](@ref) repeatedly with the same
Expand All @@ -386,20 +398,20 @@ julia> readlines("my_file.txt")
"JuliaLang is a GitHub organization."
"It has many members."
julia> readlines("my_file.txt", chomp=false)
julia> readlines("my_file.txt", keep=true)
2-element Array{String,1}:
"JuliaLang is a GitHub organization.\\n"
"It has many members.\\n"
julia> rm("my_file.txt")
```
"""
function readlines(filename::AbstractString; chomp::Bool=true)
function readlines(filename::AbstractString; kw...)
open(filename) do f
readlines(f, chomp=chomp)
readlines(f; kw...)
end
end
readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp))
readlines(s=STDIN; kw...) = collect(eachline(s; kw...))

## byte-order mark, ntoh & hton ##

Expand Down Expand Up @@ -618,41 +630,44 @@ end

# readuntil_string is useful below since it has
# an optimized method for s::IOStream
readuntil_string(s::IO, delim::UInt8) = String(readuntil(s, delim))
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))

function readuntil(s::IO, delim::Char)
function readuntil(s::IO, delim::Char; keep::Bool=false)
if delim '\x7f'
return readuntil_string(s, delim % UInt8)
return readuntil_string(s, delim % UInt8, keep)
end
out = IOBuffer()
while !eof(s)
c = read(s, Char)
write(out, c)
if c == delim
keep && write(out, c)
break
end
write(out, c)
end
return String(take!(out))
end

function readuntil(s::IO, delim::T) where T
function readuntil(s::IO, delim::T; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
while !eof(s)
c = read(s, T)
push!(out, c)
if c == delim
keep && push!(out, c)
break
end
push!(out, c)
end
return out
end

# requires that indices for target are small ordered integers bounded by start and endof
# returns whether the delimiter was matched
function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
T = eltype(target)
first = start(target)
if done(target, first)
return
return true
end
len = endof(target)
local cache # will be lazy initialized when needed
Expand Down Expand Up @@ -693,39 +708,45 @@ function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
pos = cache[pos] + first
end
end
done(target, pos) && break
done(target, pos) && return true
end
return false
end

function readuntil(io::IO, target::AbstractString)
function readuntil(io::IO, target::AbstractString; keep::Bool=false)
# small-string target optimizations
i = start(target)
done(target, i) && return ""
c, i = next(target, start(target))
if done(target, i) && c <= '\x7f'
return readuntil_string(io, c % UInt8)
return readuntil_string(io, c % UInt8, keep)
end
# decide how we can index target
if target isa String
# convert String to a utf8-byte-iterator
# convert String to a utf8-byte-iterator
if target isa String || target isa SubString{String}
target = codeunits(target)
#elseif applicable(codeunit, target)
# TODO: a more general version of above optimization
# would be to permit accessing any string via codeunit
# target = CodeUnitVector(target)
elseif !(target isa SubString{String})
# type with unknown indexing behavior: convert to array
target = collect(target)
end
out = (eltype(target) === UInt8 ? StringVector(0) : IOBuffer())
readuntil_indexable(io, target, out)
out = isa(out, IO) ? take!(out) : out
else
target = codeunits(String(target))
end
out = StringVector(0)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return String(out)
end

function readuntil(io::IO, target::AbstractVector{T}) where T
function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
readuntil_indexable(io, target, out)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return out
end

Expand Down Expand Up @@ -797,20 +818,20 @@ read(s::IO, T::Type) = error("The IO stream does not support reading objects of
mutable struct EachLine
stream::IO
ondone::Function
chomp::Bool
keep::Bool

EachLine(stream::IO=STDIN; ondone::Function=()->nothing, chomp::Bool=true) =
new(stream, ondone, chomp)
EachLine(stream::IO=STDIN; ondone::Function=()->nothing, keep::Bool=false) =
new(stream, ondone, keep)
end

"""
eachline(io::IO=STDIN; chomp::Bool=true)
eachline(filename::AbstractString; chomp::Bool=true)
eachline(io::IO=STDIN; keep::Bool=false)
eachline(filename::AbstractString; keep::Bool=false)
Create an iterable `EachLine` object that will yield each line from an I/O stream
or a file. Iteration calls [`readline`](@ref) on the stream argument repeatedly with
`chomp` passed through, determining whether trailing end-of-line characters are
removed. When called with a file name, the file is opened once at the beginning of
`keep` passed through, determining whether trailing end-of-line characters are
retained. When called with a file name, the file is opened once at the beginning of
iteration and closed at the end. If iteration is interrupted, the file will be
closed when the `EachLine` object is garbage collected.
Expand All @@ -828,11 +849,21 @@ JuliaLang is a GitHub organization. It has many members.
julia> rm("my_file.txt");
```
"""
eachline(stream::IO=STDIN; chomp::Bool=true) = EachLine(stream, chomp=chomp)::EachLine
function eachline(stream::IO=STDIN; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `eachline` is deprecated in favor of `keep=$keep`.", :eachline)
end
EachLine(stream, keep=keep)::EachLine
end

function eachline(filename::AbstractString; chomp::Bool=true)
function eachline(filename::AbstractString; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `eachline` is deprecated in favor of `keep=$keep`.", :eachline)
end
s = open(filename)
EachLine(s, ondone=()->close(s), chomp=chomp)::EachLine
EachLine(s, ondone=()->close(s), keep=keep)::EachLine
end

start(itr::EachLine) = nothing
Expand All @@ -841,7 +872,7 @@ function done(itr::EachLine, ::Nothing)
itr.ondone()
true
end
next(itr::EachLine, ::Nothing) = (readline(itr.stream, chomp=itr.chomp), nothing)
next(itr::EachLine, ::Nothing) = (readline(itr.stream, keep=itr.keep), nothing)

eltype(::Type{EachLine}) = String

Expand Down
26 changes: 15 additions & 11 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -444,26 +444,30 @@ function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer)
return nothing
end

function readuntil(io::GenericIOBuffer, delim::UInt8)
function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
lb = 70
A = StringVector(lb)
n = 0
nread = 0
nout = 0
data = io.data
for i = io.ptr : io.size
n += 1
if n > lb
lb = n*2
resize!(A, lb)
end
@inbounds b = data[i]
@inbounds A[n] = b
nread += 1
if keep || b != delim
nout += 1
if nout > lb
lb = nout*2
resize!(A, lb)
end
@inbounds A[nout] = b
end
if b == delim
break
end
end
io.ptr += n
if lb != n
resize!(A, n)
io.ptr += nread
if lb != nout
resize!(A, nout)
end
A
end
Expand Down
Loading

0 comments on commit 92bf8fd

Please sign in to comment.