Skip to content

Commit

Permalink
Merge pull request #15 from carlobaldassi/check_desc
Browse files Browse the repository at this point in the history
Add check_description flag
  • Loading branch information
carlobaldassi authored Aug 30, 2024
2 parents 41292c4 + 6315667 commit cc84728
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v3
- uses: codecov/codecov-action@v4
with:
files: lcov.info
2 changes: 2 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
codecov:
token: b60713dd-5429-4e88-8a1e-d3a1f45a34f6
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ The FASTA format which is assumed by this module is as follows:
at the beginning or end of the description
5. Empty lines are ignored (note however that lines containing whitespace will still trigger an error)

When writing, description lines longer than 80 characters will trigger a warning message; sequence data is
formatted in lines of 80 characters each; extra whitespace is silently discarded.
When writing, description lines longer than 80 characters will trigger a warning message (this can be optionally
disabled); sequence data is formatted in lines of 80 characters each; extra whitespace is silently discarded.
No other restriction is put on the content of the sequence data, except that the `>` character is
forbidden.

Expand Down
28 changes: 18 additions & 10 deletions src/FastaIO.jl
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,15 @@ mutable struct FastaWriter
entry::Int
own_f::Bool
at_start::Bool
check_description::Bool
function FastaWriter(io::IO)
fw = new(io, false, 0, 0, false, 0, 1, false, true)
fw = new(io, false, 0, 0, false, 0, 1, false, true, true)
finalizer(close, fw)
return fw
end
function FastaWriter(filename::AbstractString, mode::AbstractString = "w")
fopen = endswith(filename, ".gz") ? gzopen : open
fw = new(fopen(filename, mode), false, 0, 0, false, 0, 1, true, true)
fw = new(fopen(filename, mode), false, 0, 0, false, 0, 1, true, true, true)
finalizer(close, fw)
return fw
end
Expand Down Expand Up @@ -351,6 +352,9 @@ file.
The `FastaWriter` object has an `entry::Int` field which stores the number of the entry which is
currently being written.
After creating the object, you can set the `check_description` field to `false` to disable the warning
given when description lines are too long.
"""
function FastaWriter(f::Function, args...)
fw = FastaWriter(args...)
Expand Down Expand Up @@ -442,7 +446,7 @@ function write(fw::FastaWriter, c)
error("character '>' not allowed in sequence data (entry $(fw.entry) of FASTA input)")
end
if fw.pos == 80
if !fw.in_seq
if !fw.in_seq && fw.check_description
@warn("description line longer than 80 characters (entry $(fw.entry) of FASTA input)")
else
write(fw.f, '\n')
Expand Down Expand Up @@ -553,11 +557,13 @@ function writefastaseq(io::IO, seq, entry::Int, nl::Bool = true)
end

"""
writefasta([io::IO = stdout], data)
writefasta([io::IO = stdout], data; check_description=true)
This version of the function writes to an already opened `IO` stream, defaulting to `stdout`.
Set the keyword `check_description=false` to disable the warning message given when description lines are too long.
"""
function writefasta(io::IO, data)
function writefasta(io::IO, data; check_description::Bool=true)
entry = 0
for (desc, seq) in data
entry += 1
Expand All @@ -567,18 +573,18 @@ function writefasta(io::IO, data)
if findfirst(==('\n'), desc) nothing
error("newlines are not allowed within description (entry $entry of FASTA input)")
end
if length(desc) > 79
if length(desc) > 79 && check_description
@warn("description line longer than 80 characters (entry $entry of FASTA input)")
end
println(io, ">", desc)
entry_chars = writefastaseq(io, seq, entry)
entry_chars > 0 || error("empty sequence data (entry $entry of FASTA input)")
end
end
writefasta(data) = writefasta(stdout, data)
writefasta(data; kw...) = writefasta(stdout, data; kw...)

"""
writefasta(filename::String, data, [mode::String = "w"])
writefasta(filename::String, data, [mode::String = "w"]; check_description=true)
This function dumps data to a FASTA file, auto-formatting it so to follow the specifications detailed in
the section titled [The FASTA format](@ref). The `data` can be anything which is iterable and which produces
Expand All @@ -597,11 +603,13 @@ If the `filename` ends with `.gz`, the result will be a gzip-compressed file.
The `mode` flag determines how the `filename` is open; use `"a"` to append the data to an existing
file.
Set the keyword `check_description=false` to disable the warning message given when description lines are too long.
"""
function writefasta(filename::AbstractString, data, mode::AbstractString = "w")
function writefasta(filename::AbstractString, data, mode::AbstractString = "w"; check_description=true)
fopen = endswith(filename, ".gz") ? gzopen : open
fopen(filename, mode) do f
writefasta(f, data)
writefasta(f, data; check_description)
end
end

Expand Down
3 changes: 2 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[deps]
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
60 changes: 60 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module FastaTests
using FastaIO
using GZip
using Test
using Logging

const fastadata_ascii = Any[
("A0ADS9_STRAM/3-104",
Expand Down Expand Up @@ -202,4 +203,63 @@ end
end
end

outfile = joinpath(@__DIR__, "long_desc_test_out.fasta.gz")

@testset "desc length checks" begin
longdesc = ">" * "X"^100
data = "DATA"

FastaWriter(devnull) do fw
b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
write(fw, longdesc)
write(fw, data)
end
s = String(take!(b))
@test !isempty(s)

fw.check_description = false
b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
write(fw, longdesc)
write(fw, data)
end
s = String(take!(b))
@test isempty(s)
end

b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
writefasta(devnull, [(longdesc, data)])
end
s = String(take!(b))
@test !isempty(s)

b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
writefasta(devnull, [(longdesc, data)], check_description=false)
end
s = String(take!(b))
@test isempty(s)

try
b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
writefasta(outfile, [(longdesc, data)])
end
s = String(take!(b))
@test !isempty(s)

b = IOBuffer()
with_logger(SimpleLogger(b, Logging.Debug)) do
writefasta(outfile, [(longdesc, data)], check_description=false)
end
s = String(take!(b))
@test isempty(s)
finally
isfile(outfile) && rm(outfile)
end

end

end # module FastaTests

0 comments on commit cc84728

Please sign in to comment.