Skip to content

Commit

Permalink
[skip ci] wip lots of failures
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelm committed Feb 1, 2024
1 parent 0b52a98 commit 3881609
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 54 deletions.
57 changes: 32 additions & 25 deletions src/cutadapt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,24 @@ def make_pipeline_from_args( # noqa: C901
if paired:
pair_filter_mode = "any" if args.pair_filter is None else args.pair_filter

def make_filter(predicate1, predicate2, path1, path2):
record_writer = None
if path1 or path2:
paths = [path1, path2] if paired else [path1]
if paired and path2 is None:
interleaved = True
paths = paths[:1]
else:
interleaved = False
record_writer = outfiles.open_record_writer(*paths, interleaved=interleaved)
if paired:
step = PairedEndFilter(
predicate1, predicate2, record_writer, pair_filter_mode=pair_filter_mode
)
else:
step = SingleEndFilter(predicate1, record_writer)
return step

adapter_names: List[Optional[str]] = [a.name for a in adapters]
adapter_names2: List[Optional[str]] = [a.name for a in adapters2]

Expand Down Expand Up @@ -737,25 +755,7 @@ def make_pipeline_from_args( # noqa: C901
else:
predicate2 = None

record_writer = None
if path1 or path2:
paths = [path1, path2] if paired else [path1]
if paired and path2 is None:
interleaved = True
paths = paths[:1]
else:
interleaved = False
FIXME
if path1:
record_writer = outfiles.open_record_writer(*paths)

if paired:
step = PairedEndFilter(
predicate1, predicate2, record_writer, pair_filter_mode=pair_filter_mode
)
else:
step = SingleEndFilter(predicate1, record_writer)
steps.append(step)
steps.append(make_filter(predicate1, predicate2, path1, path2))

if args.max_n is not None:
predicate = TooManyN(args.max_n)
Expand Down Expand Up @@ -918,11 +918,16 @@ def open_writer(file, file2):
step = SingleEndFilter(predicate, None)
steps.append(step)
elif args.untrimmed_output or args.untrimmed_paired_output:
files = [outfiles.untrimmed]
if paired:
files += [outfiles.untrimmed2]
untrimmed_writer = self._open_writer(*files)
steps.append(self._make_untrimmed_filter(untrimmed_writer))
predicate1 = DiscardUntrimmed()
predicate2 = DiscardUntrimmed()
steps.append(
make_filter(
predicate1,
predicate2 if paired else None,
args.untrimmed_output,
args.untrimmed_paired_output,
)
)

if paired:
steps.append(
Expand All @@ -933,7 +938,9 @@ def open_writer(file, file2):
else:
if args.output is None:
out = outfiles.open_record_writer_from_binary_io(
default_outfile, interleaved=paired and args.interleaved, force_fasta=args.fasta
default_outfile,
interleaved=paired and args.interleaved,
force_fasta=args.fasta,
)
else:
out = outfiles.open_record_writer(args.output, force_fasta=args.fasta)
Expand Down
44 changes: 20 additions & 24 deletions src/cutadapt/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ def __init__(
interleaved: bool,
):
self._file_opener = file_opener
self._binary_files: List[BinaryIO] = []
# TODO do these actually have to be dicts?
self._binary_files: Dict[str, BinaryIO] = {}
self._text_files: Dict[str, TextIO] = {}
self._writers: Dict = {}
self._proxy_files: List[ProxyWriter] = []
Expand All @@ -217,14 +217,13 @@ def __init__(
self._interleaved = interleaved

def open_text(self, path):
assert path not in self._binary_files # TODO
# TODO
# - serial runner needs only text_file
# - parallel runner needs binary_file and proxy_file
# split into SerialOutputFiles and ParallelOutputFiles?
if self._proxied:
binary_file = self._file_opener.xopen(path, "wb")
self._binary_files[path] = binary_file
self._binary_files.append(binary_file)
proxy_file = ProxyTextFile()
self._proxy_files.append(proxy_file)
return proxy_file
Expand All @@ -233,23 +232,26 @@ def open_text(self, path):
self._text_files[path] = text_file
return text_file

def open_record_writer(self, *paths, force_fasta: bool = False):
kwargs = dict(qualities=self._qualities)
def open_record_writer(
self, *paths, interleaved: bool = False, force_fasta: bool = False
):
kwargs = dict(qualities=self._qualities, interleaved=interleaved)
if len(paths) not in (1, 2):
raise ValueError("Expected one or two paths")
if len(paths) == 2 and paths[1] is None:
paths = paths[:1]
kwargs["interleaved"] = True
if interleaved and len(paths) != 1:
raise ValueError("Cannot write to two files when interleaved is True")
# if len(paths) == 2 and paths[1] is None:
# paths = paths[:1]
# kwargs["interleaved"] = True
if len(paths) == 1 and paths[0] == "-" and force_fasta:
kwargs["fileformat"] = "fasta"
for path in paths:
assert path is not None
assert path not in self._binary_files # TODO
binary_files = []
for path in paths:
binary_file = self._file_opener.xopen(path, "wb")
binary_files.append(binary_file)
self._binary_files[path] = binary_file
self._binary_files.append(binary_file)
if self._proxied:
proxy_writer = ProxyRecordWriter(len(paths), **kwargs)
self._proxy_files.append(proxy_writer)
Expand All @@ -259,42 +261,36 @@ def open_record_writer(self, *paths, force_fasta: bool = False):
self._writers[paths] = writer
return writer

def open_record_writer_from_binary_io(self, file: BinaryIO, interleaved: bool = False, force_fasta: bool = False):
self._binary_files["fake\0path"] = file # TODO
def open_record_writer_from_binary_io(
self, file: BinaryIO, interleaved: bool = False, force_fasta: bool = False
):
self._binary_files.append(file)
kwargs = dict(qualities=self._qualities, interleaved=interleaved)
if force_fasta and file is sys.stdout.buffer:
kwargs["fileformat"] = "fasta"
if self._proxied:
proxy_writer = ProxyRecordWriter(
1, **kwargs
)
proxy_writer = ProxyRecordWriter(1, **kwargs)
self._proxy_files.append(proxy_writer)
return proxy_writer
else:
writer = self._file_opener.dnaio_open(
file, mode="w", **kwargs
)
writer = self._file_opener.dnaio_open(file, mode="w", **kwargs)
self._writers["fake\0path"] = writer
return writer

def binary_files(self):
return list(self._binary_files.values())
return self._binary_files[:]

def proxy_files(self) -> List[ProxyWriter]:
return self._proxy_files

def __iter__(self):
yield from self._binary_files.values()

def close(self) -> None:
"""Close all output files that are not stdout"""
# TODO ... that *are not* stdout
if not self._proxied:
for f in self._text_files.values():
f.close()
for f in self._writers.values():
f.close()
for f in self._binary_files.values():
for f in self._binary_files:
if f is not sys.stdout.buffer:
f.close()

Expand Down
9 changes: 5 additions & 4 deletions src/cutadapt/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,7 @@ def run(self):
infiles = InputFiles(*files, interleaved=self._interleaved_input)
(n, bp1, bp2) = self._pipeline.process_reads(infiles)
self._pipeline.flush()
cur_stats = Statistics().collect(n, bp1, bp2, [], self._pipeline._steps)
stats += cur_stats
stats += Statistics().collect(n, bp1, bp2, [], [])
self._send_outfiles(chunk_index, n)
self._pipeline.close()

Expand All @@ -200,7 +199,7 @@ def run(self):
0,
0 if self._pipeline.paired else None,
[],
self._pipeline._static_steps,
self._pipeline._steps,
)
self._write_pipe.send(-1)
self._write_pipe.send(stats)
Expand Down Expand Up @@ -441,7 +440,9 @@ def close(self):
def input_file_format(self) -> FileFormat:
detected = detect_file_format(self._infiles._files[0])
if detected is None:
raise dnaio.exceptions.UnknownFileFormat(f"Format of input file '{self._infiles._files[0].name}' not recognized.")
raise dnaio.exceptions.UnknownFileFormat(
f"Format of input file '{self._infiles._files[0].name}' not recognized."
)
return detected


Expand Down
2 changes: 1 addition & 1 deletion tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_interleaved_record_writer(self, tmp_path):
interleaved=True,
)
path = tmp_path / "out.1.fastq"
f = o.open_record_writer(path, None)
f = o.open_record_writer(path, interleaved=True)
f.write(
SequenceRecord("r", "AACC", "####"), SequenceRecord("r", "GGTT", "####")
)
Expand Down

0 comments on commit 3881609

Please sign in to comment.