Skip to content

Commit

Permalink
zstd (#23)
Browse files Browse the repository at this point in the history
Use zstd compression instead of xz and gzip to be consistent with:

* nextstrain/ncov-ingest#345
  • Loading branch information
Jennifer Chang authored Oct 14, 2022
1 parent 56b5f71 commit 306ce64
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ files = rules.files.params
rule download:
message: "Downloading sequences and metadata from data.nextstrain.org"
output:
sequences = "data/sequences.fasta.xz",
metadata = "data/metadata.tsv.gz"
sequences = "data/sequences.fasta.zst",
metadata = "data/metadata.tsv.zst"
params:
sequences_url = "https://data.nextstrain.org/files/zika/sequences.fasta.xz",
metadata_url = "https://data.nextstrain.org/files/zika/metadata.tsv.gz"
sequences_url = "https://data.nextstrain.org/files/zika/sequences.fasta.zst",
metadata_url = "https://data.nextstrain.org/files/zika/metadata.tsv.zst"
shell:
"""
curl -fsSL --compressed {params.sequences_url:q} --output {output.sequences}
Expand All @@ -30,15 +30,15 @@ rule download:
rule decompress:
message: "Decompressing sequences and metadata"
input:
sequences = "data/sequences.fasta.xz",
metadata = "data/metadata.tsv.gz"
sequences = "data/sequences.fasta.zst",
metadata = "data/metadata.tsv.zst"
output:
sequences = "data/sequences.fasta",
metadata = "data/metadata.tsv"
shell:
"""
gzip --decompress --keep {input.metadata}
xz --decompress --keep {input.sequences}
zstd -d -c {input.sequences} > {output.sequences}
zstd -d -c {input.metadata} > {output.metadata}
"""

rule filter:
Expand Down

0 comments on commit 306ce64

Please sign in to comment.