Skip to content

Commit

Permalink
Handle case where VCF filter is not defined in the header
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite authored and mergify[bot] committed Nov 24, 2021
1 parent 34d640a commit 8f04f0b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
8 changes: 5 additions & 3 deletions sgkit/io/vcf/vcf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,11 @@ def vcf_to_zarr_sequential(
variant_quality[i] = (
variant.QUAL if variant.QUAL is not None else FLOAT32_MISSING
)
for f in variant.FILTERS:
variant_filter[i][filters.index(f)] = True

try:
for f in variant.FILTERS:
variant_filter[i][filters.index(f)] = True
except ValueError:
raise ValueError(f"Filter '{f}' is not defined in the header.")
for field_handler in field_handlers:
field_handler.add_variant(i, variant)

Expand Down
14 changes: 14 additions & 0 deletions sgkit/tests/io/vcf/data/no_filter_defined.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##fileDate=20201009
##source=.
##reference=./simple.fasta
##contig=<ID=CHR1,length=60>
##contig=<ID=CHR2,length=60>
##contig=<ID=CHR3,length=60>
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3
CHR1 2 . A T 60 PASS NS=3;AC=3 GT 0/0 0/0 0/0
CHR1 7 . A C 60 FAIL NS=3;AC=4 GT 0/0 0/1 0/1
11 changes: 11 additions & 0 deletions sgkit/tests/io/vcf/test_vcf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,17 @@ def test_vcf_to_zarr__contig_not_defined_in_header(shared_datadir, tmp_path):
vcf_to_zarr(path, output)


def test_vcf_to_zarr__filter_not_defined_in_header(shared_datadir, tmp_path):
path = path_for_test(shared_datadir, "no_filter_defined.vcf")
output = tmp_path.joinpath("vcf.zarr").as_posix()

with pytest.raises(
ValueError,
match=r"Filter 'FAIL' is not defined in the header.",
):
vcf_to_zarr(path, output)


def test_vcf_to_zarr__large_number_of_contigs(shared_datadir, tmp_path):
path = path_for_test(shared_datadir, "Homo_sapiens_assembly38.headerOnly.vcf.gz")
output = tmp_path.joinpath("vcf.zarr").as_posix()
Expand Down

0 comments on commit 8f04f0b

Please sign in to comment.