diff --git a/docs/commands/simgenotype.rst b/docs/commands/simgenotype.rst index 30137694..9a09bcf5 100644 --- a/docs/commands/simgenotype.rst +++ b/docs/commands/simgenotype.rst @@ -77,7 +77,7 @@ If speed is important, it's generally faster to use PGEN files than VCFs. .. warning:: Writing PGEN files will require more memory than writing VCFs. The memory will depend on the number of simulated samples and variants. - You can reduce the memory required by this step by writing the variants in chunks. Just specify a ``--chunk-size`` value. + You can reduce the memory required for this step by writing the variants in chunks. Just specify a ``--chunk-size`` value. All files used in these examples are described :doc:`here `. diff --git a/tests/test_simgenotype.py b/tests/test_simgenotype.py index 8a29eb45..92f07f73 100644 --- a/tests/test_simgenotype.py +++ b/tests/test_simgenotype.py @@ -74,3 +74,38 @@ def test_pgen_output(capfd): prefix.unlink() prefix.with_suffix(".pvar").unlink() prefix.with_suffix(".psam").unlink() + + +def test_pgen_output_chunked(capfd): + prefix = DATADIR / "example_simgenotype.pgen" + dat_file = DATADIR / "outvcf_gen.dat" + map_dir = DATADIR / "map" + ref_vcf_file = DATADIR / "outvcf_test.pgen" + samp_info_file = DATADIR / "outvcf_info.tab" + + cmd = " ".join( + [ + "simgenotype", + f"--model {dat_file}", + f"--mapdir {map_dir}", + "--region 1:1-83000", + f"--ref_vcf {ref_vcf_file}", + f"--sample_info {samp_info_file}", + f"--out {prefix}", + f"--chunk-size 1" + ] + ) + runner = CliRunner() + result = runner.invoke(main, cmd.split(" "), catch_exceptions=False) + captured = capfd.readouterr() + assert result.exit_code == 0 + assert prefix.with_suffix(".bp").exists() + assert prefix.exists() + assert prefix.with_suffix(".pvar").exists() + assert prefix.with_suffix(".psam").exists() + + # delete the files and directory we just created + prefix.with_suffix(".bp").unlink() + prefix.unlink() + prefix.with_suffix(".pvar").unlink() + prefix.with_suffix(".psam").unlink()