Skip to content

Commit

Permalink
Use dict representation for compress and decompress
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Apr 22, 2021
1 parent 55e7fb4 commit f5d20ee
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 167 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
--------------------
[0.1.1] - 2021-04-XX
--------------------

- Add support for new columns in tskit. (benjeffery, #39, #42).

--------------------
[0.1.0] - 2019-05-10
--------------------
Expand Down
1 change: 1 addition & 0 deletions requirements/CI-tests-conda.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
humanize==3.4.1
h5py==3.2.1
msprime==1.0.0
pytest==6.2.3
pytest-cov==2.11.1
Expand Down
1 change: 1 addition & 0 deletions requirements/development.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
codecov
coverage
flake8
h5py
pre-commit
pytest
pytest-cov
Expand Down
Binary file added tests/files/1.0.0.trees
Binary file not shown.
Binary file added tests/files/1.0.0.trees.tsz
Binary file not shown.
14 changes: 3 additions & 11 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import msprime
import numpy as np
import pytest
import tskit

import tszip
Expand All @@ -43,6 +42,9 @@ class TestException(Exception):
Custom exception we can throw for testing.
"""

# We don't want pytest to use this as a class to test
__test__ = False


def capture_output(func, *args, **kwargs):
"""
Expand Down Expand Up @@ -189,7 +191,6 @@ def setUp(self):
def tearDown(self):
del self.tmpdir

@pytest.mark.xfail
def test_simple(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path)])
Expand All @@ -199,7 +200,6 @@ def test_simple(self):
ts = tszip.decompress(outpath)
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_suffix(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path), "-S", ".XYZasdf"])
Expand All @@ -221,7 +221,6 @@ def test_variants_only(self):
G2 = self.ts.genotype_matrix()
self.assertTrue(np.array_equal(G1, G2))

@pytest.mark.xfail
def test_keep(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path), "--keep"])
Expand All @@ -231,7 +230,6 @@ def test_keep(self):
ts = tszip.decompress(outpath)
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_overwrite(self):
self.assertTrue(self.trees_path.exists())
outpath = pathlib.Path(str(self.trees_path) + ".tsz")
Expand All @@ -255,7 +253,6 @@ def test_no_overwrite(self):
f"'{outpath}' already exists; use --force to overwrite"
)

@pytest.mark.xfail
def test_bad_file_format(self):
self.assertTrue(self.trees_path.exists())
with open(str(self.trees_path), "w") as f:
Expand Down Expand Up @@ -283,7 +280,6 @@ def setUp(self):
def tearDown(self):
del self.tmpdir

@pytest.mark.xfail
def test_simple(self):
self.assertTrue(self.compressed_path.exists())
self.run_decompress([str(self.compressed_path)])
Expand All @@ -293,7 +289,6 @@ def test_simple(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_suffix(self):
suffix = ".XYGsdf"
self.compressed_path = self.compressed_path.with_suffix(suffix)
Expand All @@ -306,7 +301,6 @@ def test_suffix(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_keep(self):
self.assertTrue(self.compressed_path.exists())
self.run_decompress([str(self.compressed_path), "--keep"])
Expand All @@ -316,7 +310,6 @@ def test_keep(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_overwrite(self):
self.assertTrue(self.compressed_path.exists())
outpath = self.trees_path
Expand Down Expand Up @@ -346,7 +339,6 @@ def test_decompress_bad_suffix(self):
"Compressed file must have 'asdf' suffix"
)

@pytest.mark.xfail
def test_bad_file_format(self):
self.assertTrue(self.compressed_path.exists())
with open(str(self.compressed_path), "w") as f:
Expand Down
38 changes: 31 additions & 7 deletions tests/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import msprime
import numpy as np
import pytest
import tskit
import zarr

Expand Down Expand Up @@ -106,20 +105,17 @@ class RoundTripMixin:
Set of example tree sequences that we should be able to round trip.
"""

@pytest.mark.xfail
def test_small_msprime_no_recomb(self):
ts = msprime.simulate(10, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_recomb(self):
ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.assertGreater(ts.num_trees, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_migration(self):
ts = msprime.simulate(
population_configurations=[
Expand All @@ -137,7 +133,6 @@ def test_small_msprime_migration(self):
self.assertGreater(ts.num_trees, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_top_level_metadata(self):
ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
Expand All @@ -151,15 +146,16 @@ def test_small_msprime_top_level_metadata(self):
tables.metadata = {"my_int": 1234}
self.verify(tables.tree_sequence())

@pytest.mark.xfail
def test_small_msprime_individuals_metadata(self):
ts = msprime.simulate(10, recombination_rate=1, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.assertGreater(ts.num_trees, 2)
tables = ts.dump_tables()
tables.nodes.clear()
for j, node in enumerate(ts.nodes()):
tables.individuals.add_row(flags=j, location=[j] * j, metadata=b"x" * j)
tables.individuals.add_row(
flags=j, location=[j] * j, parents=[j - 1] * j, metadata=b"x" * j
)
tables.nodes.add_row(
flags=node.flags,
population=node.population,
Expand All @@ -169,6 +165,7 @@ def test_small_msprime_individuals_metadata(self):
)
tables.populations.clear()
tables.populations.add_row(metadata=b"X" * 1024)
tables.sort()
self.verify(tables.tree_sequence())

def test_small_msprime_complex_mutations(self):
Expand Down Expand Up @@ -200,6 +197,33 @@ def test_mutation_parent_example(self):
tables.mutations.add_row(site=0, node=0, parent=0, derived_state="A")
self.verify(tables.tree_sequence())

def test_all_fields(self):
demography = msprime.Demography()
demography.add_population(name="A", initial_size=10_000)
demography.add_population(name="B", initial_size=5_000)
demography.add_population(name="C", initial_size=1_000)
demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
ts = msprime.sim_ancestry(
samples={"A": 1, "B": 1}, demography=demography, random_seed=42
)
ts = msprime.sim_mutations(ts, rate=1, random_seed=42)
tables = ts.dump_tables()
for name, table in tables.name_map.items():
if name not in ["provenances", "edges"]:
table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
metadata, metadata_offset = tskit.pack_strings(metadatas)
table.set_columns(
**{
**table.asdict(),
"metadata": metadata,
"metadata_offset": metadata_offset,
}
)
tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
tables.metadata = "Test metadata"
self.verify(tables.tree_sequence())


class TestGenotypeRoundTrip(unittest.TestCase, RoundTripMixin):
"""
Expand Down
37 changes: 37 additions & 0 deletions tests/test_legacy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# MIT License
#
# Copyright (c) 2021 Tskit Developers
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Tests for files from previous releases.
"""
import pathlib

import tskit

import tszip


class Testv1:
def test_decompress(self):
files = pathlib.Path(__file__).parent / "files"
ts = tszip.decompress(files / "1.0.0.trees.tsz")
ts2 = tskit.load(files / "1.0.0.trees")
assert ts == ts2
Loading

0 comments on commit f5d20ee

Please sign in to comment.