Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use dict representation for compress and decompress #42

Merged
merged 1 commit into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
--------------------
[0.1.1] - 2021-04-XX
--------------------

- Add support for new columns in tskit. (benjeffery, #39, #42).

--------------------
[0.1.0] - 2019-05-10
--------------------
Expand Down
1 change: 1 addition & 0 deletions requirements/CI-tests-conda.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
humanize==3.4.1
h5py==3.2.1
msprime==1.0.0
pytest==6.2.3
pytest-cov==2.11.1
Expand Down
1 change: 1 addition & 0 deletions requirements/development.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
codecov
coverage
flake8
h5py
pre-commit
pytest
pytest-cov
Expand Down
Binary file added tests/files/1.0.0.trees
Binary file not shown.
Binary file added tests/files/1.0.0.trees.tsz
Binary file not shown.
14 changes: 3 additions & 11 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import msprime
import numpy as np
import pytest
import tskit

import tszip
Expand All @@ -43,6 +42,9 @@ class TestException(Exception):
Custom exception we can throw for testing.
"""

# We don't want pytest to use this as a class to test
__test__ = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's happening here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Telling pytest that isn't a test class. Added a comment.



def capture_output(func, *args, **kwargs):
"""
Expand Down Expand Up @@ -189,7 +191,6 @@ def setUp(self):
def tearDown(self):
del self.tmpdir

@pytest.mark.xfail
def test_simple(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path)])
Expand All @@ -199,7 +200,6 @@ def test_simple(self):
ts = tszip.decompress(outpath)
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_suffix(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path), "-S", ".XYZasdf"])
Expand All @@ -221,7 +221,6 @@ def test_variants_only(self):
G2 = self.ts.genotype_matrix()
self.assertTrue(np.array_equal(G1, G2))

@pytest.mark.xfail
def test_keep(self):
self.assertTrue(self.trees_path.exists())
self.run_tszip([str(self.trees_path), "--keep"])
Expand All @@ -231,7 +230,6 @@ def test_keep(self):
ts = tszip.decompress(outpath)
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_overwrite(self):
self.assertTrue(self.trees_path.exists())
outpath = pathlib.Path(str(self.trees_path) + ".tsz")
Expand All @@ -255,7 +253,6 @@ def test_no_overwrite(self):
f"'{outpath}' already exists; use --force to overwrite"
)

@pytest.mark.xfail
def test_bad_file_format(self):
self.assertTrue(self.trees_path.exists())
with open(str(self.trees_path), "w") as f:
Expand Down Expand Up @@ -283,7 +280,6 @@ def setUp(self):
def tearDown(self):
del self.tmpdir

@pytest.mark.xfail
def test_simple(self):
self.assertTrue(self.compressed_path.exists())
self.run_decompress([str(self.compressed_path)])
Expand All @@ -293,7 +289,6 @@ def test_simple(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_suffix(self):
suffix = ".XYGsdf"
self.compressed_path = self.compressed_path.with_suffix(suffix)
Expand All @@ -306,7 +301,6 @@ def test_suffix(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_keep(self):
self.assertTrue(self.compressed_path.exists())
self.run_decompress([str(self.compressed_path), "--keep"])
Expand All @@ -316,7 +310,6 @@ def test_keep(self):
ts = tskit.load(str(outpath))
self.assertEqual(ts.tables, self.ts.tables)

@pytest.mark.xfail
def test_overwrite(self):
self.assertTrue(self.compressed_path.exists())
outpath = self.trees_path
Expand Down Expand Up @@ -346,7 +339,6 @@ def test_decompress_bad_suffix(self):
"Compressed file must have 'asdf' suffix"
)

@pytest.mark.xfail
def test_bad_file_format(self):
self.assertTrue(self.compressed_path.exists())
with open(str(self.compressed_path), "w") as f:
Expand Down
41 changes: 34 additions & 7 deletions tests/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import msprime
import numpy as np
import pytest
import tskit
import zarr

Expand Down Expand Up @@ -106,20 +105,17 @@ class RoundTripMixin:
Set of example tree sequences that we should be able to round trip.
"""

@pytest.mark.xfail
def test_small_msprime_no_recomb(self):
ts = msprime.simulate(10, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_recomb(self):
ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.assertGreater(ts.num_trees, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_migration(self):
ts = msprime.simulate(
population_configurations=[
Expand All @@ -137,7 +133,6 @@ def test_small_msprime_migration(self):
self.assertGreater(ts.num_trees, 2)
self.verify(ts)

@pytest.mark.xfail
def test_small_msprime_top_level_metadata(self):
ts = msprime.simulate(10, recombination_rate=2, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
Expand All @@ -151,15 +146,16 @@ def test_small_msprime_top_level_metadata(self):
tables.metadata = {"my_int": 1234}
self.verify(tables.tree_sequence())

@pytest.mark.xfail
def test_small_msprime_individuals_metadata(self):
ts = msprime.simulate(10, recombination_rate=1, mutation_rate=2, random_seed=2)
self.assertGreater(ts.num_sites, 2)
self.assertGreater(ts.num_trees, 2)
tables = ts.dump_tables()
tables.nodes.clear()
for j, node in enumerate(ts.nodes()):
tables.individuals.add_row(flags=j, location=[j] * j, metadata=b"x" * j)
tables.individuals.add_row(
flags=j, location=[j] * j, parents=[j - 1] * j, metadata=b"x" * j
)
tables.nodes.add_row(
flags=node.flags,
population=node.population,
Expand All @@ -169,6 +165,7 @@ def test_small_msprime_individuals_metadata(self):
)
tables.populations.clear()
tables.populations.add_row(metadata=b"X" * 1024)
tables.sort()
self.verify(tables.tree_sequence())

def test_small_msprime_complex_mutations(self):
Expand Down Expand Up @@ -200,6 +197,36 @@ def test_mutation_parent_example(self):
tables.mutations.add_row(site=0, node=0, parent=0, derived_state="A")
self.verify(tables.tree_sequence())

def test_all_fields(self):
demography = msprime.Demography()
demography.add_population(name="A", initial_size=10_000)
demography.add_population(name="B", initial_size=5_000)
demography.add_population(name="C", initial_size=1_000)
demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
ts = msprime.sim_ancestry(
samples={"A": 1, "B": 1},
demography=demography,
random_seed=42,
record_migrations=True,
)
ts = msprime.sim_mutations(ts, rate=1, random_seed=42)
tables = ts.dump_tables()
for name, table in tables.name_map.items():
if name not in ["provenances", "edges"]:
table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
metadata, metadata_offset = tskit.pack_strings(metadatas)
table.set_columns(
**{
**table.asdict(),
"metadata": metadata,
"metadata_offset": metadata_offset,
}
)
tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
tables.metadata = "Test metadata"
self.verify(tables.tree_sequence())


class TestGenotypeRoundTrip(unittest.TestCase, RoundTripMixin):
"""
Expand Down
37 changes: 37 additions & 0 deletions tests/test_legacy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# MIT License
#
# Copyright (c) 2021 Tskit Developers
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Tests for files from previous releases.
"""
import pathlib

import tskit

import tszip


class Testv1:
def test_decompress(self):
files = pathlib.Path(__file__).parent / "files"
ts = tszip.decompress(files / "1.0.0.trees.tsz")
ts2 = tskit.load(files / "1.0.0.trees")
assert ts == ts2
Loading