Skip to content

Commit

Permalink
TST: Reconcile tests with rm-ing empty features
Browse files Browse the repository at this point in the history
Closes #171.

Turns out the reason q2_moving_pictures and sleep_apnea's main.js
outputs were different is that removing empty features changed
the order of features in their rank plot JSONs around a bit (the
actual data was exactly the same since these datasets didn't have any
empty features, though). I think it was just the use of .filter().

In any case, that's ok with me. We're done here!

Now we can move on to #58.
  • Loading branch information
fedarko committed Jul 6, 2019
1 parent bbe0a3c commit 9c1ea69
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 13 deletions.
4 changes: 2 additions & 2 deletions docs/demos/byrd/main.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/demos/q2_moving_pictures/main.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/demos/sleep_apnea/main.js

Large diffs are not rendered by default.

43 changes: 34 additions & 9 deletions qurro/tests/testing_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
import qurro.scripts._plot as rrvp
from qurro._rank_utils import read_rank_file
from qurro._metadata_utils import read_metadata_file
from qurro._df_utils import replace_nan
from qurro._df_utils import (
replace_nan,
match_table_and_data,
biom_table_to_sparse_df,
)
from qurro._json_utils import get_jsons


Expand Down Expand Up @@ -204,7 +208,7 @@ def validate_main_js(out_dir, rloc, tloc, sloc, validate_jsons=True):

# Validate plot JSONs
if validate_jsons:
validate_rank_plot_json(rloc, rank_json)
validate_rank_plot_json(tloc, sloc, rloc, rank_json)
validate_sample_plot_json(tloc, sloc, sample_json, count_json)

return rank_json, sample_json, count_json
Expand Down Expand Up @@ -242,25 +246,34 @@ def basic_vegalite_json_validation(json_obj):
assert json_obj["$schema"].endswith(".json")


def validate_rank_plot_json(input_ranks_loc, rank_json):
def validate_rank_plot_json(
biom_table_loc, metadata_loc, input_ranks_loc, rank_json
):
"""Ensure that the rank plot JSON makes sense."""

# TODO check that feature metadata annotations were properly applied to the
# features. Will need the feature metadata file location to be passed here

ref_feature_ranks = read_rank_file(input_ranks_loc)

# Load the table as a Sparse DF, and then match it up with the sample
# metadata. This is needed in order to ensure that the table only describes
# samples in the sample metadata.
# (And the reason we do *that* is so that, when we're trying to figure out
# if a feature is "empty," we can just compute the sum of that feature's
# row in the table -- which we couldn't do if the table contained samples
# that would be filtered out in Qurro.)
table = biom_table_to_sparse_df(load_table(biom_table_loc))
sample_metadata = read_metadata_file(metadata_loc)
table, _ = match_table_and_data(table, ref_feature_ranks, sample_metadata)

# Validate some basic properties of the plot
# (This is all handled by Altair, so these property tests aren't
# exhaustive; they're mainly intended to verify that a general plot
# matching our specs is being created)
assert rank_json["mark"] == "bar"
assert rank_json["title"] == "Feature Ranks"
basic_vegalite_json_validation(rank_json)
dn = rank_json["data"]["name"]
# Check that we have the same count of ranked features as in the
# input ranks file (no ranked features should be dropped during the
# generation process unless -x is passed, which it isn't in any of these
# integration tests)
assert len(rank_json["datasets"][dn]) == len(ref_feature_ranks)

# Loop over every feature in the reference feature ranks. Check that each
# feature's corresponding rank data in the rank plot JSON matches.
Expand All @@ -270,6 +283,13 @@ def validate_rank_plot_json(input_ranks_loc, rank_json):
)

for ref_feature_id in ref_feature_ranks.index:
# If this feature is empty, it should have been filtered!
if sum(table.loc[ref_feature_id]) == 0:
assert ref_feature_id not in rank_json_feature_data
continue
# ...If this feature isn't empty, though, it shouldn't have been
# filtered. (We assume that the user didn't pass in -x in this test.)
#
# Check to make sure that this feature ID is actually in the rank plot
# JSON
assert ref_feature_id in rank_json_feature_data
Expand Down Expand Up @@ -305,6 +325,11 @@ def validate_sample_plot_json(

# Check that each sample's metadata in the sample plot JSON matches with
# its actual metadata.
# NOTE: here we make the assumption that all samples are non-empty.
# If we start using integration test data with empty samples, then we'll
# need to revise this function to do something akin to what
# validate_rank_plot_json() does above to ensure that empty features are
# filtered out.
sample_metadata = replace_nan(read_metadata_file(metadata_loc))
for sample in sample_json["datasets"][dn]:

Expand Down

0 comments on commit 9c1ea69

Please sign in to comment.