Skip to content

Commit

Permalink
Merge pull request #206 from nextstrain/feat/warn-dataset-ref-mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Jun 4, 2024
2 parents 588ba59 + 5608714 commit 138c448
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions scripts/rebuild
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def index_one_dataset(args, pathogen_json_path: str, dataset: object, tag: str,
path = relpath(dataset_dir, args.input_dir)

ref = get_ref_seq(pathogen_json, dataset_dir)
check_ref_seq_mismatch(ref, pathogen_json, dataset_dir)

versions, last_version = dataset_get_versions(dataset)

Expand Down Expand Up @@ -163,6 +164,20 @@ def get_ref_seq(pathogen_json, dataset_dir):
raise ValueError(f"When reading reference sequence") from e


def check_ref_seq_mismatch(standalone_ref, pathogen_json, dataset_dir):
tree_filename = dict_get(pathogen_json, ["files", "treeJson"])
tree_json_path = join(dataset_dir, tree_filename) if tree_filename else None
if tree_json_path is not None and isfile(tree_json_path):
tree_json = json_read(tree_json_path)
tree_ref = dict_get(tree_json, ["root_sequence", "nuc"])
if tree_ref is not None:
if standalone_ref.seq != tree_ref:
l.warn(
"Reference sequence provided does not exactly match reference (root) sequence in Auspice JSON. This warning "
"signals that there is a potential for failures if the mismatch is not intended."
)


def get_new_dataset_order(datasets, dataset_order):
paths = list(map(lambda d: d["path"], datasets))

Expand Down

0 comments on commit 138c448

Please sign in to comment.