From 69ff7494fa1416ac635684e9c7d05ecb1708614c Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 16 Aug 2023 16:10:01 -0700 Subject: [PATCH] Export clock rate covariance matrix and std dev Exports the clock rate covariance matrix and std dev from the phylogenetic regression performed by TreeTime in augur refine. This information was already available in a TreeTime data structure and it is useful to users who do not know the std dev of their pathogen's clock rate already. Adds functional tests for behavior when no clock rate is provided by the user and when one is. In the latter case, we shouldn't have a covariance matrix. --- augur/refine.py | 6 ++++ tests/functional/refine/cram/timetree.t | 20 +++++++++++++ .../cram/timetree_with_fixed_clock_rate.t | 29 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 tests/functional/refine/cram/timetree_with_fixed_clock_rate.t diff --git a/augur/refine.py b/augur/refine.py index f5fcc2a17..ea0cf2eb3 100644 --- a/augur/refine.py +++ b/augur/refine.py @@ -259,6 +259,12 @@ def run(args): node_data['clock'] = {'rate': tt.date2dist.clock_rate, 'intercept': tt.date2dist.intercept, 'rtt_Tmrca': -tt.date2dist.intercept/tt.date2dist.clock_rate} + # Include the standard deviation of the clock rate, if the covariance + # matrix is available. + if hasattr(tt.date2dist, "cov") and tt.date2dist.cov is not None: + node_data["clock"]["cov"] = tt.date2dist.cov + node_data["clock"]["rate_std"] = np.sqrt(tt.date2dist.cov[0, 0]) + if args.coalescent=='skyline': try: skyline, conf = tt.merger_model.skyline_inferred(gen=args.gen_per_year, confidence=2) diff --git a/tests/functional/refine/cram/timetree.t b/tests/functional/refine/cram/timetree.t index 52fb8afbc..f59c35313 100644 --- a/tests/functional/refine/cram/timetree.t +++ b/tests/functional/refine/cram/timetree.t @@ -21,3 +21,23 @@ Confirm that TreeTime trees match expected topology and branch lengths. $ python3 "$TESTDIR/../../../../scripts/diff_trees.py" "$TESTDIR/../data/tree.nwk" tree.nwk --significant-digits 2 {} + +Confirm that JSON output includes details about the clock rate. + + $ grep -A 15 '\"clock\"' branch_lengths.json + "clock": { + "cov": [ + [ + .*, (re) + .* (re) + ], + [ + .*, (re) + .* (re) + ] + ], + "intercept": .*, (re) + "rate": .*, (re) + "rate_std": .*, (re) + "rtt_Tmrca": .* (re) + }, diff --git a/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t b/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t new file mode 100644 index 000000000..90f7461a0 --- /dev/null +++ b/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t @@ -0,0 +1,29 @@ +Setup + + $ source "$TESTDIR"/_setup.sh + +Try building a time tree with a fixed clock rate and clock std dev. + + $ ${AUGUR} refine \ + > --tree "$TESTDIR/../data/tree_raw.nwk" \ + > --alignment "$TESTDIR/../data/aligned.fasta" \ + > --metadata "$TESTDIR/../data/metadata.tsv" \ + > --output-tree tree.nwk \ + > --output-node-data branch_lengths.json \ + > --timetree \ + > --coalescent opt \ + > --date-confidence \ + > --date-inference marginal \ + > --clock-rate 0.0012 \ + > --clock-std-dev 0.0002 \ + > --clock-filter-iqd 4 \ + > --seed 314159 &> /dev/null + +Confirm that JSON output does not include information about the clock rate std dev, since it was provided by the user. + + $ grep -A 4 '\"clock\"' branch_lengths.json + "clock": { + "intercept": .*, (re) + "rate": .*, (re) + "rtt_Tmrca": .* (re) + },