Skip to content

Commit

Permalink
Tests for protein topology optimisation with pip
Browse files Browse the repository at this point in the history
  • Loading branch information
junniest committed Sep 5, 2024
1 parent 5eca20a commit 447d912
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
((Baboon:0.0028331378161231516,(Rhes_cDNA:0.010200173725342486,((((DLangur:0.005187934607208908,Colobus:0.006744877884078086):0.002991818596992287,(((Orangutan:0.02425757797579703,Gibbon:0.050333222484462474):0.00349324814218369,(Gorilla:0.0031112135547833836,(Chimp:0.005196405317244636,Human:0.013661102534226385):0.003794699786919942):0.007165702458849481):0.02944066124362487,(((Tamarin:0.037556885677381205,PMarmoset:0.01702895311693103):0.030028435125174696,Squirrel:0.07201131924471216):0.007156514183315316,(((Woolly:0.034418086513793425,Spider:0.02149518903513873):0.011309226332883714,Howler:0.06122186419988194):0.031165940180073442,(Saki:0.04168710939376183,Titi:0.023512300435974267):0.02081749712142146):0.006198375436005013):0.1917050548350012):0.038848677493702605):0.009117045612009347,Patas:0.02060922344024596):0.006367394359273745,(Tant_cDNA:0.001975023706313592,AGM_cDNA:0.004967765570187601):0.008190743679022382):0.0006802670634388848):0.0005627185167904883):0);
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
((Rhes_cDNA:0.005270697439151051,((((DLangur:0.0067324631560195265,Colobus:0.00521530524332267):0.002427192075502467,(((((Chimp:0.004062536454896269,Human:0.01066970288442119):0.0042616661126785435,Gorilla:0.003799987371551049):0.00939396737272837,Orangutan:0.020925588919919923):0.0042656572453531995,Gibbon:0.04549271228044225):0.025773788804886694,(((Woolly:0.02982911938015733,Spider:0.022092190400064618):0.011455489526599595,Howler:0.07603516089516084):0.10366013657195779,(((Tamarin:0.03164671167745268,PMarmoset:0.014916060359693298):0.026753727600658195,Squirrel:0.06705296485480587):0.007093494673525226,(Saki:0.03143866735106935,Titi:0.02058319641790653):0.01626675366805705):0.004670562059845509):0.17980011911442895):0.033267957441090075):0.007258518763365157,Patas:0.020042200482060055):0.01042259615650974,(Baboon:0.005263494182438327,(Tant_cDNA:0.0037526472256948387,AGM_cDNA:0.006482057922501004):0.03573474143751188):0.000016573653357281707):0.002646554869484882):0);
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(((Tant_cDNA:0.0048018833811211675,AGM_cDNA:0.008339926875757585):0.04534128950416323,((((DLangur:0.008610868011143124,Colobus:0.006650819970918761):0.0031357126228166673,(((((Chimp:0.005215728948279252,Human:0.013682329995014232):0.005375585838951918,Gorilla:0.004959685843646604):0.012277692848934589,Orangutan:0.026495869011378525):0.005123154657377931,Gibbon:0.05895426364325162):0.032567290566178554,(((Woolly:0.03832426472150371,Spider:0.028961294295631365):0.014783978988486978,Howler:0.10023618083180653):0.13718895712242302,((Saki:0.04156451712084586,Titi:0.02555269910901685):0.020523346478912786,((Tamarin:0.04001931431119561,PMarmoset:0.019981458986083438):0.03338174082271472,Squirrel:0.08828899905103033):0.009173650693663014):0.004952774943586081):0.24177792859569436):0.04399012471581886):0.009241754766755407,Patas:0.025643509869867926):0.013364539018112975,(Rhes_cDNA:0.010134761142363087,Baboon:0.006736993384623528):0.000016088731852021455):0.0007497685155102595):0);
96 changes: 86 additions & 10 deletions phylo/src/optimisers/topo_optimiser_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@ use approx::assert_relative_eq;
use bio::io::fasta::Record;

use crate::alignment::Sequences;
use crate::evolutionary_models::ProteinModelType;
use crate::evolutionary_models::{DNAModelType::*, EvoModel, ProteinModelType::*};
use crate::likelihood::PhyloCostFunction;
use crate::optimisers::PhyloOptimiser;
// use crate::optimisers::{EvoModelOptimiser, FrequencyOptimisation::*, ModelOptimiser};
use crate::optimisers::{PhyloOptimiser, TopologyOptimiser};
use crate::phylo_info::PhyloInfoBuilder;
use crate::pip_model::PIPModel;
use crate::substitution_models::{DNASubstModel, ProteinSubstModel};
use crate::tree::tree_parser::from_newick_string;
use crate::{
evolutionary_models::{DNAModelType::*, EvoModel},
optimisers::TopologyOptimiser,
};

#[test]
fn primate_topology_from_right_tree() {
Expand Down Expand Up @@ -119,7 +116,7 @@ fn protein_topology_optimisation_good_start() {
// )
// .build()
// .unwrap();
// let model = ProteinSubstModel::new(ProteinModelType::WAG, &[]).unwrap();
// let model = ProteinSubstModel::new(WAG, &[]).unwrap();
// let unopt_logl = model.cost(&info);
// let o = TopologyOptimiser::new(&model, &info).run().unwrap();
// assert!(o.final_logl >= unopt_logl);
Expand All @@ -132,7 +129,7 @@ fn protein_topology_optimisation_good_start() {
)
.build()
.unwrap();
let model = ProteinSubstModel::new(ProteinModelType::WAG, &[]).unwrap();
let model = ProteinSubstModel::new(WAG, &[]).unwrap();
let logl = model.cost(&info);

// compare the tree height and logl to the output of PhyML
Expand All @@ -145,7 +142,7 @@ fn protein_topology_optimisation_nj_start() {
// let info = PhyloInfoBuilder::new(PathBuf::from("./data/phyml_protein_example/nogap_seqs.fasta"))
// .build()
// .unwrap();
// let model = ProteinSubstModel::new(ProteinModelType::WAG, &[]).unwrap();
// let model = ProteinSubstModel::new(WAG, &[]).unwrap();
// let unopt_logl = model.cost(&info);
// let o = TopologyOptimiser::new(&model, &info).run().unwrap();
// assert!(o.final_logl >= unopt_logl);
Expand All @@ -158,7 +155,7 @@ fn protein_topology_optimisation_nj_start() {
)
.build()
.unwrap();
let model = ProteinSubstModel::new(ProteinModelType::WAG, &[]).unwrap();
let model = ProteinSubstModel::new(WAG, &[]).unwrap();
let logl = model.cost(&info);

// compare the tree height and logl to the output of PhyML
Expand All @@ -184,3 +181,82 @@ fn pip_vs_subst_dna_tree() {
assert_relative_eq!(o_k80.initial_logl, initial_logl);
assert_eq!(o_pip.i.tree.robinson_foulds(&o_k80.i.tree), 0);
}

#[test]
fn pip_vs_subst_protein_tree_nogaps() {
// let info = PhyloInfoBuilder::new(PathBuf::from(
// "./data/phyml_protein_example/nogap_seqs.fasta",
// ))
// .build()
// .unwrap();
// let model = PIPModel::<ProteinSubstModel>::new(WAG, &[50.0, 0.1]).unwrap();
// let unopt_logl = model.cost(&info);
// let o = TopologyOptimiser::new(&model, &info).run().unwrap();
// assert!(o.final_logl >= unopt_logl);
// The above ran in 308.93s

let pip_info = PhyloInfoBuilder::with_attrs(
PathBuf::from("./data/phyml_protein_example/nogap_seqs.fasta"),
PathBuf::from("./data/phyml_protein_example/optimisation_pip_nj_start.newick"),
)
.build()
.unwrap();

let subst_info = PhyloInfoBuilder::with_attrs(
PathBuf::from("./data/phyml_protein_example/nogap_seqs.fasta"),
PathBuf::from("./data/phyml_protein_example/optimisation_nj_start.newick"),
)
.build()
.unwrap();
// compare the tree created with a substitution model to the one with PIP
// the trees are not identical, but very close
assert!(pip_info.tree.robinson_foulds(&subst_info.tree) <= 2);
}

#[test]
fn protein_optimise_model_tree() {
// let info = PhyloInfoBuilder::new(PathBuf::from("./data/phyml_protein_example/seqs.fasta"))
// .build()
// .unwrap();
// let model = PIPModel::<ProteinSubstModel>::new(WAG, &[1.4, 0.5]).unwrap();
// let unopt_logl = model.cost(&info);
// let o = ModelOptimiser::new(&model, &info, Empirical).run().unwrap();
// let model_opt_logl = o.final_logl;
// assert!(model_opt_logl >= unopt_logl);
// let o = TopologyOptimiser::new(&o.model, &info).run().unwrap();
// assert!(model_opt_logl >= unopt_logl);
// assert!(o.final_logl >= unopt_logl);
// // The above ran in 703.87s

// let info = PhyloInfoBuilder::new(PathBuf::from("./data/phyml_protein_example/seqs.fasta"))
// .build()
// .unwrap();
// let model = PIPModel::<ProteinSubstModel>::new(WAG, &[1.4, 0.5]).unwrap();
// let unopt_logl = model.cost(&info);
// let o = TopologyOptimiser::new(&model, &info).run().unwrap();
// assert!(o.final_logl >= unopt_logl);
// The above ran in 497.22s

let optim_model = PIPModel::<ProteinSubstModel>::new(WAG, &[49.56941, 0.09352]).unwrap();
let with_model_optim = PhyloInfoBuilder::with_attrs(
PathBuf::from("./data/phyml_protein_example/seqs.fasta"),
PathBuf::from(
"./data/phyml_protein_example/optimisation_pip_nj_start_model_optim_gaps.newick",
),
)
.build()
.unwrap();

let model = ProteinSubstModel::new(WAG, &[1.4, 0.5]).unwrap();
let wo_model_optim = PhyloInfoBuilder::with_attrs(
PathBuf::from("./data/phyml_protein_example/seqs.fasta"),
PathBuf::from(
"./data/phyml_protein_example/optimisation_pip_nj_start_model_optim_gaps.newick",
),
)
.build()
.unwrap();

assert!(with_model_optim.tree.robinson_foulds(&wo_model_optim.tree) == 0);
assert!(optim_model.cost(&with_model_optim) > model.cost(&wo_model_optim));
}

0 comments on commit 447d912

Please sign in to comment.