Skip to content

Commit

Permalink
restructure output of phylotax()
Browse files Browse the repository at this point in the history
  • Loading branch information
brendanf committed Oct 15, 2020
1 parent b9fba0e commit c5d5f67
Show file tree
Hide file tree
Showing 14 changed files with 160 additions and 67 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: phylotax
Type: Package
Title: Refine taxonomic assignment of environmental sequences using a taxonomic tree
Version: 0.0.2.1
Version: 0.0.3
Authors@R: person(given = "Brendan", family = "Furneaux",
email = "brendan.furneaux@gmail.com",
role = c("aut", "cre"))
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# phlotax 0.0.3

* **BREAKING CHANGE** `phylotax` returns taxonomic tables in four categories;
* "`tip_taxa`" is the assignments which PHYLOTAX has made itself.
* "`rejected`" are primary assignments which PHYLOTAX has rejected.
* "`retained`" are primary assignments which PHYLOTAX has not rejected;
however some of them may still be ambiguous.
* "`missing`" are primary assignments whose labels are not present in the
tree, so PHYLOTAX has not done anything with them. (But note that this will
be empty if no tree was given).
* `phylotax()` now returns an S3 object of class "`phylotax`". This should not
break anything, and it allows the possibility of nice improvements in the
future.
* `phylotax()$node_taxa` now includes a "`label`" column, and populates it with
node labels if they exist, or just the numbers if they don't. Node labels are
also used in trace output.

# phylotax 0.0.2.1

* Two quick bugfix, applying to errors in `taxonomy_sintax()` and
Expand Down
75 changes: 49 additions & 26 deletions R/taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -541,46 +541,54 @@ new_phylotax_env <- function(tree, taxa, parent = parent.frame()) {
.parent = parent,
node_taxa = tibble::tibble(
node = integer(),
label = NULL,
rank = taxa$rank[FALSE],
taxon = character()
),
tip_taxa = dplyr::filter(taxa, .data$label %in% tree$tip.label),
tip_taxa = dplyr::filter(taxa, FALSE),
retained = dplyr::filter(taxa, .data$label %in% tree$tip.label),
rejected = dplyr::filter(taxa, FALSE),
missing = dplyr::filter(taxa, !.data$label %in% tree$tip.label),
tree = tree
)
}


phylotax_ <- function(tree, taxa, node, ranks, method, e) {
if (length(ranks) == 0) return()

nodelabel <- if (!is.null(tree$node.label)) {
tree$node.label[node - ape::Ntip(tree)]
} else {
as.character(node)
}
parents <- phangorn::Ancestors(tree, node, type = "all")
for (r in ranks) {
if (is.ordered(ranks)) r <- ordered(r, levels = levels(ranks))
if (any(e$node_taxa$node %in% parents & e$node_taxa$rank == r)) next
taxon <- clade_taxon(tree, e$tip_taxa, node, r)
taxon <- clade_taxon(tree, e$retained, node, r)
if (is.na(taxon)) {
futile.logger::flog.debug("Could not assign a %s to node %d.", r, node)
futile.logger::flog.debug("Could not assign a %s to node %s.", r, nodelabel)
for (n in phangorn::Children(tree, node)) {
phylotax_(tree, e$tip_taxa, n, ranks, method, e)
phylotax_(tree, e$retained, n, ranks, method, e)
}
break
} else {
children <- phangorn::Children(tree, node)
if (length(children) > 0) {
futile.logger::flog.info(
"Assigned node %d and its %d children to %s %s.",
node, length(children), as.character(r), taxon)
"Assigned node %s and its %d children to %s %s.",
nodelabel, length(children), as.character(r), taxon)
} else {
futile.logger::flog.info("Assigned node %d to %s %s.", node,
futile.logger::flog.info("Assigned node %s to %s %s.", nodelabel,
as.character(r), taxon)
}
ranks <- ranks[-1]
e$node_taxa <- dplyr::bind_rows(
e$node_taxa,
tibble::tibble(node = node, rank = r, taxon = taxon)
tibble::tibble(node = node, label = nodelabel, rank = r, taxon = taxon)
)
tips <- tree$tip.label[phangorn::Descendants(tree, node, type = "tips")[[1]]]
wrongTaxa <- e$tip_taxa %>%
wrongTaxa <- e$retained %>%
dplyr::filter(
.data$label %in% tips,
.data$rank == r,
Expand All @@ -596,11 +604,16 @@ phylotax_ <- function(tree, taxa, node, ranks, method, e) {
newAssign[[n]] <- unname(method[n])
}
# remove assignments which are not consistent with the one we just chose
e$tip_taxa <- dplyr::bind_rows(
dplyr::filter(e$tip_taxa, .data$rank < r),
dplyr::filter(e$tip_taxa, .data$rank >= r) %>%
dplyr::anti_join(wrongTaxa, by = names(wrongTaxa)),
newAssign
e$tip_taxa <- dplyr::bind_rows(e$tip_taxa, newAssign)
e$rejected <- dplyr::bind_rows(
e$rejected,
dplyr::filter(e$retained, .data$rank >= r) %>%
dplyr::semi_join(wrongTaxa, by = names(wrongTaxa))
)
e$retained <- dplyr::bind_rows(
dplyr::filter(e$retained, .data$rank < r),
dplyr::filter(e$retained, .data$rank >= r) %>%
dplyr::anti_join(wrongTaxa, by = names(wrongTaxa))
)
}
}
Expand Down Expand Up @@ -647,14 +660,20 @@ phylotax_ <- function(tree, taxa, node, ranks, method, e) {
#' treat each unique combination of values in these columns as a distinct
#' method.
#'
#' @return a list with two elements, "tip_taxa" and "node_taxa". "tip_taxa" is
#' a `tibble::tibble()` with the same format as `taxa`, in
#' which assignments which are inconsistent with the phylogeny have been
#' removed, and new assignments deduced or confirmed from the phylogeny.
#' These are identified by the value "phylotax" in the "method" column,
#' which is created if it does not already exist. "node_taxa" has columns
#' "node", "rank" and "taxon", giving taxonomic assignments for the nodes of
#' the tree.
#' @return an S3 object with class "`phylotax`", with five elements:
#' * "`tip_taxa` a `tibble::tibble()` with the same format as `taxa`, containing
#' taxonomy assignments made by PHYLOTAX to tips.
#' * "`node_taxa`" a `tibble::tibble()` with columns "`node`", "`label`",
#' "`rank`" and "`taxon`" giving taxonomy assignments made by PHYLOTAX to
#' internal nodes.
#' * "`rejected`" a `tibble::tibble()` with the same format as `taxa` giving
#' primary assignments which have been rejected by PHYLOTAX.
#' * "`retained`" a `tibble::tibble()` with the same format as `taxa` giving
#' primary assignments which have not been rehected by PHYLOTAX. These may
#' contain inconsistencies that PHYLOTAX was unable to resolve.
#' * "`missing`" a `tibble::tibble()` with the same format as `taxa`, giving the
#' primary assignments which have not been assessed by PHULOTAX because they
#' have labels which are not present on the tree.
#'
#' @export
phylotax <- function(
Expand All @@ -670,9 +689,13 @@ phylotax <- function(
e <- new_phylotax_env(tree, count_assignments(taxa), ranks)
ranks <- sort(unique(taxa$rank))
phylotax_(tree, taxa, phangorn::getRoot(tree), ranks, method, e)
e$tip_taxa$n_tot <- NULL
e$tip_taxa$n_diff <- NULL
as.list(e)
for (member in c("missing", "retained", "rejected", "tip_taxa"))
for (n in c("n_tot", "n_diff"))
e[[member]][[n]] <- NULL
structure(
as.list(e),
class = "phylotax"
)
}

#' Simple phylogenetic tree for use in examples
Expand Down
15 changes: 13 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,24 @@ supports it.
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())
```

PHYLOTAX returns a list containing the tree, taxa assigned to tips,
and taxa assigned to nodes. Let's look at the tip taxa assignments.
PHYLOTAX returns a list of class "`phylotax`" containing the tree,
taxa assignments for tips and internal nodes, as well as tables dividing the
primary assignments into those which were rejected, those which were retained,
and those which were missing from the input tree.

```{r}
phylotax_out$tip_taxa
```

```{r}
phylotax_out$retained
```

```{r}
phylotax_out$rejected
```


Phylotax has used the following logic:

1. It's not possible to decide what the root (node 1) is, because one of its
Expand Down
78 changes: 49 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@

<!-- README.md is generated from README.Rmd. Please edit that file -->

# phylotax
phylotax
========

<!-- badges: start -->

Expand All @@ -11,7 +12,8 @@ status](https://travis-ci.com/brendanf/phylotax.svg?branch=master)](https://trav
coverage](https://codecov.io/gh/brendanf/phylotax/branch/master/graph/badge.svg)](https://codecov.io/gh/brendanf/phylotax?branch=master)
<!-- badges: end -->

## Installation
Installation
------------

Install the development version from [GitHub](https://github.com/) with:

Expand All @@ -20,7 +22,8 @@ Install the development version from [GitHub](https://github.com/) with:
devtools::install_github("brendanf/phylotax")
```

## Usage
Usage
-----

The PHYLOTAX algorithm takes as input taxonomic annotations from one or
more primary taxonomic assignment algoirthms, and refines them using a
Expand All @@ -40,15 +43,14 @@ plot(example_tree(), show.node.label = TRUE)

And here is a set of taxonomic assignments for the tips of the tree,
based on two hypothetical primary assignment algorithms “XTAX” and
“YTAX”. Some of the tips have been assigned to two genera: “Tax1”
and “Tax2”. The `phylotax` package includes the function `taxtable()`
which can generate a table of this type based on the output of various
primary assignment algorithms, but all that’s important is that it
contains the columns “label”, “rank”, and “taxon”. The ranks need to be
one of “rootrank”, “domain”, “kingdom”, “phylum”, “class”, “order”,
“family”, “genus”, and “species”. Our example also has a “method”
column, which PHYLOTAX uses to identify which assignments come from the
same source.
“YTAX”. Some of the tips have been assigned to two genera: “Tax1” and
“Tax2”. The `phylotax` package includes the function `taxtable()` which
can generate a table of this type based on the output of various primary
assignment algorithms, but all that’s important is that it contains the
columns “label”, “rank”, and “taxon”. The ranks need to be one of
“rootrank”, “domain”, “kingdom”, “phylum”, “class”, “order”, “family”,
“genus”, and “species”. Our example also has a “method” column, which
PHYLOTAX uses to identify which assignments come from the same source.

``` r
example_taxa()
Expand Down Expand Up @@ -87,28 +89,46 @@ tree supports it.

``` r
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())
#> INFO [2020-10-14 17:35:34] Assigned node 9 and its 2 children to genus Tax2.
#> INFO [2020-10-14 17:35:34] Assigned node 10 and its 2 children to genus Tax1.
#> INFO [2020-10-15 15:02:50] Assigned node 3 and its 2 children to genus Tax2.
#> INFO [2020-10-15 15:02:50] Assigned node 4 and its 2 children to genus Tax1.
```

PHYLOTAX returns a list containing the tree, taxa assigned to tips, and
taxa assigned to nodes. Let’s look at the tip taxa assignments.
PHYLOTAX returns a list of class “`phylotax`” containing the tree, taxa
assignments for tips and internal nodes, as well as tables dividing the
primary assignments into those which were rejected, those which were
retained, and those which were missing from the input tree.

``` r
phylotax_out$tip_taxa
#> # A tibble: 10 x 4
#> label method rank taxon
#> <chr> <chr> <ord> <chr>
#> 1 C XTAX genus Tax2
#> 2 B YTAX genus Tax2
#> 3 C YTAX genus Tax2
#> 4 D YTAX genus Tax1
#> 5 F YTAX genus Tax1
#> 6 B PHYLOTAX genus Tax2
#> 7 C PHYLOTAX genus Tax2
#> 8 E PHYLOTAX genus Tax1
#> 9 F PHYLOTAX genus Tax1
#> 10 D PHYLOTAX genus Tax1
#> # A tibble: 5 x 4
#> label method rank taxon
#> <chr> <chr> <ord> <chr>
#> 1 B PHYLOTAX genus Tax2
#> 2 C PHYLOTAX genus Tax2
#> 3 E PHYLOTAX genus Tax1
#> 4 F PHYLOTAX genus Tax1
#> 5 D PHYLOTAX genus Tax1
```

``` r
phylotax_out$retained
#> # A tibble: 5 x 4
#> label method rank taxon
#> <chr> <chr> <ord> <chr>
#> 1 C XTAX genus Tax2
#> 2 B YTAX genus Tax2
#> 3 C YTAX genus Tax2
#> 4 D YTAX genus Tax1
#> 5 F YTAX genus Tax1
```

``` r
phylotax_out$rejected
#> # A tibble: 2 x 4
#> label method rank taxon
#> <chr> <chr> <ord> <chr>
#> 1 B XTAX genus Tax1
#> 2 D XTAX genus Tax2
```

Phylotax has used the following logic:
Expand Down
Binary file modified man/figures/README-unnamed-chunk-2-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 16 additions & 8 deletions man/phylotax.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testthat/node_taxa.rds
Binary file not shown.
Binary file removed tests/testthat/phylotax.rds
Binary file not shown.
Binary file added tests/testthat/rejected.rds
Binary file not shown.
Binary file added tests/testthat/retained.rds
Binary file not shown.
9 changes: 9 additions & 0 deletions tests/testthat/test-consensus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
consensus_test <- tibble::tribble(
~label, ~method, ~rank, ~taxon,
"A", "XTAX", "genus", "G1",
"A", "XTAX", "species", "G1 s1",
"A", "YTAX", "genus", "G2")

test_that("consensus is not assumed for lower taxa when higher taxa are inconsistent", {
expect_equal(length(phylotax(taxa = consensus_test)$tip_taxa$method), 0)
})
7 changes: 6 additions & 1 deletion tests/testthat/test-phylotax.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
phylotax_out <- phylotax(tree = example_tree(), taxa = example_taxa())

test_that("phylotax does not revert", {
expect_known_value(phylotax(tree = example_tree(), taxa = example_taxa()), "phylotax.rds", update = FALSE)
expect_known_value(phylotax_out$rejected, "rejected.rds", update = FALSE)
expect_known_value(phylotax_out$retained, "retained.rds", update = FALSE)
expect_known_value(phylotax_out$tip_taxa, "tip_taxa.rds", update = FALSE)
expect_known_value(phylotax_out$node_taxa, "node_taxa.rds", update = FALSE)
})
Binary file added tests/testthat/tip_taxa.rds
Binary file not shown.

0 comments on commit c5d5f67

Please sign in to comment.