-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
Parse taxonomy file to dataframe
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,5 +3,6 @@ | |
export("%>%") | ||
export(.data) | ||
export(read_dist) | ||
export(read_tax) | ||
importFrom(dplyr,"%>%") | ||
importFrom(rlang,.data) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#' Convert taxonomy strings into dataframe of labels based on taxnomic classification | ||
#' | ||
#' @param taxonomy_filename filename of taxonomy file | ||
#' | ||
#' @return dataframe of taxonomic labels | ||
#' @export | ||
#' @author Nick Lesniak, \email{nlesniak@@umich.edu} | ||
#' | ||
#' @examples | ||
#' taxonomy_filepath <- system.file("extdata", | ||
#' "test.taxonomy", | ||
#' package = "mothuR" | ||
#' ) | ||
#' taxonomy_tbl <- read_tax(taxonomy_filepath) | ||
#' head(taxonomy_tbl) | ||
read_tax <- function(taxonomy_filename) { | ||
levels <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus") | ||
taxonomy_df <- utils::read.table(taxonomy_filename, | ||
sep = "\t", | ||
header = T, | ||
stringsAsFactors = F | ||
) %>% | ||
dplyr::mutate(Taxonomy = gsub("_", " ", .data[["Taxonomy"]])) %>% | ||
tidyr::separate(.data[["Taxonomy"]], levels, sep = "\\(\\d{2,3}\\);", extra = "drop") %>% | ||
dplyr::select(-.data[["Size"]]) | ||
# in older version of mothur unclassified are listed as unclassified | ||
# without information from higher level classification | ||
# for those cases, append with lowest identified classification | ||
if (any(taxonomy_df$Genus == "unclassified")) { | ||
taxonomy_df <- taxonomy_df %>% | ||
tidyr::pivot_longer( | ||
cols = -.data[["OTU"]], | ||
names_to = "Level", | ||
values_to = "Classification" | ||
) %>% | ||
# order classification level | ||
dplyr::mutate(Level = factor(.data[["Level"]], levels)) %>% | ||
dplyr::left_join(dplyr::group_by(., .data[["OTU"]]) %>% | ||
dplyr::filter(.data[["Classification"]] != "unclassified") %>% | ||
# select lowest level classification | ||
dplyr::filter(.data[["Level"]] == levels[max(as.numeric(.data[["Level"]]))]) %>% | ||
dplyr::select(.data[["OTU"]], Lowest_classified = .data[["Classification"]]), | ||
by = "OTU" | ||
) %>% | ||
dplyr::mutate(Classification = ifelse(.data[["Classification"]] == "unclassified", | ||
# append unclassified with lowest classification | ||
paste(.data[["Lowest_classified"]], .data[["Classification"]], sep = " "), | ||
.data[["Classification"]] | ||
)) %>% | ||
dplyr::select(-.data[["Lowest_classified"]]) %>% | ||
tidyr::pivot_wider( | ||
names_from = "Level", | ||
values_from = "Classification" | ||
) | ||
} | ||
# create label options for OTU and lowest taxonomic classification with the OTU | ||
taxonomy_df <- taxonomy_df %>% | ||
dplyr::mutate( | ||
tax_otu_label = paste0(.data[["Genus"]], " (", gsub("tu0*", "TU ", .data[["OTU"]]), ")"), | ||
tax_otu_label = gsub(" unclassified", "", .data[["tax_otu_label"]]), | ||
otu_label = paste0(gsub("tu0*", "TU ", .data[["OTU"]])) | ||
) | ||
return(taxonomy_df) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#' dplyr pipe | ||
#' @importFrom dplyr %>% | ||
#' @export | ||
dplyr::`%>%` | ||
|
||
## make R CMD CHECK shut up about the dot `.`` | ||
## See: \url{https://github.com/tidyverse/magrittr/issues/29} | ||
utils::globalVariables(c(".")) |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
pandoc: 2.7.3 | ||
pkgdown: 1.5.1 | ||
pkgdown: 1.6.1 | ||
pkgdown_sha: ~ | ||
articles: | ||
introduction: introduction.html | ||
last_built: 2020-09-03T20:18Z | ||
last_built: 2020-10-06T16:27Z | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.