diff --git a/R/abd_species.R b/R/abd_species.R index dec5164..de7ae8c 100644 --- a/R/abd_species.R +++ b/R/abd_species.R @@ -1,19 +1,20 @@ #' Abundances of Communities -#' +#' #' Utilities for community abundances (objects of class "abundances"). -#' +#' #' @returns `abd_species()` returns a tibble containing the species abundance columns only, #' to simplify numeric operations. -#' +#' #' `prob_species()` returns the same tibble but values are probabilities. -#' +#' #' `abd_sum()` returns the sample sizes of the communities in a numeric vector. #' @inheritParams check_divent_args #' #' @examples #' abd_species(paracou_6_abd) +#' prob_species(paracou_6_abd) #' abd_sum(paracou_6_abd) -#' +#' #' @name abd_species NULL @@ -22,11 +23,11 @@ NULL #' #' @export abd_species <- function( - abundances, + abundances, check_arguments = TRUE) { if (any(check_arguments)) check_divent_args() - + return( abundances[, !colnames(abundances) %in% non_species_columns] ) @@ -37,12 +38,12 @@ abd_species <- function( #' #' @export abd_sum <- function( - abundances, - as_numeric = FALSE, + abundances, + as_numeric = FALSE, check_arguments = TRUE) { if (any(check_arguments)) check_divent_args() - + the_abd_sum <- rowSums( abundances[, !colnames(abundances) %in% non_species_columns] ) @@ -65,13 +66,13 @@ abd_sum <- function( #' #' @export prob_species <- function( - species_distribution, + species_distribution, check_arguments = TRUE) { - + if (any(check_arguments)) check_divent_args() - + abundances <- species_distribution[ - , + , !colnames(species_distribution) %in% non_species_columns ] sample_sizes <- rowSums(abundances) diff --git a/R/accum_div_phylo.R b/R/accum_div_phylo.R index 995245a..902663e 100644 --- a/R/accum_div_phylo.R +++ b/R/accum_div_phylo.R @@ -1,24 +1,24 @@ #' Phylogenetic Diversity Accumulation of a Community -#' -#' Diversity and Entropy Accumulation Curves represent the accumulation of +#' +#' Diversity and Entropy Accumulation Curves represent the accumulation of #' entropy with respect to the sample size. -#' -#' `accum_ent_phylo()` or `accum_div_phylo()` estimate the phylogenetic +#' +#' `accum_ent_phylo()` or `accum_div_phylo()` estimate the phylogenetic #' diversity or entropy accumulation curve of a distribution. #' See [ent_tsallis] for details about the computation of entropy at each level #' of interpolation and extrapolation. -#' -#' In accumulation curves, extrapolation if done by estimating the asymptotic -#' distribution of the community and estimating entropy at different levels -#' by interpolation. -#' -#' Interpolation and extrapolation of integer orders of diversity are from +#' +#' In accumulation curves, extrapolation if done by estimating the asymptotic +#' distribution of the community and estimating entropy at different levels +#' by interpolation. +#' +#' Interpolation and extrapolation of integer orders of diversity are from #' \insertCite{Chao2014;textual}{divent}. -#' The asymptotic richness is adjusted so that the extrapolated part of the +#' The asymptotic richness is adjusted so that the extrapolated part of the #' accumulation joins the observed value at the sample size. -#' +#' #' "accumulation" objects can be plotted. -#' They generalize the classical Species Accumulation Curves (SAC) which are +#' They generalize the classical Species Accumulation Curves (SAC) which are #' diversity accumulation of order \eqn{q=0}. #' #' @inheritParams check_divent_args @@ -31,14 +31,14 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples -#' # Richness accumulation up to the sample size. +#' # Richness accumulation up to the sample size. #' # 100 simulations only to save time. #' autoplot( #' accum_div_phylo(mock_3sp_abd, tree = mock_3sp_tree, n_simulations = 100) #' ) -#' +#' #' @name accum_div_phylo NULL @@ -53,20 +53,20 @@ accum_ent_phylo <- function(x, ...) { #' @rdname accum_div_phylo #' -#' @param levels The levels, i.e. the sample sizes of interpolation or +#' @param levels The levels, i.e. the sample sizes of interpolation or #' extrapolation: a vector of integer values. -#' +#' #' @export accum_ent_phylo.numeric <- function( x, tree, q = 0, normalize = TRUE, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -74,7 +74,7 @@ accum_ent_phylo.numeric <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -84,7 +84,7 @@ accum_ent_phylo.numeric <- function( col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } # Set levels if needed if (is.null(levels)) { @@ -92,25 +92,25 @@ accum_ent_phylo.numeric <- function( levels <- seq_len(sample_size) } } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) - coverage_estimator <- match.arg(coverage_estimator) - + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) + coverage_estimator <- match.arg(coverage_estimator) + # Make a species_distribution the_species_distribution <- as_species_distribution(x) - + # Entropy accumulation the_entropy <- accum_ent_phylo.abundances( x = the_species_distribution, tree = tree, q = q, normalize = normalize, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, gamma = FALSE, @@ -133,11 +133,11 @@ accum_ent_phylo.abundances <- function( tree, q = 0, normalize = TRUE, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, @@ -146,7 +146,7 @@ accum_ent_phylo.abundances <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + # Species names col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] @@ -158,7 +158,7 @@ accum_ent_phylo.abundances <- function( tree <- as_phylo_divent(tree) # Check species names if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } # Set levels if needed if (is.null(levels)) { @@ -170,21 +170,21 @@ accum_ent_phylo.abundances <- function( levels <- seq_len(sample_size) } } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) - coverage_estimator <- match.arg(coverage_estimator) - + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) + coverage_estimator <- match.arg(coverage_estimator) + # Calculate abundances along the tree, that are a list of matrices if (gamma) { the_phylo_abd <- phylo_abd(abundances = metacommunity(x), tree = tree) } else { the_phylo_abd <- phylo_abd(abundances = x, tree = tree) } - + # Prepare arrays to store entropy (3 dimensions: x, y, z) # and simulated entropies (4 dimensions : x, y, z, t) - # x are tree intervals, y are communities, z are levels, + # x are tree intervals, y are communities, z are levels, # t are simulations. # Add an array to store simulation envelopes, where # t are quantiles of simulations, inf and sup. @@ -207,10 +207,10 @@ accum_ent_phylo.abundances <- function( # Prepare the progress bar if (show_progress & interactive()) { cli::cli_progress_bar( - "Computing entropy", + "Computing entropy", total = (length(the_phylo_abd) * n_communities) * (1 + n_simulations)) } - + # Calculate entropy along the tree for (x_interval in seq_along(the_phylo_abd)) { if (n_simulations > 0) { @@ -233,8 +233,8 @@ accum_ent_phylo.abundances <- function( # Max number of species in simulated communities sp_sim <- max( vapply( - comm_sim.list, - FUN = dim, + comm_sim.list, + FUN = dim, FUN.VALUE = c(0L, 0L) )[2, ] ) @@ -245,7 +245,7 @@ accum_ent_phylo.abundances <- function( # Move the simulations from the list to the array for (simulation in seq_len(n_simulations)) { for (community in seq_along(comm_sim.list)) { - # Number of species in the simulation + # Number of species in the simulation # (= number of columns - 2 for site and weight) sp_sim <- dim(comm_sim.list[[community]])[2] - 2 # Pick a simulation. Store simulated species, let extra cols = 0 @@ -256,18 +256,18 @@ accum_ent_phylo.abundances <- function( } } } - + for (y_community in seq_len(n_communities)) { # Calculate the profile of each community # Actual data ent_phylo_abd[x_interval, y_community, ] <- accum_tsallis.numeric( - x = the_phylo_abd[[x_interval]][, y_community], + x = the_phylo_abd[[x_interval]][, y_community], q = q, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, n_simulations = 0, @@ -275,17 +275,17 @@ accum_ent_phylo.abundances <- function( show_progress = FALSE, check_arguments = FALSE )$entropy - + for (t_simulation in seq_len(n_simulations)) { # Entropy of simulated communities ent_phylo_sim[x_interval, y_community, , t_simulation] <- accum_tsallis.numeric( x = comm_sim[, y_community, t_simulation], q = q, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, n_simulations = 0, @@ -301,10 +301,10 @@ accum_ent_phylo.abundances <- function( for (z_level in seq_along(levels)) { # Quantiles, recentered ent_phylo_envelope[x_interval, y_community, z_level, ] <- stats::quantile( - ent_phylo_sim[x_interval, y_community, z_level, ], + ent_phylo_sim[x_interval, y_community, z_level, ], probs = c(alpha / 2, 1 - alpha / 2), na.rm = TRUE - ) - mean(ent_phylo_sim[x_interval, y_community, z_level, ]) + + ) - mean(ent_phylo_sim[x_interval, y_community, z_level, ]) + ent_phylo_abd[x_interval, y_community, z_level] } } @@ -315,7 +315,7 @@ accum_ent_phylo.abundances <- function( } } if (show_progress & interactive()) cli::cli_progress_done() - + # Average entropy # Actual data ent_community <- apply( @@ -335,18 +335,18 @@ accum_ent_phylo.abundances <- function( w = tree$intervals ) } - + # Format the result the_profile_phylo <- ent.tibble( - ent.matrix = ent_community, - x = x, + ent.matrix = ent_community, + x = x, levels = levels ) # Add the estimator sample_sizes <- rowSums(x[, species_names]) names(sample_sizes) = x$site the_profile_phylo <- dplyr::mutate( - the_profile_phylo, + the_profile_phylo, estimator = dplyr::case_when( .data$level == sample_sizes[.data$site] ~ "Sample", .data$level < sample_sizes[.data$site] ~ "Interpolation", @@ -357,13 +357,13 @@ accum_ent_phylo.abundances <- function( # Add simulation columns if (n_simulations > 0) { ent_inf <- ent.tibble( - ent.matrix = ent_quantiles[, , 1], - x = x, + ent.matrix = ent_quantiles[, , 1], + x = x, levels = levels ) ent_sup <- ent.tibble( - ent.matrix = ent_quantiles[, , 2], - x = x, + ent.matrix = ent_quantiles[, , 2], + x = x, levels = levels ) the_profile_phylo <- dplyr::bind_cols( @@ -372,7 +372,7 @@ accum_ent_phylo.abundances <- function( sup = ent_sup$entropy ) } - + class(the_profile_phylo) <- c("accumulation", class(the_profile_phylo)) return(the_profile_phylo) } @@ -394,11 +394,11 @@ accum_div_phylo.numeric <- function( tree, q = 0, normalize = TRUE, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -406,7 +406,7 @@ accum_div_phylo.numeric <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -416,7 +416,7 @@ accum_div_phylo.numeric <- function( col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } # Set levels if needed if (is.null(levels)) { @@ -424,25 +424,25 @@ accum_div_phylo.numeric <- function( levels <- seq_len(sample_size) } } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) - coverage_estimator <- match.arg(coverage_estimator) - + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) + coverage_estimator <- match.arg(coverage_estimator) + # Make a the_species_distribution the_species_distribution <- as_species_distribution(x) - + # Diversity accumulation the_diversity <- accum_div_phylo.abundances( x = the_species_distribution, tree = tree, q = q, normalize = normalize, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, gamma = FALSE, @@ -451,7 +451,7 @@ accum_div_phylo.numeric <- function( show_progress = show_progress, check_arguments = FALSE ) - + # Return return(the_diversity) } @@ -465,11 +465,11 @@ accum_div_phylo.abundances <- function( tree, q = 0, normalize = TRUE, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, @@ -478,7 +478,7 @@ accum_div_phylo.abundances <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -488,7 +488,7 @@ accum_div_phylo.abundances <- function( col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } # Set levels if needed if (is.null(levels)) { @@ -500,17 +500,17 @@ accum_div_phylo.abundances <- function( levels <- seq_len(sample_size) } } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) - + the_entropy <- accum_ent_phylo.abundances( x, tree = tree, q = q, normalize = normalize, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, @@ -523,10 +523,10 @@ accum_div_phylo.abundances <- function( show_progress = show_progress, check_arguments = FALSE ) - + # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) @@ -542,10 +542,10 @@ accum_div_phylo.abundances <- function( #' Make a long tibble of entropy with a matrix of entropy -#' +#' #' Utility for [accum_ent_phylo.abundances] #' -#' @param ent.matrix The matrix of entropies. +#' @param ent.matrix The matrix of entropies. #' Rows are communities, columns are orders of entropy. #' @param x The species distribution. #' @param levels The levels of interpolation and extrapolation. @@ -554,7 +554,7 @@ accum_div_phylo.abundances <- function( #' @noRd #' ent.tibble <- function(ent.matrix, x, levels) { - + if (!is.matrix(ent.matrix)) { # ent.matrix may be a numeric vector (single community / min and max) ent.matrix <- t(as.matrix(ent.matrix)) diff --git a/R/accum_hill.R b/R/accum_hill.R index 027122e..592f178 100644 --- a/R/accum_hill.R +++ b/R/accum_hill.R @@ -1,24 +1,24 @@ #' Diversity Accumulation of a Community -#' -#' Diversity and Entropy Accumulation Curves represent the accumulation of +#' +#' Diversity and Entropy Accumulation Curves represent the accumulation of #' entropy and diversity with respect to the sample size. -#' -#' `accum_hill()` or `accum_tsallis()` estimate the diversity or entropy accumulation +#' +#' `accum_hill()` or `accum_tsallis()` estimate the diversity or entropy accumulation #' curve of a distribution. #' See [ent_tsallis] for details about the computation of entropy at each level #' of interpolation and extrapolation. -#' -#' In accumulation curves, extrapolation is done by estimating the asymptotic -#' distribution of the community and estimating entropy at different levels -#' by interpolation. -#' -#' Interpolation and extrapolation of integer orders of diversity are from +#' +#' In accumulation curves, extrapolation is done by estimating the asymptotic +#' distribution of the community and estimating entropy at different levels +#' by interpolation. +#' +#' Interpolation and extrapolation of integer orders of diversity are from #' \insertCite{Chao2014;textual}{divent}. -#' The asymptotic richness is adjusted so that the extrapolated part of the +#' The asymptotic richness is adjusted so that the extrapolated part of the #' accumulation joins the observed value at the sample size. -#' +#' #' "accumulation" objects can be plotted. -#' They generalize the classical Species Accumulation Curves (SAC) which are +#' They generalize the classical Species Accumulation Curves (SAC) which are #' diversity accumulation of order \eqn{q=0}. #' #' @inheritParams check_divent_args @@ -31,11 +31,11 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Paracou 6 subplot 1 #' autoplot(accum_hill(paracou_6_abd[1, ])) -#' +#' #' @name accum_hill NULL @@ -50,18 +50,18 @@ accum_tsallis <- function(x, ...) { #' @rdname accum_hill #' -#' @param levels The levels, i.e. the sample sizes of interpolation or +#' @param levels The levels, i.e. the sample sizes of interpolation or #' extrapolation: a vector of integer values. -#' +#' #' @export accum_tsallis.numeric <- function( - x, + x, q = 0, - levels = seq_len(sum(x)), + levels = seq_len(sum(x)), probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -74,18 +74,18 @@ accum_tsallis.numeric <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) - coverage_estimator <- match.arg(coverage_estimator) - + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) + coverage_estimator <- match.arg(coverage_estimator) + if (!is_integer_values(x)) { warning( "Integer abundance values are required to estimate community probabilities. Abundances have been rounded." ) x <- round(x) } - + # Eliminate 0 abd <- x[x > 0] # Sample size @@ -94,7 +94,7 @@ accum_tsallis.numeric <- function( prob <- abd / sample_size # Number of observed species s_obs <- length(abd) - + # Prepare the vector of results ent_level <- numeric(length(levels)) ent_estimator <- character(length(levels)) @@ -104,7 +104,7 @@ accum_tsallis.numeric <- function( } # i must be initialized if the accumulation contains extrapolation only i <- 0 - + # Interpolation ---- levels_interp <- levels[levels < sample_size] # Calculate entropy at each level @@ -112,21 +112,21 @@ accum_tsallis.numeric <- function( # Calculate Entropy i <- which(levels == level) ent_level[i] <- ent_tsallis.numeric( - abd, - q = q, - level = level, + abd, + q = q, + level = level, as_numeric = TRUE, check_arguments = FALSE ) ent_estimator[i] <- "Interpolation" if (show_progress & interactive()) cli::cli_progress_update(set = i) } - + # level == Sample Size ---- if (any(levels == sample_size)) { i <- which(levels == sample_size) ent_level[i] <- ent_tsallis.numeric( - prob, + prob, q = q, as_numeric = TRUE, check_arguments = FALSE @@ -134,7 +134,7 @@ accum_tsallis.numeric <- function( ent_estimator[i] <- "Sample" if (show_progress & interactive()) cli::cli_progress_update(set = i) } - + # Extrapolation ---- # Don't use ent_tsallis for speed: probability extrapolation should be run once only. levels_extrap <- levels[levels > sample_size] @@ -145,8 +145,8 @@ accum_tsallis.numeric <- function( abd, estimator = probability_estimator, unveiling = unveiling, - richness_estimator = richness_estimator, - jack_alpha = 0.05, + richness_estimator = richness_estimator, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = coverage_estimator, q = q, @@ -161,7 +161,7 @@ accum_tsallis.numeric <- function( s_obs <- sum(abd > 0) s_0 <- length(prob_unv) - s_obs # Extrapolate richness (the vector is levels_extrap) - ent_level[(i + 1):length(levels)] <- s_obs + + ent_level[(i + 1):length(levels)] <- s_obs + s_0 * (1 - (1 - s_1 / (sample_size * s_0 + s_1))^(levels_extrap - sample_size)) - 1 } else { # No singleton @@ -179,7 +179,7 @@ accum_tsallis.numeric <- function( # Estimate observed entropy ent_obs <- -sum(prob * log(prob)) # Interpolation (the vector is levels_extrap) - ent_level[(i + 1):length(levels)] <- sample_size / levels_extrap * ent_obs + + ent_level[(i + 1):length(levels)] <- sample_size / levels_extrap * ent_obs + (levels_extrap - sample_size) / levels_extrap * ent_est ent_estimator[(i + 1):length(levels)] <- richness_estimator if (show_progress & interactive()) { @@ -197,7 +197,7 @@ accum_tsallis.numeric <- function( } } else { # Valid extrapolation (the vector is levels_extrap) - ent_level[(i + 1):length(levels)] <- 1 - 1 / levels_extrap - + ent_level[(i + 1):length(levels)] <- 1 - 1 / levels_extrap - (1 - 1 / levels_extrap) * sum(abd * (abd - 1)) / sample_size / (sample_size - 1) } ent_estimator[(i + 1):length(levels)] <- "Chao2014" @@ -209,14 +209,14 @@ accum_tsallis.numeric <- function( for (level in levels_extrap) { # Abundance frequence count at level (Chao et al., 2014, eq. 5) s_nu <- vapply( - seq_len(level), + seq_len(level), function(nu) { sum( exp( lchoose(level, nu) + nu * log(prob_unv) + (level - nu) * log(1 - prob_unv) ) ) - }, + }, FUN.VALUE = 0.0 ) # Estimate entropy (Chao et al., 2014, eq. 6) @@ -248,8 +248,8 @@ accum_tsallis.numeric <- function( abd, estimator = probability_estimator, unveiling = unveiling, - richness_estimator = richness_estimator, - jack_alpha = 0.05, + richness_estimator = richness_estimator, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = coverage_estimator, q = q, @@ -264,17 +264,17 @@ accum_tsallis.numeric <- function( communities <- communities / level # Calculate entropy ent_sim <- apply( - communities, - MARGIN = 2, - FUN = ent_tsallis.numeric, + communities, + MARGIN = 2, + FUN = ent_tsallis.numeric, q = q, - as_numeric = TRUE, + as_numeric = TRUE, check_arguments = FALSE ) i <- which(levels == level) # Store quantiles ent_sim_quantiles[i, ] <- stats::quantile( - ent_sim, + ent_sim, probs = c(alpha / 2, 1 - alpha / 2) ) if (show_progress & interactive()) cli::cli_progress_update(set = i) @@ -298,7 +298,7 @@ accum_tsallis.numeric <- function( ) } class(the_accum_tsallis) <- c("accumulation", class(the_accum_tsallis)) - + return(the_accum_tsallis) } @@ -309,11 +309,11 @@ accum_tsallis.numeric <- function( accum_tsallis.abundances <- function( x, q = 0, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -326,9 +326,9 @@ accum_tsallis.abundances <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) # Set levels if needed @@ -343,17 +343,17 @@ accum_tsallis.abundances <- function( # Apply accum_tsallis.numeric() to each site accum_tsallis_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = accum_tsallis.numeric, # Arguments q = q, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, n_simulations = n_simulations, @@ -361,14 +361,14 @@ accum_tsallis.abundances <- function( show_progress = show_progress, check_arguments = FALSE ) - + # Add site names if needed if ("site" %in% colnames(x)) { site_names <- x$site } else { site_names <- paste("site", seq_len(nrow(x)), sep = "_") } - + # Make a tibble with site, level and entropy the_accum_tsallis <- tibble::tibble( site = rep(site_names, each = length(levels)), @@ -376,7 +376,7 @@ accum_tsallis.abundances <- function( do.call(rbind.data.frame, accum_tsallis_list) ) class(the_accum_tsallis) <- c("accumulation", class(the_accum_tsallis)) - + return(the_accum_tsallis) } @@ -393,13 +393,13 @@ accum_hill <- function(x, ...) { #' #' @export accum_hill.numeric <- function( - x, + x, q = 0, - levels = seq_len(sum(x)), + levels = seq_len(sum(x)), probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -407,25 +407,25 @@ accum_hill.numeric <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) # Accumulate entropy the_accum_tsallis <- accum_tsallis.numeric( x, q = q, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, n_simulations = n_simulations, @@ -433,7 +433,7 @@ accum_hill.numeric <- function( show_progress = show_progress, check_arguments = FALSE ) - + # Calculate diversity the_accum_hill <- dplyr::mutate( the_accum_tsallis, @@ -458,11 +458,11 @@ accum_hill.numeric <- function( accum_hill.abundances <- function( x, q = 0, - levels = NULL, + levels = NULL, probability_estimator = c("Chao2015", "Chao2013","ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("rarefy", "jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), n_simulations = 0, @@ -470,14 +470,14 @@ accum_hill.abundances <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) # Set levels if needed @@ -492,17 +492,17 @@ accum_hill.abundances <- function( # Apply accum_tsallis.numeric() to each site accum_hill_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = accum_hill.numeric, # Arguments q = q, - levels = levels, + levels = levels, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, n_simulations = n_simulations, @@ -510,14 +510,14 @@ accum_hill.abundances <- function( show_progress = show_progress, check_arguments = FALSE ) - + # Add site names if needed if ("site" %in% colnames(x)) { site_names <- x$site } else { site_names <- paste("site", seq_len(nrow(x)), sep = "_") } - + # Make a tibble with site, level and diversity the_accum_hill <- tibble::tibble( site = rep(site_names, each = length(levels)), @@ -525,6 +525,6 @@ accum_hill.abundances <- function( do.call(rbind.data.frame, accum_hill_list) ) class(the_accum_hill) <- c("accumulation", class(the_accum_hill)) - + return(the_accum_hill) } diff --git a/R/accum_sp.R b/R/accum_sp.R index 2ec4f9c..b34525d 100644 --- a/R/accum_sp.R +++ b/R/accum_sp.R @@ -1,33 +1,33 @@ #' Spatial Accumulation of Diversity #' #' A spatial accumulation is a measure of diversity with respect to the distance from individuals. -#' -#' Objects of class `accum_sp` contain the value of diversity +#' +#' Objects of class `accum_sp` contain the value of diversity #' (`accum_sp_diversity` objects), entropy (`accum_sp_entropy` objects) or #' mixing (`accum_sp_mixing` objects) at distances from the individuals. -#' -#' These objects are lists: -#' +#' +#' These objects are lists: +#' #' - `X` contains the [dbmss::wmppp] point pattern, #' - `accumulation` is a 3-dimensional array, with orders of diveristy in rows, #' neighborhood size (number of points or distance) in columns and a single slice #' for the observed entropy, diversity or mixing. #' - `neighborhoods` is a similar 3-dimensional array with one slice per point #' of `X`. -#' +#' #' They can be plotted or mapped. #' @aliases accum_sp accum_sp_entropy accum_sp_diversity accum_sp_mixing -#' +#' #' @inheritParams check_divent_args -#' +#' #' @name accum_sp NULL #' @rdname accum_sp -#' +#' #' @param x an `accum_sp` object. #' @param ... Additional arguments to be passed to [plot], or, in `plot_map()`, -#' to [spatstat.explore::bw.smoothppp] and [spatstat.explore::density.ppp] to +#' to [spatstat.explore::bw.smoothppp] and [spatstat.explore::density.ppp] to #' control the kernel smoothing and to [spatstat.geom::plot.im] to plot the image. #' @param type plotting parameter. Default is "l". #' @param main main title of the plot. @@ -38,77 +38,77 @@ NULL #' @param line_width width of the Diversity Accumulation Curve line. #' @param col_shade The color of the shaded confidence envelope. #' @param col_border The color of the borders of the confidence envelope. -#' +#' #' @returns `plot.accum_sp()` returns `NULL`. #' @export #' plot.accum_sp <- function( - x, - ..., - q = dimnames(x$accumulation)$q[1], - type = "l", - main = "accumulation of ...", - xlab = "Sample size...", - ylab = "Diversity...", + x, + ..., + q = dimnames(x$accumulation)$q[1], + type = "l", + main = "accumulation of ...", + xlab = "Sample size...", + ylab = "Diversity...", ylim = NULL, - show_h0 = TRUE, - line_width = 2, - col_shade = "grey75", + show_h0 = TRUE, + line_width = 2, + col_shade = "grey75", col_border = "red") { - + # Prepare the parameters h <- accum_sp_plot_helper(x, q, main, xlab, ylab, ylim) - + # Prepare the plot plot( - x = dimnames(x$accumulation)[[2]], - y = as.numeric(x$accumulation[h$q_row, , 1]), + x = dimnames(x$accumulation)[[2]], + y = as.numeric(x$accumulation[h$q_row, , 1]), ylim = c(h$ymin, h$ymax), - type = h$type, - main = h$main, - xlab = h$xlab, + type = h$type, + main = h$main, + xlab = h$xlab, ylab = h$ylab ) - + if (dim(x$accumulation)[3] == 4) { # Confidence envelope is available graphics::polygon( - x = c(rev(dimnames(x$accumulation)[[2]]), dimnames(x$accumulation)[[2]]), - y = c(rev(x$accumulation[h$q_row, , 4]), x$accumulation[h$q_row, , 3]), - col = col_shade, + x = c(rev(dimnames(x$accumulation)[[2]]), dimnames(x$accumulation)[[2]]), + y = c(rev(x$accumulation[h$q_row, , 4]), x$accumulation[h$q_row, , 3]), + col = col_shade, border = FALSE ) # Add red lines on borders of polygon graphics::lines( - x = dimnames(x$accumulation)[[2]], - y = x$accumulation[h$q_row, , 4], - col = col_border, + x = dimnames(x$accumulation)[[2]], + y = x$accumulation[h$q_row, , 4], + col = col_border, lty = 2 ) graphics::lines( - x = dimnames(x$accumulation)[[2]], - y = x$accumulation[h$q_row, , 3], - col = col_border, + x = dimnames(x$accumulation)[[2]], + y = x$accumulation[h$q_row, , 3], + col = col_border, lty = 2 ) # Redraw the SAC graphics::lines( - x = dimnames(x$accumulation)[[2]], - y = x$accumulation[h$q_row, , 1], - lwd = line_width, + x = dimnames(x$accumulation)[[2]], + y = x$accumulation[h$q_row, , 1], + lwd = line_width, ... ) - + # H0 if (show_h0) { graphics::lines( - x = dimnames(x$accumulation)[[2]], - y = x$accumulation[h$q_row, , 2], + x = dimnames(x$accumulation)[[2]], + y = x$accumulation[h$q_row, , 2], lty = 2 - ) - } + ) + } } - + return(invisible(NULL)) } @@ -121,23 +121,23 @@ plot.accum_sp <- function( #' @export #' autoplot.accum_sp <- function( - object, - ..., + object, + ..., q = dimnames(object$accumulation)$q[1], - main = "Accumulation of ...", - xlab = "Sample size...", - ylab = "Diversity...", - ylim = NULL, - show_h0 = TRUE, - col_shade = "grey75", + main = "Accumulation of ...", + xlab = "Sample size...", + ylab = "Diversity...", + ylim = NULL, + show_h0 = TRUE, + col_shade = "grey75", col_border = "red") { - + # Prepare the parameters h <- accum_sp_plot_helper(object, q, main, xlab, ylab, ylim) - + # Prepare the data df <- data.frame( - x = as.numeric(dimnames(object$accumulation)[[2]]), + x = as.numeric(dimnames(object$accumulation)[[2]]), y = object$accumulation[h$q_row, , 1] ) if (dim(object$accumulation)[3] == 4) { @@ -146,36 +146,36 @@ autoplot.accum_sp <- function( df$high <- object$accumulation[h$q_row, , 4] if (show_h0) df$H0 <- object$accumulation[h$q_row, , 2] } - + # Prepare the plot the_plot <- ggplot2::ggplot( - data = df, + data = df, ggplot2::aes(x = .data$x, y = .data$y) ) + ggplot2::geom_line() + ggplot2::labs(title = h$main, x = h$xlab, y = h$ylab) - + if (dim(object$accumulation)[3] == 4) { the_plot <- the_plot + ggplot2::geom_ribbon( ggplot2::aes( - ymin = .data$low, + ymin = .data$low, ymax = .data$high - ), - fill = col_shade, + ), + fill = col_shade, alpha = 0.5) + # Add red lines on borders of polygon ggplot2::geom_line( - ggplot2::aes(y = .data$low), - colour = col_border, + ggplot2::aes(y = .data$low), + colour = col_border, linetype = 2 ) + ggplot2::geom_line( - ggplot2::aes(y = .data$high), - colour = col_border, + ggplot2::aes(y = .data$high), + colour = col_border, linetype = 2 ) - + # H0 if (show_h0) { the_plot <- the_plot + @@ -187,18 +187,18 @@ autoplot.accum_sp <- function( #' @rdname accum_sp -#' +#' #' @param accum an object to map. #' @param neighborhood The neighborhood size, i.e. the number of neighbors or the distance to consider. -#' @param sigma the smoothing bandwidth. -#' The standard deviation of the isotropic smoothing kernel. +#' @param sigma the smoothing bandwidth. +#' The standard deviation of the isotropic smoothing kernel. #' Either a numerical value, or a function that computes an appropriate value of sigma. #' @param allow_jitter if `TRUE`, duplicated points are jittered to avoid their #' elimination by the smoothing procedure. #' @param weighted if `TRUE`, the weight of the points is used by the smoothing #' procedure. -#' @param adjust force the automatically selected bandwidth to be multiplied -#' by `adjust`. +#' @param adjust force the automatically selected bandwidth to be multiplied +#' by `adjust`. #' Setting it to values lower than one (1/2 for example) will sharpen the estimation. #' @param dim_x the number of columns (pixels) of the resulting map, 128 by default. #' @param dim_y the number of rows (pixels) of the resulting map, 128 by default. @@ -211,12 +211,12 @@ autoplot.accum_sp <- function( #' @param point_col the color of the points. #' Standard base graphic arguments such as `main` can be used. #' @param suppress_margins if `TRUE`, the map has reduced margins. -#' -#' @returns `plot_map` returns a [spatstat.geom::im] object that can be used to produce +#' +#' @returns `plot_map` returns a [spatstat.geom::im] object that can be used to produce #' alternative maps. -# +# #' @export -#' +#' #' @examples #' # Generate a random community #' X <- rspcommunity(1, size = 50, species_number = 10) @@ -224,36 +224,36 @@ autoplot.accum_sp <- function( #' accum <- accum_sp_hill(X, orders = 0, r = c(0, 0.2), individual = TRUE) #' # Plot the local richness at distance = 0.2 #' plot_map(accum, q = 0, neighborhood = 0.2) -#' +#' plot_map <- function( - accum, - q = dimnames(accum$accumulation)$q[1], - neighborhood = dplyr::last(colnames(accum$neighborhoods)), - sigma = spatstat.explore::bw.scott(accum$X, isotropic = TRUE), + accum, + q = dimnames(accum$accumulation)$q[1], + neighborhood = dplyr::last(colnames(accum$neighborhoods)), + sigma = spatstat.explore::bw.scott(accum$X, isotropic = TRUE), allow_jitter = TRUE, - weighted = FALSE, - adjust = 1, - dim_x = 128, - dim_y = 128, - main = "", - col = grDevices::terrain.colors(256), + weighted = FALSE, + adjust = 1, + dim_x = 128, + dim_y = 128, + main = "", + col = grDevices::terrain.colors(256), contour = TRUE, - contour_levels = 10, + contour_levels = 10, contour_col = "dark red", - points = FALSE, - pch = 20, + points = FALSE, + pch = 20, point_col = "black", suppress_margins = TRUE, - ..., + ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() } if (is.null(dim(accum$neighborhoods))) { stop("The Accumulation object does not contain individual data to plot.") } - + # Jitter if (allow_jitter) { # Find duplicates @@ -266,7 +266,7 @@ plot_map <- function( accum$X$y[the_dups] <- the_dups.wmppp$y } } - + # Convert numeric values of q and Neighborhood into their index q_row <- which(as.numeric(rownames(accum$neighborhoods)) == q) nbd_col <- which(as.numeric(colnames(accum$neighborhoods)) == neighborhood) @@ -276,60 +276,60 @@ plot_map <- function( stop("Incorrect q.") } if (length(accum$neighborhoods[, nbd_col, ]) == 0) { - stop("Incorrect neighborhood.") + stop("Incorrect neighborhood.") } - + # Detect points with NA values is_not_na <- !is.na(accum$neighborhoods[q_row, nbd_col, ]) - + # Weights if (weighted) { the_weights <- spatstat.geom::marks(accum$X)[is_not_na] } else { the_weights <- rep(1, spatstat.geom::npoints(accum$X)) } - + # Prepare the ppp to plot the_ppp <- spatstat.geom::unmark(accum$X) spatstat.geom::marks(the_ppp) <- accum$neighborhoods[q_row, nbd_col, ] the_ppp <- the_ppp[is_not_na] class(the_ppp) <- "ppp" - + # Image if (suppress_margins) { par_old <- graphics::par("mar") graphics::par(mar = c(0, 0, 2, 0)) } the_image <- spatstat.explore::Smooth.ppp( - the_ppp, - sigma = sigma, - ..., - weights = the_weights, + the_ppp, + sigma = sigma, + ..., + weights = the_weights, adjust = adjust, dimyx = c(dim_y, dim_x) ) - + plot(the_image, main = main, col = col, ...) if (contour) { graphics::contour( - the_image, - add = TRUE, - nlevels = contour_levels, + the_image, + add = TRUE, + nlevels = contour_levels, col = contour_col ) } if(points) { graphics::points( - x = the_ppp$x, - y = the_ppp$y, - pch = pch, + x = the_ppp$x, + y = the_ppp$y, + pch = pch, col = point_col ) } if (suppress_margins) { graphics::par("mar" = par_old) } - + # Return the image for further processing return(invisible(the_image)) } @@ -348,13 +348,13 @@ plot_map <- function( #' @noRd #' accum_sp_plot_helper <- function(x, q, main, xlab, ylab, ylim) { - + # Find the row in the accumulation table q_row <- which(dimnames(x$accumulation)$q == q) if (length(q_row) != 1) { stop("The value of q does not correspond to any accumulation curve.") } - + if (is.null(ylim)) { # Evaluate ylim if not set by an argument ymin <- min(x$accumulation[q_row, , ]) @@ -363,7 +363,7 @@ accum_sp_plot_helper <- function(x, q, main, xlab, ylab, ylim) { ymin <- ylim[1] ymax <- ylim[2] } - + if (main == "Accumulation of ...") { # Prepare the main title if (inherits(x, "accum_sp_entropy")) { @@ -378,7 +378,7 @@ accum_sp_plot_helper <- function(x, q, main, xlab, ylab, ylim) { } if (inherits(x, "accum_sp_mixing")) main <- paste("Mixing index of order", q) } - + if (xlab == "Sample size...") { if (names(dimnames(x$accumulation)[2]) == "n") { xlab <- "Number of individuals" @@ -386,7 +386,7 @@ accum_sp_plot_helper <- function(x, q, main, xlab, ylab, ylim) { xlab <- "Distance from the central individual" } } - + if (ylab == "Diversity...") { # Prepare Y-axis if (inherits(x, "accum_sp_entropy")) { @@ -404,11 +404,11 @@ accum_sp_plot_helper <- function(x, q, main, xlab, ylab, ylim) { } return( list( - q_row = q_row, - ymin = ymin, - ymax = ymax, - main = main, - xlab = xlab, + q_row = q_row, + ymin = ymin, + ymax = ymax, + main = main, + xlab = xlab, ylab = ylab ) ) diff --git a/R/accum_sp_hill.R b/R/accum_sp_hill.R index 573bb9c..4109c3a 100644 --- a/R/accum_sp_hill.R +++ b/R/accum_sp_hill.R @@ -547,7 +547,3 @@ accum_mixing <- function( class(the_mixing) <- c("accum_sp_mixing", "accum_sp", class(the_mixing)) return(the_mixing) } - - -#' @rdname accum_sp_hill -#' diff --git a/R/autoplot.accumulation.R b/R/autoplot.accumulation.R index ebb403d..ea1f9a7 100644 --- a/R/autoplot.accumulation.R +++ b/R/autoplot.accumulation.R @@ -1,6 +1,6 @@ #' Plot Accumulation Objects -#' -#' Plot objects of class "accumulation" produced by [accum_hill] and other +#' +#' Plot objects of class "accumulation" produced by [accum_hill] and other #' accumulation functions. #' #' @param object An object of class "accumulation". @@ -19,10 +19,10 @@ #' @examples #' # Species accumulation curve #' autoplot(accum_hill(mock_3sp_abd)) -#' +#' autoplot.accumulation <- function( - object, - ..., + object, + ..., main = NULL, xlab = "Sample Size", ylab = NULL, @@ -30,48 +30,48 @@ autoplot.accumulation <- function( alpha = 0.3, lty = ggplot2::GeomLine$default_aes$linetype, lwd = ggplot2::GeomLine$default_aes$linewidth){ - + # Add a site column if needed if (!"site" %in% colnames(object)) { object <- dplyr::mutate(object, site = "Unique site") } - + # Build the plot if ("diversity" %in% colnames(object)) { the_plot <- ggplot2::ggplot( - object, + object, ggplot2::aes( - x = .data$level, + x = .data$level, y = .data$diversity, col = .data$site ) ) } else { the_plot <- ggplot2::ggplot( - object, + object, ggplot2::aes( - x = .data$level, + x = .data$level, y = .data$entropy, color = .data$site ) ) } - + # Confidence envelope if ("sup" %in% colnames(object) & "inf" %in% colnames(object)) { the_plot <- the_plot + ggplot2::geom_ribbon( ggplot2::aes( - ymin = .data$inf, + ymin = .data$inf, ymax = .data$sup, lty = .data$site - ), + ), fill = shade_color, alpha = alpha, - col = shade_color + col = shade_color ) } - + # y-axis label if (is.null(ylab)) { if ("diversity" %in% colnames(object)) { @@ -80,37 +80,37 @@ autoplot.accumulation <- function( ylab <- "Entropy" } } - + # Accumulations the_plot <- the_plot + ggplot2::geom_line(linetype = lty, linewidth = lwd) + ggplot2::labs(title = main, x = xlab, y = ylab) - + # Actual value actual <- dplyr::filter(object, .data$estimator == "Sample") if (nrow(actual) > 0) { if ("diversity" %in% colnames(object)) { the_plot <- the_plot + ggplot2::geom_hline( - data = actual, - mapping = ggplot2::aes(yintercept = .data$diversity, color = .data$site), + data = actual, + mapping = ggplot2::aes(yintercept = .data$diversity, color = .data$site), linetype = 2 ) } else { the_plot <- the_plot + ggplot2::geom_hline( - data = actual, - mapping = ggplot2::aes(yintercept = .data$entropy, color = .data$site), + data = actual, + mapping = ggplot2::aes(yintercept = .data$entropy, color = .data$site), linetype = 2 ) } the_plot <- the_plot + ggplot2::geom_vline( - data = actual, - mapping = ggplot2::aes(xintercept = .data$level, color = .data$site), + data = actual, + mapping = ggplot2::aes(xintercept = .data$level, color = .data$site), linetype = 2 - ) + ) } - + return(the_plot) } diff --git a/R/autoplot.profile.R b/R/autoplot.profile.R index fa10625..b3ed81c 100644 --- a/R/autoplot.profile.R +++ b/R/autoplot.profile.R @@ -1,6 +1,6 @@ #' Plot Profile Objects -#' -#' Plot objects of class "profile" produced by [profile_hill] and other +#' +#' Plot objects of class "profile" produced by [profile_hill] and other #' profile functions. #' #' @param object An object of class "profile". @@ -19,10 +19,10 @@ #' @examples #' # Diversity profile curve #' autoplot(profile_hill(mock_3sp_abd)) -#' +#' autoplot.profile <- function( - object, - ..., + object, + ..., main = NULL, xlab = "Order of Diversity", ylab = "Diversity", @@ -30,41 +30,41 @@ autoplot.profile <- function( alpha = 0.3, lty = ggplot2::GeomLine$default_aes$linetype, lwd = ggplot2::GeomLine$default_aes$linewidth){ - + # Add a site column if needed if (!"site" %in% colnames(object)) { object <- dplyr::mutate(object, site = "Unique site") } - + # Build the plot the_plot <- ggplot2::ggplot( - object, + object, ggplot2::aes( - x = .data$order, + x = .data$order, y = .data$diversity, col = .data$site ) ) - + # Confidence envelope if ("sup" %in% colnames(object) & "inf" %in% colnames(object)) { the_plot <- the_plot + ggplot2::geom_ribbon( ggplot2::aes( - ymin = .data$inf, + ymin = .data$inf, ymax = .data$sup, lty = .data$site - ), - fill = shade_color, + ), + fill = shade_color, alpha = alpha, col = shade_color ) } - + # Profiles the_plot <- the_plot + ggplot2::geom_line(linetype = lty, linewidth = lwd) + ggplot2::labs(title = main, x = xlab, y = ylab) - + return(the_plot) } diff --git a/R/coverage.R b/R/coverage.R index 7d14cdb..4573726 100644 --- a/R/coverage.R +++ b/R/coverage.R @@ -1,44 +1,44 @@ #' Sample Coverage of a Community -#' -#' `coverage()` calculates an estimator of the sample coverage of a community -#' described by its abundance vector. -#' `coverage_to_size()` estimates the sample size corresponding to the chosen +#' +#' `coverage()` calculates an estimator of the sample coverage of a community +#' described by its abundance vector. +#' `coverage_to_size()` estimates the sample size corresponding to the chosen #' sample coverage. -#' -#' The sample coverage \eqn{C} of a community is the total probability of -#' occurrence of the species observed in the sample. -#' \eqn{1-C} is the probability for an individual of the whole community to +#' +#' The sample coverage \eqn{C} of a community is the total probability of +#' occurrence of the species observed in the sample. +#' \eqn{1-C} is the probability for an individual of the whole community to #' belong to a species that has not been sampled. -#' -#' The historical estimator is due to Turing \insertCite{Good1953}{divent}. -#' It only relies on singletons (species observed only once). -#' Chao's \insertCite{Chao2010a}{divent} estimator uses doubletons too and -#' Zhang-Huang's \insertCite{Chao1988,Zhang2007}{divent} uses the whole +#' +#' The historical estimator is due to Turing \insertCite{Good1953}{divent}. +#' It only relies on singletons (species observed only once). +#' Chao's \insertCite{Chao2010a}{divent} estimator uses doubletons too and +#' Zhang-Huang's \insertCite{Chao1988,Zhang2007}{divent} uses the whole #' distribution. -#' -#' If `level` is not `NULL`, the sample coverage is interpolated or extrapolated. -#' Interpolation by the Good estimator relies on the equality between sampling -#' deficit and the generalized Simpson entropy \insertCite{Good1953}{divent}. -#' The \insertCite{Chao2014;textual}{divent} estimator allows extrapolation, +#' +#' If `level` is not `NULL`, the sample coverage is interpolated or extrapolated. +#' Interpolation by the Good estimator relies on the equality between sampling +#' deficit and the generalized Simpson entropy \insertCite{Good1953}{divent}. +#' The \insertCite{Chao2014;textual}{divent} estimator allows extrapolation, #' reliable up a level equal to the double size of the sample. #' #' @inheritParams check_divent_args #' @param x An object. #' @param ... Unused. -#' +#' #' @returns `coverage()` returns a named number equal to the calculated sample coverage. #' The name is that of the estimator used. -#' +#' #' `coverage_to_size()` returns a number equal to the sample size corresponding #' to the chosen sample coverage. -#' +#' #' @references #' \insertAllCited{} -#' +#' #' @examples #' coverage(paracou_6_abd) #' coverage_to_size(paracou_6_abd, sample_coverage = 0.9) -#' +#' #' @name coverage NULL @@ -51,28 +51,28 @@ coverage <- function(x, ...) { } #' @rdname coverage -#' +#' #' @param estimator An estimator of the sample coverage. -#' "ZhangHuang" is the most accurate but does not allow choosing a `level`. -#' "Good"'s estimator only allows interpolation, i.e. estimation of the coverage -#' of a subsample. +#' "ZhangHuang" is the most accurate but does not allow choosing a `level`. +#' "Good"'s estimator only allows interpolation, i.e. estimation of the coverage +#' of a subsample. #' "Chao" allows estimation at any `level`, including extrapolation. -#' "Turing" is the simplest estimator, equal to 1 minus the number of singletons +#' "Turing" is the simplest estimator, equal to 1 minus the number of singletons #' divided by the sample size. #' @param level The level of interpolation or extrapolation, i.e. an abundance. #' #' @export coverage.numeric <- function( - x, + x, estimator = c("ZhangHuang", "Chao", "Turing", "Good"), - level = NULL, + level = NULL, as_numeric = FALSE, ..., check_arguments = TRUE) { if (any(check_arguments)) check_divent_args() - estimator <- match.arg(estimator) - + estimator <- match.arg(estimator) + # Round values abd <- as.integer(round(x)) # Eliminate zeros @@ -82,11 +82,11 @@ coverage.numeric <- function( # singletons. Convert named number to number. s_1 <- as.numeric(abd_distribution["1"]) sample_size <- sum(abd) - + # Coverage at the observed level ---- if (is.null(level)) { # Estimate coverage at the observed level - + ## No singletons: coverage=1 ---- if (is.na(s_1)) { if (as_numeric) { @@ -94,28 +94,28 @@ coverage.numeric <- function( } else { return( tibble::tibble_row( - estimator = "No singleton", + estimator = "No singleton", coverage = 1 ) - ) + ) } } - + ## Singletons only: coverage=0 ---- if (s_1 == sample_size) { - warning ("Sample coverage is 0, most estimators will return NaN.") + warning("Sample coverage is 0, most estimators will return NaN.") if (as_numeric) { return(0) } else { return( tibble::tibble_row( - estimator = "Singletons only", + estimator = "Singletons only", coverage = 0 ) - ) + ) } } - + ## Zhang & Huang's estimator ---- if (estimator == "ZhangHuang") { prob <- abd/sample_size @@ -130,29 +130,29 @@ coverage.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, coverage = the_coverage ) - ) + ) } - } + } } - + ## Chao's estimator ---- if (estimator == "Chao") { - the_coverage <- 1 - s_1 / sample_size * (1- chao_A(abd)) + the_coverage <- 1 - s_1 / sample_size * (1 - chao_A(abd)) if (as_numeric) { return(the_coverage) } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, coverage = the_coverage ) - ) + ) } } - + ## Turing's estimator ---- if (estimator == "Turing" | estimator == "Good") { the_coverage <- 1 - s_1 / sample_size @@ -161,25 +161,25 @@ coverage.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, coverage = the_coverage ) - ) + ) } } } - + # Coverage at a chosen level ---- if (!is.null(level)) { # level must be an integer. check_divent_args() may have accepted a value between 0 and 1 - if (level <=1) stop("level must be an integer > 1.") - + if (level <= 1) stop("level must be an integer > 1.") + ## Turing or Zhang & Huang's estimator ---- if (estimator == "Turing" | estimator == "ZhangHuang") { warning("Turing and ZhangHuang estimators do not allow interpolation or extrapolation. Chao's estimator is used.") estimator <- "Chao" } - + ## Good's estimator ---- if (estimator == "Good") { if (level >= sample_size) { @@ -196,20 +196,20 @@ coverage.numeric <- function( level = level, coverage = the_coverage ) - ) + ) } } } - + ## Chao's estimator ---- if (estimator == "Chao") { if (level < sample_size) { ### Interpolation ---- abd_restricted <- abd[(sample_size - abd) >= level] the_coverage <- 1 - sum( - abd_restricted / sample_size * - exp(lgamma(sample_size - abd_restricted + 1) - - lgamma(sample_size - abd_restricted - level + 1) - + abd_restricted / sample_size * + exp(lgamma(sample_size - abd_restricted + 1) - + lgamma(sample_size - abd_restricted - level + 1) - lgamma(sample_size) + lgamma(sample_size - level)) ) } else { @@ -221,15 +221,15 @@ coverage.numeric <- function( } else { return( tibble::tibble_row( - estimator = "No singleton", + estimator = "No singleton", level = level, coverage = 1 ) - ) + ) } } else { - the_coverage <- 1 - - s_1 / sample_size * + the_coverage <- 1 - + s_1 / sample_size * (1 - chao_A(abd))^(level - sample_size + 1) } } @@ -238,11 +238,11 @@ coverage.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, level = level, coverage = the_coverage ) - ) + ) } } } @@ -250,22 +250,22 @@ coverage.numeric <- function( #' @rdname coverage -#' +#' #' @export coverage.abundances <- function( - x, + x, estimator = c("ZhangHuang", "Chao", "Turing", "Good"), - level = NULL, + level = NULL, ..., check_arguments = TRUE) { - + if (any(check_arguments)) check_divent_args() - estimator <- match.arg(estimator) - + estimator <- match.arg(estimator) + # Apply coverage.numeric() to each site coverage_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = coverage.numeric, @@ -274,7 +274,7 @@ coverage.abundances <- function( level = level, check_arguments = FALSE ) - + return( # Make a tibble with site, estimator and sample-coverage tibble::tibble( @@ -285,8 +285,8 @@ coverage.abundances <- function( ) ) } - - + + #' @rdname coverage #' #' @export @@ -300,16 +300,16 @@ coverage_to_size <- function(x, ...) { #' #' @export coverage_to_size.numeric <- function( - x, + x, sample_coverage, estimator = c("ZhangHuang", "Chao", "Turing", "Good"), as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) check_divent_args() - estimator <- match.arg(estimator) - + estimator <- match.arg(estimator) + # Round values abd <- as.integer(round(x)) # Eliminate zeros @@ -318,30 +318,30 @@ coverage_to_size.numeric <- function( abd_distribution <- tapply(abd, INDEX = abd, FUN = length) # Singletons. Convert named number to number. if (is.na(abd_distribution["1"])) { - s_1 <- 0 + s_1 <- 0 } else { s_1 <- as.numeric(abd_distribution["1"]) } sample_size <- sum(abd) - + # Singletons only if (s_1 == sample_size) { stop("Sample coverage is 0.") } - + # Actual coverage sample_coverage_actual <- coverage.numeric( - abd, + abd, estimator = estimator, as_numeric = TRUE, check_arguments = FALSE ) - + if (sample_coverage >= sample_coverage_actual) { # Extrapolation the_size <- round( - sample_size + - (log(sample_size / s_1) + log(1 - sample_coverage)) / + sample_size + + (log(sample_size / s_1) + log(1 - sample_coverage)) / log(1 - chao_A(abd) ) - 1 ) @@ -351,43 +351,43 @@ coverage_to_size.numeric <- function( stats::optimize( chao_delta, abd = abd, - target_coverage = sample_coverage, - lower = 1, + target_coverage = sample_coverage, + lower = 1, upper = sample_size )$minimum ) } - + if (as_numeric) { return(the_size) } else { return( tibble::tibble_row( - sample_coverage = sample_coverage, + sample_coverage = sample_coverage, size = the_size ) - ) + ) } } #' @rdname coverage -#' +#' #' @export coverage_to_size.abundances <- function( - x, + x, sample_coverage, estimator = c("ZhangHuang", "Chao", "Turing", "Good"), ..., check_arguments = TRUE) { - + if (any(check_arguments)) check_divent_args() - estimator <- match.arg(estimator) - + estimator <- match.arg(estimator) + # Apply coverage_to_size.numeric() to each site size_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = coverage_to_size.numeric, @@ -397,7 +397,7 @@ coverage_to_size.abundances <- function( as_numeric = FALSE, check_arguments = FALSE ) - + return( # Make a tibble with site, estimator and sample-coverage tibble::tibble( @@ -411,9 +411,9 @@ coverage_to_size.abundances <- function( #' Departure of actual sample coverage from target coverage -#' +#' #' Helper for [coverage_2_size]. -#' +#' #' @param size The size of the sample. Adjusted to minimize `delta()`. #' @param target_coverage The sample coverage to reach by adjusting size. #' @@ -421,14 +421,14 @@ coverage_to_size.abundances <- function( #' @noRd #' chao_delta <- function( - abd, - size, + abd, + size, target_coverage) { abs( coverage.numeric( - x = abd, - estimator = "Chao", - level = size, + x = abd, + estimator = "Chao", + level = size, as_numeric = TRUE, check_arguments = FALSE ) - target_coverage diff --git a/R/div_hurlbert.R b/R/div_hurlbert.R index feadd8b..9fe1000 100644 --- a/R/div_hurlbert.R +++ b/R/div_hurlbert.R @@ -1,16 +1,16 @@ #' Hurlbert Diversity of a Community -#' -#' Estimate the diversity sensu stricto, i.e. the effective number of species -#' number of species \insertCite{Dauby2012;textual}{divent} +#' +#' Estimate the diversity sensu stricto, i.e. the effective number of species +#' number of species \insertCite{Dauby2012;textual}{divent} #' from abundance or probability data. -#' +#' #' Several estimators are available to deal with incomplete sampling. -#' -#' Bias correction requires the number of individuals. -#' +#' +#' Bias correction requires the number of individuals. +#' #' Estimation techniques are from \insertCite{Hurlbert1971;textual}{divent}. -#' -#' Hurlbert's diversity cannot be estimated at a specified level of interpolation or +#' +#' Hurlbert's diversity cannot be estimated at a specified level of interpolation or #' extrapolation, and diversity partioning is not available. #' #' @inheritParams check_divent_args @@ -22,11 +22,11 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Diversity of each community #' div_hurlbert(paracou_6_abd, k = 2) -#' +#' #' @name div_hurlbert NULL @@ -42,11 +42,11 @@ div_hurlbert <- function(x, k = 1, ...) { #' @rdname div_hurlbert #' #' @param estimator An estimator of asymptotic diversity. -#' +#' #' @export div_hurlbert.numeric <- function( - x, - k = 2, + x, + k = 2, estimator = c("Hurlbert", "naive"), as_numeric = FALSE, ..., @@ -56,21 +56,21 @@ div_hurlbert.numeric <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) + estimator <- match.arg(estimator) the_entropy <- ent_hurlbert.numeric( - x, - k = k, + x, + k = k, estimator = estimator, check_arguments = FALSE ) # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = hurlbert_ent2div(.data$entropy, k = k), .keep = "unused" ) - + # return the diversity if (as_numeric) { return(the_diversity$diversity) @@ -84,8 +84,8 @@ div_hurlbert.numeric <- function( #' #' @export div_hurlbert.species_distribution <- function( - x, - k = 2, + x, + k = 2, estimator = c("Hurlbert", "naive"), as_numeric = FALSE, ..., @@ -95,11 +95,11 @@ div_hurlbert.species_distribution <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) + estimator <- match.arg(estimator) the_entropy <- ent_hurlbert.species_distribution( - x, - k = k, + x, + k = k, estimator = estimator, as_numeric = as_numeric, check_arguments = FALSE @@ -109,7 +109,7 @@ div_hurlbert.species_distribution <- function( the_diversity = hurlbert_ent2div(the_entropy, k = k) } else { the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = hurlbert_ent2div(.data$entropy, k = k), .keep = "unused" ) @@ -120,7 +120,7 @@ div_hurlbert.species_distribution <- function( #' Compute Hurlbert's diversity from its entropy -#' +#' #' Find the effective number of species numerically #' #' @param hurlbert_entropy The entropy. @@ -128,7 +128,7 @@ div_hurlbert.species_distribution <- function( #' #' @returns Hurlbert's effective number of species. #' @noRd -#' +#' hurlbert_ent2div <- function(hurlbert_entropy, k) { # Relation between diversity and entropy # (D for diversity, S for entropy, k is the parameter) @@ -136,12 +136,12 @@ hurlbert_ent2div <- function(hurlbert_entropy, k) { # Minimize it return( vapply( - hurlbert_entropy, + hurlbert_entropy, FUN = function(S) { stats::uniroot( - f = f, - interval = c(1, 1E+7), - S = S, + f = f, + interval = c(1, 1E+7), + S = S, k = k )$root }, diff --git a/R/div_part.R b/R/div_part.R index 35da480..e7a8a18 100644 --- a/R/div_part.R +++ b/R/div_part.R @@ -1,8 +1,8 @@ #' Diversity partition -#' +#' #' Calculate \eqn{\gamma}, \eqn{\beta} and \eqn{\alpha} diversities of a metacommunity. -#' -#' The function computes \eqn{\gamma} diversity after building a metacommunity +#' +#' The function computes \eqn{\gamma} diversity after building a metacommunity #' from local communities according to their weight \insertCite{Marcon2014a}{divent}. #' \eqn{\alpha} entropy is the weighted mean local entropy, converted into Hill #' numbers to obtain \eqn{\alpha} diversity. @@ -13,36 +13,36 @@ #' #' @returns A tibble with diversity values at each scale. #' @export -#' +#' #' @references #' \insertAllCited{} #' #' @examples #' div_part(paracou_6_abd) -#' +#' div_part <- function( - abundances, - q = 1, - estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + abundances, + q = 1, + estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Holste", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), q_threshold = 10, check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(abundances < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) # Gamma diversity ---- @@ -51,7 +51,7 @@ div_part <- function( species_distribution = abundances, q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, @@ -62,7 +62,7 @@ div_part <- function( ) # Calculate diversity div_gamma <- dplyr::mutate( - ent_gamma, + ent_gamma, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) @@ -73,19 +73,19 @@ div_part <- function( # Apply div_hill.numeric() to each site ent_sites <- apply( # Eliminate site and weight columns - abundances[, !colnames(abundances) %in% non_species_columns], + abundances[, !colnames(abundances) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = ent_tsallis.numeric, # Arguments q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = TRUE, check_arguments = FALSE @@ -108,17 +108,17 @@ div_part <- function( order = q, diversity = div_gamma$diversity / div_alpha$diversity ) - + # Summarize div_metacommunity <- dplyr::bind_cols( site = "Metacommunity", dplyr::bind_rows( - dplyr::select(div_gamma, -.data$site), - div_beta, + dplyr::select(div_gamma, -.data$site), + div_beta, div_alpha ) ) - + # Site diversity div_sites <- div_hill.species_distribution( x = abundances, @@ -128,21 +128,21 @@ div_part <- function( probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, gamma = FALSE, check_arguments = FALSE ) - + # Bind everything the_divpart <- dplyr::bind_rows( - div_metacommunity, + div_metacommunity, dplyr::bind_cols(scale = "community", div_sites) ) # Weight of the metacommunity the_divpart[the_divpart$scale == "gamma", "weight"] <- sum(abundances$weight) - + return(the_divpart) } diff --git a/R/div_phylo.R b/R/div_phylo.R index 9d6e4e5..0a40900 100644 --- a/R/div_phylo.R +++ b/R/div_phylo.R @@ -1,37 +1,37 @@ #' Phylogenetic Diversity of a Community -#' +#' #' Estimate the diversity of species from abundance or probability data #' and a phylogenetic tree. #' Several estimators are available to deal with incomplete sampling. -#' -#' Bias correction requires the number of individuals. +#' +#' Bias correction requires the number of individuals. #' See [div_hill] for estimators. -#' -#' Entropy can be estimated at a specified level of interpolation or -#' extrapolation, either a chosen sample size or sample coverage +#' +#' Entropy can be estimated at a specified level of interpolation or +#' extrapolation, either a chosen sample size or sample coverage #' \insertCite{Chao2014}{divent}, rather than its asymptotic value. #' See [accum_tsallis] for details. #' #' @inheritParams check_divent_args -#' @param x An object, that may be a numeric vector containing abundances +#' @param x An object, that may be a numeric vector containing abundances #' or probabilities, or an object of class [abundances] or [probabilities]. #' @param ... Unused. #' -#' @returns A tibble with the site names, the estimators used and the estimated +#' @returns A tibble with the site names, the estimators used and the estimated #' diversity #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' div_phylo(paracou_6_abd, tree = paracou_6_taxo, q = 2) -#' +#' #' # At 80% coverage #' div_phylo(paracou_6_abd, tree = paracou_6_taxo, q = 2, level = 0.8) -#' +#' #' # Gamma entropy #' div_phylo(paracou_6_abd, tree = paracou_6_taxo, q = 2, gamma = TRUE) -#' +#' #' @name div_phylo NULL @@ -47,27 +47,27 @@ div_phylo <- function(x, tree, q = 1, ...) { #' @rdname div_phylo #' #' @param estimator An estimator of asymptotic diversity. -#' +#' #' @export div_phylo.numeric <- function( - x, + x, tree, - q = 1, + q = 1, normalize = TRUE, - estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive", "Bonachela", "Holste"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -77,26 +77,26 @@ div_phylo.numeric <- function( col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) the_entropy <- ent_phylo.numeric( - x, + x, tree = tree, - q = q, + q = q, normalize = normalize, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = FALSE, @@ -104,7 +104,7 @@ div_phylo.numeric <- function( ) # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) @@ -121,25 +121,25 @@ div_phylo.numeric <- function( #' #' @export div_phylo.species_distribution <- function( - x, + x, tree, - q = 1, + q = 1, normalize = TRUE, - estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive", "Bonachela", "Holste"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -149,13 +149,13 @@ div_phylo.species_distribution <- function( col_names <- colnames(x) species_names <- col_names[!col_names %in% non_species_columns] if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) the_entropy <- ent_phylo.species_distribution( @@ -164,7 +164,7 @@ div_phylo.species_distribution <- function( normalize = TRUE, q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, @@ -177,7 +177,7 @@ div_phylo.species_distribution <- function( ) # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) diff --git a/R/div_richness.R b/R/div_richness.R index ee6a7b1..6676a60 100644 --- a/R/div_richness.R +++ b/R/div_richness.R @@ -305,7 +305,7 @@ div_richness.numeric <- function( coe <- stats::qnorm(1 - jack_alpha/2, 0, 1) # Which orders pass the test? orders <- (gene[2:(k + 1), 5] < coe) - if (sum(orders, na.rm=TRUE) == 0) { + if (sum(orders, na.rm = TRUE) == 0) { # If none, keep the max value of k (+1 because jack1 is in line 2) k_smallest <- k + 1 s_jack <- gene[k_smallest, 1] @@ -388,7 +388,7 @@ div_richness.numeric <- function( # Don't unveil the asymptotic distribution, use the asymptotic estimator s_0 <- div_richness.numeric( abd, - estimator=estimator, + estimator = estimator, jack_alpha = jack_alpha, jack_max = jack_max, as_numeric = TRUE, diff --git a/R/div_similarity.R b/R/div_similarity.R index a3cad15..78f6a0a 100644 --- a/R/div_similarity.R +++ b/R/div_similarity.R @@ -1,16 +1,16 @@ #' Similarity-Based Diversity of a Community -#' +#' #' Estimate the diversity of species from abundance or probability data and a #' similarity matrix between species. -#' Several estimators are available to deal with incomplete sampling. +#' Several estimators are available to deal with incomplete sampling. #' Bias correction requires the number of individuals. -#' -#' All species of the `species_distribution` must be found in the matrix of +#' +#' All species of the `species_distribution` must be found in the matrix of #' `similarities` if it is named. #' If it is not, its size must equal the number of species. #' Then, the order of species is assumed to be the same as that of the #' `species_distribution`. -#' +#' #' Similarity-Based diversity can't be interpolated of extrapolated as of the #' state of the art. #' @@ -30,7 +30,7 @@ #' div_similarity(paracou_6_abd, similarities = Z, q = 2) #' # gamma diversity #' div_similarity(paracou_6_abd, similarities = Z, q = 2, gamma = TRUE) -#' +#' #' @name div_similarity NULL @@ -46,42 +46,42 @@ div_similarity <- function(x, similarities, q = 1, ...) { #' @rdname div_similarity #' #' @param estimator An estimator of asymptotic diversity. -#' +#' #' @export div_similarity.numeric <- function( - x, + x, similarities = diag(length(x)), - q = 1, - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + q = 1, + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), sample_coverage = NULL, as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") similarities <- checked_matrix(similarities, x) } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - coverage_estimator <- match.arg(coverage_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + coverage_estimator <- match.arg(coverage_estimator) the_entropy <- ent_similarity.numeric( - x, + x, similarities = similarities, - q = q, + q = q, estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, sample_coverage = sample_coverage, @@ -90,7 +90,7 @@ div_similarity.numeric <- function( ) # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) @@ -107,39 +107,39 @@ div_similarity.numeric <- function( #' #' @export div_similarity.species_distribution <- function( - x, + x, similarities = diag(sum(!colnames(x) %in% non_species_columns)), - q = 1, - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + q = 1, + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") similarities <- checked_matrix(similarities, x) } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - coverage_estimator <- match.arg(coverage_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + coverage_estimator <- match.arg(coverage_estimator) the_entropy <- ent_similarity.species_distribution( - x, + x, similarities = similarities, - q = q, + q = q, estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = FALSE, @@ -147,16 +147,16 @@ div_similarity.species_distribution <- function( ) # Calculate diversity the_diversity <- dplyr::mutate( - the_entropy, + the_entropy, diversity = exp_q(.data$entropy, q = q), .keep = "unused" ) - + # Return the diversity if (as_numeric) { return(the_diversity$diversity) } else { return(the_diversity) } - + } diff --git a/R/e_n_q.R b/R/e_n_q.R index e858a6d..863e41e 100644 --- a/R/e_n_q.R +++ b/R/e_n_q.R @@ -1,21 +1,21 @@ #' Grassberger's expectation of n^q -#' -#' Expected value of \eqn{n^q} when \eqn{n} follows a Poisson distribution +#' +#' Expected value of \eqn{n^q} when \eqn{n} follows a Poisson distribution #' of parameter \eqn{n}. -#' -#' The expectation of \eqn{n^q} when \eqn{n} follows a Poisson distribution +#' +#' The expectation of \eqn{n^q} when \eqn{n} follows a Poisson distribution #' was derived by \insertCite{Grassberger1988;textual}{divent}. -#' +#' #' It is computed using the [beta] function. -#' Its value is 0 for \eqn{n-q+1<0}, and close to 0 when \eqn{n=q}, -#' which is not a correct estimate: it should not be used when \eqn{q > n}. +#' Its value is 0 for \eqn{n-q+1<0}, and close to 0 when \eqn{n=q}, +#' which is not a correct estimate: it should not be used when \eqn{q > n}. #' #' @param n A positive integer vector. #' @param q A positive number. #' #' @returns A vector of the same length as n containing the transformed values. #' @export -#' +#' #' @references #' \insertAllCited{} #' @@ -28,19 +28,19 @@ #' mean(rpois(1000, lambda = n)^q) #' # and (naive estimation) #' n^q -#' +#' e_n_q <- function(n, q) { if (q == 0) { return(rep(1, length(n))) } else { - # beta cannot be computed for n - q + 1 < 0 (so warnings must be suppressed) + # beta cannot be computed for n - q + 1 < 0 (so warnings must be suppressed) # but the value is 0 then beta_value <- suppressWarnings(gamma(q) / beta(n - q + 1, q)) beta_value[n - q + 1 < 0] <- 0 - # (-1)^n is problematic for long vectors (returns NA for large values). + # (-1)^n is problematic for long vectors (returns NA for large values). # It is replaced by 1 - n %% 2 * 2 (n is rounded if is not an integer) - the_e_n_q <- beta_value - - (1 - round(n) %% 2 * 2) * gamma(1 + q) * sin(pi * q) / pi / (n + 1) + the_e_n_q <- beta_value - + (1 - round(n) %% 2 * 2) * gamma(1 + q) * sin(pi * q) / pi / (n + 1) return(the_e_n_q) } -} +} diff --git a/R/ent_hurlbert.R b/R/ent_hurlbert.R index 600c764..daaefdd 100644 --- a/R/ent_hurlbert.R +++ b/R/ent_hurlbert.R @@ -1,12 +1,12 @@ #' Hurlbert Entropy of a Community -#' +#' #' Estimate the Hurlbert entropy \insertCite{Hurlbert1971}{divent} of species from abundance or probability data. #' Several estimators are available to deal with incomplete sampling. -#' -#' Bias correction requires the number of individuals. +#' +#' Bias correction requires the number of individuals. #' See [div_hurlbert] for estimators. -#' -#' Hurlbert's entropy cannot be estimated at a specified level of interpolation or +#' +#' Hurlbert's entropy cannot be estimated at a specified level of interpolation or #' extrapolation, and entropy partitioning is not available. #' #' @inheritParams check_divent_args @@ -18,11 +18,11 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Entropy of each community #' ent_hurlbert(paracou_6_abd, k = 2) -#' +#' #' @name ent_hurlbert NULL @@ -37,12 +37,12 @@ ent_hurlbert <- function(x, k = 2, ...) { #' @rdname ent_hurlbert #' -#' @param estimator An estimator of entropy. -#' +#' @param estimator An estimator of entropy. +#' #' @export ent_hurlbert.numeric <- function( - x, - k = 2, + x, + k = 2, estimator = c("Hurlbert", "naive"), as_numeric = FALSE, ..., @@ -52,7 +52,7 @@ ent_hurlbert.numeric <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) + estimator <- match.arg(estimator) # Entropy of a vector of probabilities ---- if (abs(sum(x) - 1) < length(x) * .Machine$double.eps) { @@ -62,14 +62,14 @@ ent_hurlbert.numeric <- function( } else { return( tibble::tibble_row( - estimator = "naive", + estimator = "naive", order = k, entropy = the_entropy ) - ) + ) } } - + # Eliminate 0 abd <- x[x > 0] # Sample size @@ -79,7 +79,7 @@ ent_hurlbert.numeric <- function( } # Number of observed species s_obs <- length(abd) - + # Entropy of a vector of abundances ---- if (!is_integer_values(abd)) { warning("The estimator can't be applied to non-integer values.") @@ -94,11 +94,11 @@ ent_hurlbert.numeric <- function( } else { return( tibble::tibble_row( - estimator = "naive", + estimator = "naive", order = k, entropy = the_entropy ) - ) + ) } } # Hurlbert estimator @@ -111,7 +111,7 @@ ent_hurlbert.numeric <- function( } else { return( tibble::tibble_row( - estimator = "Hurlbert", + estimator = "Hurlbert", order = k, entropy = the_entropy ) @@ -122,15 +122,15 @@ ent_hurlbert.numeric <- function( warning("estimator was not recognized") return(NA) } - + #' @rdname ent_hurlbert #' #' @export ent_hurlbert.species_distribution <- function( - x, - k = 2, + x, + k = 2, estimator = c("Hurlbert", "naive"), as_numeric = FALSE, ..., @@ -140,12 +140,12 @@ ent_hurlbert.species_distribution <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) + estimator <- match.arg(estimator) # Apply ent_hurlbert.numeric() to each site ent_hurlbert_sites <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = ent_hurlbert.numeric, diff --git a/R/ent_rao.R b/R/ent_rao.R index 74b1801..3349a99 100644 --- a/R/ent_rao.R +++ b/R/ent_rao.R @@ -1,20 +1,20 @@ #' Rao's Quadratic Entropy of a Community -#' -#' Estimate the quadratic entropy \insertCite{Rao1982}{divent} of species from +#' +#' Estimate the quadratic entropy \insertCite{Rao1982}{divent} of species from #' abundance or probability data. -#' An estimator \insertCite{Lande1996}{divent} is available to deal with +#' An estimator \insertCite{Lande1996}{divent} is available to deal with #' incomplete sampling. -#' +#' #' Rao's entropy is phylogenetic or similarity-based entropy of order 2. #' [ent_phylo] and [ent_similarity] with argument `q = 2` provide more estimators #' and allow estimating entropy at a chosen level. #' -#' All species of the `species_distribution` must be found in the matrix of +#' All species of the `species_distribution` must be found in the matrix of #' `distances` if it is named. #' If it is not or if `x` is numeric, its size must equal the number of species. #' Then, the order of species is assumed to be the same as that of the #' `species_distribution` or its numeric equivalent. -#' +#' #' @inheritParams check_divent_args #' @param x An object, that may be a numeric vector containing abundances or probabilities, #' or an object of class [abundances] or [probabilities]. @@ -24,19 +24,19 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Entropy of each community #' ent_rao(paracou_6_abd, tree = paracou_6_taxo) -#' # Similar to (but estimators are not the same) +#' # Similar to (but estimators are not the same) #' ent_phylo(paracou_6_abd, tree = paracou_6_taxo, q = 2) -#' +#' #' # Functional entropy #' ent_rao(paracou_6_abd, distances = paracou_6_fundist) -#' +#' #' # gamma entropy #' ent_rao(paracou_6_abd, tree = paracou_6_taxo, gamma = TRUE) -#' +#' #' @name ent_rao NULL @@ -52,10 +52,10 @@ ent_rao <- function(x, ...) { #' @rdname ent_rao #' #' @param estimator An estimator of entropy. -#' +#' #' @export ent_rao.numeric <- function( - x, + x, distances = NULL, tree = NULL, normalize = TRUE, @@ -63,7 +63,7 @@ ent_rao.numeric <- function( as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -77,7 +77,7 @@ ent_rao.numeric <- function( if (is.null(distances)) { # Check species in the tree if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } } else { # Check species in the distance matrix @@ -87,13 +87,13 @@ ent_rao.numeric <- function( } if (!is.null(colnames(distances))) { if (length(setdiff(species_names, colnames(distances))) != 0) { - stop("Some species are missing in the distance matrix") + stop("Some species are missing in the distance matrix") } } } } - estimator <- match.arg(estimator) - + estimator <- match.arg(estimator) + # Prepare the distance matrix if (is.null(distances)) { # Calculate distances from tree @@ -106,7 +106,7 @@ ent_rao.numeric <- function( if (normalize) { distances <- distances / max(distances) } - + # Entropy of a vector of probabilities ---- if (abs(sum(x) - 1) < length(x) * .Machine$double.eps) { # Probabilities sum to 1, allowing rounding error @@ -116,20 +116,20 @@ ent_rao.numeric <- function( } else { return( tibble::tibble_row( - estimator = "naive", + estimator = "naive", order = 2, entropy = the_entropy ) - ) + ) } } - + # Eliminate 0 abd <- as.numeric(x[x > 0]) distances <- distances[x > 0, x > 0] # Sample size sample_size <- sum(abd) - + ## Exit if x contains no or a single species ---- if (length(abd) < 2) { if (length(abd) == 0) { @@ -138,11 +138,11 @@ ent_rao.numeric <- function( } else { return( tibble::tibble_row( - estimator = "No Species", + estimator = "No Species", order = 2, entropy = NA ) - ) + ) } } else { if (as_numeric) { @@ -150,11 +150,11 @@ ent_rao.numeric <- function( } else { return( tibble::tibble_row( - estimator = "Single Species", + estimator = "Single Species", order = 2, entropy = 0 ) - ) + ) } } } else { @@ -164,10 +164,10 @@ ent_rao.numeric <- function( estimator <- "naive" } } - + # Probabilities prob <- abd / sum(abd) - + if (estimator == "naive") { # Naive estimator ---- the_entropy <- mean(distances %*% prob) @@ -175,19 +175,19 @@ ent_rao.numeric <- function( # Lande's estimator ---- the_entropy <- mean(distances %*% prob) * sample_size / (sample_size - 1) } - + # Return if (as_numeric) { return(the_entropy) } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, order = 2, entropy = the_entropy ) - ) - } + ) + } } @@ -204,7 +204,7 @@ ent_rao.species_distribution <- function( as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -219,7 +219,7 @@ ent_rao.species_distribution <- function( if (is.null(distances)) { # Check species in the tree if (length(setdiff(species_names, rownames(tree$phylo_groups))) != 0) { - stop("Some species are missing in the tree.") + stop("Some species are missing in the tree.") } } else { # Check species in the distance matrix @@ -229,23 +229,23 @@ ent_rao.species_distribution <- function( } if (!is.null(colnames(distances))) { if (length(setdiff(species_names, colnames(distances))) != 0) { - stop("Some species are missing in the distance matrix") + stop("Some species are missing in the distance matrix") } } } } estimator <- match.arg(estimator) - + if (!is.null(distances)) { # Check species names and reorder the matrix to fit the names distances <- checked_matrix(distances, x) } - + if (gamma) { # Build the metacommunity abd <- metacommunity.abundances( - x, - as_numeric = TRUE, + x, + as_numeric = TRUE, check_arguments = FALSE ) the_entropy <- ent_rao.numeric( @@ -269,7 +269,7 @@ ent_rao.species_distribution <- function( # Apply ent_rao.numeric() to each site ent_rao_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = ent_rao.numeric, diff --git a/R/ent_similarity.R b/R/ent_similarity.R index 6444ec6..94db3aa 100644 --- a/R/ent_similarity.R +++ b/R/ent_similarity.R @@ -1,16 +1,16 @@ #' Similarity-Based Entropy of a Community -#' +#' #' Estimate the entropy of species from abundance or probability data and a #' similarity matrix between species. -#' Several estimators are available to deal with incomplete sampling. +#' Several estimators are available to deal with incomplete sampling. #' Bias correction requires the number of individuals. -#' -#' All species of the `species_distribution` must be found in the matrix of +#' +#' All species of the `species_distribution` must be found in the matrix of #' `similarities` if it is named. #' If it is not or if `x` is numeric, its size must equal the number of species. #' Then, the order of species is assumed to be the same as that of the #' `species_distribution` or its numeric equivalent. -#' +#' #' Similarity-Based entropy can't be interpolated of extrapolated as of the #' state of the art. #' @@ -30,7 +30,7 @@ #' ent_similarity(paracou_6_abd, similarities = Z, q = 2) #' # gamma diversity #' ent_similarity(paracou_6_abd, similarities = Z, q = 2, gamma = TRUE) -#' +#' #' @name ent_similarity NULL @@ -45,18 +45,18 @@ ent_similarity <- function(x, similarities, q = 1, ...) { #' @rdname ent_similarity #' -#' @param estimator An estimator of entropy. -#' +#' @param estimator An estimator of entropy. +#' #' @export ent_similarity.numeric <- function( - x, + x, similarities = diag(length(x)), - q = 1, - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + q = 1, + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), sample_coverage = NULL, @@ -69,9 +69,9 @@ ent_similarity.numeric <- function( if (any(x < 0)) stop("Species probabilities or abundances must be positive.") similarities <- checked_matrix(similarities, x) } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) coverage_estimator <- match.arg(coverage_estimator) # Check that dimensions correspond @@ -93,14 +93,14 @@ ent_similarity.numeric <- function( } else { return( tibble::tibble_row( - estimator = "naive", + estimator = "naive", order = q, entropy = the_entropy ) - ) + ) } } - + # Eliminate 0 abd <- x[x > 0] similarities <- similarities[x > 0, x > 0] @@ -111,8 +111,8 @@ ent_similarity.numeric <- function( ordinariness <- as.numeric(similarities %*% prob) # Number of observed species s_obs <- length(abd) - - + + # Entropy of a vector of abundances ---- ## Exit if x contains no or a single species ---- if (length(abd) < 2) { @@ -122,11 +122,11 @@ ent_similarity.numeric <- function( } else { return( tibble::tibble_row( - estimator = "No Species", + estimator = "No Species", order = q, entropy = NA ) - ) + ) } } else { if (as_numeric) { @@ -134,11 +134,11 @@ ent_similarity.numeric <- function( } else { return( tibble::tibble_row( - estimator = "Single Species", + estimator = "Single Species", order = q, entropy = 0 ) - ) + ) } } } else { @@ -148,7 +148,7 @@ ent_similarity.numeric <- function( estimator <- "naive" } } - + ## Metacommunity estimation ---- if (!is.null(sample_coverage)) { # Force estimator to ChaoShen @@ -156,13 +156,13 @@ ent_similarity.numeric <- function( } else { # Calculate sample coverage sample_coverage <- coverage.numeric( - abd, + abd, estimator = coverage_estimator, as_numeric = TRUE, check_arguments = FALSE ) } - + ## Naive estimator ---- if (!is_integer_values(abd)) { warning("The estimator can't be applied to non-integer values.") @@ -180,14 +180,14 @@ ent_similarity.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, order = q, entropy = the_entropy ) - ) + ) } } - + ## Unveiled estimator ---- if (estimator %in% c("UnveilJ", "UnveilC", "UnveiliC")) { prob_unv <- probabilities.numeric( @@ -196,10 +196,10 @@ ent_similarity.numeric <- function( unveiling = unveiling, richness_estimator = switch( estimator, - UnveilJ = "jackknife", - UnveilC = "Chao1", + UnveilJ = "jackknife", + UnveilC = "Chao1", UnveiliC = "iChao1" - ), + ), jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, @@ -209,7 +209,7 @@ ent_similarity.numeric <- function( ) s_est <- length(prob_unv) } - + # Calculate the average similarity Z <- similarities diag(Z) <- NA @@ -218,13 +218,13 @@ ent_similarity.numeric <- function( prob_cov <- prob * sample_coverage # Tune the ordinariness of observed species, i.e. # similarity with observed species is multiplied by coverage - # and unobserved species add (1 - coverage) * average similarity - Z_p <- as.vector(similarities %*% prob_cov) + + # and unobserved species add (1 - coverage) * average similarity + Z_p <- as.vector(similarities %*% prob_cov) + rep(sim_mean * (1 - sample_coverage), length(prob_cov)) - + if (estimator %in% c("UnveilJ", "UnveilC", "UnveiliC")) { # concatenate the ordinariness of unobserved species, i.e. - # their probability + sim_mean * the other probabilities + # their probability + sim_mean * the other probabilities Z_p <- c( Z_p, prob_unv[-(1:s_obs)] + (1 - prob_unv[-(1:s_obs)]) * sim_mean @@ -241,24 +241,24 @@ ent_similarity.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, order = q, entropy = the_entropy ) - ) + ) } } - + ## MarconZhang ---- if (estimator == "MarconZhang" | estimator == "Max") { V <- seq_len(sample_size - 1) - # p_V_Ns is an array, containing (1 - (n_s-1)/(n-j)) for each species (lines) + # p_V_Ns is an array, containing (1 - (n_s-1)/(n-j)) for each species (lines) # and all j from 1 to n-1 p_V_Ns <- outer(abd, V, function(n_s, j) 1 - (n_s - 1) / (sample_size - j)) # Useful values are products from j=1 to v, so prepare cumulative products p_V_Ns <- apply(p_V_Ns, 1, cumprod) } - + if (estimator == "ChaoShen" | estimator == "Max") { # Horvitz-Thomson multiplier HT_C_P <- prob_cov / (1 - (1 - prob_cov)^sample_size) @@ -267,7 +267,7 @@ ent_similarity.numeric <- function( Z_p[Z_p == 0] <- 1 ent_chao_shen <- (sum(HT_C_P * ln_q(1 / Z_p, q = q))) } - + if ((estimator == "MarconZhang" | estimator == "Max") & (q != 1)) { Zpqm1 <- Z_p^(q - 1) # Force 0^(q-1) = 0 @@ -279,8 +279,8 @@ ent_similarity.numeric <- function( w_v <- cumprod(w_vi) Taylor <- 1 + sum( prob * vapply( - seq_len(s_obs), - FUN = S_v, + seq_len(s_obs), + FUN = S_v, # Arguments abd = abd, sample_size = sample_size, @@ -294,15 +294,15 @@ ent_similarity.numeric <- function( U <- Taylor - sum(FirstTerms) ent_marcon_zhang <- ((K + U - 1) / (1 - q)) } - + if ((estimator == "MarconZhang" | estimator == "Max") & (q == 1)) { L <- -sum(prob_cov * log(Z_p)) # Weights w_v <- ((1 - sim_mean)^V) / V Taylor <- 1 + sum( prob * vapply( - seq_len(s_obs), - FUN = S_v, + seq_len(s_obs), + FUN = S_v, # Arguments abd = abd, sample_size = sample_size, @@ -316,25 +316,25 @@ ent_similarity.numeric <- function( X <- Taylor - sum(FirstTerms) ent_marcon_zhang <- L + X } - - the_entropy <- switch ( + + the_entropy <- switch( estimator, ChaoShen = ent_chao_shen, MarconZhang = ent_marcon_zhang, Max = max(ent_chao_shen, ent_marcon_zhang) ) - + # Return if (as_numeric) { return(the_entropy) } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, order = q, entropy = the_entropy ) - ) + ) } } @@ -343,30 +343,30 @@ ent_similarity.numeric <- function( #' #' @export ent_similarity.species_distribution <- function( - x, + x, similarities = diag(sum(!colnames(x) %in% non_species_columns)), - q = 1, - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + q = 1, + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, as_numeric = FALSE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) coverage_estimator <- match.arg(coverage_estimator) - + # Check species names and reorder the matrix to fit the names similarities <- checked_matrix(similarities, x) @@ -389,7 +389,7 @@ ent_similarity.species_distribution <- function( # Apply ent_similarity.numeric() to each site ent_similarity_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = ent_similarity.numeric, @@ -399,8 +399,8 @@ ent_similarity.species_distribution <- function( estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, as_numeric = FALSE, check_arguments = FALSE ) @@ -421,7 +421,7 @@ ent_similarity.species_distribution <- function( #' Sum of Products Weighted by w_v -#' +#' #' Utility for the Marcon-Zhang estimator of similarity-based entropy. #' #' @param species_index The species to consider (from 1 to `s_obs`) @@ -445,15 +445,15 @@ S_v <- function( #' Similarity-Based Gamma entropy of a metacommunity -#' +#' #' Build the metacommunity and check that abundances are integers. -#' +#' #' See [ent_gamma_tsallis] for details. #' @param species_distribution An object of class [species_distribution]. -#' +#' #' @returns A tibble with the estimator used and the estimated entropy. #' @noRd -#' +#' ent_gamma_similarity <- function( species_distribution, similarities, @@ -465,18 +465,18 @@ ent_gamma_similarity <- function( jack_max, coverage_estimator, as_numeric) { - + # Build the metacommunity abd <- metacommunity.abundances( - species_distribution, - as_numeric = TRUE, + species_distribution, + as_numeric = TRUE, check_arguments = FALSE ) if (is_integer_values(abd)) { # Sample coverage is useless sample_coverage <- NULL } else { - # Non-integer values in the metacommunity. + # Non-integer values in the metacommunity. # Calculate the sample coverage and change the estimator. sample_coverage <- coverage.numeric( colSums( @@ -492,7 +492,7 @@ ent_gamma_similarity <- function( estimator <- "Marcon" } } - + # Compute the entropy. the_entropy <- ent_similarity.numeric( abd, @@ -507,7 +507,7 @@ ent_gamma_similarity <- function( as_numeric = as_numeric, check_arguments = FALSE ) - + # Return if (as_numeric) { return(the_entropy) diff --git a/R/ent_simpson.R b/R/ent_simpson.R index 48d2a91..7275ad9 100644 --- a/R/ent_simpson.R +++ b/R/ent_simpson.R @@ -1,16 +1,16 @@ #' Simpson's Entropy of a Community -#' +#' #' Estimate the entropy \insertCite{Simpson1949}{divent} of species from abundance #' or probability data. #' Several estimators are available to deal with incomplete sampling. -#' -#' Bias correction requires the number of individuals. +#' +#' Bias correction requires the number of individuals. #' See [div_hill] for non-specific estimators. -#' +#' #' Simpson-specific estimator is from \insertCite{Lande1996;textual}{divent}. -#' -#' Entropy can be estimated at a specified level of interpolation or -#' extrapolation, either a chosen sample size or sample coverage +#' +#' Entropy can be estimated at a specified level of interpolation or +#' extrapolation, either a chosen sample size or sample coverage #' \insertCite{Chao2014}{divent}, rather than its asymptotic value. #' See [accum_tsallis] for details. #' @@ -23,16 +23,16 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Entropy of each community #' ent_simpson(paracou_6_abd) #' # gamma entropy #' ent_simpson(paracou_6_abd, gamma = TRUE) -#' +#' #' # At 80% coverage #' ent_simpson(paracou_6_abd, level = 0.8) -#' +#' #' @name ent_simpson NULL @@ -48,19 +48,19 @@ ent_simpson <- function(x, ...) { #' @rdname ent_simpson #' #' @param estimator An estimator of entropy. -#' +#' #' @export ent_simpson.numeric <- function( - x, + x, estimator = c("Lande", - "UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + "UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive", "Bonachela", "Holste"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), as_numeric = FALSE, @@ -71,10 +71,10 @@ ent_simpson.numeric <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) # Entropy of a vector of probabilities ---- @@ -87,14 +87,14 @@ ent_simpson.numeric <- function( } else { return( tibble::tibble_row( - estimator = "naive", + estimator = "naive", order = 2, entropy = the_entropy ) - ) + ) } } - + # Eliminate 0 abd <- x[x > 0] # Sample size @@ -110,11 +110,11 @@ ent_simpson.numeric <- function( } else { return( tibble::tibble_row( - estimator = "No Species", + estimator = "No Species", order = 2, entropy = NA ) - ) + ) } } else { if (as_numeric) { @@ -122,11 +122,11 @@ ent_simpson.numeric <- function( } else { return( tibble::tibble_row( - estimator = "Single Species", + estimator = "Single Species", order = 2, entropy = 0 ) - ) + ) } } } else { @@ -136,7 +136,7 @@ ent_simpson.numeric <- function( estimator <- "naive" } } - + if (estimator == "Lande") { # Lande's estimator ---- the_entropy <- 1 - sum(abd * (abd - 1) / sample_size / (sample_size - 1)) @@ -145,24 +145,24 @@ ent_simpson.numeric <- function( } else { return( tibble::tibble_row( - estimator = estimator, + estimator = estimator, order = 2, entropy = the_entropy ) - ) - } + ) + } } else { ## Tsallis entropy if estimator != "Lande" ---- return( ent_tsallis( abd, - q = 2, + q = 2, estimator = estimator, - level = NULL, + level = NULL, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, + jack_alpha = jack_alpha, jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = as_numeric, @@ -176,7 +176,7 @@ ent_simpson.numeric <- function( # If level is coverage, get size if (level < 1) { level <- coverage_to_size.numeric( - abd, + abd, sample_coverage = level, estimator = coverage_estimator, as_numeric = TRUE, @@ -191,28 +191,28 @@ ent_simpson.numeric <- function( } else { return( tibble::tibble_row( - estimator = "Sample", + estimator = "Sample", order = 2, level = level, entropy = the_entropy ) - ) + ) } } ## Valid interpolation and extrapolation ---- - the_entropy <- 1 - 1 / level - + the_entropy <- 1 - 1 / level - (1 - 1 / level) * sum(abd * (abd - 1)) / sample_size / (sample_size - 1) if (as_numeric) { return(the_entropy) } else { return( tibble::tibble_row( - estimator = "Chao2014", + estimator = "Chao2014", order = 2, level = level, entropy = the_entropy ) - ) + ) } } @@ -223,14 +223,14 @@ ent_simpson.numeric <- function( ent_simpson.species_distribution <- function( x, estimator = c("Lande", - "UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + "UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive", "Bonachela", "Holste"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, @@ -242,10 +242,10 @@ ent_simpson.species_distribution <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - estimator <- match.arg(estimator) - probability_estimator <- match.arg(probability_estimator) - unveiling <- match.arg(unveiling) - richness_estimator <- match.arg(richness_estimator) + estimator <- match.arg(estimator) + probability_estimator <- match.arg(probability_estimator) + unveiling <- match.arg(unveiling) + richness_estimator <- match.arg(richness_estimator) coverage_estimator <- match.arg(coverage_estimator) if (gamma) { @@ -254,7 +254,7 @@ ent_simpson.species_distribution <- function( species_distribution = x, q = 2, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, @@ -268,18 +268,18 @@ ent_simpson.species_distribution <- function( # Apply ent_simpson.numeric() to each site ent_simpson_sites <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = ent_simpson.numeric, # Arguments estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = as_numeric, check_arguments = FALSE diff --git a/R/ent_sp_simpson.R b/R/ent_sp_simpson.R index dc64cf6..2a938e7 100644 --- a/R/ent_sp_simpson.R +++ b/R/ent_sp_simpson.R @@ -1,13 +1,13 @@ #' Spatially Explicit Simpson's Entropy -#' -#' Simpson's entropy of the neighborhood of individuals, +#' +#' Simpson's entropy of the neighborhood of individuals, #' up to a distance \insertCite{Shimatani2001}{divent}. #' #' @inheritParams check_divent_args #' @param correction the edge-effect correction to apply when estimating #' the number of neighbors or the *K* function with [spatstat.explore::Kest]. #' Default is "isotropic". -#' +#' #' @name ent_sp_simpson #' @references #' \insertAllCited{} @@ -15,10 +15,10 @@ NULL #' @rdname ent_sp_simpson -#' @returns `ent_sp_simpson` returns an object of class `fv`, +#' @returns `ent_sp_simpson` returns an object of class `fv`, #' see [spatstat.explore::fv.object]. #' There are methods to print and plot this class. -#' It contains the value of the spatially explicit Simpson's entropy +#' It contains the value of the spatially explicit Simpson's entropy #' for each distance in `r`. #' @export #' @@ -27,81 +27,81 @@ NULL #' X <- rspcommunity(1, size = 1000, species_number = 3) #' # Calculate the entropy and plot it #' autoplot(ent_sp_simpson(X)) -#' +#' ent_sp_simpson <- function( - X, - r = NULL, + X, + r = NULL, correction = c("isotropic", "translate", "none"), check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() } - correction <- match.arg(correction) - + correction <- match.arg(correction) + # Summary abd <- tapply( - spatstat.geom::marks(X)$PointType, - spatstat.geom::marks(X)$PointType, + spatstat.geom::marks(X)$PointType, + spatstat.geom::marks(X)$PointType, length) abd <- abd[!is.na(abd)] sample_size <- sum(abd) prob <- abd / sample_size - + # r if (is.null(r)) { r_max <- spatstat.geom::diameter(X$window) # Default values, as in dbmss - r <- r_max * + r <- r_max * c( - 0, - 1:20, - seq(22, 40, 2), - seq(45, 100, 5), - seq(110, 200, 10), + 0, + 1:20, + seq(22, 40, 2), + seq(45, 100, 5), + seq(110, 200, 10), seq(220, 400, 20) ) / 800 } # K all points K_all <- correction_fv( spatstat.explore::Kest( - X, - r = r, + X, + r = r, correction = correction ), correction ) - + # The point pattern is split into a list of ppp for each mark ppp.list <- split(X, as.factor(spatstat.geom::marks(X)$PointType)) # K for each ppp K.list <- lapply( - ppp.list, - FUN = spatstat.explore::Kest, + ppp.list, + FUN = spatstat.explore::Kest, r = r, correction = correction ) # Extract the values into a dataframe, each column is a PointType K_PointType <- as.data.frame( lapply( - K.list, - FUN = correction_fv, + K.list, + FUN = correction_fv, correction = correction ) ) # K_PointType is NA for species with a a single point. Should be 0 K_PointType[is.na(K_PointType)] <- 0 - + # Result: Shilmatani's function of r Shi_r <- (1 - rowSums( - (K_PointType * rep(abd * (abd - 1), each = dim(K_PointType)[1])) / + (K_PointType * rep(abd * (abd - 1), each = dim(K_PointType)[1])) / (K_all * sample_size * (sample_size - 1))) ) * (sample_size - 1) / sample_size - + # Build a dataframe with r, theoretical value and S(r) Shi.df <- data.frame( - r = r, - Simpson = ent_simpson.numeric(abd, as_numeric = TRUE), + r = r, + Simpson = ent_simpson.numeric(abd, as_numeric = TRUE), S_r = Shi_r ) @@ -109,15 +109,15 @@ ent_sp_simpson <- function( labl <- c("r", "hat(%s)", "hat(%s)(r)") desc <- c("Distance argument r", "Asymptotic %s", "Estimated %s") the_entropy <- spatstat.explore::fv( - Shi.df, - argu = "r", - ylab = quote(Shimatani(r)), - valu = "S_r", - fmla = ". ~ r", - alim = c(0, max(r)), - labl = labl, - desc = desc, - unitname = X$window$unit, + Shi.df, + argu = "r", + ylab = quote(Shimatani(r)), + valu = "S_r", + fmla = ". ~ r", + alim = c(0, max(r)), + labl = labl, + desc = desc, + unitname = X$window$unit, fname = "Simpson's Entropy") spatstat.explore::fvnames(the_entropy, ".") <- colnames(Shi.df)[-1] return(the_entropy) @@ -126,13 +126,13 @@ ent_sp_simpson <- function( #' @rdname ent_sp_simpson -#' @param h0 A string describing the null hypothesis to simulate. +#' @param h0 A string describing the null hypothesis to simulate. #' The null hypothesis may be "RandomPosition": points are drawn in a Poisson process (default) #' or "RandomLabeling": randomizes point types, keeping locations unchanged. #' -#' @return `ent_sp_simpsonEnvelope` returns an envelope object [spatstat.explore::envelope]. +#' @return `ent_sp_simpsonEnvelope` returns an envelope object [spatstat.explore::envelope]. #' There are methods to print and plot this class. -#' It contains the observed value of the function, +#' It contains the observed value of the function, #' its average simulated value and the confidence envelope. #' @export #' @@ -141,23 +141,23 @@ ent_sp_simpson <- function( #' X <- rspcommunity(1, size = 1000, species_number = 3) #' # Calculate the entropy and plot it #' autoplot(ent_sp_simpsonEnvelope(X, n_simulations = 10)) -#' +#' ent_sp_simpsonEnvelope <- function( - X, - r = NULL, + X, + r = NULL, n_simulations = 100, - alpha = 0.05, + alpha = 0.05, correction = c("isotropic", "translate", "none"), - h0 = c("RandomPosition", "RandomLabeling"), - global = FALSE, + h0 = c("RandomPosition", "RandomLabeling"), + global = FALSE, check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() } - correction <- match.arg(correction) - h0 <- match.arg(h0) - + correction <- match.arg(correction) + h0 <- match.arg(h0) + # Choose the null hypothesis X_sim <- switch( h0, @@ -169,14 +169,14 @@ ent_sp_simpsonEnvelope <- function( } # local envelope, keep extreme values for lo and hi (nrank=1) the_envelope <- spatstat.explore::envelope( - X, - fun = ent_sp_simpson, - nsim = n_simulations, + X, + fun = ent_sp_simpson, + nsim = n_simulations, nrank = 1, - r = r, + r = r, correction = correction, check_arguments = FALSE, - simulate = X_sim, + simulate = X_sim, savefuns = TRUE ) attr(the_envelope, "einfo")$H0 <- switch( @@ -195,7 +195,7 @@ ent_sp_simpsonEnvelope <- function( #' according to an edge-effect correction #' #' @param fv the function value object, see [spatstat.explore::fv.object]. -#' @param correction the edge-effect correction: +#' @param correction the edge-effect correction: #' "isotropic", "translate" or "none" #' #' @returns a vector with the function values diff --git a/R/exp_q.R b/R/exp_q.R index eab452e..b4fd1d2 100644 --- a/R/exp_q.R +++ b/R/exp_q.R @@ -1,14 +1,14 @@ #' Deformed exponential -#' +#' #' Calculate the deformed exponential of order *q*. -#' +#' #' The deformed exponential is the reciprocal of the deformed logarithm #' \insertCite{Tsallis1994}{divent}, see [ln_q]. #' It is defined as \eqn{(x(1-q)+1)^{\frac{1}{(1-q)}}}. -#' -#' For \eqn{q>1}, \eqn{\ln_q{(+\infty)}=\frac{1}{(q-1)}} +#' +#' For \eqn{q>1}, \eqn{\ln_q{(+\infty)}=\frac{1}{(q-1)}} #' so \eqn{\exp_q{(x)}} is not defined for \eqn{x>\frac{1}{(q-1)}}. -#' When `x` is very close to this value, the exponential is severely subject +#' When `x` is very close to this value, the exponential is severely subject #' to rounding errors. #' #' @param x A numeric vector or array. @@ -24,16 +24,16 @@ #' curve(exp_q(x, q = 0), from = -5, to = 0, lty = 2) #' curve(exp(x), from = -5, to = 0, lty= 1, add = TRUE) #' curve(exp_q(x, q = 2), from = -5, to = 0, lty = 3, add = TRUE) -#' legend("bottomright", +#' legend("bottomright", #' legend = c( #' expression(exp[0](x)), #' expression(exp(x)), #' expression(exp[2](x)) #' ), -#' lty = c(2, 1, 3), +#' lty = c(2, 1, 3), #' inset = 0.02 #' ) -#' +#' exp_q <- function(x, q) { # Explicit recycling if (length(x) > length(q)) { @@ -41,8 +41,8 @@ exp_q <- function(x, q) { } else if (length(x) < length(q)) { x <- rep_len(x, length(q)) } - - # General formula. Rather 1 - x * q + x than x * (1 - q) + 1 + + # General formula. Rather 1 - x * q + x than x * (1 - q) + 1 # to limit rounding errors the_exp_q <- (1 - x * q + x)^(1 / (1 - q)) # returns 1 if q==1: calculate exp(x) @@ -51,8 +51,8 @@ exp_q <- function(x, q) { the_exp_q[(q > 1) & (x > 1 / (q - 1))] <- NaN # Rounding error: x values that can't be distinguished from the max defined # Substract the rounding error to x and recompute. Far from perfect. - the_exp_q[(q > 1) & (abs(1 - x * q + x) < .Machine$double.eps)] <- + the_exp_q[(q > 1) & (abs(1 - x * q + x) < .Machine$double.eps)] <- (1 - x * q + x + q * .Machine$double.eps - .Machine$double.eps)^(1 / (1 - q)) - + return(the_exp_q) } diff --git a/R/fit_rac.R b/R/fit_rac.R index 4867cb1..dc09cee 100644 --- a/R/fit_rac.R +++ b/R/fit_rac.R @@ -1,14 +1,14 @@ #' Fit a distribution -#' +#' #' Fit a well-known distribution to a species distribution. -#' -#' [abundances] can be used to fit rank-abundance curves (RAC) of classical -#' distributions: -#' +#' +#' [abundances] can be used to fit rank-abundance curves (RAC) of classical +#' distributions: +#' #' - "lnorm" for log-normal \insertCite{Preston1948}{divent}. #' - "lseries" for log-series \insertCite{Fisher1943}{divent}. #' - "geom" for geometric \insertCite{Motomura1932}{divent}. -#' - "bstick" for broken stick \insertCite{MacArthur1957}{divent}. +#' - "bstick" for broken stick \insertCite{MacArthur1957}{divent}. #' It has no parameter, so the maximum abundance is returned. #' #' @inheritParams check_divent_args @@ -16,13 +16,13 @@ #' @param ... Unused. #' #' @returns A tibble with the sites and the estimated distribution parameters. -#' +#' #' @references #' \insertAllCited{} #' #' @examples #' fit_rac(paracou_6_abd, distribution = "lnorm") -#' +#' #' @name fit_rac NULL @@ -37,19 +37,19 @@ fit_rac <- function(x, ...) { #' @rdname fit_rac #' -#' +#' #' @export fit_rac.numeric <- function( x, distribution = c("lnorm", "lseries", "geom", "bstick"), ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - distribution <- match.arg(distribution) + distribution <- match.arg(distribution) # Eliminate zeros and sort. abd <- sort(x[x > 0], decreasing = TRUE) @@ -58,7 +58,7 @@ fit_rac.numeric <- function( sample_size <- sum(abd) # Unique values nu <- unique(abd) - + if (distribution == "lnorm") { # Fit a lognormal distribution log_abd <- log(abd) @@ -68,38 +68,38 @@ fit_rac.numeric <- function( return( list( rac = tibble::tibble( - rank = rank, + rank = rank, abundance = nu - ), + ), parameters = tibble::tibble( - mu=mu, - sigma=sigma + mu = mu, + sigma = sigma ) ) ) } - + if (distribution == "lseries") { # Evaluate alpha alpha <- vegan::fisher.alpha(abd) # May (1975) Ecology and Evolution of Communities, Harvard University Press. sei <- function(t) exp(-t)/t rank <- vapply( - nu, + nu, function(x) { n <- x * log(1 + alpha / sample_size) f <- stats::integrate(sei, n, Inf) fv <- f[["value"]] return(alpha * fv) - }, + }, FUN.VALUE = 0 ) return( list( rac = tibble::tibble( - rank = rank, + rank = rank, abundance = nu - ), + ), parameters = tibble::tibble( alpha = alpha ) @@ -115,16 +115,16 @@ fit_rac.numeric <- function( return( list( rac = tibble::tibble( - rank = rank, + rank = rank, abundance = exp(reg$coefficients[1] + reg$coefficients[2] * rank) - ), + ), parameters = tibble::tibble( prob = as.numeric(-reg$coefficients[2]) ) ) ) } - + if (distribution == "bstick") { # Fit a broken stick f1 <- sort(cumsum(1 / (s_obs:1)), decreasing = TRUE) @@ -132,9 +132,9 @@ fit_rac.numeric <- function( return( list( rac = tibble::tibble( - rank = seq_len(s_obs), + rank = seq_len(s_obs), abundance = nu - ), + ), parameters = tibble::tibble( max = max(nu) ) @@ -157,12 +157,12 @@ fit_rac.species_distribution <- function( check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") } - distribution <- match.arg(distribution) + distribution <- match.arg(distribution) # Apply probabilities.numeric() to each site rac_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = fit_rac.numeric, @@ -170,7 +170,7 @@ fit_rac.species_distribution <- function( distribution = distribution, check_arguments = FALSE ) - + # Bind the rows of the parameters tibble racs <- dplyr::bind_rows(lapply(rac_list, function(x) x$parameters)) # Restore the site names diff --git a/R/fun_similarity.R b/R/fun_similarity.R index 6e2abac..798791d 100644 --- a/R/fun_similarity.R +++ b/R/fun_similarity.R @@ -1,13 +1,13 @@ #' Functional similarity -#' -#' Transform a distance matrix into a similarity matrix +#' +#' Transform a distance matrix into a similarity matrix #' \insertCite{Leinster2012}{divent}. -#' Similarity between two species is defined either by a negative exponential -#' function of their distance or by the complement to 1 of their normalized +#' Similarity between two species is defined either by a negative exponential +#' function of their distance or by the complement to 1 of their normalized #' distance (such that the most distant species are 1 apart). #' #' @inheritParams check_divent_args -#' @param exponential If `TRUE`, similarity is \eqn{e^{-r \delta}}, +#' @param exponential If `TRUE`, similarity is \eqn{e^{-r \delta}}, #' where \eqn{r} is argument `rate`. #' If `FALSE`, it is \eqn{1 - \delta / \max(\delta)}. #' @@ -16,17 +16,17 @@ #' #' @references #' \insertAllCited{} -#' +#' #' @examples #' # Similarity between Paracou 6 species #' hist(fun_similarity(paracou_6_fundist)) -#' -fun_similarity <- function ( +#' +fun_similarity <- function( distances, exponential = TRUE, rate = 1, check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() # Names @@ -34,7 +34,7 @@ fun_similarity <- function ( stop("Row and column names must be identical in 'distances'.") } } - + if (inherits(distances, "dist")) { # dist objects are supported but the remainder assumes a matrix distances <- as.matrix(distances) @@ -43,7 +43,7 @@ fun_similarity <- function ( if (is.null(colnames(distances))) { ### Add default species names such as sp_1 colnames(x) <- paste( - "sp", + "sp", formatC(seq_len(ncol(x)), width = ceiling(log10(ncol(x))), flag = "0"), sep = "_" ) @@ -51,13 +51,13 @@ fun_similarity <- function ( } else if (!identical(colnames(distances), rownames(distances))) { stop("Row and column names must be identical in 'distances'.") } - + if (exponential) { the_similarity <- exp(-rate * distances) } else { the_similarity <- 1 - distances / max(distances) } - + class(the_similarity) <- c("similarity", class(the_similarity)) return(the_similarity) } diff --git a/R/ln_q.R b/R/ln_q.R index 0f43197..fe097e6 100644 --- a/R/ln_q.R +++ b/R/ln_q.R @@ -1,13 +1,13 @@ #' Deformed logarithm -#' +#' #' Calculate the deformed logarithm of order *q*. -#' -#' The deformed logarithm \insertCite{Tsallis1994}{divent} is defined as +#' +#' The deformed logarithm \insertCite{Tsallis1994}{divent} is defined as #' \eqn{\ln_q{x}=\frac{(x^{(1-q)}-1)}{(1-q)}}. -#' +#' #' The shape of the deformed logarithm is similar to that of the regular one. #' \eqn{\ln_1{x}=\log{x}}. -#' +#' #' For \eqn{q>1}, \eqn{\ln_q{(+\infty)}=\frac{1}{(q-1)}}. #' #' @param x A numeric vector or array. @@ -15,7 +15,7 @@ #' #' @returns A vector of the same length as `x` containing the transformed values. #' @export -#' +#' #' @references #' \insertAllCited{} #' @@ -23,16 +23,16 @@ #' curve(ln_q(1/ x, q = 0), 0, 1, lty = 2, ylab = "Logarithm", ylim = c(0, 10)) #' curve(log(1 / x), 0, 1, lty = 1, n =1E4, add = TRUE) #' curve(ln_q(1 / x, q = 2), 0, 1, lty = 3, add = TRUE) -#' legend("topright", +#' legend("topright", #' legend = c( #' expression(ln[0](1/x)), #' expression(log(1/x)), #' expression(ln[2](1/x)) #' ), -#' lty = c(2, 1, 3), +#' lty = c(2, 1, 3), #' inset = 0.02 #' ) -#' +#' ln_q <- function(x, q) { # Explicit recycling if (length(x) > length(q)) { @@ -40,13 +40,13 @@ ln_q <- function(x, q) { } else if (length(x) < length(q)) { x <- rep_len(x, length(q)) } - + # General formula the_ln_q <- (x^(1 - q) - 1) / (1 - q) # returns NaN if q==1: calculate log(x) the_ln_q[q == 1] <- log(x)[q == 1] # Force NaN for negative x the_ln_q[x < 0] <- NaN - - return (the_ln_q) + + return(the_ln_q) } diff --git a/R/phylo_divent.R b/R/phylo_divent.R index dfeec6a..998d05a 100644 --- a/R/phylo_divent.R +++ b/R/phylo_divent.R @@ -1,7 +1,7 @@ #' Class phylo_divent -#' +#' #' Methods for dendrograms of class "phylo_divent". -#' +#' #' `as_phylo_divent` calculates cuts and intervals of a phylogenetic tree and makes #' it available both in [stats::hclust] and [ape::phylo] formats. #' The conversion preprocesses the tree: it calculates cuts so that the tree @@ -10,31 +10,31 @@ #' @inheritParams check_divent_args #' @param x An object of class "phylo_divent". #' -#' @returns `as_phylo_divent` returns a phylogenetic tree that is an object of +#' @returns `as_phylo_divent` returns a phylogenetic tree that is an object of #' class "phylo_divent". #' #' @examples #' # Paracou plot 6 species taxonomy #' tree <- as_phylo_divent(mock_3sp_tree) #' plot(tree) -#' +#' #' @name phylo_divent NULL #' @rdname phylo_divent -#' +#' #' @export as_phylo_divent <- function(tree) { # The tree may be NULL or already processed - if (is.null(tree) | inherits(tree, "phylo_divent")) return (tree) - + if (is.null(tree) | inherits(tree, "phylo_divent")) return(tree) + # Convert tree to phylo and hclust---- # tree must be either a phylog, phylo or a hclust object if (inherits(tree, "phylog")) { - # Build an hclust object to use cutree later. + # Build an hclust object to use cutree later. # Distances in $Wdist are actually sqrt(2*distance) - # Caution: Distances in hclust count full branch lengths between species, + # Caution: Distances in hclust count full branch lengths between species, # i.e. twice the ultrametric distance. See ?as.phylo.hclust tree.hclust <- stats::hclust(tree$Wdist^2 / 2, "average") # build a phylo object @@ -44,7 +44,7 @@ as_phylo_divent <- function(tree) { } else if (inherits(tree, "phylo")) { tree.phylo <- tree # Build an hclust object to use cutree later. - # Edge lengths are multiplied by 2 during the conversion. + # Edge lengths are multiplied by 2 during the conversion. # Divide by 2 before that. tree$edge.length <- tree$edge.length / 2 tree.hclust <- ape::as.hclust.phylo(tree) @@ -59,28 +59,28 @@ as_phylo_divent <- function(tree) { } else { stop("tree must be an object of class phylo, phylog or hclust") } - + # The tree must be ultrametric ---- if (!ape::is.ultrametric(tree.phylo)) { stop("The tree must be ultrametric.") } - + # Calculate distances between nodes and leaves ---- dist_from_leaves <- ape::branching.times(tree.phylo) # Get a sorted list of cuts (eliminate leaves) cuts <- sort(unique(dist_from_leaves)) # Calculate intervals between cuts (add 0 to cuts to get the first interval) intervals <- diff(c(0, cuts)) - # Eliminate 0 intervals (happen when a node contains more than 2 tips), + # Eliminate 0 intervals (happen when a node contains more than 2 tips), # including rounding errors rounding_error <- max(dist_from_leaves) * 10 * .Machine$double.eps cuts <- cuts[intervals > rounding_error] intervals <- intervals[intervals > rounding_error] - + # Derive phylogenetic groups, i.e. how to group species along the tree ---- - # Rounding errors in cutree: is.unsorted(hclust$height) may return TRUE + # Rounding errors in cutree: is.unsorted(hclust$height) may return TRUE # even though height is sorted by construction - # Values are may not be sorted properly because of rounding errors, + # Values are may not be sorted properly because of rounding errors, # e.g. 4e-16 (2 * .Machine$double.eps) in a taxonomy where cuts contains (1,2,3) heights_original <- tree.hclust$height # Run sort so that is.unsorted returns FALSE. @@ -88,10 +88,10 @@ as_phylo_divent <- function(tree) { # If there is no rounding error, add one: # (10 * .Machine$double.eps times the tree height) or cutree will miss some nodes. rounding_error <- max(tree.hclust$height) * 10 * .Machine$double.eps - # Cut the tree at each node (eliminate the root). + # Cut the tree at each node (eliminate the root). # Cut at the values + RoundingError or many nodes will be missed. phylo_groups <- stats::cutree( - tree.hclust, + tree.hclust, h = c(0, cuts[-length(cuts)]) + rounding_error ) # Eliminate the rounding error @@ -114,6 +114,6 @@ as_phylo_divent <- function(tree) { #' @rdname phylo_divent #' #' @export -is_phylo_divent <- function (x) { +is_phylo_divent <- function(x) { inherits(x, "phylo_divent") } diff --git a/R/plot.phylo_divent.R b/R/plot.phylo_divent.R index 54e1fa4..84c6068 100644 --- a/R/plot.phylo_divent.R +++ b/R/plot.phylo_divent.R @@ -1,5 +1,5 @@ #' Plot phylo_divent Objects -#' +#' #' Plot objects of class "phylo_divent" produced by [as_phylo_divent], that are #' phylogenetic trees. #' @@ -7,15 +7,15 @@ #' @param ... Arguments passed to [stats::plot.dendrogram]. #' #' @returns `NULL`. Called for side effects. -#' +#' #' @export #' #' @examples #' # Paracou plot 6 species taxonomy #' tree <- as_phylo_divent(paracou_6_taxo) #' plot(tree, leaflab = "none") -#' -plot.phylo_divent <- function (x, ...) { +#' +plot.phylo_divent <- function(x, ...) { # Plot the hclust object as a dendrogram plot(stats::as.dendrogram(x$hclust), ...) } diff --git a/R/plot.species_distribution.R b/R/plot.species_distribution.R index db1c876..5f4c102 100644 --- a/R/plot.species_distribution.R +++ b/R/plot.species_distribution.R @@ -1,8 +1,8 @@ #' Plot Profile Objects -#' -#' Plot objects of class "species_distribution" produced by +#' +#' Plot objects of class "species_distribution" produced by #' [species_distribution] and similar functions. -#' +#' #' @param x An object. #' @param ... Additional arguments to be passed to [plot]. Unused elsewhere. #' @@ -12,12 +12,12 @@ NULL #' @rdname plot.species_distribution #' #' @param type The type of plot. "RAC" (Rank-abundance curve, or Whittaker plot) -#' or "Metacommunity" to represent species abundances of each community along +#' or "Metacommunity" to represent species abundances of each community along #' with those of the metacommunity. #' @param fit_rac If `TRUE`, estimate a theoretical distribution and fit the data with it. #' RAC plot only. #' @param distribution The distribution of species abundances. -#' May be "lnorm" (log-normal), "lseries" (log-series), "geom" (geometric) or +#' May be "lnorm" (log-normal), "lseries" (log-series), "geom" (geometric) or #' "bstick" (broken stick). #' RAC plot only. #' @param ylog If `TRUE`, the Y-axis is in log-scale. @@ -30,58 +30,58 @@ NULL #' RAC plot only. #' #' @returns `NULL`. Called for side effects. -#' +#' #' @export plot.species_distribution <- function( - x, + x, type = c("RAC", "Metacommunity"), - ..., + ..., fit_rac = FALSE, distribution = c("lnorm", "lseries", "geom", "bstick"), - ylog = "y", - main = NULL, - xlab = "Rank", + ylog = "y", + main = NULL, + xlab = "Rank", ylab = NULL, palette = "Set1") { - + type <- match.arg(type) - + if (type == "RAC") { # Whittaker plot ---- - + # Prepare ylab if (is.null(ylab)) { if (is_probabilities(x)) { ylab <- "Probability" } else { - ylab <- "Abundance" + ylab <- "Abundance" } } - + # Find the max number of species in all communities - abundances <- x[, !colnames(x) %in% c("site", "weight")] + abundances <- x[, !colnames(x) %in% c("site", "weight")] s_obs_max <- max(rowSums(abundances > 0)) - + # Prepare the plot: X and Y ranges base::plot( x = 1:s_obs_max, y = seq(from = 1, to = max(abundances), length.out = s_obs_max), type = "n", - log = ylog, - main = main, - xlab = xlab, - ylab = ylab, - axes = FALSE, + log = ylog, + main = main, + xlab = xlab, + ylab = ylab, + axes = FALSE, ... ) # X axis ticks must start from 1 graphics::axis(1, graphics::axTicks(1) + 1) graphics::axis(2) graphics::box() - + # Color palette, min number of colors is 3 cols <- RColorBrewer::brewer.pal(max(nrow(x), 3), name = palette) - + # Loop in communities to build the plot for (community in seq_len(nrow(x))) { # Extract the abundances of the community @@ -90,29 +90,29 @@ plot.species_distribution <- function( abd <- sort(abd[abd > 0], decreasing = TRUE) sample_size <- sum(abd) s_obs <- length(abd) - + # Draw the species abundances graphics::points( - x = seq_len(s_obs), - y = abd, + x = seq_len(s_obs), + y = abd, col = cols[community] ) - + # Draw the fitted models if (fit_rac) { rac_fitted <- fit_rac( - abd, - distribution = distribution, + abd, + distribution = distribution, check_arguments = FALSE ) graphics::lines( - x = rac_fitted$rac$rank, - y = rac_fitted$rac$abundance, + x = rac_fitted$rac$rank, + y = rac_fitted$rac$abundance, col = cols[community] ) } } - + # Legend if several communities if (nrow(x) > 1) { graphics::legend( @@ -126,15 +126,15 @@ plot.species_distribution <- function( } } else if (type == "Metacommunity") { # Metacommunity plot ---- - + # Prepare ylab if (is.null(ylab)) { ylab <- "Species frequencies" } # Prepare data: community probabilities x.probabilities <- probabilities.abundances( - x, - estimator = "naive", + x, + estimator = "naive", check_arguments = FALSE ) prob_communities <- t( @@ -148,13 +148,13 @@ plot.species_distribution <- function( x.metacommunity[1, !colnames(x.metacommunity) %in% non_species_columns] ) prob_metacommunity <- abd_metacommunity / sum(abd_metacommunity) - + # Plot graphics::barplot( cbind( prob_communities, rep(0, length(abd_metacommunity)), - prob_metacommunity + prob_metacommunity ), beside = FALSE, width = c(weights, .5, 1), @@ -164,7 +164,7 @@ plot.species_distribution <- function( ... ) } - + } @@ -178,45 +178,45 @@ plot.species_distribution <- function( #' @importFrom rlang .data #' @export autoplot.species_distribution <- function( - object, - ..., + object, + ..., fit_rac = FALSE, distribution = c("lnorm", "lseries", "geom", "bstick"), - ylog = TRUE, - main = NULL, - xlab = "Rank", - ylab = NULL, + ylog = TRUE, + main = NULL, + xlab = "Rank", + ylab = NULL, pch = ggplot2::GeomPoint$default_aes$shape, cex = ggplot2::GeomPoint$default_aes$size) { - + # Prepare ylab if (is.null(ylab)) { if (is_probabilities(object)) { ylab <- "Probability" } else { - ylab <- "Abundance" + ylab <- "Abundance" } } - + # Find the max number of species in all communities s_obs_max <- max( rowSums( object[, !colnames(object) %in% non_species_columns] > 0 ) ) - + # Prepare the plot the_plot <- ggplot2::ggplot() + # Make X axis start at 1 ggplot2::scale_x_continuous(labels = rlang::as_function(~ .x + 1)) + ggplot2::labs(title = main, x = xlab, y = ylab) - + # Prepare the data to plot the_data <- data.frame(site = NULL, rank = NULL, abd = NULL) if (fit_rac) { the_model <- data.frame(site = NULL, rank = NULL, abd = NULL) } - + # Loop in communities to build the plot for (community in seq_len(nrow(object))) { # Extract the abundances of the community @@ -225,22 +225,22 @@ autoplot.species_distribution <- function( abd <- sort(abd[abd > 0], decreasing = TRUE) sample_size <- sum(abd) s_obs <- length(abd) - + # Transform data into dataframe the_data <- rbind( - the_data, + the_data, data.frame( - site = object[community, "site"], - rank = seq_len(s_obs), + site = object[community, "site"], + rank = seq_len(s_obs), abundance = abd ) ) - + # Fitted model if (fit_rac) { rac_fitted <- fit_rac( - abd, - distribution = distribution, + abd, + distribution = distribution, check_arguments = FALSE ) the_model <- rbind( @@ -252,32 +252,32 @@ autoplot.species_distribution <- function( ) } } - + # Plot the_plot <- the_plot + ggplot2::geom_point( - data = the_data, + data = the_data, mapping = ggplot2::aes(x = .data$rank, y = .data$abundance, col = .data$site), shape = pch, size = cex - ) - + ) + # Fitted models if (fit_rac) { - the_plot <- the_plot + + the_plot <- the_plot + ggplot2::geom_line( data = the_model, mapping = ggplot2::aes(x = .data$rank, y = .data$abundance, col = .data$site) ) } - + # Log Y-axis - if (ylog) the_plot <- the_plot + ggplot2::scale_y_log10() - + if (ylog) the_plot <- the_plot + ggplot2::scale_y_log10() + # No legend if single community if (nrow(object) == 1) { the_plot <- the_plot + ggplot2::theme(legend.position = "none") } - + return(the_plot) } diff --git a/R/profile_hill.R b/R/profile_hill.R index 41f94c6..524d817 100644 --- a/R/profile_hill.R +++ b/R/profile_hill.R @@ -1,14 +1,14 @@ #' Diversity Profile of a Community -#' -#' Calculate the diversity profile of a community, i.e. diversity (Hill numbers) +#' +#' Calculate the diversity profile of a community, i.e. diversity (Hill numbers) #' against its order. -#' -#' A bootstrap confidence interval can be produced by simulating communities -#' (their number is `n_simulations`) with [rcommunity] and calculating their profiles. -#' Simulating communities implies a downward bias in the estimation: -#' rare species of the actual community may have abundance zero in simulated communities. +#' +#' A bootstrap confidence interval can be produced by simulating communities +#' (their number is `n_simulations`) with [rcommunity] and calculating their profiles. +#' Simulating communities implies a downward bias in the estimation: +#' rare species of the actual community may have abundance zero in simulated communities. #' Simulated diversity values are recentered so that their mean is that of the actual community. -#' +#' #' @inheritParams check_divent_args #' @param x An object, that may be a numeric vector containing abundances or probabilities, #' or an object of class [abundances] or [probabilities]. @@ -19,10 +19,10 @@ #' #' @returns A tibble with the site names, the estimators used and the estimated diversity at each order. #' This is an object of class "profile" that can be plotted. -#' +#' #' @references #' \insertAllCited{} -#' +#' #' @name profile_hill NULL @@ -31,8 +31,8 @@ NULL #' #' @export profile_hill <- function( - x, - orders = seq(from = 0, to = 2, by = 0.1), + x, + orders = seq(from = 0, to = 2, by = 0.1), ...) { UseMethod("profile_hill") } @@ -41,22 +41,22 @@ profile_hill <- function( #' @rdname profile_hill #' #' @param orders The orders of diversity used to build the profile. -#' @param estimator An estimator of entropy. +#' @param estimator An estimator of entropy. #' @param n_simulations The number of simulations used to estimate the confidence envelope of the profile. #' @param alpha The risk level, 5% by default, of the confidence envelope of the profile. -#' +#' #' @export profile_hill.numeric <- function( - x, - orders = seq(from = 0, to = 2, by = 0.1), - estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + x, + orders = seq(from = 0, to = 2, by = 0.1), + estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Holste", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), q_threshold = 10, @@ -90,12 +90,12 @@ profile_hill.numeric <- function( x, q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, as_numeric = TRUE, @@ -105,8 +105,8 @@ profile_hill.numeric <- function( FUN.VALUE = 0 ) return(the_profile_hill) - } - + } + # Regular output, simulations are allowed ---- the_profile_hill <- lapply( orders, @@ -115,12 +115,12 @@ profile_hill.numeric <- function( x, q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, as_numeric = FALSE, @@ -130,7 +130,7 @@ profile_hill.numeric <- function( ) # Make a tibble with the list the_profile_hill <- do.call(rbind.data.frame, the_profile_hill) - + if (n_simulations > 0) { # Simulations ---- if (!is_integer_values(x)) { @@ -154,18 +154,18 @@ profile_hill.numeric <- function( for (i in seq_len(n_simulations)) { # Parallelize. Do not allow more forks. profiles_list <- parallel::mclapply( - orders, + orders, FUN = function(q) { div_hill.numeric( - communities[i, !colnames(communities) %in% non_species_columns], + communities[i, !colnames(communities) %in% non_species_columns], q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, as_numeric = TRUE, @@ -184,12 +184,12 @@ profile_hill.numeric <- function( ) # Quantiles div_quantiles <- apply( - profile_hills, - MARGIN = 2, + profile_hills, + MARGIN = 2, FUN = stats::quantile, probs = c(alpha / 2, 1 - alpha / 2) ) - # Format the result + # Format the result the_profile_hill <- tibble::tibble( the_profile_hill, inf = div_quantiles[1, ], @@ -197,7 +197,7 @@ profile_hill.numeric <- function( ) } class(the_profile_hill) <- c("profile", class(the_profile_hill)) - + return(the_profile_hill) } @@ -206,16 +206,16 @@ profile_hill.numeric <- function( #' #' @export profile_hill.species_distribution <- function( - x, - orders = seq(from = 0, to = 2, by = 0.1), - estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", + x, + orders = seq(from = 0, to = 2, by = 0.1), + estimator = c("UnveilJ", "ChaoJost", "ChaoShen", "GenCov", "Grassberger", "Holste", "Marcon", "UnveilC", "UnveiliC", "ZhangGrabchak", "naive"), - level = NULL, + level = NULL, probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), q_threshold = 10, @@ -226,7 +226,7 @@ profile_hill.species_distribution <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -241,19 +241,19 @@ profile_hill.species_distribution <- function( if (gamma) { the_profile_hill <- profile_hill.numeric( metacommunity.abundances( - x = x, - as_numeric = TRUE, + x = x, + as_numeric = TRUE, check_arguments = FALSE ), # Arguments q = q, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, as_numeric = FALSE, @@ -267,19 +267,19 @@ profile_hill.species_distribution <- function( # Apply profile_hill.numeric() to each site profile_hill_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = profile_hill.numeric, # Arguments orders = orders, estimator = estimator, - level = level, + level = level, probability_estimator = probability_estimator, unveiling = unveiling, richness_estimator = richness_estimator, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, q_threshold = q_threshold, as_numeric = FALSE, @@ -297,6 +297,6 @@ profile_hill.species_distribution <- function( ) } class(the_profile_hill) <- c("profile", class(the_profile_hill)) - + return(the_profile_hill) } diff --git a/R/profile_phylo.R b/R/profile_phylo.R index 60f47c1..9a98e64 100644 --- a/R/profile_phylo.R +++ b/R/profile_phylo.R @@ -90,7 +90,7 @@ profile_phylo.numeric <- function( coverage_estimator <- match.arg(coverage_estimator) bootstrap <- match.arg(bootstrap) if (as_numeric && n_simulations > 0) { - stop ("No simulations are allowed if a numeric vector is expected ('as_numeric = TRUE').") + stop("No simulations are allowed if a numeric vector is expected ('as_numeric = TRUE').") } # Call the .species_distribution method diff --git a/R/profile_similarity.R b/R/profile_similarity.R index e55033a..6b7def8 100644 --- a/R/profile_similarity.R +++ b/R/profile_similarity.R @@ -1,14 +1,14 @@ #' Similarity-Based Diversity Profile of a Community -#' +#' #' Calculate the diversity profile of a community, i.e. its similarity-based diversity #' against its order. -#' -#' A bootstrap confidence interval can be produced by simulating communities -#' (their number is `n_simulations`) with [rcommunity] and calculating their profiles. -#' Simulating communities implies a downward bias in the estimation: -#' rare species of the actual community may have abundance zero in simulated communities. +#' +#' A bootstrap confidence interval can be produced by simulating communities +#' (their number is `n_simulations`) with [rcommunity] and calculating their profiles. +#' Simulating communities implies a downward bias in the estimation: +#' rare species of the actual community may have abundance zero in simulated communities. #' Simulated diversity values are recentered so that their mean is that of the actual community. -#' +#' #' @inheritParams check_divent_args #' @param x An object, that may be a numeric vector containing abundances or probabilities, #' or an object of class [abundances] or [probabilities]. @@ -22,10 +22,10 @@ #' #' @returns A tibble with the site names, the estimators used and the estimated diversity at each order. #' This is an object of class "profile" that can be plotted. -#' +#' #' @references #' \insertAllCited{} -#' +#' #' @name profile_similarity NULL @@ -34,9 +34,9 @@ NULL #' #' @export profile_similarity <- function( - x, + x, similarities, - orders = seq(from = 0, to = 2, by = 0.1), + orders = seq(from = 0, to = 2, by = 0.1), ...) { UseMethod("profile_similarity") } @@ -45,21 +45,21 @@ profile_similarity <- function( #' @rdname profile_similarity #' #' @param orders The orders of diversity used to build the profile. -#' @param estimator An estimator of entropy. +#' @param estimator An estimator of entropy. #' @param n_simulations The number of simulations used to estimate the confidence envelope of the profile. #' @param alpha The risk level, 5% by default, of the confidence envelope of the profile. -#' +#' #' @export profile_similarity.numeric <- function( - x, + x, similarities = diag(length(x)), - orders = seq(from = 0, to = 2, by = 0.1), - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + orders = seq(from = 0, to = 2, by = 0.1), + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), richness_estimator = c("jackknife", "iChao1", "Chao1", "naive"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), sample_coverage = NULL, @@ -70,7 +70,7 @@ profile_similarity.numeric <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -81,10 +81,12 @@ profile_similarity.numeric <- function( unveiling <- match.arg(unveiling) coverage_estimator <- match.arg(coverage_estimator) bootstrap <- match.arg(bootstrap) - + # Numeric vector, no simulation ---- if (as_numeric) { - if (n_simulations > 0) stop ("No simulations are allowed if a numeric vector is expected ('as_numeric = TRUE').") + if (n_simulations > 0) { + stop("No simulations are allowed if a numeric vector is expected ('as_numeric = TRUE').") + } the_profile_similarity <- vapply( orders, FUN = function(q) { @@ -95,8 +97,8 @@ profile_similarity.numeric <- function( estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, sample_coverage = sample_coverage, as_numeric = TRUE, @@ -106,8 +108,8 @@ profile_similarity.numeric <- function( FUN.VALUE = 0 ) return(the_profile_similarity) - } - + } + # Regular output, simulations are allowed ---- the_profile_similarity <- lapply( orders, @@ -119,8 +121,8 @@ profile_similarity.numeric <- function( estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = FALSE, check_arguments = FALSE @@ -129,7 +131,7 @@ profile_similarity.numeric <- function( ) # Make a tibble with the list the_profile_similarity <- do.call(rbind.data.frame, the_profile_similarity) - + if (n_simulations > 0) { # Simulations ---- if (!is_integer_values(x)) { @@ -153,17 +155,17 @@ profile_similarity.numeric <- function( for (i in seq_len(n_simulations)) { # Parallelize. Do not allow more forks. profiles_list <- parallel::mclapply( - orders, + orders, FUN = function(q) { div_similarity.numeric( - communities[i, !colnames(communities) %in% non_species_columns], + communities[i, !colnames(communities) %in% non_species_columns], similarities = similarities, - q = q, + q = q, estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, sample_coverage = sample_coverage, as_numeric = TRUE, @@ -182,12 +184,12 @@ profile_similarity.numeric <- function( ) # Quantiles div_quantiles <- apply( - profile_similarities, - MARGIN = 2, + profile_similarities, + MARGIN = 2, FUN = stats::quantile, probs = c(alpha / 2, 1 - alpha / 2) ) - # Format the result + # Format the result the_profile_similarity <- tibble::tibble( the_profile_similarity, inf = div_quantiles[1, ], @@ -195,7 +197,7 @@ profile_similarity.numeric <- function( ) } class(the_profile_similarity) <- c("profile", class(the_profile_similarity)) - + return(the_profile_similarity) } @@ -204,14 +206,14 @@ profile_similarity.numeric <- function( #' #' @export profile_similarity.species_distribution <- function( - x, + x, similarities = diag(sum(!colnames(x) %in% non_species_columns)), - orders = seq(from = 0, to = 2, by = 0.1), - estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", + orders = seq(from = 0, to = 2, by = 0.1), + estimator = c("UnveilJ", "Max", "ChaoShen", "MarconZhang", "UnveilC", "UnveiliC", "naive"), probability_estimator = c("Chao2015", "Chao2013", "ChaoShen", "naive"), unveiling = c("geometric", "uniform", "none"), - jack_alpha = 0.05, + jack_alpha = 0.05, jack_max = 10, coverage_estimator = c("ZhangHuang", "Chao", "Turing", "Good"), gamma = FALSE, @@ -221,7 +223,7 @@ profile_similarity.species_distribution <- function( show_progress = TRUE, ..., check_arguments = TRUE) { - + if (any(check_arguments)) { check_divent_args() if (any(x < 0)) stop("Species probabilities or abundances must be positive.") @@ -232,22 +234,22 @@ profile_similarity.species_distribution <- function( unveiling <- match.arg(unveiling) coverage_estimator <- match.arg(coverage_estimator) bootstrap <- match.arg(bootstrap) - + if (gamma) { the_profile_similarity <- profile_similarity.numeric( metacommunity.abundances( - x = x, - as_numeric = TRUE, + x = x, + as_numeric = TRUE, check_arguments = FALSE ), # Arguments similarities = similarities, - orders = orders, + orders = orders, estimator = estimator, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = FALSE, n_simulations = n_simulations, @@ -260,17 +262,17 @@ profile_similarity.species_distribution <- function( # Apply profile_similarity.numeric() to each site profile_similarity_list <- apply( # Eliminate site and weight columns - x[, !colnames(x) %in% non_species_columns], + x[, !colnames(x) %in% non_species_columns], # Apply to each row MARGIN = 1, FUN = profile_similarity.numeric, # Arguments similarities = similarities, - orders = orders, + orders = orders, probability_estimator = probability_estimator, unveiling = unveiling, - jack_alpha = jack_alpha, - jack_max = jack_max, + jack_alpha = jack_alpha, + jack_max = jack_max, coverage_estimator = coverage_estimator, as_numeric = FALSE, n_simulations = n_simulations, @@ -287,6 +289,6 @@ profile_similarity.species_distribution <- function( ) } class(the_profile_similarity) <- c("profile", class(the_profile_similarity)) - + return(the_profile_similarity) } diff --git a/R/rlseries.R b/R/rlseries.R index 39ba7e1..87c3a52 100644 --- a/R/rlseries.R +++ b/R/rlseries.R @@ -1,13 +1,13 @@ #' Log-Series Distribution -#' +#' #' Random generation for the log-series distribution. -#' +#' #' Fast implementation of the random generation of a log-series distribution #' \insertCite{Fisher1943}{divent}. -#' +#' #' The complete set of functions (including density, distribution function and quantiles) #' can be found in package _sads_ but this implementation of the random generation is much faster. -#' +#' #' If `size` is too large, i.e. `size` + 1 can't be distinguished from `size` due to rounding, #' then an error is raised. #' @@ -30,10 +30,10 @@ #' # rcommunity() may be a better choice here #' autoplot(rcommunity(1, size = 1E4, fisher_alpha = 40, distribution = "lseries")) rlseries <- function( - n, - size, - fisher_alpha, - show_progress = TRUE, + n, + size, + fisher_alpha, + show_progress = TRUE, check_arguments = TRUE) { # adapted from Dan Lunn, http://www.stats.ox.ac.uk/~dlunn/BS1_05/BS1_Rcode.pdf # Uncomment to limit to integer value