From 31ed5531932e92464c71584f304d7ac260bc7d7c Mon Sep 17 00:00:00 2001 From: Gianmarco Alberti Date: Thu, 19 Apr 2018 18:14:33 +0200 Subject: [PATCH] v0.22 --- DESCRIPTION | 2 +- R/sig_dim_perm.R | 13 ++++--------- R/sig_dim_perm_scree.R | 19 +++++-------------- README.md | 10 +++++++--- man/CAinterprTools-package.Rd | 2 +- man/sig.dim.perm.Rd | 2 +- man/sig.dim.perm.scree.Rd | 2 +- 7 files changed, 20 insertions(+), 30 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f6b120c..9af9366 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CAinterprTools Title: Package for graphical aid in Correspondence Analysis interpretation and significance testing -Version: 0.21 +Version: 0.22 Authors@R: "Gianmarco ALberti [aut, cre]" Description: A number of interesting packages are available to perform Correspondence Analysis in R. At the best of my knowledge, they lack diff --git a/R/sig_dim_perm.R b/R/sig_dim_perm.R index 1855361..0946022 100644 --- a/R/sig_dim_perm.R +++ b/R/sig_dim_perm.R @@ -12,7 +12,7 @@ #' data(greenacre_data) #' sig.dim.perm(greenacre_data, 1,2) #Returns a scatterplot of the permuted inertia of the 1 CA dimension against the permuted inertia of the 2 CA dimension. Observed inertia of the selected dimensions and 95th percentile of the permuted inertias are also displayed for testing the significance of the observed inertias. #' -sig.dim.perm <- function(data, x=1, y=2, B=999) { +sig.dim.perm <- function(data, x=1, y=2, B=1000) { nIter <- B+1 numb.dim.cols <- ncol(data) - 1 numb.dim.rows <- nrow(data) - 1 @@ -21,19 +21,14 @@ sig.dim.perm <- function(data, x=1, y=2, B=999) { res <- CA(data, graph=FALSE) d[1,]<- rbind(res$eig[,1]) pb <- txtProgressBar(min = 0, max = nIter, style = 3) #set the progress bar to be used inside the loop - for (i in 2:nIter){ rand.table <- as.data.frame(r2dtable(1, apply(data, 1,sum), apply(data, 2, sum))) res <- CA(rand.table, graph=FALSE) d[i,] <- rbind(res$eig[,1]) setTxtProgressBar(pb, i) } - - perm.pvalues <- round(1 + colSums(d[-1,] > d[1,][col(d[-1,])]) / (B+1), 4) - pvalues.toreport <- ifelse(perm.pvalues < 0.001, "< 0.001", - ifelse(perm.pvalues < 0.01, "< 0.01", - ifelse(perm.pvalues < 0.05, "< 0.05", - round(perm.pvalues, 3)))) + perm.pvalues <- round(colSums(d[-1,] > d[1,][col(d[-1,])]) / B, 4) + pvalues.toreport <- ifelse(perm.pvalues < 0.001, "< 0.001", ifelse(perm.pvalues < 0.01, "< 0.01", ifelse(perm.pvalues < 0.05, "< 0.05",round(perm.pvalues, 3)))) plot(d[,x], d[,y], main=" Scatterplot of permuted dimensions' inertia", sub="large red dot: observed inertia", @@ -44,4 +39,4 @@ sig.dim.perm <- function(data, x=1, y=2, B=999) { col="#00000088") # hex code for 'black'; last two digits set the transparency par(new=TRUE) plot(d[1,x], d[1,y], xlim=c(min(d[,x]), max(d[,x])), ylim=c(min(d[,y]), max(d[,y])), pch=20, cex=1.5, col="red", xaxt = "n", xlab = "", ylab = "", sub = "") #add the observed inertia as a large red dot - } \ No newline at end of file +} \ No newline at end of file diff --git a/R/sig_dim_perm_scree.R b/R/sig_dim_perm_scree.R index 8333f80..3bb1b4b 100644 --- a/R/sig_dim_perm_scree.R +++ b/R/sig_dim_perm_scree.R @@ -14,7 +14,7 @@ #' data(greenacre_data) #' sig.dim.perm.scree(greenacre_data, 9999) #' -sig.dim.perm.scree <- function(data, B=999, cex=0.7, pos=4, offset=0.5){ +sig.dim.perm.scree <- function(data, B=1000, cex=0.7, pos=4, offset=0.5){ options(scipen = 999) nIter <- B+1 numb.dim.cols <- ncol(data) - 1 @@ -24,33 +24,24 @@ sig.dim.perm.scree <- function(data, B=999, cex=0.7, pos=4, offset=0.5){ res <- CA(data, graph=FALSE) d[1,]<- rbind(res$eig[,1]) pb <- txtProgressBar(min = 0, max = nIter, style = 3) #set the progress bar to be used inside the loop - for (i in 2:nIter){ rand.table <- as.data.frame(r2dtable(1, apply(data, 1,sum), apply(data, 2, sum))) res <- CA(rand.table, graph=FALSE) d[i,] <- rbind(res$eig[,1]) setTxtProgressBar(pb, i) } - target.percent <- apply(d[-c(1),],2, quantile, probs = 0.95) #calculate the 95th percentile of the randomized eigenvalues, excluding the first row (which store the observed eigenvalues) - max.y.lim <- max(d[1,], target.percent) obs.eig <- as.matrix(d[1,]) obs.eig.to.plot <- melt(obs.eig) #requires reshape2 - - perm.p.values <- round(1 + colSums(d[-1,] > d[1,][col(d[-1,])]) / (B+1), 4) - perm.pvalues.to.report <- ifelse(perm.p.values < 0.001, "< 0.001", - ifelse(perm.p.values < 0.01, "< 0.01", - ifelse(perm.p.values < 0.05, "< 0.05", - round(perm.p.values, 3)))) - + perm.p.values <- round(colSums(d[-1,] > d[1,][col(d[-1,])]) / B, 4) + perm.pvalues.to.report <- ifelse(perm.p.values < 0.001, "< 0.001", ifelse(perm.p.values < 0.01, "< 0.01", ifelse(perm.p.values < 0.05, "< 0.05", round(perm.p.values, 3)))) plot(obs.eig.to.plot$value, type = "o", ylim = c(0, max.y.lim), xaxt = "n", xlab = "Dimensions", ylab = "Eigenvalue", pch=20) text(obs.eig.to.plot$value, labels = perm.pvalues.to.report, cex = cex, pos = pos, offset = offset) axis(1, at = 1:table.dim) - title(main = "Correspondence Analysis: \nscree-plot of observed and permuted eigenvalues", - sub = paste0("Black dots=observed eigenvalues; blue dots=95th percentile of the permutated eigenvalues' distribution. Number of permutations: ", B), - cex.sub = 0.8) + title(main = "Correspondence Analysis: \nscree-plot of observed and permuted eigenvalues", sub = paste0("Black dots=observed eigenvalues; blue dots=95th percentile of the permutated eigenvalues' distribution. Number of permutations: ", B), cex.sub = 0.8) par(new = TRUE) percentile.to.plot <- melt(target.percent) plot(percentile.to.plot$value, type = "o", lty = 2, col = "blue", ylim = c(0, max.y.lim), xaxt = "n", xlab = "", ylab = "", sub = "") + #return(d) } \ No newline at end of file diff --git a/README.md b/README.md index 5b7936f..37f748c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # CAinterprTools -vers 0.21 +vers 0.22 A number of interesting packages are available to perform Correspondence Analysis in R. At the best of my knowledge, however, they lack some tools to help users to eyeball some critical CA aspects (e.g., contribution of rows/cols categories to the principal axes, quality of the display,correlation of rows/cols categories with dimensions, etc). Besides providing those facilities, this package allows calculating the significance of the CA dimensions by means of the 'Average Rule', the Malinvaud test, and by permutation test. Further, it allows to also calculate the permuted significance of the CA total inertia. @@ -307,7 +307,11 @@ improvements and typos fixes to the help documentation; improvements to the char New in `version 0.21`: -improvements and typos fixes to the help documentation; adjustments and improvements to the permuted p-values calculation in the `sig.dim.perm()`, `sig.dim.perm.scree()`, and `sig.tot.inertia.perm()` functions. +improvements and typos fixes to the help documentation; adjustments and improvements to the permuted p-values calculation in the `sig.dim.perm()`, `sig.dim.perm.scree()`, and `sig.tot.inertia.perm()` functions. + +New in `version 0.22`: + +improvements and typos fixes to the help documentation; under-the-hood improvements of performance; error fix in the calculation of p-values in the `sig.dim.perm()` and `sig.dim.perm.scree()` functions (error introduced in v0.21). ## Installation To install the package in R, just follow the few steps listed below: @@ -322,7 +326,7 @@ library(devtools) ``` 3) download the 'CAinterprTools' package from GitHub via the 'devtools''s command: ```r -install_github("gianmarcoalberti/CAinterprTools@v0.21") +install_github("gianmarcoalberti/CAinterprTools@v0.22") ``` 4) load the package: ```r diff --git a/man/CAinterprTools-package.Rd b/man/CAinterprTools-package.Rd index 0666e17..4e932c8 100644 --- a/man/CAinterprTools-package.Rd +++ b/man/CAinterprTools-package.Rd @@ -20,7 +20,7 @@ A number of interesting packages are available to perform \tabular{ll}{ Package: \tab CAinterprTools\cr Type: \tab Package\cr -Version: \tab 0.21\cr +Version: \tab 0.22\cr Date: \tab 2018-04\cr License: \tab GPL\cr } diff --git a/man/sig.dim.perm.Rd b/man/sig.dim.perm.Rd index 3153303..3942e97 100644 --- a/man/sig.dim.perm.Rd +++ b/man/sig.dim.perm.Rd @@ -4,7 +4,7 @@ \alias{sig.dim.perm} \title{Permuted significance of CA dimensions} \usage{ -sig.dim.perm(data, x = 1, y = 2, B = 999) +sig.dim.perm(data, x = 1, y = 2, B = 1000) } \arguments{ \item{data:}{name of the dataset (must be in dataframe format).} diff --git a/man/sig.dim.perm.scree.Rd b/man/sig.dim.perm.scree.Rd index 144747b..7ccf647 100644 --- a/man/sig.dim.perm.scree.Rd +++ b/man/sig.dim.perm.scree.Rd @@ -4,7 +4,7 @@ \alias{sig.dim.perm.scree} \title{Scree plot to test the significance of CA dimensions by means of a randomized procedure} \usage{ -sig.dim.perm.scree(data, B = 999, cex = 0.7, pos = 4, offset = 0.5) +sig.dim.perm.scree(data, B = 1000, cex = 0.7, pos = 4, offset = 0.5) } \arguments{ \item{data:}{name of the contingency table (must be in dataframe format).}