Skip to content

Commit

Permalink
Merge pull request #4 from leahkemp/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
leahkemp authored Jan 27, 2022
2 parents c6e9fd2 + ee44c32 commit 45d2f76
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 24 deletions.
19 changes: 13 additions & 6 deletions diff_expression/diff_expression.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,13 @@ mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
# make all count data numeric
dplyr::mutate_all(as.numeric)
# replace "." with "-" so RNA names are the same
# replace "." with "-" so RNA names are the same
base::row.names(mirna_smrnaseq_data) <- base::gsub("\\.", "\\-", base::row.names(mirna_smrnaseq_data))
# remove weird tRNA row
trna_excerpt_data <- trna_excerpt_data %>%
dplyr::filter(!(rownames(trna_excerpt_data) == "tRNA"))
# create a list defining which code chunks to analyse (that are set to TRUE for both the datasets and contrasts to analyse) based on the yaml user configuration file
to_eval_chunk <- config[c("mirna_smrnaseq",
"mirna_excerpt",
Expand All @@ -132,6 +136,9 @@ to_eval_chunk <- config[c("mirna_smrnaseq",
"circrna_excerpt",
"gencode_excerpt")] %>%
rep(times = 6)
# make everything true before conditionally making false
to_eval_chunk <- base::replace(to_eval_chunk, 1:base::length(to_eval_chunk), TRUE)
names(to_eval_chunk)[1:6] <- paste(names(to_eval_chunk)[1:6], "1", sep = "_")
names(to_eval_chunk)[7:12] <- paste(names(to_eval_chunk)[7:12], "2", sep = "_")
Expand All @@ -140,7 +147,7 @@ names(to_eval_chunk)[19:24] <- paste(names(to_eval_chunk)[19:24], "4", sep = "_"
names(to_eval_chunk)[25:30] <- paste(names(to_eval_chunk)[25:30], "5", sep = "_")
names(to_eval_chunk)[31:36] <- paste(names(to_eval_chunk)[31:36], "6", sep = "_")
n_contrasts <- length(config$contrasts)
n_contrasts <- base::length(config$contrasts)
to_eval_chunk <- if(n_contrasts == 1) {
replace(to_eval_chunk, 7:36, FALSE)
Expand Down Expand Up @@ -249,7 +256,7 @@ ordered_treatments <- treatments %>%
# make sure the samples (columns) in all the datasets are in the same order for specifying the treatments downstream
# (that depends on the columns being in the correct order) (loop over all count datasets)
count_datasets<- base::lapply(count_datasets, function(x) {
count_datasets <- base::lapply(count_datasets, function(x) {
x[ , gtools::mixedsort(names(x))]
Expand Down Expand Up @@ -279,7 +286,7 @@ plotSA_interactive <- function(data) {
y[allzero] <- NA
}
if (length(data$df.prior) > 1L) {
if (base::length(data$df.prior) > 1L) {
df2 <- base::max(data$df.prior)
s2 <- data$sigma^2/fit$s2.prior
pdn <- stats::pf(s2, df1 = data$df.residual, df2 = df2)
Expand Down Expand Up @@ -432,7 +439,7 @@ base::cat(base::paste0("- ", config$contrasts, "\n"))

```{r, results = "asis"}
# print the number of samples analysed
base::cat(base::paste0("Total number of samples: ", length(unique(metadata$sample))))
base::cat(base::paste0("Total number of samples: ", base::length(base::unique(metadata$sample))))
```

Number of samples in each treatment group:
Expand All @@ -449,7 +456,7 @@ base::cat(base::paste0("- ", n_samples_by_treatment$treatment, ": ", n_samples_b
## limma/voom

```{r, results = "asis"}
base::cat(base::paste0("The data was filtered using the [filterByExpr](https://rdrr.io/bioc/edgeR/man/filterByExpr.html) function. The filtering keeps RNA's that have count-per-million (CPM) above min.count (", config$min_count, ")", " in n samples (", length(unique(metadata$sample)), "). In addition, each kept RNA is required to have at least min.total.count reads (", config$min_total_count, ") across all the samples. From a statistical point of view, removing low count RNA's allows the mean-variance relationship in the data to be estimated with greater reliability ([Law et al., (2018)](https://f1000research.com/articles/5-1408))."))
base::cat(base::paste0("The data was filtered using the [filterByExpr](https://rdrr.io/bioc/edgeR/man/filterByExpr.html) function. The filtering keeps RNA's that have count-per-million (CPM) above min.count (", config$min_count, ")", " in n samples (", base::length(base::unique(metadata$sample)), "). In addition, each kept RNA is required to have at least min.total.count reads (", config$min_total_count, ") across all the samples. From a statistical point of view, removing low count RNA's allows the mean-variance relationship in the data to be estimated with greater reliability ([Law et al., (2018)](https://f1000research.com/articles/5-1408))."))
```

```{r, results = "asis"}
Expand Down
79 changes: 61 additions & 18 deletions heatmaps/heatmaps.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,18 @@ config <- yaml::yaml.load_file("../config/config.yaml")
metadata <- utils::read.csv(file.path(config$metadata))
# read in differential expression results
diff_expr_results <- utils::read.table("../diff_expression/diff_expr_results/diff_expr_results.tsv",
diff_expr_results_model_1 <- utils::read.table("../diff_expression/diff_expr_results/diff_expr_results_model_1.tsv",
header = TRUE)
diff_expr_results_model_2 <- utils::read.table("../diff_expression/diff_expr_results/diff_expr_results_model_2.tsv",
header = TRUE)
diff_expr_results <- rbind(diff_expr_results_model_1, diff_expr_results_model_2)
# write differential expression results to file
diff_expr_results %>%
utils::write.table(file = "../diff_expression/diff_expr_results/diff_expr_results.tsv", row.names=FALSE, sep="\t")
# load all the count datasets
mirna_excerpt_data <- utils::read.table(base::file.path(config$excerpt_merged_results_dir,
"exceRpt_miRNA_ReadCounts.txt"),
Expand Down Expand Up @@ -122,6 +131,10 @@ mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
# replace "." with "-" so RNA names are the same
base::row.names(mirna_smrnaseq_data) <- base::gsub("\\.", "\\-", base::row.names(mirna_smrnaseq_data))
# remove weird tRNA row
trna_excerpt_data <- trna_excerpt_data %>%
dplyr::filter(!(rownames(trna_excerpt_data) == "tRNA"))
# create a vector defining the count datasets to analyse (that are set to TRUE) based on the yaml user configuration file
to_analyse <- config[c("mirna_smrnaseq",
"mirna_excerpt",
Expand Down Expand Up @@ -205,17 +218,26 @@ log_counts_per_million <- base::lapply(log_counts_per_million, function(x)
)
# setup a string that scales the widths of the subplots for each heatmap based on the number of treatment groups
num_treatments <- base::length(base::unique(metadata$treatment))
widths <- 1/num_treatments
all_widths <- base::rep(widths, num_treatments)
all_widths <- metadata %>%
group_by(treatment) %>%
dplyr::summarise(n_samples = n()) %>%
dplyr::mutate(n_samples = n_samples/base::length(metadata$treatment)) %>%
dplyr::pull(n_samples)
# set heatmap figure heights to automatically scale to how many rows are present in the data
fig_height_mirna_smrnaseq <- base::nrow(log_counts_per_million$mirna_smrnaseq) / 2
fig_height_mirna_excerpt <- base::nrow(log_counts_per_million$mirna_excerpt) / 2
fig_height_pirna_excerpt <- base::nrow(log_counts_per_million$pirna_excerpt) / 2
fig_height_trna_excerpt <- base::nrow(log_counts_per_million$trna_excerpt) / 2
fig_height_circrna_excerpt <- base::nrow(log_counts_per_million$circrna_excerpt) / 2
fig_height_gencode_excerpt <- base::nrow(log_counts_per_million$gencode_excerpt) / 2
fig_height_mirna_smrnaseq <- base::nrow(log_counts_per_million$mirna_smrnaseq) / 6
fig_height_mirna_excerpt <- base::nrow(log_counts_per_million$mirna_excerpt) / 6
fig_height_pirna_excerpt <- base::nrow(log_counts_per_million$pirna_excerpt) / 6
fig_height_trna_excerpt <- base::nrow(log_counts_per_million$trna_excerpt) / 6
fig_height_circrna_excerpt <- base::nrow(log_counts_per_million$circrna_excerpt) / 6
fig_height_gencode_excerpt <- base::nrow(log_counts_per_million$gencode_excerpt) / 6
fig_height_mirna_smrnaseq_free <- base::nrow(log_counts_per_million$mirna_smrnaseq) / 4
fig_height_mirna_excerpt_free <- base::nrow(log_counts_per_million$mirna_excerpt) / 4
fig_height_pirna_excerpt_free <- base::nrow(log_counts_per_million$pirna_excerpt) / 4
fig_height_trna_excerpt_free <- base::nrow(log_counts_per_million$trna_excerpt) / 4
fig_height_circrna_excerpt_free <- base::nrow(log_counts_per_million$circrna_excerpt) / 4
fig_height_gencode_excerpt_free <- base::nrow(log_counts_per_million$gencode_excerpt) / 4
# need to set the figure height to NOT ZERO when the figure height ends up being less than 1 so rmarkdown doesn't error out on knitting
fig_height_mirna_smrnaseq[fig_height_mirna_smrnaseq <1 ] <- 1
Expand All @@ -225,6 +247,13 @@ fig_height_trna_excerpt[fig_height_trna_excerpt <1 ] <- 1
fig_height_circrna_excerpt[fig_height_circrna_excerpt <1 ] <- 1
fig_height_gencode_excerpt[fig_height_gencode_excerpt <1 ] <- 1
fig_height_mirna_smrnaseq_free[fig_height_mirna_smrnaseq_free <1 ] <- 1
fig_height_mirna_excerpt_free[fig_height_mirna_excerpt_free <1 ] <- 1
fig_height_pirna_excerpt_free[fig_height_pirna_excerpt_free <1 ] <- 1
fig_height_trna_excerpt_free[fig_height_trna_excerpt_free <1 ] <- 1
fig_height_circrna_excerpt_free[fig_height_circrna_excerpt_free <1 ] <- 1
fig_height_gencode_excerpt_free[fig_height_gencode_excerpt_free <1 ] <- 1
# also need to set the figure height to NOT ZERO when there are no rows of data so rmarkdown doesn't error out on knitting
fig_height_mirna_smrnaseq[is.na(fig_height_mirna_smrnaseq[1])] <- 1
fig_height_mirna_excerpt[is.na(fig_height_mirna_excerpt[1])] <- 1
Expand All @@ -233,6 +262,13 @@ fig_height_trna_excerpt[is.na(fig_height_trna_excerpt[1])] <- 1
fig_height_circrna_excerpt[is.na(fig_height_circrna_excerpt[1])] <- 1
fig_height_gencode_excerpt[is.na(fig_height_gencode_excerpt[1])] <- 1
fig_height_mirna_smrnaseq_free[is.na(fig_height_mirna_smrnaseq_free[1])] <- 1
fig_height_mirna_excerpt_free[is.na(fig_height_mirna_excerpt_free[1])] <- 1
fig_height_pirna_excerpt_free[is.na(fig_height_pirna_excerpt_free[1])] <- 1
fig_height_trna_excerpt_free[is.na(fig_height_trna_excerpt_free[1])] <- 1
fig_height_circrna_excerpt_free[is.na(fig_height_circrna_excerpt_free[1])] <- 1
fig_height_gencode_excerpt_free[is.na(fig_height_gencode_excerpt_free[1])] <- 1
# specify treatments by creating a string of conditions that match the order of the columns/samples in the count data
# get the treatments and samples names from the metadata file
treatments <- metadata %>%
Expand All @@ -259,11 +295,18 @@ base::cat(base::paste0(" - mirna smrnaseq: ", config$mirna_smrnaseq, "\n",
" - gencode excerpt: ", config$gencode_excerpt, "\n"))
```

Treatment comparisons:
Treatment comparisons (model 1):

```{r, results = "asis"}
# print the treatment group comparisons the user has chosen to analyse
base::cat(base::paste0("- ", config$contrasts, "\n"))
base::cat(base::paste0("- ", config$contrasts_model_1, "\n"))
```

Treatment comparisons (model 2):

```{r, results = "asis"}
# print the treatment group comparisons the user has chosen to analyse
base::cat(base::paste0("- ", config$contrasts_model_2, "\n"))
```

```{r, results = "asis"}
Expand Down Expand Up @@ -911,7 +954,7 @@ if (all(is.na(lcpm_gencode_excerpt)) == FALSE & nrow(lcpm_gencode_excerpt) > 3 &
base::cat("#### miRNA's (smrnaseq)")
```

```{r, eval = config$mirna_smrnaseq, fig.height = fig_height_mirna_smrnaseq, out.width = "100%"}
```{r, eval = config$mirna_smrnaseq, fig.height = fig_height_mirna_smrnaseq_free, out.width = "100%"}
# extract datasets
lcpm_mirna_smrnaseq <- log_counts_per_million$mirna_smrnaseq
Expand Down Expand Up @@ -975,7 +1018,7 @@ if (all(is.na(lcpm_mirna_smrnaseq)) == FALSE & nrow(lcpm_mirna_smrnaseq) > 3 & "
base::cat("#### miRNA's (excerpt)")
```

```{r, eval = config$mirna_excerpt, fig.height = fig_height_mirna_excerpt, out.width = "100%"}
```{r, eval = config$mirna_excerpt, fig.height = fig_height_mirna_excerpt_free, out.width = "100%"}
# extract datasets
lcpm_mirna_excerpt <- log_counts_per_million$mirna_excerpt
Expand Down Expand Up @@ -1039,7 +1082,7 @@ if (all(is.na(lcpm_mirna_excerpt)) == FALSE & nrow(lcpm_mirna_excerpt) > 3 & "mi
base::cat("#### piRNA's (excerpt)")
```

```{r, eval = config$pirna_excerpt, fig.height = fig_height_pirna_excerpt, out.width = "100%"}
```{r, eval = config$pirna_excerpt, fig.height = fig_height_pirna_excerpt_free, out.width = "100%"}
# extract datasets
lcpm_pirna_excerpt <- log_counts_per_million$pirna_excerpt
Expand Down Expand Up @@ -1103,7 +1146,7 @@ if (all(is.na(lcpm_pirna_excerpt)) == FALSE & nrow(lcpm_pirna_excerpt) > 3 & "pi
base::cat("#### tRNA's (excerpt)")
```

```{r, eval = config$trna_excerpt, fig.height = fig_height_trna_excerpt, out.width = "100%"}
```{r, eval = config$trna_excerpt, fig.height = fig_height_trna_excerpt_free, out.width = "100%"}
# extract datasets
lcpm_trna_excerpt <- log_counts_per_million$trna_excerpt
Expand Down Expand Up @@ -1167,7 +1210,7 @@ if (all(is.na(lcpm_trna_excerpt)) == FALSE & nrow(lcpm_trna_excerpt) > 3 & "trna
base::cat("#### circRNA's (excerpt)")
```

```{r, eval = config$circrna_excerpt, fig.height = fig_height_circrna_excerpt, out.width = "100%"}
```{r, eval = config$circrna_excerpt, fig.height = fig_height_circrna_excerpt_free, out.width = "100%"}
# extract datasets
lcpm_circrna_excerpt <- log_counts_per_million$circrna_excerpt
Expand Down Expand Up @@ -1231,7 +1274,7 @@ if (all(is.na(lcpm_circrna_excerpt)) == FALSE & nrow(lcpm_circrna_excerpt) > 3 &
base::cat("#### gencode's (excerpt)")
```

```{r, eval = config$gencode_excerpt, fig.height = fig_height_gencode_excerpt, out.width = "100%"}
```{r, eval = config$gencode_excerpt, fig.height = fig_height_gencode_excerpt_free, out.width = "100%"}
# extract datasets
lcpm_gencode_excerpt <- log_counts_per_million$gencode_excerpt
Expand Down
4 changes: 4 additions & 0 deletions prepare_counts/prepare_counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ mirna_smrnaseq_data <- mirna_smrnaseq_data %>%
# replace "." with "-" so RNA names are the same
base::row.names(mirna_smrnaseq_data) <- base::gsub("\\.", "\\-", base::row.names(mirna_smrnaseq_data))

# remove weird tRNA row
trna_excerpt_data <- trna_excerpt_data %>%
dplyr::filter(!(rownames(trna_excerpt_data) == "tRNA"))

# create a vector defining the count datasets to analyse (that are set to TRUE) based on the yaml user configuration file
to_analyse <- config[c("mirna_smrnaseq",
"mirna_excerpt",
Expand Down

0 comments on commit 45d2f76

Please sign in to comment.