Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
KJeynesCupper committed Oct 30, 2023
1 parent b3f85b9 commit d11222b
Show file tree
Hide file tree
Showing 13 changed files with 600,081 additions and 76 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ importFrom(DESeq2,"estimateSizeFactors")
importFrom(DESeq2,"plotPCA")
importFrom(DESeq2,"results")
importFrom(DESeq2,"rlog")
importFrom(GenomeInfoDb,"seqlevels")
importFrom(GenomeInfoDb,"seqnames")
importFrom(GenomicRanges,"GRanges")
importFrom(GenomicRanges,"GRangesList")
Expand Down
34 changes: 18 additions & 16 deletions R/RNAimport.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
#'@param annotation path; directory to genome annotation (GFF) file used for
#'alignment.
#'
#'
#'@param idattr character; GFF attribute to be used as feature ID containing
#'mRNA names. Several GFF ines with the same feature ID will be considered as
#'parts of the same feature. The feature ID is used to identity the counts in
#'the output table. Default is "Name".
#'
#'@return
#'**For sRNAseq:**
Expand Down Expand Up @@ -130,6 +133,7 @@ RNAimport <- function(input = c("sRNA", "mRNA"),
samples,
analysisType = "mobile",
annotation,
idattr = "Name",
FPKM = FALSE) {
if (base::missing(input) || !input %in% c("sRNA", "mRNA")) {
stop("Please state the data-type to the `input` paramter.")
Expand Down Expand Up @@ -320,27 +324,25 @@ RNAimport <- function(input = c("sRNA", "mRNA"),
genes <- lapply(sample_data, "[", , "mRNA")
genes_all <- unique(Reduce(merge,genes))

# add gene length
annotation_file <- rtracklayer::import(annotation, format = "gff3")
# select column with cells contain "gene", but if they contain "gene" then nothing else.
gene_columns <- which(sapply(annotation_file, function(x) any(grepl("^mRNA$", x))))
gene_col_name <- names(annotation_file)[gene_columns]
# select genes
genes_info <- subset(annotation_file, type == "mRNA")
width <- paste0(as.numeric(genes_info$end)-as.numeric(genes_info$start))
# add mRNA locus and width etc
annotation_file <- rtracklayer::import(annotation)
gene_columns <- which(sapply(elementMetadata(annotation_file) , function(x) any(grepl("^mRNA$", x))))
gene_col_name <- names(elementMetadata(annotation_file))[gene_columns]
genes_info <- as.data.frame(subset(annotation_file, type == "mRNA"))
Locus <- paste0(genes_info$seqname, ":",genes_info$start,"-",
genes_info$end)

genes_info <- cbind(Locus, genes_info,width)

genes_info <- cbind(Locus, genes_info)
colnames(genes_info)[colnames(genes_info) %in% idattr] <- "mRNA"
# merge gene list with annotation info.
merged_gene_info <- merge(genes_all, genes_info, by = "Locus", all.x = TRUE)
#merged_gene_info <- merged_gene_info[stats::complete.cases(merged_gene_info), ]
merged_gene_info <- merge(genes_all, genes_info, by = "mRNA", all.x = TRUE) %>%
select(mRNA, Locus, seqnames, start, end, width, strand, type)%>%
rename(chr = seqnames)
gene_widths <- merged_gene_info$width

# ADDs sample information to the genes_all object
for (i in seq_along(sample_data)){
matches <- merged_gene_info[sample_data[[i]], on = "Locus", nomatch = 0]
matches <- merged_gene_info[sample_data[[i]], on = "mRNA", nomatch = 0]
matches_values <- matches[, .(Count=sum(Count)),by = "mRNA"]

# Rename the aggregated columns
Expand All @@ -358,14 +360,14 @@ RNAimport <- function(input = c("sRNA", "mRNA"),
~tidyr::replace_na(.,0)))
# set genes as rownames
fpkm <- apply(X = subset(mRNA_information,
select = c(-Locus, -mRNA, -chr, -start, -end, -width)),
select = c(-Locus, -mRNA, -chr, -start, -end, -width, -strand, -type)),
MARGIN = 2,
FUN = function(x) {
sum_x <- sum(as.numeric(x))
if (sum_x == 0) {
t <- 0
} else {
t <- 10^9 * x / gene_widths / sum_x
t <- 10^9 * x / as.numeric(gene_widths) / sum_x
}
t
})
Expand Down
10 changes: 5 additions & 5 deletions R/invisible.functions.mobileRNA.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,19 +158,19 @@ gff_import <- function(gff_file, nrows = -1) {
################### convert character to factor in gramges #####

convertChar2Factor <- function(gr) {
charCols <- sapply(elementMetadata(gr), is.character)
charCols <- sapply(S4Vectors::elementMetadata(gr), is.character)

if (any(charCols)) {
gr_metadata <- elementMetadata(gr)
gr_metadata <- S4Vectors::elementMetadata(gr)
gr_metadata[charCols] <- lapply(gr_metadata[charCols], as.factor)
elementMetadata(gr) <- gr_metadata
S4Vectors::elementMetadata(gr) <- gr_metadata
}
metadata_cols <- elementMetadata(gr)
metadata_cols <- S4Vectors::elementMetadata(gr)
charlist_cols <- sapply(metadata_cols, function(col) class(col) == "CompressedCharacterList")

if (any(charlist_cols)) {
metadata_cols <- metadata_cols[!charlist_cols]
elementMetadata(gr) <- metadata_cols
S4Vectors::elementMetadata(gr) <- metadata_cols
}
return(gr)
}
Expand Down
2 changes: 1 addition & 1 deletion R/mapRNA.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
#' @param type character; feature type (3rd column in GFF file) to be used,
#' all features of other type are ignored. Default is "mRNA".
#'
#' @param idattr character: GFF attribute to be used as feature ID. Several GFF
#' @param idattr character; GFF attribute to be used as feature ID. Several GFF
#' lines with the same feature ID will be considered as parts of the same
#' feature. The feature ID is used to identity the counts in the output table.
#' Default is "Name".
Expand Down
Binary file added data/mRNA_data.rda
Binary file not shown.
Loading

0 comments on commit d11222b

Please sign in to comment.