From b45ce513a3f31832ea1420856ab6bd6ff413993a Mon Sep 17 00:00:00 2001 From: ytakemon Date: Thu, 3 Mar 2022 09:20:43 -0800 Subject: [PATCH] related to #24 #25 remove data documents --- GINIR_data_document.tar.gz | 3 +++ R/CCLE_exp.R | 10 -------- R/CCLE_exp_annot.R | 14 ----------- R/copy_num.R | 10 -------- R/copy_num_annot.R | 16 ------------ R/dep.R | 9 ------- R/dep_annot.R | 14 ----------- R/essential_genes.R | 13 ---------- R/gene_effect.R | 9 ------- R/list_available_cancer_type.R | 12 +++++---- R/mut_calls.R | 46 ---------------------------------- R/nonessential_genes.R | 13 ---------- R/protein.R | 9 ------- R/protein_annot.R | 12 --------- R/protein_nodup.R | 9 ------- R/sample_ARID1A_KO_screen.R | 9 ------- R/sample_annot.R | 34 ------------------------- 17 files changed, 10 insertions(+), 232 deletions(-) create mode 100644 GINIR_data_document.tar.gz delete mode 100644 R/CCLE_exp.R delete mode 100644 R/CCLE_exp_annot.R delete mode 100644 R/copy_num.R delete mode 100644 R/copy_num_annot.R delete mode 100644 R/dep.R delete mode 100644 R/dep_annot.R delete mode 100644 R/essential_genes.R delete mode 100644 R/gene_effect.R delete mode 100644 R/mut_calls.R delete mode 100644 R/nonessential_genes.R delete mode 100644 R/protein.R delete mode 100644 R/protein_annot.R delete mode 100644 R/protein_nodup.R delete mode 100644 R/sample_ARID1A_KO_screen.R delete mode 100644 R/sample_annot.R diff --git a/GINIR_data_document.tar.gz b/GINIR_data_document.tar.gz new file mode 100644 index 0000000..c6ea7fe --- /dev/null +++ b/GINIR_data_document.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69939720fd02b1a2f4ac48c1bf63adc55556774e4e396fa8836d918e7c7fbd5f +size 2667 diff --git a/R/CCLE_exp.R b/R/CCLE_exp.R deleted file mode 100644 index 63f82eb..0000000 --- a/R/CCLE_exp.R +++ /dev/null @@ -1,10 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line RNA-seq expression -#' -#' @description A data set containing the RNA-seq expression in transcripts per million (TPM) of 1279 cancer cell lines. -#' Data was generated by the Cancer Cell Line Encyclopedia (CCLE) and distributed by DepMap. -#' Details on data generation can be found in Ghandi, M., et al (2019) (https://www.nature.com/articles/s41586-019-1186-3). -#' The data was obtained from 'CCLE_expression.csv' (see source url). -#' -#' @format A data frame with 1279 rows and 19145 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"CCLE_exp" diff --git a/R/CCLE_exp_annot.R b/R/CCLE_exp_annot.R deleted file mode 100644 index cc4e4e3..0000000 --- a/R/CCLE_exp_annot.R +++ /dev/null @@ -1,14 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line RNA-seq expression annotation -#' -#' @description A data set containing gene name column headers from the "CCLE_exp" data frame and -#' its various forms that exist in the DepMap data set platform. -#' -#' @format A data frame with 19145 rows and 4 variables: -#' \describe{ -#' \item{\code{names}}{character "CCLE_exp" column names} -#' \item{\code{GeneNames}}{character Hugo symbols} -#' \item{\code{GeneID}}{character NCBI gene IDs} -#' \item{\code{GeneNameID}}{character Hugo symbol separated by NCBI gene ID} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"CCLE_exp_annot" diff --git a/R/copy_num.R b/R/copy_num.R deleted file mode 100644 index 51888cb..0000000 --- a/R/copy_num.R +++ /dev/null @@ -1,10 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line copy number -#' -#' @description A data set containing the copy number of 27640 genes in 1713 cancer cell lines. -#' Data was generated by the Cancer Cell Line Encyclopedia (CCLE) and distributed by DepMap. -#' Details on data generation can be found in Ghandi, M., et al (2019) (https://www.nature.com/articles/s41586-019-1186-3). -#' The data was obtained from 'CCLE_expression.csv' (see source url). -#' -#' @format A data frame with 1713 rows and 27640 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"copy_num" diff --git a/R/copy_num_annot.R b/R/copy_num_annot.R deleted file mode 100644 index aedabdc..0000000 --- a/R/copy_num_annot.R +++ /dev/null @@ -1,16 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line copy number annotations -#' -#' @description A data set containing the copy number of 27640 genes in 1713 cancer cell lines. -#' Data was generated by the Cancer Cell Line Encyclopedia (CCLE) and distributed by DepMap. -#' Details on data generation can be found in Ghandi, M., et al (2019) (https://www.nature.com/articles/s41586-019-1186-3). -#' The data was obtained from 'CCLE_expression.csv' (see source url). -#' -#' @format A data frame with 27640 rows and 4 variables: -#' \describe{ -#' \item{\code{names}}{character "copy_num"} -#' \item{\code{GeneNames}}{character Hugo symbols} -#' \item{\code{GeneID}}{character NCBI gene IDs} -#' \item{\code{GeneNameID}}{character Hugo symbol separated by NCBI gene ID} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"copy_num_annot" diff --git a/R/dep.R b/R/dep.R deleted file mode 100644 index a6d310b..0000000 --- a/R/dep.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line gene dependency probabilities -#' -#' @description A data set containing the gene dependency probabilities of 18334 genes in 739 cancer cell lines. -#' Details on data generation can be found in Meyers, RM., et al (2017). -#' The data was obtained from 'Achilles_gene_dependency.csv' (see source url). -#' -#' @format A data frame with 19145 rows and 4 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"dep" diff --git a/R/dep_annot.R b/R/dep_annot.R deleted file mode 100644 index 7b648d1..0000000 --- a/R/dep_annot.R +++ /dev/null @@ -1,14 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line gene dependency probabilities annotation -#' -#' @description A data set containing gene name column headers from the "dep" data frame and -#' its various forms that exist in the DepMap data set platform. -#' -#' @format A data frame with 18334 rows and 4 variables: -#' \describe{ -#' \item{\code{names}}{character "dep" column names} -#' \item{\code{GeneNames}}{character Hugo symbols} -#' \item{\code{GeneID}}{character NCBI gene IDs} -#' \item{\code{GeneNameID}}{character Hugo symbol separated by NCBI gene ID} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"dep_annot" diff --git a/R/essential_genes.R b/R/essential_genes.R deleted file mode 100644 index 052eb8a..0000000 --- a/R/essential_genes.R +++ /dev/null @@ -1,13 +0,0 @@ -#' @title DepMap 20Q1: Gold standard essential genes list -#' -#' @description A data set containing gold standard essential genes as described in Meyers, RM., et al (2017). -#' The data was obtained from 'common_essentials.csv' (see source url). -#' -#' @format A data frame with 2290 rows and 3 variables: -#' \describe{ -#' \item{\code{GeneNameID}}{character character Hugo symbol separated by NCBI gene ID} -#' \item{\code{GeneNames}}{character Hugo symbols} -#' \item{\code{GeneID}}{character NCBI gene IDs} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"essential_genes" \ No newline at end of file diff --git a/R/gene_effect.R b/R/gene_effect.R deleted file mode 100644 index a89a85f..0000000 --- a/R/gene_effect.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line gene KO effects -#' -#' @description A data set containing the gene KO effect of 18334 genes in 739 cancer cell lines. -#' Details on data generation can be found in Meyers, RM., et al (2017). -#' The data was obtained from 'Achilles_gene_effect.csv' (see source url). -#' -#' @format A data frame with 739 rows and 18334 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"gene_effect" diff --git a/R/list_available_cancer_type.R b/R/list_available_cancer_type.R index 1b08e93..2392b5d 100644 --- a/R/list_available_cancer_type.R +++ b/R/list_available_cancer_type.R @@ -4,7 +4,8 @@ #' `list_available_cancer_types()` and `list_available_cancer_subtypes()` provide tools for identifying cancer (sub)types that are available in DepMap. #' #' @return string A vector containing unique cancer types available -#' + +#' @param data_dir string Path to GINIR_data #' @import rlang #' @import dplyr #' @import utils @@ -12,10 +13,10 @@ #' @export #' @examples #' list_available_cancer_types() -list_available_cancer_types <- function(){ +list_available_cancer_types <- function(data_dir){ # Load necessary data sample_annot <- NULL # see: https://support.bioconductor.org/p/24756/ - load(paste0(system.file(package = "GINIR"), "/data/sample_annot.rda"), envir = environment()) + load(paste0(data_dir, "/sample_annot.rda"), envir = environment()) # Main sample_annot %>% @@ -25,14 +26,15 @@ list_available_cancer_types <- function(){ #' @describeIn list_available_cancer_types List cancer subtypes that are available #' #' @param input_disease string A vector of unique with one or more cancer types listed in `list_available_cancer_types()` +#' @param data_dir string Path to GINIR_data #' @importFrom rlang .data #' @export #' @examples #' list_available_cancer_subtypes("Lung Cancer") -list_available_cancer_subtypes <- function(input_disease){ +list_available_cancer_subtypes <- function(input_disease, data_dir){ # Load necessary data sample_annot <- NULL # see: https://support.bioconductor.org/p/24756/ - load(paste0(system.file(package = "GINIR"), "/data/sample_annot.rda"), envir = environment()) + load(paste0(data_dir, "/sample_annot.rda"), envir = environment()) # Main sample_annot %>% diff --git a/R/mut_calls.R b/R/mut_calls.R deleted file mode 100644 index bc842df..0000000 --- a/R/mut_calls.R +++ /dev/null @@ -1,46 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line RNA-seq expression -#' -#' @description A data set single nucleotide variations (SNVs), insertions and deletions (Indels) found in of 1697 cancer cell lines. -#' Data was generated by the Cancer Cell Line Encyclopedia (CCLE) and distributed by DepMap. -#' Details on data generation can be found in Ghandi, M., et al (2019) (https://www.nature.com/articles/s41586-019-1186-3). -#' The data was obtained from 'CCLE_expression.csv' (see source url). -#' -#' @format A data frame with 1279923 rows and 34 variables: -#' \describe{ -#' \item{\code{Hugo_Symbol}}{character} -#' \item{\code{Entrez_Gene_Id}}{double} -#' \item{\code{NCBI_Build}}{double} -#' \item{\code{Chromosome}}{character} -#' \item{\code{Start_position}}{double} -#' \item{\code{End_position}}{double} -#' \item{\code{Strand}}{character} -#' \item{\code{Variant_Classification}}{character} -#' \item{\code{Variant_Type}}{character} -#' \item{\code{Reference_Allele}}{character} -#' \item{\code{Tumor_Seq_Allele1}}{character} -#' \item{\code{dbSNP_RS}}{character} -#' \item{\code{dbSNP_Val_Status}}{character} -#' \item{\code{Genome_Change}}{character} -#' \item{\code{Annotation_Transcript}}{character} -#' \item{\code{Tumor_Sample_Barcode}}{character} -#' \item{\code{cDNA_Change}}{character} -#' \item{\code{Codon_Change}}{character} -#' \item{\code{Protein_Change}}{character} -#' \item{\code{isDeleterious}}{logical} -#' \item{\code{isTCGAhotspot}}{logical} -#' \item{\code{TCGAhsCnt}}{double} -#' \item{\code{isCOSMIChotspot}}{logical} -#' \item{\code{COSMIChsCnt}}{double} -#' \item{\code{ExAC_AF}}{character} -#' \item{\code{CGA_WES_AC}}{character} -#' \item{\code{SangerWES_AC}}{character} -#' \item{\code{SangerRecalibWES_AC}}{character} -#' \item{\code{RNAseq_AC}}{character} -#' \item{\code{HC_AC}}{character} -#' \item{\code{RD_AC}}{character} -#' \item{\code{WGS_AC}}{character} -#' \item{\code{Variant_annotation}}{character} -#' \item{\code{DepMap_ID}}{character} -#'} -#' @details DETAILS -"mut_calls" \ No newline at end of file diff --git a/R/nonessential_genes.R b/R/nonessential_genes.R deleted file mode 100644 index 9b6ecf2..0000000 --- a/R/nonessential_genes.R +++ /dev/null @@ -1,13 +0,0 @@ -#' @title DepMap 20Q1: Common non-essential genes list -#' -#' @description A data set containing common non-essential genes as described in Meyers, RM., et al (2017). -#' The data was obtained from 'nonessentials.csv' (see source url). -#' -#' @format A data frame with 2290 rows and 3 variables: -#' \describe{ -#' \item{\code{GeneNameID}}{character character Hugo symbol separated by NCBI gene ID} -#' \item{\code{GeneNames}}{character Hugo symbols} -#' \item{\code{GeneID}}{character NCBI gene IDs} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"nonessential_genes" \ No newline at end of file diff --git a/R/protein.R b/R/protein.R deleted file mode 100644 index ecab1de..0000000 --- a/R/protein.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line protein expression -#' -#' @description A data set containing the protein expression of 426 cancer cell lines. -#' Details on data generation can be found in Nusinow, DP., et al (2020) (https://www.sciencedirect.com/science/article/pii/S0092867419313856). -#' The data was obtained from 'protein_quant_current_normalized.csv' (see source url). -#' -#' @format A data frame with 1279 rows and 19145 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"protein" diff --git a/R/protein_annot.R b/R/protein_annot.R deleted file mode 100644 index aa0597a..0000000 --- a/R/protein_annot.R +++ /dev/null @@ -1,12 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line protein expression annotation -#' -#' @description A data set containing cell line column headers from the "protein" data frame and -#' its various forms that exist in the DepMap data set platform. -#' -#' @format A data frame with 426 rows and 2 variables: -#' \describe{ -#' \item{\code{GygiNames}}{character "protein" data frame cell line column names} -#' \item{\code{DepMap_ID}}{character Corresponding DepMap_ID} -#'} -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"protein_annot" diff --git a/R/protein_nodup.R b/R/protein_nodup.R deleted file mode 100644 index d119937..0000000 --- a/R/protein_nodup.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line protein expression with only the major isoform -#' -#' @description A data set containing the major protein isoform expression of 426 cancer cell lines. -#' Details on data generation can be found in Nusinow, DP., et al (2020) (https://www.sciencedirect.com/science/article/pii/S0092867419313856). -#' The data was obtained from 'protein_quant_current_normalized.csv' (see source url). -#' -#' @format A data frame with 4893 rows and 426 variables: -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"protein_nodup" diff --git a/R/sample_ARID1A_KO_screen.R b/R/sample_ARID1A_KO_screen.R deleted file mode 100644 index 250a31f..0000000 --- a/R/sample_ARID1A_KO_screen.R +++ /dev/null @@ -1,9 +0,0 @@ -#' @title Sample GINI screen result for ARID1A KO pan cancer cell lines -#' -#' @description A sample data set containing the results of a screen conducted in cancer cell lines with -#' loss-of-function alterations in ARID1A (HomDel) generated using `GINI_screen()`. As reported in previous literature -#' ARID1A and ARID1B have synthetic lethal interaction (Helming et al.; doi: 10.1038/nm.3480; PMID:24562383), -#' thus gene with the highest interaction score is ARID1B. -#' -#' @format A data frame with 18,333 rows and 17 variables: -"sample_ARID1A_KO_screen" diff --git a/R/sample_annot.R b/R/sample_annot.R deleted file mode 100644 index 184279d..0000000 --- a/R/sample_annot.R +++ /dev/null @@ -1,34 +0,0 @@ -#' @title DepMap 20Q1: Cancer cell line sample annotations -#' -#' @description A data set containing the cancer cell lines information generated by the Cancer Cell Line Encyclopedia (CCLE) and distributed by DepMap. -#' The data was obtained from 'sample_info.csv' (see source url) and details on data generation can be found in Ghandi, M., et al (2019) (https://www.nature.com/articles/s41586-019-1186-3). -#' -#' @format A data frame with 1775 rows and 24 variables: -#' \describe{ -#' \item{\code{DepMap_ID}}{DepMap designated IDs used as a use key} -#' \item{\code{stripped_cell_line_name}}{character} -#' \item{\code{CCLE_Name}}{character} -#' \item{\code{alias}}{character} -#' \item{\code{COSMIC_ID}}{double} -#' \item{\code{lineage}}{character} -#' \item{\code{lineage_subtype}}{character} -#' \item{\code{lineage_sub_subtype}}{character} -#' \item{\code{lineage_molecular_subtype}}{character} -#' \item{\code{sex}}{character} -#' \item{\code{source}}{character} -#' \item{\code{Achilles_n_replicates}}{double} -#' \item{\code{cell_line_NNMD}}{double} -#' \item{\code{culture_type}}{character} -#' \item{\code{culture_medium}}{character} -#' \item{\code{cas9_activity}}{character} -#' \item{\code{RRID}}{character} -#' \item{\code{sample_collection_site}}{character} -#' \item{\code{primary_or_metastasis}}{character} -#' \item{\code{disease}}{character} -#' \item{\code{disease_subtype}}{character} -#' \item{\code{age}}{double} -#' \item{\code{Sanger_model_ID}}{character} -#' \item{\code{additional_info}}{character} -#' } -#' @source \url{https://figshare.com/articles/dataset/DepMap_20Q1_Public/11791698} -"sample_annot"