-
Notifications
You must be signed in to change notification settings - Fork 13
/
db_meta.toml
463 lines (385 loc) · 77.3 KB
/
db_meta.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
Title = "Meta information of databases"
[db.cfg_meta]
avaliable_cfg = ["db_annovar.toml", "db_blast.toml", "db_main.toml"]
prefix_url = "https://raw.githubusercontent.com/JhuangLab/BioInstaller/master/inst/extdata/config/db/"
cfg_dir = "@>@system.file('extdata', 'config/db', package = 'BioInstaller')@<@"
[db.item.blast]
title = "Basic Local Alignment Search Tool Databases"
description = "All of blast required databases"
url = "ftp://ftp.ncbi.nih.gov/blast/db/"
[db.item.cscd]
url = "http://gb.whu.edu.cn/CSCD/"
description = "Circular RNA (circRNA) is a large group of RNA family extensively existed in cells and tissues. High-throughput sequencing provides a way to view circRNAs across different samples, especially in various diseases. However, there is still no comprehensive database for exploring the cancer-specific circRNAs. Researchers at Wuhan University collected 228 total RNA or polyA(-) RNA-seq samples from both cancer and normal cell lines, and identified 272 152 cancer-specific circRNAs. A total of 950 962 circRNAs were identified in normal samples only, and 170 909 circRNAs were identified in both tumor and normal samples, which could be further used as non-tumor background. The researchers constructed a cancer-specific circRNA database. To understand the functional effects of circRNAs, they predicted the microRNA response element sites and RNA binding protein sites for each circRNA. They further predicted potential open reading frames to highlight translatable circRNAs. To understand the association between the linear splicing and the back-splicing, the researchers also predicted the splicing events in linear transcripts of each circRNA. As the first comprehensive cancer-specific circRNA database, they believe CSCD could significantly contribute to the research for the function and regulation of cancer-associated circRNAs."
publication = "XiaS , Feng J, Chen K, Ma Y, Gong J, Cai FF, Jin Y, Gao Y, Xia L, Chang H, Wei L, Han L, He C. (2017) CSCD: a database for cancer-specific circular RNAs. Nucleic Acids Research"
[db.item.srnanalyzer]
url = "http://srnanalyzer.systemsbiology.net/"
description = "sRNAnalyzer is a flexible, modular pipeline for the analysis of small RNA sequencing data."
publication = "Wu X, Kim TK, Baxter D, Scherler K, Gordon A, Fong O, Etheridge A, Galas DJ, Wang K. (2017) sRNAnalyzer—a flexible and customizable small RNA sequencing data analysis pipeline. Nucleic Acids Research"
[db.item.seecancer]
url = "http://biocc.hrbmu.edu.cn/SEECancer"
description = "Cancer is driven by accumulating somatic alterations which confer normal cells fitness advantage to evolve from a premalignant status to malignant tumor. The SEECancer database presents the comprehensive cancer evolutionary stage-specific somatic events (including early-specific, late-specific, relapse-specific, metastasis-specific, drug-resistant and drug-induced genomic events) and their temporal orders."
publication = "(Zhang and Luo, 2017) SEECancer: a resource for somatic events in evolution of cancer genome. DOI: 10.1093/nar/gkx964"
[db.item.diseaseenhancer]
url = "http://biocc.hrbmu.edu.cn/DiseaseEnhancer/"
description = """Genetic alterations/variants of enhancers make an essential contribution to disease progression. And more than 3 million of enhancers generated by international consortiums indicated that disease-associated enhancers will open a brand new view of pathophysiology.
DiseaseEnhancer provides a comprehensive map of manually curated disease-associated enhancers, which includes 847 disease-associated enhancers in 143 human diseases, involving 896 unique enhancer-gene interactions. We also manually collected their dysregulated target genes and mechanistic-related information, such as the associated variant types (including single nucleotide variant, somatic mutation, indel and copy number alteration) and affected transcription factor bindings. Additional genome data were also integrated into DiseaseEnhancer to help characterize disease-associated enhancers."""
publication = "Zhang G, Shi J, Zhu S, et al. DiseaseEnhancer: a resource of human disease-associated enhancer catalog[J]. Nucleic Acids Research, 2017."
[db.item.pancanqtl]
url = "http://bioinfo.life.hust.edu.cn/PancanQTL"
description = "Expression quantitative trait loci (eQTLs) are regions of the genome containing DNA sequence variants that influence the expression level of one or more genes. PancanQTL aims to comprehensively provide cis-eQTLs (SNPs affect local gene expression) and trans-eQTLs (SNPs affect distant gene expression) in 33 cancer types from The Cancer Genome Atlas (TCGA)."
publication = "Gong J, Mei S, Liu C, et al. PancanQTL: systematic identification of cis-eQTLs and trans-eQTLs in 33 cancer types[J]. Nucleic Acids Research, 2017."
[db.item.msdd]
url = "http://www.bio-bigdata.com/msdd"
description = "MSDD provides two maps that enable users to download data by clicking on the appropriate area. The left map classifies data according to the organ and the right map displays the hotspot data."
publication = "Yue M, Zhou D, Zhi H, et al. MSDD: a manually curated database of experimentally supported associations among miRNAs, SNPs and human diseases[J]. Nucleic Acids Research, 2017."
[db.item.mndr]
url = "http://www.rna-society.org/mndr"
description = "Accumulated evidences suggest diverse non-coding RNAs (ncRNAs) involved in a wide variety of diseases progression. Hence, we have updated the MNDR v2.0 database by integrating experimental and prediction diverse ncRNA-disease associations from manual literatures curation and other resources under one common framework. The new developments in MNDR v2.0 include (1) over 220-fold ncRNA-disease associations enhancement than previous version (including lncRNA, miRNA, piRNA, snoRNA and more than 1,400 diseases); (2) integrating experimental and prediction evidence from 14 resources and prediction algorithms for each ncRNA-disease association; (3) mapping disease name to the Disease Ontology and Medical Subject Headings (MeSH); (4) providing a confidence score for each ncRNA-disease association; and (5) an increase of species coverage to 6 mammals."
publication = ["Cui T, Zhang L, Huang Y, et al. MNDR v2. 0: an updated resource of ncRNA–disease associations in mammals[J]. Nucleic Acids Research, 2017.",
"Wang Y, Chen L, Chen B, et al. Mammalian ncRNA-disease repository: a global view of ncRNA-mediated disease network[J]. Cell death & disease, 2013, 4(8): e765."]
[db.item.rsnp3]
url = "http://rsnp3.psych.ac.cn/index.do"
description = "SNP related regulatory elements, element-gene pairs & SNP-based regulatory network"
publication = "Guo L, Wang J. rSNPBase 3.0: an updated database of SNP-related regulatory elements, element-gene pairs and SNP-based gene regulatory networks[J]. Nucleic Acids Research, 2017."
[db.item.ecodrug]
url = "http://www.ecodrug.org/"
description = "The ECOdrug database contains information on the Evolutionary Conservation Of human Drug targets in over 600 eukaryotic species The interface allows users to identify human drug targets to 1000+ legacy drugs and explore integrated orthologue predictions for the drug targets, transparently showing the confidence in the predictions both across methods and taxonomic groups."
publication = "Verbruggen B, Gunnarsson L, Kristiansson E, et al. ECOdrug: a database connecting drugs and conservation of their targets across species[J]. Nucleic Acids Research, 2017."
[db.item.medreaders]
url = "http://medreader.org"
description = "MeDReaders: A database for transcription factors that bind to methylated DNA"
publication = "Wang G, Luo X, Wang J, et al. MeDReaders: a database for transcription factors that bind to methylated DNA[J]. Nucleic Acids Research, 2017."
[db.item.superdrug2]
url = "http://cheminfo.charite.de/superdrug2"
description = 'SuperDRUG2 database is a unique, one-stop resource for approved/marketed drugs, containing more than 4,500 active pharmaceutical ingredients. We annotated drugs with regulatory details, chemical structures (2D and 3D), dosage, biological targets, physicochemical properties, external identifiers, side-effects and pharmacokinetic data. Different search mechanisms allow navigation through the chemical space of approved drugs. A 2D chemical structure search is provided in addition to a 3D superposition feature that superposes a drug with ligands already known to be found in the experimentally determined protein-ligand complexes. For the first time, we introduced simulation of "physiologically-based" pharmacokinetics of drugs. Our interaction check feature not only identifies potential drug-drug interactions but also provides alternative recommendations for elderly patients.'
publication = "GB/T 7714 Siramshetty V B, Eckert O A, Gohlke B O, et al. SuperDRUG2: a one stop resource for approved/marketed drugs[J]. Nucleic Acids Research, 2017."
[db.item.varcards]
url = "http://varcards.biols.ac.cn"
description = "VarCards: an integrated genetic and clinical database for coding variants in the human genome"
publication = "Li J, Shi L, Zhang K, et al. VarCards: an integrated genetic and clinical database for coding variants in the human genome[J]. Nucleic Acids Research, 2017."
[db.item.civic]
url = "https://civic.genome.wustl.edu/home"
description = "Realizing precision medicine will require this information to be centralized, debated and interpreted for application in the clinic. CIViC is an open access, open source, community-driven web resource for Clinical Interpretation of Variants in Cancer. Our goal is to enable precision medicine by providing an educational forum for dissemination of knowledge and active discussion of the clinical significance of cancer genome alterations. For more details refer to the 2017 CIViC publication in Nature Genetics."
publication = 'Griffith, Malachi, et al. "CIViC is a community knowledgebase for expert crowdsourcing the clinical interpretation of variants in cancer." Nature genetics 49.2 (2017): 170-174.'
[db.item.expression_atlas]
url = "https://www.ebi.ac.uk/gxa/home/"
description = "Expression Atlas is an open science resource that gives users a powerful way to find information about gene and protein expression across species and biological conditions such as different tissues, cell types, developmental stages and diseases among others. Expression Atlas aims to help answering questions such as ‘where is a certain gene expressed?’ or ‘how does its expression change in a disease?’"
publication = "Papatheodorou, I., et al. Expression Atlas: gene and protein expression across multiple studies and organisms. Nucleic Acids Res 2017."
[db.item.remap2]
url = "http://tagc.univ-mrs.fr/remap/"
description = "ReMap, an integrative analysis of transcriptional regulators ChIP-seq experiments from both Public and Encode datasets. The ReMap atlas consits of 80 million peaks from 485 transcription factors (TFs), transcription coactivators (TCAs) and chromatin-remodeling factors (CRFs). The atlas is available to browse or download either for a given TF or cell line, or for the entire dataset. "
publication = [
"Integrative analysis of public ChIP-seq experiments reveals a complex multi-cell regulatory landscape.Griffon, A., Barbier, Q., Dalino, J., van Helden, J., Spicuglia, S., Ballester, B. Nucleic Acids Research, Volume 43, Issue 4, 27 February 2015 ",
"ReMap 2018: An updated regulatory regions atlas from an integrative analysis of DNA-binding ChIP-seq experiments. Cheneby J., Gheorghe M., Artufel M., Mathelier A., Ballester, B. Nucleic Acids Research, gkx1092, https://doi.org/10.1093/nar/gkx1092"]
[db.item.funcoup]
url = "http://funcoup.sbc.su.se/search/"
description = "This release of the FunCoup database (http://funcoup.sbc.su.se) is the fourth generation of one of the most comprehensive databases for genome-wide functional association networks. These functional associations are inferred via integrating various data types using a naive Bayesian algorithm and orthology based information transfer across different species. This approach provides high coverage of the included genomes as well as high quality of inferred interactions. In this update of FunCoup we introduce four new eukaryotic species: Schizosaccharomyces pombe, Plasmodium falciparum, Bos taurus, Oryza sativa and open the database to the prokaryotic domain by including networks for Escherichia coli and Bacillus subtilis. The latter allows us to also introduce a new class of functional association between genes - co-occurrence in the same operon. We also supplemented the existing classes of functional association: metabolic, signaling, complex and physical protein interaction with up-to-date information. In this release we switched to InParanoid v8 as the source of orthology and base for calculation of phylogenetic profiles. While populating all other evidence types with new data we introduce a new evidence type based on quantitative mass spectrometry data. Finally, the new JavaScript based network viewer provides the user an intuitive and responsive platform to further evaluate the results."
publication = [
"Ogris, C., et al. FunCoup 4: new species, data, and visualization. Nucleic Acids Res 2017.",
"Schmitt, T., Ogris, C., & Sonnhammer, E. L. (2013). FunCoup 3.0: database of genome-wide functional coupling networks. Nucleic Acids Research, 42(Database issue), D380-8",
"Alexeyenko, A., Schmitt, T., E. L. (2012). Comparative interactomics with Funcoup 2.0. Nucleic Acids Research, 40(Database issue), D821-8",
"Alexeyenko, A., & Sonnhammer, E. L. (2009). Global networks of functional coupling in eukaryotes from comprehensive data integration. Genome Research, 19(6), 1107-1116"]
[db.item.proteinatlas]
url = "https://www.proteinatlas.org/"
description = "The Human Protein Atlas (HPA) is a Swedish-based program started in 2003 with the aim to map of all the human proteins in cells, tissues and organs using integration of various omics technologies, including antibody-based imaging, mass spectrometry-based proteomics, transcriptomics and systems biology. All the data in the knowledge resource is open access to allow scientists both in academia and industry to freely access the data for exploration of the human proteome. The Human Protein Atlas consists of three separate parts, each focusing on a particular aspect of the genome-wide analysis of the human proteins; the Tissue Atlas showing the distribution of the proteins across all major tissues and organs in the human body, the Cell Atlas showing the subcellular localization of proteins in single cells, and finally the Pathology Atlas showing the impact of protein levels for survival of patients with cancer. The Human Protein Atlas program has already contributed to several thousands of publications in the field of human biology and disease and it is selected by the organization ELIXIR (www.elixir-europe.org) as a European core resource due to its fundamental importance for a wider life science community. The HPA consortium is funded by the Knut and Alice Wallenberg Foundation."
publication = [
"U..M et al, 2015. Tissue-based map of the human proteome. Science PubMed: 25613900 DOI: 10.1126/science.1260419",
"Thul PJ et al, 2017. A subcellular map of the human proteome. Science. PubMed: 28495876 DOI: 10.1126/science.aal3321",
"Uhlen M et al, 2017. A pathology atlas of the human cancer transcriptome. Science. PubMed: 28818916 DOI: 10.1126/science.aan2507"
]
[db.item.dgidb]
url = "http://dgidb.org/"
description = "The Drug-Gene Interaction database (DGIdb) mines existing resources that generate hypotheses about how mutated genes might be targeted therapeutically or prioritized for drug development. It provides an interface for searching lists of genes against a compendium of drug-gene interactions and potentially ‘druggable’ genes. DGIdb can be accessed at http://dgidb.org/."
publication = "Griffith, M., et al. DGIdb: mining the druggable genome. Nat Methods 2013;10(12):1209-1210. "
[db.item.drugbank]
url = "https://www.drugbank.ca"
description = "The DrugBank database is a comprehensive, freely accessible, online database containing information on drugs and drug targets. As both a bioinformatics and a cheminformatics resource, DrugBank combines detailed drug (i.e. chemical, pharmacological and pharmaceutical) data with comprehensive drug target (i.e. sequence, structure, and pathway) information. Because of its broad scope, comprehensive referencing and unusually detailed data descriptions, DrugBank is more akin to a drug encyclopedia than a drug database. As a result, links to DrugBank are maintained for nearly all drugs listed in Wikipedia. DrugBank is widely used by the drug industry, medicinal chemists, pharmacists, physicians, students and the general public. Its extensive drug and drug-target data has enabled the discovery and repurposing of a number of existing drugs to treat rare and newly identified illnesses."
publication = [
"Wishart D S, Knox C, Guo A C, et al. DrugBank: a comprehensive resource for in silico drug discovery and exploration[J]. Nucleic acids research, 2006, 34(suppl_1): D668-D672.",
"Wishart D S, Knox C, Guo A C, et al. DrugBank: a knowledgebase for drugs, drug actions and drug targets[J]. Nucleic acids research, 2007, 36(suppl_1): D901-D906.",
"Knox C, Law V, Jewison T, et al. DrugBank 3.0: a comprehensive resource for ‘omics’ research on drugs[J]. Nucleic acids research, 2010, 39(suppl_1): D1035-D1041.",
"Law V, Knox C, Djoumbou Y, et al. DrugBank 4.0: shedding new light on drug metabolism[J]. Nucleic acids research, 2013, 42(D1): D1091-D1097.",
"Wishart DS, Feunang YD, Guo AC, Lo EJ, Marcu A, Grant JR, Sajed T, Johnson D, Li C, Sayeeda Z, Assempour N, Iynkkaran I, Liu Y, Maciejewski A, Gale N, Wilson A, Chin L, Cummings R, Le D, Pon A, Knox C, Wilson M. DrugBank 5.0: a major update to the DrugBank database for 2018. Nucleic Acids Res. 2017 Nov 8. doi: 10.1093/nar/gkx1037."]
[db.item.interpro]
url = "http://www.ebi.ac.uk/interpro"
description = "InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites. We combine protein signatures from a number of member databases into a single searchable resource, capitalising on their individual strengths to produce a powerful integrated database and diagnostic tool."
publication = [
"Apweiler R, Attwood T K, Bairoch A, et al. The InterPro database, an integrated documentation resource for protein families, domains and functional sites[J]. Nucleic acids research, 2001, 29(1): 37-40.",
"Mulder N, Apweiler R. InterPro and InterProScan: tools for protein sequence classification and comparison[J]. Comparative genomics, 2007: 59-70.",
"Jones P, Binns D, Chang H Y, et al. InterProScan 5: genome-scale protein function classification[J]. Bioinformatics, 2014, 30(9): 1236-1240."]
[db.item.inbiomap]
url = "https://www.intomics.com/inbio/map"
description = "InBio Map™ is a high coverage, high quality, convenient and transparent platform for investigating and visualizing protein-protein interactions. InBio Map™ and the corresponding InWeb_InBioMap PPI database are developed, owned and continuously maintained by Intomics A/S"
publication = "Li, T., et al. A scored human protein-protein interaction network to catalyze genomic interpretation. Nat Methods 2017;14(1):61-64."
[db.item.omim]
url = "https://omim.org/"
description = """OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily. The full-text, referenced overviews in OMIM contain information on all known mendelian disorders and over 15,000 genes. OMIM focuses on the relationship between phenotype and genotype. It is updated daily, and the entries contain copious links to other genetics resources.
This database was initiated in the early 1960s by Dr. Victor A. McKusick as a catalog of mendelian traits and disorders, entitled Mendelian Inheritance in Man (MIM). Twelve book editions of MIM were published between 1966 and 1998. The online version, OMIM, was created in 1985 by a collaboration between the National Library of Medicine and the William H. Welch Medical Library at Johns Hopkins. It was made generally available on the internet starting in 1987. In 1995, OMIM was developed for the World Wide Web by NCBI, the National Center for Biotechnology Information.
OMIM is authored and edited at the McKusick-Nathans Institute of Genetic Medicine, Johns Hopkins University School of Medicine, under the direction of Dr. Ada Hamosh."""
publication = [
"Hamosh A, Scott A F, Amberger J S, et al. Online Mendelian Inheritance in Man (OMIM), a knowledgebase of human genes and genetic disorders[J]. Nucleic acids research, 2005, 33(suppl_1): D514-D517.",
"Amberger J, Bocchini C A, Scott A F, et al. McKusick's online Mendelian inheritance in man (OMIM®)[J]. Nucleic acids research, 2008, 37(suppl_1): D793-D796.",
"Amberger J S, Bocchini C A, Schiettecatte F, et al. OMIM. org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders[J]. Nucleic acids research, 2014, 43(D1): D789-D798."
]
[db.item.biosystems]
url = "https://www.ncbi.nlm.nih.gov/biosystems"
description = """A biosystem, or biological system, is a group of molecules that interact in a biological system. One type of biosystem is a biological pathway, which can consist of interacting genes, proteins, and small molecules. Another type of biosystem is a disease, which can involve components such as genes, biomarkers, and drugs. A number of databases provide diagrams showing the components and products of biological pathways along with corresponding annotations and links to literature. The NCBI BioSystems Database was developed as a complementary project to (1) serve as a centralized repository of data; (2) connect the biosystem records with associated literature, molecular, and chemical data throughout the EntrezBI BioSystems record for arachidonic acid metabolism, for example, displays the name and description of the biosystem along with a thumbnail image of the pathway diagram that links to the full size illustration on the source database's web site. In addition, the BioSystems record lists and categorizes the genes, proteins, and small molecules involved in the biological system, along with related biosystems and citations, and allows instant retrieval of the those data sets through a wide range of Links. Integrating the data in this way makes it possible to search across all the pathways to answer broad questions such as the "how to" examples shown below. The companion FLink icon FLink tool, in turn, allows you to input a list of proteins, genes, or small molecules and retrieve a ranked list of biosystems. The NCBI BioSystems Database currently contains records from several source databases: KEGG, BioCyc (including its Tier 1 EcoCyc and MetaCyc databases, and its Tier 2 databases), Reactome, the National Cancer Institute's Pathway Interaction Database, WikiPathways, and Gene Ontology (GO). The BioSystems database includes several types of records such as pathways, structural complexes, and functional sets, and is desiged to accomodate other record types, such as diseases, as data become available. Through these collaborations, the BioSystems database facilitates access to, and provides the ability to compute on, a wide range of biosystems data. Detailed diagrams and annotations for individual biosystems are then available on the web sites of the source databases."""
publication = "Geer L Y, Marchler-Bauer A, Geer R C, et al. The NCBI biosystems database[J]. Nucleic acids research, 2009, 38(suppl_1): D492-D496."
[db.item.denovo_db]
url = "http://denovo-db.gs.washington.edu/denovo-db"
description = "denovo-db is a collection of germline de novo variants identified in the human genome. de novo variants are those present in children but not their parents (see figure to right). With the advancements in whole-exome and whole-genome sequencing we are now able to assess 1000s of these variants. To provide a landing place for de novo variation we created denovo-db, which has been assembled using the published literature. Many large exome and genome studies have focused on neurodevelopmental disorders and while we are very interested in these disorders we have not limited our database to only these phenotypes. The information types present in denovo-db have been refined to include what we think is highly relevant for genetic studies (for example basic functional annotation, CADD scores, and validation status). Our goal is to provide a compendium of all de novo variants to benefit the larger researcher community and to allow researchers to ask various scientific questions such as: 1. Which sites in the human genome have de novo mutations? 2. Which sites are highly mutable to de novo mutation? 3. What are features of de novo variants generally and in disease? 4. What kinds of phenotypes are represented by de novo variants?"
publication = "Turner T N, Yi Q, Krumm N, et al. denovo-db: a compendium of human de novo variants[J]. Nucleic acids research, 2017, 45(D1): D804-D811."
[db.item.hpo]
url = "http://human-phenotype-ontology.github.io"
description = "The Human Phenotype Ontology (HPO) aims to provide a standardized vocabulary of phenotypic abnormalities encountered in human disease. Each term in the HPO describes a phenotypic abnormality, such as atrial septal defect. The HPO is currently being developed using the medical literature, Orphanet, DECIPHER, and OMIM. HPO currently contains approximately 11,000 terms (still growing) and over 115,000 annotations to hereditary diseases. The HPO also provides a large set of HPO annotations to approximately 4000 common diseases."
publication = "Kohler, S., et al. The Human Phenotype Ontology project: linking molecular biology and disease through phenotype data. Nucleic Acids Res 2014;42(Database issue):D966-974."
[db.item.tumorfusions]
url = "http://www.tumorfusions.org"
description = "Gene fusion represents a class of molecular aberrations in cancer and has been exploited for therapeutic purposes. In this paper we describe TumorFusions, a data portal that catalogues 20 731 gene fusions detected in 9966 well characterized cancer samples and 648 normal specimens from The Cancer Genome Atlas (TCGA). The portal spans 33 cancer types in TCGA. Fusion transcripts were identified via a uniform pipeline, including filtering against a list of 3838 transcript fusions detected in a panel of 648 non-neoplastic samples. Fusions were mapped to somatic DNA rearrangements identified using whole genome sequencing data from 561 cancer samples as a means of validation. We observed that 65% of transcript fusions were associated with a chromosomal alteration, which is annotated in the portal. Other features of the portal include links to SNP array-based copy number levels and mutational patterns, exon and transcript level expressions of the partner genes, and a network-based centrality score for prioritizing functional fusions. Our portal aims to be a broadly applicable and user friendly resource for cancer gene annotation and is publicly available at http://www.tumorfusions.org."
publication = "Hu X, Wang Q, Tang M, et al. TumorFusions: an integrative resource for cancer-associated transcript fusions[J]. Nucleic Acids Research, 2017."
[db.item.gtex]
url = "https://www.gtexportal.org"
description = "Correlations between genotype and tissue-specific gene expression levels will help identify regions of the genome that influence whether and how much a gene is expressed. GTEx will help researchers to understand inherited susceptibility to disease and will be a resource database and tissue bank for many studies in the future. The Genotype-Tissue Expression (GTEx) project aims to provide to the scientific community a resource with which to study human gene expression and regulation and its relationship to genetic variation. This project will collect and analyze multiple human tissues from donors who are also densely genotyped, to assess genetic variation within their genomes. By analyzing global RNA expression within individual tissues and treating the expression levels of genes as quantitative traits, variations in gene expression that are highly correlated with genetic variation can be identified as expression quantitative trait loci, or eQTLs. Despite the rapid progress achieved using genome-wide association studies (GWAS; See: http://www.genome.gov/26525384 ) to identify genetic changes associated with common human diseases, such as heart disease, cancer, diabetes, asthma, and stroke, a large majority of these genetic changes lies outside of the protein-coding regions of genes and often even outside of the genes themselves, making it difficult to discern which genes are affected and by what mechanism. The comprehensive identification of human eQTLs will greatly help to identify genes whose expression is affected by genetic variation, and will provide a valuable basis on which to study the mechanism of that gene regulation. The project will also involve consultation and research into the ethical, legal and social issues raised by the research, support for statistical methods development, and creation of a database to house existing and GTEx-generated eQTL data . The database will allow users to view and download computed eQTL results and provide a controlled access system for de-identified individual-level genotype, expression, and clinical data. The associated tissue repository will also serve as a resource for many additional kinds of analyses."
publication = [
"Consortium G. Human genomics. The Genotype-Tissue Expression (GTEx) pilot analysis: multitissue gene regulation in humans[J]. Science, 2015, 348(6235):648-60.",
"Consortium G, Battle A, Brown C D, et al. Genetic effects on gene expression across human tissues[J]. Nature, 2017, 550(7675):204."
]
[db.item.hgnc]
url = "https://www.genenames.org/"
description = "HGNC is responsible for approving unique symbols and names for human loci, including protein coding genes, ncRNA genes and pseudogenes, to allow unambiguous scientific communication. genenames.org is a curated online repository of HGNC-approved gene nomenclature, gene families and associated resources including links to genomic, proteomic and phenotypic information."
publication = [
"Gray KA, Yates B, Seal RL, Wright MW, Bruford EA. genenames.org: the HGNC resources in 2015. Nucleic Acids Res. 2015 Jan;43(Database issue):D1079-85. doi: 10.1093/nar/gku1071. PMID:25361968",
"HGNC Database, HUGO Gene Nomenclature Committee (HGNC), EMBL Outstation - Hinxton, European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridgeshire, CB10 1SD, UK www.genenames.org."
]
[db.item.atcircdb]
url = "http://genome.sdau.edu.cn/circRNA/index.php"
description = """Circular RNA not only functions as a potential competitive target for miRNA, but also regulates transcription and interacts with RNA-binding proteins. Because of the structural stability of the circular form, these molecules are promising candidates for intervening in a number of biological pathways, and may be a high value tool for pharmaceutical research in human and photosynthesis in plant.
Based on our previous research, we systematically investigated 622 RNA-Seq samples from 87 indepedent studies hosted at NCBI SRA, and extracted all related circular RNAs. To improve the prediction accuracy, we not only applied a straightforward metric to screen and rank the circular RNA, but also incorporated exon boundaries as well as circular RNA candidates from previous studies into this resource to provide robust evidence for experimental biologists. In regards of the interaction between miRNA and circular RNAs, we utilized psRNAtarget and TAPIR to evaluate the statistical significance. Together, this database will host all predicted and validated Arabidopsis circular RNAs, and provide valuable and comprehensive information for studying this newly emerging non-coding RNA."""
publication = [
"Ye J, Wang L, Li S, Zhang Q, Zhang Q, Tang W, Wang K, Song K, Sablok G, Sun X*, Zhao H*; AtCircDB: a tissue-specific database for Arabidopsis circular RNAs. Brief Bioinform 2017 bbx089. doi: 10.1093/bib/bbx089.",
"Sun X, Wang L, Ding J, Wang Y, Wang J, Zhang X, Che Y, Liu Z, Zhang X, Ye J, Wang J, Sablok G, Deng Z, Zhao H. Integrative analysis of Arabidopsis thaliana transcriptomics reveals intuitive splicing mechanism for circular RNA. FEBS Lett. 2016. 590(20):3510-3516. "]
[db.item.circnet]
url = "http://circnet.mbc.nctu.edu.tw/"
description = "Circular RNAs (circRNAs) represent a new type of regulatory noncoding RNA that only recently has been identified and cataloged. Emerging evidence indicates that circRNAs exert a new layer of post-transcriptional regulation of gene expression. In this study, we utilized transcriptome sequencing datasets to systematically identify the expression of circRNAs (including known and newly identified ones by our pipeline) in 464 RNA-seq samples, and then constructed the CircNet database (http://circnet.mbc.nctu.edu.tw/) that provides the following resources: (i) novel circRNAs, (ii) integrated miRNA-target networks, (iii) expression profiles of circRNA isoforms, (iv) genomic annotations of circRNA isoforms (e.g., 282,948 exon positions), and (v) sequences of circRNA isoforms. The CircNet database is to our knowledge the first public database that provides tissue-specific circRNA expression profiles and circRNA-miRNA-gene regulatory networks. It not only extends the most up to date catalog of circRNAs but also provides a thorough expression analysis of both previously reported and novel circRNAs. Furthermore, it generates an integrated regulatory network that illustrates the regulation between circRNAs, miRNAs and genes."
publication = "Liu Y C, Li J R, Sun C H, et al. CircNet: a database of circular RNAs derived from transcriptome sequencing data[J]. Nucleic acids research, 2016, 44(D1): D209-D215."
[db.item.circbase]
url = "http://circrna.org/"
description = "Recently, several laboratories have reported thousands of circular RNAs (circRNAs) in animals. Numerous circRNAs are highly stable and have specific spatiotemporal expression patterns. Even though a function for circRNAs is unknown, these features make circRNAs an interesting class of RNAs as possible biomarkers and for further research. We developed a database and website, “circBase,” where merged and unified data sets of circRNAs and the evidence supporting their expression can be accessed, downloaded, and browsed within the genomic context. circBase also provides scripts to identify known and novel circRNAs in sequencing data. The database is freely accessible through the web server at http://www.circbase.org/."
publication = "Glažar P, Papavasileiou P, Rajewsky N. circBase: a database for circular RNAs[J]. Rna, 2014, 20(11): 1666-1670."
[db.item.circrnadb]
url = "http://202.195.183.4:8000/circrnadb/circRNADb.php"
description = "circRNADb (version1.0.0), circular RNA (or circRNA) Database, is a comprehensive database for human circular RNAs with protein-coding annotations. It is freely available for non-commercial use. The latest version of this circRNA database contains 32,914 exonic circRNAs with 16,328 protein-coding annotations, of which 46 circRNAs from 37 genes were found to have their corresponding proteins expressed according mass spectrometry. circRNADb can be a valuable resource for large-scale studies of circRNA in humans."
publication = "Chen X, Han P, Zhou T, et al. circRNADb: a comprehensive database for human circular RNAs with protein-coding annotations[J]. Scientific reports, 2016, 6."
[db.item.exorbase]
url = "http://www.exorbase.org/exoRBase/toIndex"
description = """exoRBase is a repository of circular RNA (circRNA), long non-coding RNA (lncRNA) and messenger RNA (mRNA) derived from RNA-seq data analyses of human blood exosomes. Experimental validations from published literature are also included.
exoRBase features the integration and visualization of RNA expression profiles based on normalized RNA-seq data spanning both normal individuals and patients with different diseases.
exoRBase aims to collect and characterize all long RNA species in human blood exosomes. The annotation, expression level and possible original tissues are provided. exoRBase will aid researchers in identifying molecular signatures in blood exosomes and will trigger new circulating biomarker discovery and functional implication for human diseases."""
publication = "Li S, Li Y, Chen B, et al. exoRBase: a database of circRNA, lncRNA and mRNA in human blood exosomes[J]. Nucleic Acids Research, 2017."
[db.item.exsnp]
url = "http://www.exsnp.org"
description = "Genome-wide association studies (GWAS) of human complex disease have identified a large number of disease associated genetic loci, distinguished by an altered frequency of specific single nucleotide polymorphisms (SNPs) among individuals with a particular disease, compared to controls. However, most of these risk loci do not provide direct information on the biological basis of a disease or on the underlying mechanisms. Recent genome-wide expression quantitative trait loci (eQTLs) association studies have provided information on genetic factors, especially SNPs, associated with gene expression variation. These eQTLs likely contribute to phenotype diversity and disease susceptibility, but interpretation is handicapped by low reproducibility of the expression results. Our primary goal is to establish a gold-standard list of consensus eQTLs by integrating publicly available data for specific human populations and cell types, so as to efficiently prioritize functional SNPs. We used linkage disequilibrium data from Hapmap and the 1000 Genome Project to integrate the results of eQTL studies. Separate gold-standard sets for various populations allowed us to investigate eQTLs which contribute to population-specific expression variation. Additionally, tissue-specific eQTL associations were identified by comparing eQTL data from six cell types: LCLs, B cells, Monocytes, Brain, Liver, and Skin. Moreover, to discover the role of these eQTLs play in human common diseases, we have integrated the current gold standard data with SNPs in disease risk loci from GWA studies of seven common human diseases."
publication = ["Yu CH, Pal LR, & Moult J. (2016). Consensus Genome-Wide Expression Quantitative Trait Loci and Their Relationship with Human Complex Trait Disease. OMICS, 20(7):400-14. PMID: 27428252",
"Pal LR, Yu CH, Mount SM, & Moult J. (2015). Insights from GWAS: emerging landscape of mechanisms underlying complex trait disease. BMC Genomics, 16 Suppl 8:S4 PMID: 26110739"]
[db.item.rvarbase]
url = "http://rv.psych.ac.cn"
description = "rVarBase annotates variant's regulatory feature in three fields: chromatin state of the region surrounding variant, regulatory elements overlapped with variant, and variant's potential target genes. It also provides optioned extended annotation for variants, including: LD-proxies of known SNP, SNP/CNV that is overlapped with or located in queried variant, traits (disease and expression quantitative trait) associated with variant. rVarBase is an updated version of the database rSNPBase, it is consistent with the old version in utilizing experimentally supported regulatory elements from ENCODE and other data resources to make relevant annotation (such as involved regulatory manner and potential target gene)."
publication = "Guo, L., Du, Y., Qu, S., & Wang, J. (2015). rVarBase: an updated database for regulatory features of human variants. Nucleic acids research, gkv1107 PMID:26503253"
[db.item.seeqtl]
url = "http://www.bios.unc.edu/research/genomic_software/seeQTL/"
description = "seeQTL is a comprehensive and versatile eQTL database, including various eQTL studies and a meta-analysis of HapMap eQTL information. The database presents eQTL association results in a convenient browser, using both segmented local-association plots and genome-wide Manhattan plots."
publication = "Xia K, Shabalin A A, Huang S, et al. seeQTL: a searchable database for human eQTLs[J]. Bioinformatics, 2011, 28(3): 451-452. PMID:22171328"
[db.item.fantom]
url = "http://fantom.gsc.riken.jp"
description = "FANTOM is an international research consortium established by Dr. Hayashizaki and his colleagues in 2000 to assign functional annotations to the full-length cDNAs that were collected during the Mouse Encyclopedia Project at RIKEN. FANTOM has since developed and expanded over time to encompass the fields of transcriptome analysis. The object of the project is moving steadily up the layers in the system of life, progressing thus from an understanding of the ‘elements’ - the transcripts - to an understanding of the ‘system’ - the transcriptional regulatory network, in other words the ‘system’ of an individual life form."
publication = ["Andersson R, Gebhard C, Miguel-Escalada I, et al. An atlas of active enhancers across human cell types and tissues[J]. Nature, 2014, 507(7493): 455-461. MLA", "Fantom Consortium. A promoter-level mammalian expression atlas[J]. Nature, 2014, 507(7493): 462-470."]
[db.item.snipa3]
url = "http://snipa.helmholtz-muenchen.de/snipa3"
description = "SNiPA offers both functional annotations and linkage disequilibrium information for bi-allelic genomic variants (SNPs and SNVs). SNiPA combines LD data based on the 1000 Genomes Project with various annotation layers, such as gene annotations, phenotypic trait associations, and expression-/metabolic quantitative trait loci. See the documentation for all data sources integrated into SNiPA. For information on updates and new releases, see the Release Notes."
publication = "Arnold, M., Raffler, J., Pfeufer, A., Suhre, K., & Kastenmüller, G. (2014). SNiPA: an interactive, genetic variant-centered annotation browser. Bioinformatics, 31(8), 1334-1336."
[db.item.rddpred]
url = "http://epigenomics.snu.ac.kr/RDDpred/prior_data"
description = """RDDpred: A condition-specific RNA-editing prediction model from RNA-seq data
1) RDDpred deduces condition-specific training examples without any experimental validations to construct a predictor.
2) As far as we know, RDDpred is the very first machine-learning based automated pipeline for RNA-editing prediction.
3) RDDpred successfully reproduced the results of two previous studies (95%, 90%),
with showing significant NPV (84%, 75%) and the prediction procedures are finished in reasonable time (18 hrs).
"""
publication = "Kim M, Hur B, Kim S. RDDpred: a condition-specific RNA-editing prediction model from RNA-seq data[J]. BMC genomics, 2016, 17(Suppl 1)."
[db.item.lncediting]
url = "http://bioinfo.life.hust.edu.cn/LNCediting"
description = "RNA editing is a widespread post-transcriptional mechanism that can make discrete changes to specific nucleotide sequences within a RNA transcripts. RNA editing events can result in missense codon changes in mRNA, modulation of alternative splicing in mRNA, or modification of regulatory RNAs and their binding sites in small noncoding RNA, such as miRNA. Recent studies have developed computational methods to accurately detect more than 2 million A-to-I RNA editing from next-generation sequencing data in different species. However, the vast majority of these RNA sites are in noncoding regions of the genome and have unknown functional relevance. LNCediting provides a comprehensive resource for the functional prediction of RNA editing in long noncoding RNAs (lncRNAs)."
publication = "Jing Gong†, Chunjie Liu†, Wei Liu, Yu Xiang, Lixia Diao, An-Yuan Guo∗ and Leng Han∗. Nucl. Acids Res. (2016). doi: 10.1093/nar/gkw835."
[db.item.eggnog]
url = "http://eggnogdb.embl.de/#/app/home"
description = "eggNOG is a public resource that provides Orthologous Groups (OGs) of proteins at different taxonomic levels, each with integrated and summarized functional annotations. Developments since the latest public release include changes to the algorithm for creating OGs across taxonomic levels, making nested groups hierarchically consistent. This allows for a better propagation of functional terms across nested OGs and led to the novel annotation of 95 890 previously uncharacterized OGs, increasing overall annotation coverage from 67% to 72%. The functional annotations of OGs have been expanded to also provide Gene Ontology terms, KEGG pathways and SMART/Pfam domains for each group. Moreover, eggNOG now provides pairwise orthology relationships within OGs based on analysis of phylogenetic trees. We have also incorporated a framework for quickly mapping novel sequences to OGs based on precomputed HMM profiles. Finally, eggNOG version 4.5 incorporates a novel data set spanning 2605 viral OGs, covering 5228 proteins from 352 viral proteomes. All data are accessible for bulk downloading, as a web-service, and through a completely redesigned web interface. The new access points provide faster searches and a number of new browsing and visualization capabilities, facilitating the needs of both experts and less experienced users. eggNOG v4.5 is available at http://eggnog.embl.de."
publication = """eggNOG 4.5: a hierarchical orthology framework with improved functional annotations for eukaryotic, prokaryotic and viral sequences. Jaime Huerta-Cepas, Damian Szklarczyk, Kristoffer Forslund, Helen Cook, Davide Heller, Mathias C. Walter, Thomas Rattei, Daniel R. Mende, Shinichi Sunagawa, Michael Kuhn, Lars Juhl Jensen, Christian von Mering, and Peer Bork. Nucl. Acids Res. (04 January 2016) 44 (D1): D286-D293. doi: 10.1093/nar/gkv1248"""
[db.item.rbp_var]
title = "RBP-Var2: A platform for exploring functional variants involved in post-transcriptional regulation mediated by RNA-binding proteins"
description = "RBP-Var is a database for annotation of functional variants which potentially influence RNA-protein interactions by changing RNA structure in the H. sapiens genome. It contains dbSNPs and RNA editing events in RBP bindig sites (rbSNVs), the change of RNA secondary structure induced by rbSNV, the rbSNV-induced gain/loss of binding sites of miRNA and potential functional rbSNVs which could impact RBP binding. In addition, RBP-Var also integrates GWAS data, eQTL data, ClinVar data, RNA expression and COSMIC data into selection of functional SNVs for genetic association studies."
publication = "Mao F, Xiao L, Li X, et al. RBP-Var: a database of functional variants involved in regulation mediated by RNA-binding proteins[J]. Nucleic Acids Research, 2016, 44(Database issue):D154-D163."
tag = ["NGS", "database"]
[db.item.docm]
title = "the Database of Curated Mutations"
description = "DoCM, the Database of Curated Mutations, is a highly curated database of known, disease-causing mutations that provides easily explorable variant lists with direct links to source citations for easy verification."
publication = "A correspondence describing DoCM has been published in Nature Methods: DoCM: a database of curated mutations in cancer. Nature Methods (2016) doi:10.1038/nmeth.4000."
tag = ["NGS", "database"]
[db.item.cancer_hotspots]
title = "A RESOURCE FOR STATISTICALLY SIGNIFICANT MUTATIONS IN CANCER"
description = "This resource is maintained by the Kravis Center for Molecular Oncology at Memorial Sloan Kettering Cancer Center. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data."
publication = ["Chang et al., Accelerating discovery of functional mutant alleles in cancer. Cancer Discovery, 10.1158/2159-8290.CD-17-0321 (2017)",
"Chang et al., Identifying recurrent mutations in cancer reveals widespread lineage diversity and mutational specificity. Nature Biotechnology 34, 155–163 (2016)"]
tag = ["NGS", "database"]
[db.item.intogen]
title = "Integrative Onco Genomics"
description = "IntOGen-mutations platform (http://www.intogen.org/mutations/) summarizes somatic mutations, genes and pathways involved in tumorigenesis. It identifies and visualizes cancer drivers, analyzing 4,623 exomes from 13 cancer sites. It provides support to cancer researchers, aids the identification of drivers across tumor cohorts and helps rank mutations for better clinical decision-making."
publication = ["Rubio-Perez, C., Tamborero, D., Schroeder, MP., Antolín, AA., Deu-Pons,J., Perez-Llamas, C., Mestres, J., Gonzalez-Perez, A., Lopez-Bigas, N. In silico prescription of anticancer drugs to cohorts of 28 tumor types reveals novel targeting opportunities. Cancer Cell 27 (2015), pp. 382-396",
"Gonzalez-Perez A, Perez-Llamas C, Deu-Pons J, Tamborero D, Schroeder MP, Jene-Sanz A, Santos A & Lopez-Bigas N IntOGen-mutations identifies cancer Nature Methods 2013; doi:10.1038/nmeth.2642"]
tag = ["NGS", "database"]
[db.item.disgenet]
title = "a database of gene-disease associations"
description = """DisGeNET is a discovery platform containing one of the largest publicly available collections of genes and variants associated to human diseases. DisGeNET integrates data from expert curated repositories, GWAS catalogues, animal models and the scientific literature. DisGeNET data are homogeneously annotated with controlled vocabularies and community-driven ontologies. Additionally, several original metrics are provided to assist the prioritization of genotype–phenotype relationships.
The current version of DisGeNET (v5.0) contains 561,119 gene-disease associations (GDAs), between 17,074 genes and 20,370 diseases, disorders, traits, and clinical or abnormal human phenotypes, and 135,588 variant-disease associations (VDAs), between 83,002 SNPs and 9,169 diseases and phenotypes."""
publication = ["DisGeNET: a comprehensive platform integrating information on human disease-associated genes and variants, Nucleic Acids Research, Volume 45, Issue D1, 4 January 2017, Pages D833–D839, https://doi.org/10.1093/nar/gkw943"]
tag = ["NGS", "database"]
[db.item.cgi]
title = "Cancer Genome Interpreter"
description = """Cancer Genome Interpreter is designed to support the identification of tumor alterations that drive the disease and detect those that may be therapeutically actionable. CGI relies on existing knowledge collected from several resources and on computational methods that annotate the alterations in a tumor according to distinct levels of evidence.
With a list of genomic alterations and the cancer type as input, the CGI identifies validated driver alterations and annotates and classifies the remaining variants of unknown significance. Then, alterations that are biomarkers of drug response or interact with existing chemical compounds are identified according to current knowledge."""
publication = "Cancer Genome Interpreter Annotates The Biological And Clinical Relevance Of Tumor Alterations. bioRxiv 140475; doi: https://doi.org/10.1101/140475"
tag = ["NGS", "database"]
[db.item.oncokb]
title = "Precision Oncology Knowledge Base"
description = """To date, > 3,000 unique mutations, fusions, and copy number alterations in 418 cancerassociated
genes have been annotated. To test the utility of OncoKB, we annotated all genomic
events in 5,983 primary tumor samples in 19 cancer types. Forty-one percent of samples harbored
at least one potentially actionable alteration, of which 7.5% were predictive of clinical
benefit from a standard treatment. OncoKB annotations are available through a public Web
resource (http://oncokb.org) and are incorporated into the cBioPortal for Cancer Genomics to
facilitate the interpretation of genomic alterations by physicians and researchers."""
publication = "Chakravarty D, Gao J, Phillips S M, et al. OncoKB: A Precision Oncology Knowledge Base.[J]. Jco Precision Oncology, 2017, 2017."
[db.item.mirdb]
title = "miRDB: an online resource for microRNA target prediction and functional annotations"
description = "miRDB is an online database for miRNA target prediction and functional annotations. All the targets in miRDB were predicted by a bioinformatics tool, MirTarget, which was developed by analyzing thousands of miRNA-target interactions from high-throughput sequencing experiments. Common features associated with miRNA target binding have been identified and used to predict miRNA targets with machine learning methods. miRDB hosts predicted miRNA targets in five species: human, mouse, rat, dog and chicken. As a recent update, users may provide their own sequences for customized target prediction. In addition, through combined computational analyses and literature mining, functionally active miRNAs in humans and mice were identified. These miRNAs, as well as associated functional annotations, are presented in the FuncMir Collection in miRDB."
publication = "Nathan Wong and Xiaowei Wang (2015) miRDB: an online resource for microRNA target prediction and functional annotations. Nucleic Acids Research. 43(D1):D146-152."
[db.item.mirtarbase]
title = "miRTarBase: the experimentally validated microRNA-target interactions database"
description = """As a database, miRTarBase has accumulated more than three hundred and
sixty thousand miRNA-target interactions (MTIs), which are collected by manually surveying pertinent literature after NLP of the text systematically to filter research articles related to functional studies of miRNAs."""
publication = "miRTarBase update 2018: a resource for experimentally validated microRNA-target interactions. (2018) Nucleic Acids Research. PMID:29126174"
[db.item.mirnest]
title = "miRNEST is an integrative collection of animal, plant and virus microRNA data."
description = """The database provides you with:
a) microRNAs from our high-throughput predictions as well as from external databases
b) predicted targets for plant candidates and experimental target support
c) integrated data from 15 external databases, which includes e.g. sequences, polymorphism, expression, promoters.
d) mirtrons, miRNA gene structures, degradome data and more!
"""
publication = "Szczesniak MW, Makalowska I (2014) miRNEST 2.0: a database of plant and animal microRNAs. Nucleic Acids Res. 42:D74-D77. "
[db.item.rbpdb]
title = "RBPDB, the database of RNA-binding protein specificities"
description = """RBPDB is a collection of RNA-binding proteins linked to a curated database of published observations of RNA binding. For a description of the database,
including types of proteins and experiments represented, data sources, and curation methodology, please refer to the RBPDB paper at Nucleic Acids Research."""
publication = "RBPDB: a database of RNA-binding specificities. 2010 Nucleic Acids Research. doi:10.1093/nar/gkq1069"
[db.item.appris]
title = "APPRIS, annotating principal splice isoforms"
description = "{APPRIS} is a system that deploys a range of computational methods to provide annotations of alternative splice isoforms and identify principal isoforms for vertebrate species."
publication = "APPRIS 2017: principal isoforms for multiple gene sets. Rodriguez JM, Rodriguez-Rivas J, Domenico TD, Vázquez J, Valencia A, and Tress ML. Nucleic Acids Res. Database issue; 2017 Oct 23. DOI:10.1093/nar/gkx997"
[db.item.lncipedia]
title = "A comprehensive compendium of human long non-coding RNAs"
description = "LNCipedia is a public database for long non-coding RNA (lncRNA) sequence and annotation. The current release contains 127,802 transcripts and 56,946 genes."
publication = "Pieter-Jan Volders; Kenneth Verheggen; Gerben Menschaert; Klaas Vandepoele; Lennart Martens; Jo Vandesompele and Pieter Mestdagh; Nucleic Acids Research 2015 doi:10.1093/nar/gku1060"
[db.item.msigdb]
title = "Molecular Signatures Database"
description = "MSigDB (Molecular Signatures Database), The Molecular Signatures Database (MSigDB) is a collection of annotated gene sets for use with GSEA software."
publication = "Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles, 2005, PNAS, doi:10.1073/pnas.0506580102"
[db.item.mircancer]
title = "miRCancer : microRNA Cancer Association Database"
description = "miRCancer provides comprehensive collection of microRNA (miRNA) expression profiles in various human cancers which are automatically extracted from published literatures in PubMed. It utilizes text mining techniques for information collection. Manual revision is applied after auto-extraction to provide 100% precision."
publication = "miRCancer: a microRNA-cancer association database constructed by text mining on literature Boya Xie; Qin Ding; Hongjin Han; Di Wu Bioinformatics, Vol. 29, Issue 5, pp.638-644, 2013"
[db.item.dcdb]
title = "Drug Combination Database"
description = "DCDB (Drug Combination Database), Accumulating scientific and clinical evidences have suggested the use of drug combinations as a safe and effective approach, to treat complicated and refractory diseases. The Drug Combination Database (DCDB) is devoted to the research and development of multi-component drugs. The current version of DCDB collected 1363 drug combinations (330 approved and 1033 investigational, including 237 unsuccessful usages), involving 904 individual drugs, 805 targets."
publication = "DCDB 2.0: a major update of the drug combination database. Yanbin Liu, Qiang Wei, et.al, Database (2014) 2014 : bau124 doi: 10.1093/database/bau124"
[db.item.oncomirdb]
title = "A Database for Oncogenic & Tumor-Suppressive MicroRNAs"
description = "OncomiRDB, aiming at annotating the experimentally verified oncogenic and tumor-suppressive miRNAs from literature."
publication = "Dongfang Wang, Jin Gu#, Ting Wang, Zijian Ding. OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs. Bioinformatics 2014, 30(15):2237-2238."
[db.item.islandviewer]
title = "An integrated interface for computational identification and visualization of genomic islands"
description = "This web site was developed so that researchers could easily view and download genomic islands for all published sequenced bacterial and archaeal genomes that have been predicted using the the currently most accurate GI prediction methods."
publication = "Bertelli, C. et al. 2017. 'IslandViewer 4: Expanded prediction of genomic islands for larger-scale datasets' Nucleic Acids Research. 2017 May 2. doi: 10.1093/nar/gkx343"
[db.item.hpdi]
title = "Human Protein-DNA Interactome (hPDI)"
description = "hPDI (Human Protein-DNA Interactome), The hPDI database holds experimental protein-DNA interaction data for humans identified by protein microarray assays. The current release of hPDI contains 17,718 protein-DNA interactions for 1013 human DNA-binding proteins. These DNA-binding proteins include 493 human transcription factors (TFs) and 520 unconventional DNA binding proteins (uDBPs)."
publication = "Xie, Z., Hu, S.H., Blackshaw, S., Zhu, H. and Qian, J. (2009) hPDI: a database of experimental human protein-DNA interactions, Bioinformatics.;Hu, S.H., Xie, Z., Onishi, A., Yu, X.P., Jiang, L.Z., Lin, J., Rho, H.S., Woodard, C., Wang, H., Jeong, J.S., Long, S.Y., He, X.F., Blackshaw, S., Qian, Q. and Zhu, H. (2009) Profiling the Human Protein-DNA Interactome Reveals ERK2 as a Transcriptional Repressor of Interferon Signalling, Cell, 139, 610-622."
[db.item.dbsno]
title = "dbSNO: Database of Cysteine S-NitrOsylation"
description = "dbSNO, Protein S-nitrosylation (SNO) is a reversible post-translational modification (PTM) and involves the covalent attachment of nitric oxide (NO) to the thiol group of cysteine (Cys) residues. Given the increasing number of proteins reported to be regulated by this modification, S-nitrosylation is considered to act, in a manner analogous to phosphorylation, as a pleiotropic regulator that elicits dual effects to regulate diverse pathophysiological processes by altering protein function, stability, and conformation change in various cancers and human disorders."
publication = "Yi-Ju Chen, Wei-Chieh Ching, Jinn-Shiun Chen, Tzong-Yi Lee, Cheng-Tsung Lu, Hsiao-Chiao Chou, Pei-Yi Lin, Kay-Hooi Khoo, Jenn-Han Chen, and Yu-Ju Chen (2014) 'Decoding the S-nitrosoproteomic Atlas in Individualized Human Colorectal Cancer Tissues Using a Label-free Quantitation Strategy', Journal of Proteome Research, DOI: 10.1021/pr5002675"
[db.item.phosphonetworks]
title = "A database for experimentally determined kinase-substrate relationships"
description = "PhosphoNetworks, a combined bioinformatics and protein microarray-based strategy to construct a high-resolution map of the human phosphorylation networks."
publication = "Hu, J., Rho H., Newman, R., Zhang, J., Zhu, Heng., Qian, J. 'PhosphoNetworks: A Database for Human Phosphorylation Networks'. Bioinformatics. doi: 10.1093/bioinformatics/btt627. (2013)"
[db.item.consensuspathdb]
title = "Interaction networks in Homo sapiens"
description = "ConsensusPathDB-human integrates interaction networks in Homo sapiens including binary and complex protein-protein, genetic, metabolic, signaling, gene regulatory and drug-target interactions, as well as biochemical pathways."
publication = "Kamburov, A. et al. (2013) The ConsensusPathDB interaction database: 2013 update. Nucleic Acids Res. "
[db.item.instruct]
title = "A database of 3D protein interactome networks with structural resolution"
description = "INstruct, a database of high-quality protein interactome networks annotated to 3D structural resolution. We currently catalogue 6585 human, 644 A. thaliana, 120 C. elegans, 166 D. melanogaster, 119 M. musculus, 1273 S. cerevisiae, and 37 S. pombe structurally resolved interactions. The interactions shown on this site have been curated from some of the most popular interaction databases and filtered to reflect only binary interactions that meet our strict quality criteria. The schematic below shows how we are then able to reconstruct 3D interaction interfaces for our high-quality set by using available co-crystal structure"
publication = "INstruct: a database of high-quality 3D structurally resolved protein interactome networks. Bioinformatics. 2013 Jun 15;29(12):1577-9. doi: 10.1093/bioinformatics/btt181"
[db.item.redoxdb]
title = "RedoxDB: a curated database of protein oxidative modification"
description = "RedoxDB, a manually curated database of experimentally verified protein oxidative modification. RedoxDB mainly consists of two types of data: dataset (A) includes redox proteins for which the modified Cys have been experimentally verified, and dataset (B) includes redox proteins that the modified Cys have not been determined yet. When searching or blasting RedoxDB, user can decide to included dataset(B) or not."
publication = "Sun M., Wang Y., Cheng H., Zhang Q., Ge W., Guo D. RedoxDB - a curated database of experimentally verified protein redox modification. Bioinformatics, 28(19):2551-2552 (PMID: 22833525)."
[db.item.sm2mir]
title = "SM2miR: a database of the experimentally validated small molecules' effects on microRNA expression"
description = "SM2miR, a manual curated database which collects and incorporates the experimentally validated small molecules' effects on miRNA expression in 20 species from the published papers. Each entry contains the detailed information about small molecules, miRNAs and their relationships, including species, small molecule name, DrugBank Accession number, PubChem CID, approved by FDA or not, miRNA name, miRBase Accession number, expression pattern of miRNA, experimental detection method, tissues or conditions for detection, evidences in the reference, PubMed ID and the published year of the reference."
publication = "Liu X, Wang S, Meng F, Wang J, Zhang Y, Dai E, Yu X, Li X, Jiang W. SM2miR: a database of the experimentally validated small molecules' effects on microRNA expression. Bioinformatics. 2013 Feb 1; 29(3): 409-11. doi: 10.1093/bioinformatics/bts698. Epub 2012 Dec 5."
[db.item.hmdb]
title = "HMDB:The Human Metabolome Database"
description = "HMDB is an online database of small molecule metabolites found in the human body, which facilitates human metabolomics research including the identification and characterization of human metabolites using NMR and MS."
publication = "Wishart DS, Knox C, Guo AC, et al., HMDB: a knowledgebase for the human metabolome. Nucleic Acids Res. 2009 37(Database issue):D603-610. 18953024 ; Wishart DS, Feunang YD, Marcu A, Guo AC, Liang K, et al., HMDB 4.0 — The Human Metabolome Database for 2018. Nucleic Acids Res. 2018. Jan 4;46(D1):D608-17. 29140435"
[db.item.awesome]
title = "AWESOME, a database of SNPs that affect protein post-translational modifications"
description = "Protein post-translational modifications (PTMs), including phosphorylation, ubiquitination, methylation, acetylation, glycosylation et al, are very important biological processes. PTM changes in some critical genes, which may be induced by base-pair substitution, are shown to affect the risk of diseases. Recently, large-scale exome-wide association studies found that missense single nucleotide polymorphisms (SNPs) play an important role in the susceptibility for complex diseases or traits. One of the functional mechanisms of missense SNPs is that they may affect PTMs and leads to a protein dysfunction and its downstream signaling pathway disorder. Here, we constructed a database named AWESOME (A Website Exhibits SNP On Modification Event, http://www.awesome-hust.com), which is an interactive web-based analysis tool that systematically evaluates the role of SNPs on nearly all kinds of PTMs based on 20 available tools. We also provided a well-designed scoring system to compare the performance of different PTM prediction tools and help users to get a better interpretation of results. Users can search SNPs, genes or position of interest, filter with specific modifications or prediction methods, to get a comprehensive PTM change induced by SNPs. In summary, our database provides a convenient way to detect PTM-related SNPs, which may potentially be pathogenic factors or therapeutic targets."
publication = "AWESOME: a database of SNPs that affect protein post-translational modifications. Nucleic Acids Res. 2018 Sep 12. doi: 10.1093/nar/gky821."
[db.item.cellmarker]
title = "CellMarker: a manually curated resource of cell markers in human and mouse."
description = "One of the most fundamental questions in biology is what types of cells form different tissues and organs in a functionally coordinated fashion. Larger-scale single-cell sequencing and biology experiment studies are now rapidly opening up new ways to track this question by revealing substantial cell markers for distinguishing different cell types in tissues. Here, we developed the CellMarker database (http://biocc.hrbmu.edu.cn/CellMarker/ or http://bio-bigdata.hrbmu.edu.cn/CellMarker/), aiming to provide a comprehensive and accurate resource of cell markers for various cell types in tissues of human and mouse. By manually curating over 100 000 published papers, 4124 entries including the cell marker information, tissue type, cell type, cancer information and source, were recorded. At last, 13 605 cell markers of 467 cell types in 158 human tissues/sub-tissues and 9148 cell makers of 389 cell types in 81 mouse tissues/sub-tissues were collected and deposited in CellMarker. CellMarker provides a user-friendly interface for browsing, searching and downloading markers of diverse cell types of different tissues. Furthermore, a summarized marker prevalence in each cell type is graphically and intuitively presented through a vivid statistical graph. We believe that CellMarker is a comprehensive and valuable resource for cell researches in precisely identifying and characterizing cells, especially at the single-cell level."
publication = "CellMarker: a manually curated resource of cell markers in human and mouse. Nucleic Acids Res. 2018 Oct 5. doi: 10.1093/nar/gky900."
[db.item.lncrnadisease]
title = "LncRNADisease 2.0: an updated database of long non-coding RNA-associated diseases."
description = "Mounting evidence suggested that dysfunction of long non-coding RNAs (lncRNAs) is involved in a wide variety of diseases. A knowledgebase with systematic collection and curation of lncRNA-disease associations is critically important for further examining their underlying molecular mechanisms. In 2013, we presented the first release of LncRNADisease, representing a database for collection of experimental supported lncRNA-disease associations. Here, we describe an update of the database. The new developments in LncRNADisease 2.0 include (i) an over 40-fold lncRNA-disease association enhancement compared with the previous version; (ii) providing the transcriptional regulatory relationships among lncRNA, mRNA and miRNA; (iii) providing a confidence score for each lncRNA-disease association; (iv) integrating experimentally supported circular RNA disease associations. LncRNADisease 2.0 documents more than 200 000 lncRNA-disease associations. We expect that this database will continue to serve as a valuable source for potential clinical application related to lncRNAs. LncRNADisease 2.0 is freely available at http://www.rnanut.net/lncrnadisease/."
publication = "LncRNADisease 2.0: an updated database of long non-coding RNA-associated diseases. Nucleic Acids Res. 2018 Oct 4. doi: 10.1093/nar/gky905."
[db.item.ewasdb]
title = "EWASdb: epigenome-wide association study database."
description = "DNA methylation, the most intensively studied epigenetic modification, plays an important role in understanding the molecular basis of diseases. Furthermore, epigenome-wide association study (EWAS) provides a systematic approach to identify epigenetic variants underlying common diseases/phenotypes. However, there is no comprehensive database to archive the results of EWASs. To fill this gap, we developed the EWASdb, which is a part of 'The EWAS Project', to store the epigenetic association results of DNA methylation from EWASs. In its current version (v 1.0, up to July 2018), the EWASdb has curated 1319 EWASs associated with 302 diseases/phenotypes. There are three types of EWAS results curated in this database: (i) EWAS for single marker; (ii) EWAS for KEGG pathway and (iii) EWAS for GO (Gene Ontology) category. As the first comprehensive EWAS database, EWASdb has been searched or downloaded by researchers from 43 countries to date. We believe that EWASdb will become a valuable resource and significantly contribute to the epigenetic research of diseases/phenotypes and have potential clinical applications. EWASdb is freely available at http://www.ewas.org.cn/ewasdb or http://www.bioapp.org/ewasdb."
publication = "EWASdb: epigenome-wide association study database. Nucleic Acids Res. 2018 Oct 13. doi: 10.1093/nar/gky942."
[db.item.cancersplicingqtl]
title = "CancerSplicingQTL: a database for genome-wide identification of splicing QTLs in human cancer."
description = "Alternative splicing (AS) is a widespread process that increases structural transcript variation and proteome diversity. Aberrant splicing patterns are frequently observed in cancer initiation, progress, prognosis and therapy. Increasing evidence has demonstrated that AS events could undergo modulation by genetic variants. The identification of splicing quantitative trait loci (sQTLs), genetic variants that affect AS events, might represent an important step toward fully understanding the contribution of genetic variants in disease development. However, no database has yet been developed to systematically analyze sQTLs across multiple cancer types. Using genotype data from The Cancer Genome Atlas and corresponding AS values calculated by TCGASpliceSeq, we developed a computational pipeline to identify sQTLs from 9 026 tumor samples in 33 cancer types. We totally identified 4 599 598 sQTLs across all cancer types. We further performed survival analyses and identified 17 072 sQTLs associated with patient overall survival times. Furthermore, using genome-wide association study (GWAS) catalog data, we identified 1 180 132 sQTLs overlapping with known GWAS linkage disequilibrium regions. Finally, we constructed a user-friendly database, CancerSplicingQTL (http: //www.cancersplicingqtl-hust.com/) for users to conveniently browse, search and download data of interest. This database provides an informative sQTL resource for further characterizing the potential functional roles of SNPs that control transcript isoforms in human cancer."
publication = "CancerSplicingQTL: a database for genome-wide identification of splicing QTLs in human cancer. Nucleic Acids Res. 2018 Oct 17. doi: 10.1093/nar/gky954."
[db.item.pmkb]
title = "The cancer precision medicine knowledge base for structured clinical-grade mutations and interpretations."
description = """
OBJECTIVE:
This paper describes the Precision Medicine Knowledge Base (PMKB; https://pmkb.weill.cornell.edu ), an interactive online application for collaborative editing, maintenance, and sharing of structured clinical-grade cancer mutation interpretations.
MATERIALS AND METHODS:
PMKB was built using the Ruby on Rails Web application framework. Leveraging existing standards such as the Human Genome Variation Society variant description format, we implemented a data model that links variants to tumor-specific and tissue-specific interpretations. Key features of PMKB include support for all major variant types, standardized authentication, distinct user roles including high-level approvers, and detailed activity history. A REpresentational State Transfer (REST) application-programming interface (API) was implemented to query the PMKB programmatically.
RESULTS:
At the time of writing, PMKB contains 457 variant descriptions with 281 clinical-grade interpretations. The EGFR, BRAF, KRAS, and KIT genes are associated with the largest numbers of interpretable variants. PMKB's interpretations have been used in over 1500 AmpliSeq tests and 750 whole-exome sequencing tests. The interpretations are accessed either directly via the Web interface or programmatically via the existing API.
DISCUSSION:
An accurate and up-to-date knowledge base of genomic alterations of clinical significance is critical to the success of precision medicine programs. The open-access, programmatically accessible PMKB represents an important attempt at creating such a resource in the field of oncology.
CONCLUSION:
The PMKB was designed to help collect and maintain clinical-grade mutation interpretations and facilitate reporting for clinical cancer genomic testing. The PMKB was also designed to enable the creation of clinical cancer genomics automated reporting pipelines via an API."""
publication = "The cancer precision medicine knowledge base for structured clinical-grade mutations and interpretations. J Am Med Inform Assoc. 2017 May 1;24(3):513-519. doi: 10.1093/jamia/ocw148 (IF: 4.27)."
[db.item.lnc2cancer]
title = "Lnc2Cancer v2.0: updated database of experimentally supported long non-coding RNAs in human cancers"
description = """Lnc2Cancer 2.0 (http://www.bio-bigdata.net/lnc2cancer) is an updated database that provides comprehensive experimentally supported associations between lncRNAs and human cancers. In Lnc2Cancer 2.0, we have updated the database with more data and several new features, including (i) exceeding a 4-fold increase over the previous version, recruiting 4989 lncRNA-cancer associations between 1614 lncRNAs and 165 cancer subtypes. (ii) newly adding about 800 experimentally supported circulating, drug-resistant and prognostic-related lncRNAs in various cancers. (iii) appending the regulatory mechanism of lncRNA in cancer, including microRNA (miRNA), transcription factor (TF), variant andmethylation regulation. (iv) increasing more than 70 high-throughput experiments (microarray and next-generation sequencing) of lncRNAs in cancers. (v) Scoring the associations between lncRNA and cancer to evaluate the correlations. (vi) updating the annotation information of lncRNAs (version 28) and containing more detailed descriptions for lncRNAs and cancers. Moreover, a newly designed, user-friendly interface was also developed to provide a convenient platform for users. In particular, the functions of browsing data by cancer primary organ, biomarker type and regulatory mechanism, advanced search following several features and filtering the data by LncRNA-Cancer score were enhanced. Lnc2Cancer 2.0 will be a useful resource platform for further understanding the associations between lncRNA and human cancer. """
publication = "Lnc2Cancer v2.0: updated database of experimentally supported long non-coding RNAs in human cancers. Nucleic Acids Res. 2018 Nov 8. doi: 10.1093/nar/gky1096."
[db.item.fusiongdb]
title = "FusionGDB: fusion gene annotation DataBase"
description = """Gene fusion is one of the hallmarks of cancer genome via chromosomal rearrangement initiated by DNA double-strand breakage. To date, many fusion genes (FGs) have been established as important biomarkers and therapeutic targets in multiple cancer types. To better understand the function of FGs in cancer types and to promote the discovery of clinically relevant FGs, we built FusionGDB (Fusion Gene annotation DataBase) available at https://ccsm.uth.edu/FusionGDB. We collected 48 117 FGs across pan-cancer from three representative fusion gene resources: the improved database of chimeric transcripts and RNA-seq data (ChiTaRS 3.1), an integrative resource for cancerassociated transcript fusions (TumorFusions), and The Cancer Genome Atlas (TCGA) fusions by Gao et al. For these ∼48K FGs, we performed functional annotations including gene assessment across pancancer fusion genes, open reading frame (ORF) assignment, and retention search of 39 protein features based on gene structures of multiple isoforms with different breakpoints. We also provided the fusion transcript and amino acid sequences according to multiple breakpoints and transcript isoforms. Our analyses identified 331, 303 and 667 in-frame FGs with retaining kinase, DNA-binding, and epigenetic factor domains, respectively, as well as 976 FGs lost protein-protein interaction. FusionGDB provides six categories of annotations: FusionGeneSummary, FusionProtFeature, FusionGeneSequence, Fusion- GenePPI, RelatedDrug and RelatedDisease."""
publication = "FusionGDB: fusion gene annotation DataBase. Nucleic Acids Res. 2018 Nov 8. doi: 10.1093/nar/gky1067."