If a record was (or more than one were) successfully stored in genetic_alteration, return true ; else false
- *
- *
- * During the import of any single profile data file, at most one record per Entrez_Gene_Id will be successfuly imported to genetic_alteration.
- * Each attempt to import is done through a call to the function storeGeneticAlterations().
- * That function will check an instance variable importSetOfGenes, and if the gene has been previously imported, no new attempt is made (failure).
- * Each time a gene is successfully imported, it is added to importSetOfGenes.
- *
- * MicroRNA are treated specially because of the possible presence of constructed combination forms (such as 'MIR-100/100*' and 'MIR-100/100').
- * In these cases a Hugo_Symbol such as 'hsa-mir-100' may be expected to match the (fake) Entrez_Gene_Id for both of these combination forms.
- * In that case, we want to import several copies of the genetic alteration profile line .. one for each matched gene of type 'miRNA'.
- * This allows the visualization of both CNA event profiles for the microRNA precursor with expression profiles for the microRNA mature form.
- *
- * The current implementation of this code does not attempt to "merge" / "unify" lines in the profile data file which have duplicated Entrez_Gene_Id.
- * Instead, the first encountered line which maps to the Entrez_Gene_Id will be stored as a record in genetic_alteration (returns true).
- * Later lines which attempt to store a record with that Entrez_Gene_Id will not be stored as a record in genetic_alteration (returns false).
- * For microRNA gene aliases it is possible that complex interactions will occur, where an earlier line in the data file stores a record under several Entrez_Gene_Ids, and a later line in the file fails to store records under some of those previously 'used' Entrez_Gene_Ids, but succeeds in storing a record under one or more not previously used Entrez_Gene_Ids. So a microRNA line from the file may be imported "partially successfully" (returns true).
- *
- * Examples Cases:
- * Gene records are P1, P2, P3, P4 (protein coding), M1, M2, M3 (microRNA).
- * Gene_Symbol AMA is gene_alias for M1 and M2, Gene_Symbol AMB is gene_alias for M2 and M3, Gene_Symbol AAMBIG is gene_alias for P3 and P4. Gene_Symbol AMIXED is gene_alias for P1 and M3.
- *
- * Case_1 (the last two lines will be skipped and logged like "Gene P1 (#) found to be duplicated in your file. Duplicated row will be ignored!")
- *
- * Hugo_Symbol | Sample1 | ...
- * |
---|
P1 | 0 | ...
- * |
P2 | 0 | ...
- * |
P1 | 0 | ...
- * |
P1 | 0 | ...
- * |
- *
- * Case_2 (the last line will be skipped and logged like "Gene M1 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene M2 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Could not store microRNA or RPPA data" )
- *
- * Hugo_Symbol | Sample1 | ...
- * |
---|
AMA | 0 | ...
- * |
AMA | 0 | ...
- * |
- *
- * Case_3 (the last line in the file will fail with log messages like "Gene symbol AAMBIG found to be ambiguous. Record will be skipped for this gene.")
- *
- * Hugo_Symbol | Sample1 | ...
- * |
---|
P1 | 0 | ...
- * |
P2 | 0 | ...
- * |
AAMBIG | 0 | ...
- * |
- *
- * Case_4 (the second to last line will partially succeed, storing a record in genetic_alteration for gene M3 but failing for M2 with a log message like "Gene M2 (#) (given as alias in your file as: AMB) found to be duplicated in your file. Duplicated row will be ignored!" ; the last line in the file will fail with log messages like "Gene M3 (#) (given as alias in your file as: AMIXED) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene symbol AMIXED found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.")
- *
- * Hugo_Symbol | Sample1 | ...
- * |
---|
AMA | 0 | ...
- * |
AMB | 0 | ...
- * |
AMIXED | 0 | ...
- * |
- *
- * @param line the line from the profile data file to be parsed
- * @param nrColumns the number of columns, defined by the header line
- * @param sampleStartIndex the index of the first column with a sample name in the header field
- * @param hugoSymbolIndex the index of the column Hugo_Symbol
- * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id
- * @param rppaGeneRefIndex the index of the column Composite.Element.Ref
- * @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref)
- * @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true)
- * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols
- * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line
- * @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines
- * @param daoGeneticAlteration in instance of DaoGeneticAlteration ... for use in storing records in the genetic_alteration table
- * @return true if any record was stored in genetic_alteration, else false
- * @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration
- */
- private boolean parseLine(String line, int nrColumns, int sampleStartIndex,
- int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex,
- boolean isRppaProfile, boolean isDiscretizedCnaProfile,
- DaoGeneOptimized daoGene,
- List filteredSampleIndices, List orderedSampleList,
- Map existingCnaEvents, DaoGeneticAlteration daoGeneticAlteration
- ) throws DaoException {
-
+ * Attempt to create a genetic_alteration record based on the current line read from a profile data file.
+ *
+ * - Commented out lines and blank lines are always skipped (returns false)
+ *
- The line is split into columns by the tab delimiter
+ *
- The involved genes (list of entrez_gene_ids) are determined:
+ *
+ * - Hugo_Symbol and Entrez_Gene_Id column indices are read and validated
+ *
- if neither are available, the line is skipped
+ *
- if Hugo_Symbol contains '///' or '---', the line is skipped
+ *
- rppaProfile parsing has special rules for determining the involved genes
+ *
- if Entrez_Gene_Id is available, use that to determine the involved genes
+ *
- if Hugo_Symbol is available, use that to determine the involved genes (truncate symbols with '|' in them)
+ *
- if the involved genes list is still empty, the line is skipped (returns false)
+ *
+ * - Both gene_alias and gene records are examined to see how many genes of type 'miRNA' are matched
+ *
- If any matched record is of type 'miRNA':
+ *
+ * - Loop through each gene or gene_alias of type 'miRNA' and attempt to store the record under that gene in genetic_alteration
+ *
- If no records were successfully stored in genetic_alteration, log the failure
+ *
+ * - If no matched record is of type 'miRNA':
+ *
- if there is exactly 1 involved gene (using only the gene table if sufficient, or gene_alias if neccessary):
+ *
+ * - if this is a 'discretizedCnaProfile', normalize the CNA values and create a list of cnaEvents to be added
+ *
- attempt to store the record in genetic_alteration
+ *
- if the record is successfully stored (not duplicated), create (or update) records in sample_cna_event for the created list of cnaEvents (if any)
+ *
+ * - if there are several involved genes and the profile is an rppaProfile, loop through the genes; for each one:
+ *
+ * - attempt to store the record under that gene in genetic_alteration
+ *
- count the number of successfully imported records (for logging)
+ *
+ *
+ * - after looping through all involved genes, check whether any records were successfully stored in genetic_alteration - if not log the failure
+ *
+ * - if there are several involved genes and the profile is not an rppaProfile, log a failure to import the current line due to ambiguous gene symbol
+ *
+ * If a record was (or more than one were) successfully stored in genetic_alteration, return true ; else false
+ *
+ *
+ * During the import of any single profile data file, at most one record per Entrez_Gene_Id will be successfuly imported to genetic_alteration.
+ * Each attempt to import is done through a call to the function storeGeneticAlterations().
+ * That function will check an instance variable importSetOfGenes, and if the gene has been previously imported, no new attempt is made (failure).
+ * Each time a gene is successfully imported, it is added to importSetOfGenes.
+ *
+ * MicroRNA are treated specially because of the possible presence of constructed combination forms (such as 'MIR-100/100*' and 'MIR-100/100').
+ * In these cases a Hugo_Symbol such as 'hsa-mir-100' may be expected to match the (fake) Entrez_Gene_Id for both of these combination forms.
+ * In that case, we want to import several copies of the genetic alteration profile line .. one for each matched gene of type 'miRNA'.
+ * This allows the visualization of both CNA event profiles for the microRNA precursor with expression profiles for the microRNA mature form.
+ *
+ * The current implementation of this code does not attempt to "merge" / "unify" lines in the profile data file which have duplicated Entrez_Gene_Id.
+ * Instead, the first encountered line which maps to the Entrez_Gene_Id will be stored as a record in genetic_alteration (returns true).
+ * Later lines which attempt to store a record with that Entrez_Gene_Id will not be stored as a record in genetic_alteration (returns false).
+ * For microRNA gene aliases it is possible that complex interactions will occur, where an earlier line in the data file stores a record under several Entrez_Gene_Ids, and a later line in the file fails to store records under some of those previously 'used' Entrez_Gene_Ids, but succeeds in storing a record under one or more not previously used Entrez_Gene_Ids. So a microRNA line from the file may be imported "partially successfully" (returns true).
+ *
+ * Examples Cases:
+ * Gene records are P1, P2, P3, P4 (protein coding), M1, M2, M3 (microRNA).
+ * Gene_Symbol AMA is gene_alias for M1 and M2, Gene_Symbol AMB is gene_alias for M2 and M3, Gene_Symbol AAMBIG is gene_alias for P3 and P4. Gene_Symbol AMIXED is gene_alias for P1 and M3.
+ *
+ * Case_1 (the last two lines will be skipped and logged like "Gene P1 (#) found to be duplicated in your file. Duplicated row will be ignored!")
+ *
+ * Hugo_Symbol | Sample1 | ...
+ * |
---|
P1 | 0 | ...
+ * |
P2 | 0 | ...
+ * |
P1 | 0 | ...
+ * |
P1 | 0 | ...
+ * |
+ *
+ * Case_2 (the last line will be skipped and logged like "Gene M1 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene M2 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Could not store microRNA or RPPA data" )
+ *
+ * Hugo_Symbol | Sample1 | ...
+ * |
---|
AMA | 0 | ...
+ * |
AMA | 0 | ...
+ * |
+ *
+ * Case_3 (the last line in the file will fail with log messages like "Gene symbol AAMBIG found to be ambiguous. Record will be skipped for this gene.")
+ *
+ * Hugo_Symbol | Sample1 | ...
+ * |
---|
P1 | 0 | ...
+ * |
P2 | 0 | ...
+ * |
AAMBIG | 0 | ...
+ * |
+ *
+ * Case_4 (the second to last line will partially succeed, storing a record in genetic_alteration for gene M3 but failing for M2 with a log message like "Gene M2 (#) (given as alias in your file as: AMB) found to be duplicated in your file. Duplicated row will be ignored!" ; the last line in the file will fail with log messages like "Gene M3 (#) (given as alias in your file as: AMIXED) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene symbol AMIXED found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.")
+ *
+ * Hugo_Symbol | Sample1 | ...
+ * |
---|
AMA | 0 | ...
+ * |
AMB | 0 | ...
+ * |
AMIXED | 0 | ...
+ * |
+ *
+ * @param line the line from the profile data file to be parsed
+ * @param nrColumns the number of columns, defined by the header line
+ * @param sampleStartIndex the index of the first column with a sample name in the header field
+ * @param hugoSymbolIndex the index of the column Hugo_Symbol
+ * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id
+ * @param rppaGeneRefIndex the index of the column Composite.Element.Ref
+ * @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref)
+ * @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true)
+ * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols
+ * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line
+ * @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines
+ * @param daoGeneticAlteration in instance of DaoGeneticAlteration ... for use in storing records in the genetic_alteration table
+ * @return true if any record was stored in genetic_alteration, else false
+ * @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration
+ */
+ private boolean parseLine(String line, int nrColumns, int sampleStartIndex,
+ int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex,
+ boolean isRppaProfile, boolean isDiscretizedCnaProfile,
+ DaoGeneOptimized daoGene,
+ List filteredSampleIndices, List orderedSampleList,
+ Map existingCnaEvents, DaoGeneticAlteration daoGeneticAlteration
+ ) throws DaoException {
+
//TODO: refactor this entire function - split functionality into smaller units / subroutines
- boolean recordStored = false;
-
+ boolean recordStored = false;
+
// Ignore lines starting with #
if (!line.startsWith("#") && line.trim().length() > 0) {
String[] parts = line.split("\t",-1);
-
+
if (parts.length>nrColumns) {
if (line.split("\t").length>nrColumns) {
ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
+ + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
return false;
}
}
@@ -545,9 +534,9 @@ else if (!entrez.matches("[0-9]+")) {
//TODO - would be better to give an exception in some cases, like negative Entrez values
ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez);
return false;
- }
+ }
}
-
+
//If all are empty, skip line:
if (geneSymbol == null && entrez == null) {
ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol or Entrez_Id value");
@@ -571,7 +560,7 @@ else if (!entrez.matches("[0-9]+")) {
//will be null when there is a parse error in this case, so we
//can return here and avoid duplicated messages:
return false;
- }
+ }
if (genes.isEmpty()) {
String gene = (geneSymbol != null) ? geneSymbol : entrez;
ProgressMonitor.logWarning("Gene not found for: [" + gene
@@ -648,16 +637,16 @@ else if (!entrez.matches("[0-9]+")) {
// none of the matched genes are type "miRNA"
if (genes.size() == 1) {
List cnaEventsToAdd = new ArrayList();
-
+
if (isDiscretizedCnaProfile) {
long entrezGeneId = genes.get(0).getEntrezGeneId();
for (int i = 0; i < values.length; i++) {
-
+
// temporary solution -- change partial deletion back to full deletion.
if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) {
values[i] = CNA_VALUE_HOMOZYGOUS_DELETION;
}
- if (values[i].equals(CNA_VALUE_AMPLIFICATION)
+ if (values[i].equals(CNA_VALUE_AMPLIFICATION)
// || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB
// || values[i].equals(CNA_VALUE_ZERO)
// || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION)
@@ -683,7 +672,7 @@ else if (!entrez.matches("[0-9]+")) {
//only add extra CNA related records if the step above worked, otherwise skip:
if (recordStored) {
CnaUtil.storeCnaEvents(existingCnaEvents, cnaEventsToAdd);
- }
+ }
} else {
if (isRppaProfile) { // for protein data, duplicate the data
for (CanonicalGene gene : genes) {
@@ -714,7 +703,7 @@ else if (!entrez.matches("[0-9]+")) {
}
return recordStored;
}
-
+
/**
* Parses line for gene set record and stores record in 'genetic_alteration' table.
* @param line
@@ -724,33 +713,33 @@ else if (!entrez.matches("[0-9]+")) {
* @param filteredSampleIndices
* @param daoGeneticAlteration
* @return
- * @throws DaoException
+ * @throws DaoException
*/
private boolean parseGenesetLine(String line, int nrColumns, int sampleStartIndex, int genesetIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException {
+ List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException {
boolean storedRecord = false;
-
+
if (!line.startsWith("#") && line.trim().length() > 0) {
String[] parts = line.split("\t",-1);
if (parts.length>nrColumns) {
if (line.split("\t").length>nrColumns) {
ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
+ + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
return false;
}
}
-
+
String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length>nrColumns?nrColumns:parts.length);
// trim whitespace from values
values = Stream.of(values).map(String::trim).toArray(String[]::new);
values = filterOutNormalValues(filteredSampleIndices, values);
-
+
Geneset geneset = DaoGeneset.getGenesetByExternalId(parts[genesetIdIndex]);
if (geneset != null) {
- storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(),
- EntityType.GENESET, geneset.getExternalId());
+ storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(),
+ EntityType.GENESET, geneset.getExternalId());
}
else {
ProgressMonitor.logWarning("Geneset " + parts[genesetIdIndex] + " not found in DB. Record will be skipped.");
@@ -768,39 +757,39 @@ private boolean parseGenesetLine(String line, int nrColumns, int sampleStartInde
* @param filteredSampleIndices
* @param daoGeneticAlteration
* @return
- * @throws DaoException
+ * @throws DaoException
*/
private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStartIndex, int genericAssayIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException {
+ List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException {
boolean recordIsStored = false;
-
+
if (!line.startsWith("#") && line.trim().length() > 0) {
String[] parts = line.split("\t", -1);
if (parts.length > nrColumns) {
if (line.split("\t").length > nrColumns) {
ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
+ + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]);
return false;
}
}
-
+
String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length>nrColumns?nrColumns:parts.length);
// trim whitespace from values
values = Stream.of(values).map(String::trim).toArray(String[]::new);
values = filterOutNormalValues(filteredSampleIndices, values);
-
+
String stableId = parts[genericAssayIdIndex];
Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(stableId, null);
-
+
if (entityId == null) {
ProgressMonitor.logWarning("Generic Assay entity " + parts[genericAssayIdIndex] + " not found in DB. Record will be skipped.");
} else {
- recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId,
- EntityType.GENERIC_ASSAY, stableId);
+ recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId,
+ EntityType.GENERIC_ASSAY, stableId);
}
return recordIsStored;
@@ -819,14 +808,14 @@ private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStar
* @return boolean indicating if record was stored successfully or not
*/
private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGeneticAlteration daoGeneticAlteration,
- Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) {
+ Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) {
try {
if (importedGeneticEntitySet.add(geneticEntityId)) {
daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values);
return true;
}
else {
- ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName
+ ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName
+ " [" + geneticEntityType +"] already imported from file. Record will be skipped.");
return false;
}
@@ -838,7 +827,7 @@ private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGenetic
/**
* Tries to parse the genes and look them up in DaoGeneOptimized
- *
+ *
* @param antibodyWithGene
* @return returns null if something was wrong, e.g. could not parse the antibodyWithGene string; returns
* a list with 0 or more elements otherwise.
@@ -886,7 +875,7 @@ private List parseRPPAGenes(String antibodyWithGene) throws DaoEx
for (String symbol : symbolsNotFound) {
ProgressMonitor.logWarning("Gene " + symbol + " not found in DB. Record will be skipped for this gene.");
}
-
+
Pattern p = Pattern.compile("(p[STY][0-9]+(?:_[STY][0-9]+)*)");
Matcher m = p.matcher(arrayId);
String residue;
@@ -899,7 +888,7 @@ private List parseRPPAGenes(String antibodyWithGene) throws DaoEx
return importPhosphoGene(genes, residue);
}
}
-
+
private List importPhosphoGene(List genes, String residue) throws DaoException {
DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance();
List phosphoGenes = new ArrayList();
@@ -921,7 +910,7 @@ private List importPhosphoGene(List genes, String
return phosphoGenes;
}
-
+
// returns index for geneset id column
private int getGenesetIdIndex(String[] headers) {
return getColIndexByName(headers, "geneset_id");
@@ -930,19 +919,19 @@ private int getGenesetIdIndex(String[] headers) {
private int getGenericAssayIdIndex(String[] headers) {
return getColIndexByName(headers, "ENTITY_STABLE_ID");
}
-
+
private int getHugoSymbolIndex(String[] headers) {
return getColIndexByName(headers, "Hugo_Symbol");
}
-
+
private int getEntrezGeneIdIndex(String[] headers) {
return getColIndexByName(headers, "Entrez_Gene_Id");
}
-
+
private int getRppaGeneRefIndex(String[] headers) {
return getColIndexByName(headers, "Composite.Element.Ref");
}
-
+
// helper function for finding the index of a column by name
private int getColIndexByName(String[] headers, String colName) {
for (int i=0; i featureColNames = new ArrayList();
featureColNames.add("Gene Symbol");
@@ -977,7 +966,7 @@ private int getStartIndex(String[] headers, int ...featureColIds) {
}
int startIndex = -1;
-
+
for (int i=0; i e.getAlteration().getDescription())
.collect(toList());
assertEquals(2, cnaEvents.size());
- assertEquals("Amplified,Homozygously deleted", String.join(",", cnaEvents));
+ assertTrue(newArrayList("Amplified", "Homozygously deleted").containsAll(cnaEvents));
// Test gene with partial deletion and amplification has two cna events:
List convertedCnaEvents = resultCnaEvents
@@ -141,7 +142,7 @@ public void testImportCnaDiscreteLongDataAddsCnaEvents() throws Exception {
.map(e -> e.getAlteration().getDescription())
.collect(toList());
assertEquals(2, cnaEvents.size());
- assertEquals("Amplified,Homozygously deleted", String.join(",", cnaEvents));
+ assertTrue( newArrayList("Amplified", "Homozygously deleted").containsAll(cnaEvents));
// Test gene with homozygous deletion and amplification has no cna events:
List skippedCnaEvents = resultCnaEvents
diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java
index 53b5db61b50..dc8e4239385 100644
--- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java
+++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java
@@ -19,7 +19,7 @@
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
- */
+*/
package org.mskcc.cbio.portal.scripts;
@@ -62,45 +62,45 @@
@Transactional
public class TestImportCopyNumberSegmentData {
- @Autowired
- ApplicationContext applicationContext;
-
- //To use in test cases where we expect an exception:
- @Rule
- public ExpectedException exception = ExpectedException.none();
-
-
- @Before
- public void setUp() throws DaoException
- {
- //set it, to avoid this being set to the runtime (not for testing) application context:
- SpringUtil.setApplicationContext(applicationContext);
- }
-
- /**
+ @Autowired
+ ApplicationContext applicationContext;
+
+ //To use in test cases where we expect an exception:
+ @Rule
+ public ExpectedException exception = ExpectedException.none();
+
+
+ @Before
+ public void setUp() throws DaoException
+ {
+ //set it, to avoid this being set to the runtime (not for testing) application context:
+ SpringUtil.setApplicationContext(applicationContext);
+ }
+
+ /**
* Test importing of Clinical Data File.
*
* @throws DaoException Database Access Error.
* @throws IOException IO Error.
*/
- @Test
+ @Test
public void testImportSegmentDataNewStudy() throws Exception {
- //new dummy study to simulate importing clinical data in empty study:
- CancerStudy cancerStudy = new CancerStudy("testnewseg","testnewseg","testnewseg","brca",true);
+ //new dummy study to simulate importing clinical data in empty study:
+ CancerStudy cancerStudy = new CancerStudy("testnewseg","testnewseg","testnewseg","brca",true);
cancerStudy.setReferenceGenome("hg19");
- DaoCancerStudy.addCancerStudy(cancerStudy);
+ DaoCancerStudy.addCancerStudy(cancerStudy);
addTestPatientAndSampleRecords(new File("src/test/resources/segment/data_cna_hg19.seg"), cancerStudy);
String[] args = {
- "--data","src/test/resources/segment/data_cna_hg19.seg",
- "--meta","src/test/resources/segment/meta_cna_hg19_seg.txt",
- "--loadMode", "bulkLoad"
- };
+ "--data","src/test/resources/segment/data_cna_hg19.seg",
+ "--meta","src/test/resources/segment/meta_cna_hg19_seg.txt",
+ "--loadMode", "bulkLoad"
+ };
ImportCopyNumberSegmentData runner = new ImportCopyNumberSegmentData(args);
- runner.run();
+ runner.run();
//TODO : fix test to actually store data and add some checks
-
- }
+
+ }
private void addTestPatientAndSampleRecords(File file, CancerStudy cancerStudy) throws FileNotFoundException, IOException, DaoException {
// extract sample ids from first column
@@ -124,4 +124,4 @@ private void addTestPatientAndSampleRecords(File file, CancerStudy cancerStudy)
}
MySQLbulkLoader.flushAll();
}
-}
\ No newline at end of file
+}
diff --git a/db-scripts/src/main/resources/cgds.sql b/db-scripts/src/main/resources/cgds.sql
index 797bfc0b45b..f28432e323d 100644
--- a/db-scripts/src/main/resources/cgds.sql
+++ b/db-scripts/src/main/resources/cgds.sql
@@ -590,7 +590,6 @@ CREATE TABLE `cna_event` (
`CNA_EVENT_ID` int(255) NOT NULL auto_increment,
`ENTREZ_GENE_ID` int(11) NOT NULL,
`ALTERATION` tinyint NOT NULL,
- `ANNOTATION_JSON` JSON,
PRIMARY KEY (`CNA_EVENT_ID`),
UNIQUE (`ENTREZ_GENE_ID`, `ALTERATION`),
FOREIGN KEY (`ENTREZ_GENE_ID`) REFERENCES `gene` (`ENTREZ_GENE_ID`),
@@ -761,4 +760,4 @@ CREATE TABLE `resource_study` (
);
-- THIS MUST BE KEPT IN SYNC WITH db.version PROPERTY IN pom.xml
-INSERT INTO info VALUES ('2.12.15', NULL);
+INSERT INTO info VALUES ('2.12.14', NULL);
diff --git a/db-scripts/src/main/resources/migration.sql b/db-scripts/src/main/resources/migration.sql
index ea77948547e..c3c4fa5a069 100644
--- a/db-scripts/src/main/resources/migration.sql
+++ b/db-scripts/src/main/resources/migration.sql
@@ -992,7 +992,3 @@ ALTER TABLE `structural_variant` DROP COLUMN `SITE2_EXON`;
ALTER TABLE `structural_variant` DROP COLUMN `CENTER`;
ALTER TABLE `structural_variant` DROP COLUMN `EXTERNAL_ANNOTATION`;
UPDATE `info` SET `DB_SCHEMA_VERSION`="2.12.14";
-
-##version: 2.12.15
-ALTER TABLE `cna_event` ADD COLUMN `ANNOTATION_JSON` JSON AFTER `ALTERATION`;
-UPDATE `info` SET `DB_SCHEMA_VERSION`="2.12.15";
diff --git a/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java b/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java
index cbeb2862d16..054c68164c0 100644
--- a/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java
+++ b/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java
@@ -1,19 +1,13 @@
package org.cbioportal.model;
-import com.fasterxml.jackson.annotation.JsonRawValue;
-import io.swagger.annotations.ApiModelProperty;
-
import java.io.Serializable;
import javax.validation.constraints.NotNull;
public class DiscreteCopyNumberData extends Alteration implements Serializable {
+
@NotNull
private Integer alteration;
-
- @JsonRawValue
- @ApiModelProperty(dataType = "java.util.Map")
- private String annotationJson;
-
+
public Integer getAlteration() {
return alteration;
}
@@ -21,13 +15,4 @@ public Integer getAlteration() {
public void setAlteration(Integer alteration) {
this.alteration = alteration;
}
-
- public String getAnnotationJson() {
- return annotationJson;
- }
-
- public void setAnnotationJson(String annotationJson) {
- this.annotationJson = annotationJson;
- }
-
}
diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml
index 6e63ab375ed..0caffb5697d 100644
--- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml
+++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml
@@ -6,7 +6,6 @@
cna_event.ENTREZ_GENE_ID as entrezGeneId,
cna_event.ALTERATION AS alteration,
- cna_event.ANNOTATION_JSON as annotationJson,
genetic_profile.STABLE_ID AS molecularProfileId,
sample.STABLE_ID AS sampleId,
patient.STABLE_ID AS patientId,
diff --git a/persistence/persistence-mybatis/src/test/resources/testSql.sql b/persistence/persistence-mybatis/src/test/resources/testSql.sql
index c5021caa7ce..b8e4a3adc1b 100644
--- a/persistence/persistence-mybatis/src/test/resources/testSql.sql
+++ b/persistence/persistence-mybatis/src/test/resources/testSql.sql
@@ -417,9 +417,9 @@ INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) V
INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) VALUES (2,2,'1.4146,-0.0662,-0.8585,-1.6576,-0.3552,-0.8306,0.8102,0.1146,0.3498,0.0349,0.4927,-0.8665,-0.4754,-0.7221,');
INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) VALUES (3,2,'-0.8097,0.7360,-1.0225,-0.8922,0.7247,0.3537,1.2702,-0.1419,');
-INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (1,207,-2, '{"columnName":{"fieldName":"fieldValue"}}');
-INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (2,208,2, '{"columnName":{"fieldName":"fieldValue"}}');
-INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (3,207,2, '{"columnName":{"fieldName":"fieldValue"}}');
+INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (1,207,-2);
+INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (2,208,2);
+INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (3,207,2);
INSERT INTO sample_cna_event (CNA_EVENT_ID,SAMPLE_ID,GENETIC_PROFILE_ID) VALUES (1,1,2);
INSERT INTO sample_cna_event (CNA_EVENT_ID,SAMPLE_ID,GENETIC_PROFILE_ID) VALUES (2,1,2);
diff --git a/pom.xml b/pom.xml
index 93129e9c95a..97e3c3980b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -315,7 +315,7 @@
720
- 2.12.15
+ 2.12.14
diff --git a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java
index 957a4b930ab..beaa290db64 100644
--- a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java
+++ b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java
@@ -64,65 +64,6 @@ public void getDiscreteCopyNumbersInMultipleMolecularProfilesHomdelOrAmp() {
Assert.assertEquals(toStrings(returned), toStrings(actual));
}
- @Test
- public void getDiscreteCopyNumbersWithAnnotationJson() {
- List returned = Arrays.asList(
- discreteCopyNumberData("sample1", "study1", -2),
- discreteCopyNumberData("sample2", "study2", 2)
- );
- returned.get(0).setAnnotationJson("{\"columnName\":{\"fieldName\":\"fieldValue\"}}");
- List profiles = Arrays.asList("profile1", "profile2");
- List samples = Arrays.asList("sample1", "sample2");
- List geneIds = Arrays.asList(0, 1);
- List alterationTypes = Arrays.asList(-2, 2);
-
- Mockito.when(discreteCopyNumberRepository.getDiscreteCopyNumbersInMultipleMolecularProfiles(
- profiles,
- samples,
- geneIds,
- alterationTypes,
- PROJECTION
- ))
- .thenReturn(
- returned
- );
-
- List actual = discreteCopyNumberService.getDiscreteCopyNumbersInMultipleMolecularProfiles(
- profiles, samples, geneIds, alterationTypes, PROJECTION
- );
-
- Assert.assertEquals(toStrings(returned), toStrings(actual));
- }
- @Test
- public void getDiscreteCopyNumbersWithoutAnnotationJson() {
- List returned = Arrays.asList(
- discreteCopyNumberData("sample1", "study1", -2),
- discreteCopyNumberData("sample2", "study2", 2)
- );
- // returned.get(0).setAnnotationJson("{\"columnName\":{\"fieldName\":\"fieldValue\"}}");
- List profiles = Arrays.asList("profile1", "profile2");
- List samples = Arrays.asList("sample1", "sample2");
- List geneIds = Arrays.asList(0, 1);
- List alterationTypes = Arrays.asList(-2, 2);
-
- Mockito.when(discreteCopyNumberRepository.getDiscreteCopyNumbersInMultipleMolecularProfiles(
- profiles,
- samples,
- geneIds,
- alterationTypes,
- PROJECTION
- ))
- .thenReturn(
- returned
- );
-
- List actual = discreteCopyNumberService.getDiscreteCopyNumbersInMultipleMolecularProfiles(
- profiles, samples, geneIds, alterationTypes, PROJECTION
- );
- Assert.assertNull(returned.get(0).getAnnotationJson());
- Assert.assertEquals(toStrings(returned), toStrings(actual));
- }
-
@Test
public void getDiscreteCopyNumbersInMultipleMolecularProfilesAllAlterationTypes() {
List returned = Arrays.asList(
diff --git a/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java b/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java
index f189329be98..88a8d99ea78 100644
--- a/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java
+++ b/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java
@@ -47,7 +47,6 @@
import org.cbioportal.model.ClinicalEvent;
import org.cbioportal.model.ClinicalEventData;
import org.cbioportal.model.DataAccessToken;
-import org.cbioportal.model.DiscreteCopyNumberData;
import org.cbioportal.model.CopyNumberSeg;
import org.cbioportal.model.Gene;
import org.cbioportal.model.GenePanel;
@@ -83,7 +82,6 @@
import org.cbioportal.web.mixin.ClinicalEventMixin;
import org.cbioportal.web.mixin.CopyNumberSegMixin;
import org.cbioportal.web.mixin.DataAccessTokenMixin;
-import org.cbioportal.web.mixin.DiscreteCopyNumberDataMixin;
import org.cbioportal.web.mixin.GeneMixin;
import org.cbioportal.web.mixin.GenePanelMixin;
import org.cbioportal.web.mixin.GenePanelToGeneMixin;
@@ -120,7 +118,6 @@ public CustomObjectMapper() {
mixinMap.put(ClinicalEventData.class, ClinicalEventDataMixin.class);
mixinMap.put(CopyNumberSeg.class, CopyNumberSegMixin.class);
mixinMap.put(DataAccessToken.class, DataAccessTokenMixin.class);
- mixinMap.put(DiscreteCopyNumberData.class, DiscreteCopyNumberDataMixin.class);
mixinMap.put(Gene.class, GeneMixin.class);
mixinMap.put(GenePanel.class, GenePanelMixin.class);
mixinMap.put(GenePanelToGene.class, GenePanelToGeneMixin.class);
diff --git a/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java b/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java
deleted file mode 100644
index f8c904f252b..00000000000
--- a/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java
+++ /dev/null
@@ -1,9 +0,0 @@
-package org.cbioportal.web.mixin;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-public class DiscreteCopyNumberDataMixin {
-
- @JsonProperty("namespaceColumns")
- private String annotationJson;
-}
\ No newline at end of file
diff --git a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java
index c0d87247c78..0bf7adbfa65 100644
--- a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java
+++ b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java
@@ -41,7 +41,6 @@ public class DiscreteCopyNumberControllerTest {
private static final String TEST_SAMPLE_STABLE_ID_1 = "test_sample_stable_id_1";
private static final int TEST_ENTREZ_GENE_ID_1 = 1;
private static final int TEST_ALTERATION_1 = 1;
- private static final String TEST_ANNOTATION_JSON_1 = "{\"columnName\":{\"fieldName\":\"fieldValue\"}}";
private static final String TEST_HUGO_GENE_SYMBOL_1 = "test_hugo_gene_symbol_1";
private static final String TEST_TYPE_1 = "test_type_1";
private static final String TEST_CYTOBAND_1 = "test_cytoband_1";
@@ -103,7 +102,6 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDefaultProject
.andExpect(MockMvcResultMatchers.jsonPath("$[0].sampleId").value(TEST_SAMPLE_STABLE_ID_1))
.andExpect(MockMvcResultMatchers.jsonPath("$[0].entrezGeneId").value(TEST_ENTREZ_GENE_ID_1))
.andExpect(MockMvcResultMatchers.jsonPath("$[0].alteration").value(TEST_ALTERATION_1))
- .andExpect(MockMvcResultMatchers.jsonPath("$[0].namespaceColumns.columnName.fieldName").value("fieldValue"))
.andExpect(MockMvcResultMatchers.jsonPath("$[0].gene").doesNotExist())
.andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId")
.value(TEST_MOLECULAR_PROFILE_STABLE_ID_2))
@@ -111,24 +109,6 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDefaultProject
.andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(TEST_ENTREZ_GENE_ID_2))
.andExpect(MockMvcResultMatchers.jsonPath("$[1].alteration").value(TEST_ALTERATION_2))
.andExpect(MockMvcResultMatchers.jsonPath("$[1].gene").doesNotExist());
- }
- @Test
- public void getDiscreteCopyNumbersWithoutAnnotationJson() throws Exception {
-
- List discreteCopyNumberDataList = createExampleDiscreteCopyNumberData();
- discreteCopyNumberDataList.get(0).setAnnotationJson(null);
- Mockito.when(discreteCopyNumberService.getDiscreteCopyNumbersInMolecularProfileBySampleListId(
- Mockito.any(), Mockito.any(), Mockito.any(),
- Mockito.any(), Mockito.any())).thenReturn(discreteCopyNumberDataList);
-
- mockMvc.perform(MockMvcRequestBuilders.get("/molecular-profiles/test_molecular_profile_id/discrete-copy-number")
- .param("sampleListId", TEST_SAMPLE_LIST_ID)
- .param("discreteCopyNumberEventType", DiscreteCopyNumberEventType.HOMDEL_AND_AMP.name())
- .accept(MediaType.APPLICATION_JSON))
- .andExpect(MockMvcResultMatchers.status().isOk())
- .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON))
- .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(2)))
- .andExpect(MockMvcResultMatchers.jsonPath("$[0].namespaceColumns").doesNotExist());
}
@Test
@@ -359,7 +339,6 @@ private List createExampleDiscreteCopyNumberData() {
discreteCopyNumberData1.setEntrezGeneId(TEST_ENTREZ_GENE_ID_1);
discreteCopyNumberData1.setAlteration(TEST_ALTERATION_1);
discreteCopyNumberDataList.add(discreteCopyNumberData1);
- discreteCopyNumberData1.setAnnotationJson(TEST_ANNOTATION_JSON_1);
DiscreteCopyNumberData discreteCopyNumberData2 = new DiscreteCopyNumberData();
discreteCopyNumberData2.setMolecularProfileId(TEST_MOLECULAR_PROFILE_STABLE_ID_2);
discreteCopyNumberData2.setSampleId(TEST_SAMPLE_STABLE_ID_2);