diff --git a/core/src/main/java/org/mskcc/cbio/maf/TabDelimitedFileUtil.java b/core/src/main/java/org/mskcc/cbio/maf/TabDelimitedFileUtil.java index 0b7cd13df29..420758c248f 100644 --- a/core/src/main/java/org/mskcc/cbio/maf/TabDelimitedFileUtil.java +++ b/core/src/main/java/org/mskcc/cbio/maf/TabDelimitedFileUtil.java @@ -28,7 +28,7 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ +*/ package org.mskcc.cbio.maf; @@ -40,206 +40,206 @@ */ public class TabDelimitedFileUtil { - public final static String NA_STRING = "NA"; - public final static long NA_LONG = Long.MIN_VALUE; - // TODO use MIN instead of -1, we may have fields with negative values - public final static int NA_INT = -1; - public final static float NA_FLOAT = -1; - - /** - * If field is not found in header or data line, or is empty, it just returns empty - * field value "NA". - * - * @param index: index of the column to parse. Can be set to -1 if the column was not found in - * header. This method will return "NA" in this case. - * @param parts: the data line parts, i.e. the line split by separator. - * @return : the value as is, or "NA" if column was empty, not present in file (indicated by index=-1), - * or not present in data line (parts parameter above). - */ - public static String getPartString(int index, String[] parts) - { - try - { - if (parts[index].length() == 0) - { - return NA_STRING; - } - else - { - return parts[index]; - } - } - catch (ArrayIndexOutOfBoundsException e) - { - return NA_STRING; - } - } - - /** - * Return the trimmed string from the column, or an empty string if -1. - * - * Require the column to exist before the end of the data line. This can - * be used instead of getPartString() if NA may be a meaningful value and - * the file is expected to have been validated. - * - * @param index : index of the column to parse. May be set to -1 if the - * column was not found in header, to return "". - * @param parts: the data line parts, i.e. the line split by separator. - * - * @return : the value as is, or "" if the index is -1. - */ - public static String getPartStringAllowEmpty(int index, String[] parts) - { - try - { - if (index < 0) { - //return empty string: - return ""; - } - //else just return as is, trimmed version: - return parts[index].trim(); - } - catch (ArrayIndexOutOfBoundsException e) - { - // all lines must have the same number of columns, and the - // validation script should never allow this to reach the loader - throw new RuntimeException( - "Unexpected error while parsing column nr: " + (index+1), - e); - } - } - - /** - * Return the trimmed string from the column, or an empty string if -1 - * or "NA". - * - * Require the column to exist before the end of the data line. - * - * @param index : index of the column to parse. May be set to -1 if the - * column was not found in header, to return "". - * @param parts: the data line parts, i.e. the line split by separator. - * - * @return : the value as is, or "" if the index is -1. - */ - public static String getPartStringAllowEmptyAndNA(int index, String[] parts) - { - String value = getPartStringAllowEmpty(index, parts); - if (value.equals(NA_STRING)) { - value = ""; - } - return value; - } - - public static Long getPartLong(int index, String[] parts) { - try { - String part = parts[index]; - return Long.parseLong(part); - } catch (ArrayIndexOutOfBoundsException e) { - return NA_LONG; - } catch (NumberFormatException e) { - return NA_LONG; - } - } - - // This method does not call Integer.parseInt() as one might expect. - // Presumably this is to allow the convertion of strings like "6.2" to "6". - // The method previously called (int)Float.parseFloat() but floats - // reserve 23 bits for the mantissa and ints are 32 bits so precision - // was lost parsing "138536968" which was converted to 138536960. - // Now we call (int)Double.parseDouble() because double allocates - // 52 bits for the mantissa. Note getPartLong calls Long.parseLong() - // when this does not call Integer.parseInt() which seems inconsistent. - public static Integer getPartInt(int index, String[] parts) - { - try { - String part = parts[index]; - return (int)(Double.parseDouble(part)); - } catch (ArrayIndexOutOfBoundsException e) { - return NA_INT; - } catch (NumberFormatException e) { - return NA_INT; - } - } - - public static Float getPartPercentage(int index, String[] parts) - { - try { - float result = NA_FLOAT; - String part = parts[index]; - if (part.contains("%")) { - result = Float.parseFloat(part.replace("%", "")) / Float.parseFloat("100"); - } else { - result = Float.parseFloat(part); - } - return result; - } catch (ArrayIndexOutOfBoundsException e) { - return NA_FLOAT; - } catch (NumberFormatException e) { - return NA_FLOAT; - } - } - - public static Float getPartFloat(int index, String[] parts) - { - try { - String part = parts[index]; - return Float.parseFloat(part); - } catch (ArrayIndexOutOfBoundsException e) { - return NA_FLOAT; - } catch (NumberFormatException e) { - return NA_FLOAT; - } - } - - // returning MIN_VALUE instead of NA_FLOAT - // use this one if -1 is not a safe "NA" value. - public static Float getPartFloat2(int index, String[] parts) - { - try { - String part = parts[index]; - return Float.parseFloat(part); - } catch (ArrayIndexOutOfBoundsException e) { - return Float.MIN_VALUE; - } catch (NumberFormatException e) { - return Float.MIN_VALUE; - } - } - - public static String adjustDataLine(String dataLine, - int headerCount) - { - String line = dataLine; - String[] parts = line.split("\t", -1); - - // diff should be zero if (# of headers == # of data cols) - int diff = headerCount - parts.length; - - // number of header columns are more than number of data columns - if (diff > 0) - { - // append appropriate number of tabs - for (int i = 0; i < diff; i++) - { - line += "\t"; - } - } - // number of data columns are more than number of header columns - else if (diff < 0) - { - line = ""; - - // just truncate the data (discard the trailing columns) - for (int i = 0; i < headerCount; i++) - { - line += parts[i]; - - if (i < headerCount - 1) - { - line += "\t"; - } - } - } - - return line; - } + public final static String NA_STRING = "NA"; + public final static long NA_LONG = Long.MIN_VALUE; + // TODO use MIN instead of -1, we may have fields with negative values + public final static int NA_INT = -1; + public final static float NA_FLOAT = -1; + + /** + * If field is not found in header or data line, or is empty, it just returns empty + * field value "NA". + * + * @param index: index of the column to parse. Can be set to -1 if the column was not found in + * header. This method will return "NA" in this case. + * @param parts: the data line parts, i.e. the line split by separator. + * @return : the value as is, or "NA" if column was empty, not present in file (indicated by index=-1), + * or not present in data line (parts parameter above). + */ + public static String getPartString(int index, String[] parts) + { + try + { + if (parts[index].length() == 0) + { + return NA_STRING; + } + else + { + return parts[index]; + } + } + catch (ArrayIndexOutOfBoundsException e) + { + return NA_STRING; + } + } + + /** + * Return the trimmed string from the column, or an empty string if -1. + * + * Require the column to exist before the end of the data line. This can + * be used instead of getPartString() if NA may be a meaningful value and + * the file is expected to have been validated. + * + * @param index : index of the column to parse. May be set to -1 if the + * column was not found in header, to return "". + * @param parts: the data line parts, i.e. the line split by separator. + * + * @return : the value as is, or "" if the index is -1. + */ + public static String getPartStringAllowEmpty(int index, String[] parts) + { + try + { + if (index < 0) { + //return empty string: + return ""; + } + //else just return as is, trimmed version: + return parts[index].trim(); + } + catch (ArrayIndexOutOfBoundsException e) + { + // all lines must have the same number of columns, and the + // validation script should never allow this to reach the loader + throw new RuntimeException( + "Unexpected error while parsing column nr: " + (index+1), + e); + } + } + + /** + * Return the trimmed string from the column, or an empty string if -1 + * or "NA". + * + * Require the column to exist before the end of the data line. + * + * @param index : index of the column to parse. May be set to -1 if the + * column was not found in header, to return "". + * @param parts: the data line parts, i.e. the line split by separator. + * + * @return : the value as is, or "" if the index is -1. + */ + public static String getPartStringAllowEmptyAndNA(int index, String[] parts) + { + String value = getPartStringAllowEmpty(index, parts); + if (value.equals(NA_STRING)) { + value = ""; + } + return value; +} + + public static Long getPartLong(int index, String[] parts) { + try { + String part = parts[index]; + return Long.parseLong(part); + } catch (ArrayIndexOutOfBoundsException e) { + return NA_LONG; + } catch (NumberFormatException e) { + return NA_LONG; + } + } + + // This method does not call Integer.parseInt() as one might expect. + // Presumably this is to allow the convertion of strings like "6.2" to "6". + // The method previously called (int)Float.parseFloat() but floats + // reserve 23 bits for the mantissa and ints are 32 bits so precision + // was lost parsing "138536968" which was converted to 138536960. + // Now we call (int)Double.parseDouble() because double allocates + // 52 bits for the mantissa. Note getPartLong calls Long.parseLong() + // when this does not call Integer.parseInt() which seems inconsistent. + public static Integer getPartInt(int index, String[] parts) + { + try { + String part = parts[index]; + return (int)(Double.parseDouble(part)); + } catch (ArrayIndexOutOfBoundsException e) { + return NA_INT; + } catch (NumberFormatException e) { + return NA_INT; + } + } + + public static Float getPartPercentage(int index, String[] parts) + { + try { + float result = NA_FLOAT; + String part = parts[index]; + if (part.contains("%")) { + result = Float.parseFloat(part.replace("%", "")) / Float.parseFloat("100"); + } else { + result = Float.parseFloat(part); + } + return result; + } catch (ArrayIndexOutOfBoundsException e) { + return NA_FLOAT; + } catch (NumberFormatException e) { + return NA_FLOAT; + } + } + + public static Float getPartFloat(int index, String[] parts) + { + try { + String part = parts[index]; + return Float.parseFloat(part); + } catch (ArrayIndexOutOfBoundsException e) { + return NA_FLOAT; + } catch (NumberFormatException e) { + return NA_FLOAT; + } + } + + // returning MIN_VALUE instead of NA_FLOAT + // use this one if -1 is not a safe "NA" value. + public static Float getPartFloat2(int index, String[] parts) + { + try { + String part = parts[index]; + return Float.parseFloat(part); + } catch (ArrayIndexOutOfBoundsException e) { + return Float.MIN_VALUE; + } catch (NumberFormatException e) { + return Float.MIN_VALUE; + } + } + + public static String adjustDataLine(String dataLine, + int headerCount) + { + String line = dataLine; + String[] parts = line.split("\t", -1); + + // diff should be zero if (# of headers == # of data cols) + int diff = headerCount - parts.length; + + // number of header columns are more than number of data columns + if (diff > 0) + { + // append appropriate number of tabs + for (int i = 0; i < diff; i++) + { + line += "\t"; + } + } + // number of data columns are more than number of header columns + else if (diff < 0) + { + line = ""; + + // just truncate the data (discard the trailing columns) + for (int i = 0; i < headerCount; i++) + { + line += parts[i]; + + if (i < headerCount - 1) + { + line += "\t"; + } + } + } + + return line; + } } \ No newline at end of file diff --git a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java index ec42d3dbec0..25bef12594c 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java +++ b/core/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java @@ -28,7 +28,7 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ +*/ package org.mskcc.cbio.portal.dao; @@ -51,7 +51,7 @@ * @author Ethan Cerami. */ public class DaoGeneticAlteration { - public static final String DELIM = ","; + private static final String DELIM = ","; public static final String NAN = "NaN"; private static DaoGeneticAlteration daoGeneticAlteration = null; @@ -70,7 +70,7 @@ private DaoGeneticAlteration() { public static DaoGeneticAlteration getInstance() throws DaoException { if (daoGeneticAlteration == null) { daoGeneticAlteration = new DaoGeneticAlteration(); - + } return daoGeneticAlteration; @@ -86,52 +86,48 @@ ObjectNode process( /** * Adds a Row of Genetic Alterations associated with a Genetic Profile ID and Entrez Gene ID. - * * @param geneticProfileId Genetic Profile ID. - * @param entrezGeneId Entrez Gene ID. - * @param values DELIM separated values. + * @param entrezGeneId Entrez Gene ID. + * @param values DELIM separated values. * @return number of rows successfully added. * @throws DaoException Database Error. */ - public int addGeneticAlterations( - int geneticProfileId, - long entrezGeneId, - String[] values - ) throws DaoException { - return addGeneticAlterationsForGeneticEntity(geneticProfileId, DaoGeneOptimized.getGeneticEntityId(entrezGeneId), values); + public int addGeneticAlterations(int geneticProfileId, long entrezGeneId, String[] values) + throws DaoException { + return addGeneticAlterationsForGeneticEntity(geneticProfileId, DaoGeneOptimized.getGeneticEntityId(entrezGeneId), values); } - + public int addGeneticAlterationsForGeneticEntity(int geneticProfileId, int geneticEntityId, String[] values) - throws DaoException { - + throws DaoException { + StringBuffer valueBuffer = new StringBuffer(); - for (String value : values) { + for (String value: values) { if (value.contains(DELIM)) { - throw new IllegalArgumentException("Value cannot contain delim: " + DELIM + throw new IllegalArgumentException ("Value cannot contain delim: " + DELIM + " --> " + value); } valueBuffer.append(value).append(DELIM); } - - if (MySQLbulkLoader.isBulkLoad()) { - // write to the temp file maintained by the MySQLbulkLoader - MySQLbulkLoader.getMySQLbulkLoader("genetic_alteration").insertRecord(Integer.toString(geneticProfileId), - Integer.toString(geneticEntityId), valueBuffer.toString()); - // return 1 because normal insert will return 1 if no error occurs - return 1; - } - + + if (MySQLbulkLoader.isBulkLoad() ) { + // write to the temp file maintained by the MySQLbulkLoader + MySQLbulkLoader.getMySQLbulkLoader("genetic_alteration").insertRecord(Integer.toString( geneticProfileId ), + Integer.toString( geneticEntityId ), valueBuffer.toString()); + // return 1 because normal insert will return 1 if no error occurs + return 1; + } + Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; - + try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement - ("INSERT INTO genetic_alteration (GENETIC_PROFILE_ID, " + - " GENETIC_ENTITY_ID," + - " `VALUES`) " - + "VALUES (?,?,?)"); + ("INSERT INTO genetic_alteration (GENETIC_PROFILE_ID, " + + " GENETIC_ENTITY_ID," + + " `VALUES`) " + + "VALUES (?,?,?)"); pstmt.setInt(1, geneticProfileId); pstmt.setLong(2, geneticEntityId); pstmt.setString(3, valueBuffer.toString()); @@ -146,15 +142,15 @@ public int addGeneticAlterationsForGeneticEntity(int geneticProfileId, int genet /** * Gets the Specified Genetic Alteration. * - * @param geneticProfileId Genetic Profile ID. - * @param sampleId Sample ID. - * @param entrezGeneId Entrez Gene ID. + * @param geneticProfileId Genetic Profile ID. + * @param sampleId Sample ID. + * @param entrezGeneId Entrez Gene ID. * @return value or NAN. * @throws DaoException Database Error. */ public String getGeneticAlteration(int geneticProfileId, int sampleId, - long entrezGeneId) throws DaoException { - HashMap sampleMap = getGeneticAlterationMap(geneticProfileId, entrezGeneId); + long entrezGeneId) throws DaoException { + HashMap sampleMap = getGeneticAlterationMap(geneticProfileId, entrezGeneId); if (sampleMap.containsKey(sampleId)) { return sampleMap.get(sampleId); } else { @@ -164,79 +160,78 @@ public String getGeneticAlteration(int geneticProfileId, int sampleId, /** * Gets a HashMap of Values, keyed by Sample ID. - * - * @param geneticProfileId Genetic Profile ID. - * @param entrezGeneId Entrez Gene ID. + * @param geneticProfileId Genetic Profile ID. + * @param entrezGeneId Entrez Gene ID. * @return HashMap of values, keyed by Sample ID. * @throws DaoException Database Error. */ public HashMap getGeneticAlterationMap(int geneticProfileId, - long entrezGeneId) throws DaoException { - HashMap> map = getGeneticAlterationMap(geneticProfileId, Collections.singleton(entrezGeneId)); + long entrezGeneId) throws DaoException { + HashMap> map = getGeneticAlterationMap(geneticProfileId, Collections.singleton(entrezGeneId)); if (map.isEmpty()) { return new HashMap(); } - + return map.get(entrezGeneId); } /** * Returns the map of entrezGeneId as key and map with all - * respective CaseId and Values as value. - * - * @param geneticProfileId Genetic Profile ID. - * @param entrezGeneIds Entrez Gene IDs. - * @return Map>. + * respective CaseId and Values as value. + * + * @param geneticProfileId Genetic Profile ID. + * @param entrezGeneIds Entrez Gene IDs. + * @return Map>. * @throws DaoException Database Error. */ - public HashMap> getGeneticAlterationMap(int geneticProfileId, Collection entrezGeneIds) throws DaoException { - Collection geneticEntityIds = null; - if (entrezGeneIds != null) { - //translate entrezGeneIds to corresponding geneticEntityIds: - geneticEntityIds = new ArrayList(); - for (Long entrezGeneId : entrezGeneIds) { - geneticEntityIds.add(DaoGeneOptimized.getGeneticEntityId(entrezGeneId)); - } - } - HashMap> intermediateMap = getGeneticAlterationMapForEntityIds(geneticProfileId, geneticEntityIds); - //translate back to entrez, since intermediateMap is keyed by geneticEntityIds: - HashMap> resultMap = new HashMap>(); - Iterator>> mapIterator = intermediateMap.entrySet().iterator(); - while (mapIterator.hasNext()) { - Entry> mapEntry = mapIterator.next(); - resultMap.put(DaoGeneOptimized.getEntrezGeneId(mapEntry.getKey()), mapEntry.getValue()); - } - return resultMap; + public HashMap> getGeneticAlterationMap(int geneticProfileId, Collection entrezGeneIds) throws DaoException { + Collection geneticEntityIds = null; + if (entrezGeneIds != null) { + //translate entrezGeneIds to corresponding geneticEntityIds: + geneticEntityIds = new ArrayList(); + for (Long entrezGeneId : entrezGeneIds) { + geneticEntityIds.add(DaoGeneOptimized.getGeneticEntityId(entrezGeneId)); + } + } + HashMap> intermediateMap = getGeneticAlterationMapForEntityIds(geneticProfileId, geneticEntityIds); + //translate back to entrez, since intermediateMap is keyed by geneticEntityIds: + HashMap> resultMap = new HashMap>(); + Iterator>> mapIterator = intermediateMap.entrySet().iterator(); + while (mapIterator.hasNext()) { + Entry> mapEntry = mapIterator.next(); + resultMap.put(DaoGeneOptimized.getEntrezGeneId(mapEntry.getKey()), mapEntry.getValue()); + } + return resultMap; } - + /** * Returns the map of geneticEntityIds as key and map with all - * respective CaseId and Values as value. - * + * respective CaseId and Values as value. + * * @param geneticProfileId * @param geneticEntityIds - * @return Map>. + * @return Map>. * @throws DaoException */ - public HashMap> getGeneticAlterationMapForEntityIds(int geneticProfileId, Collection geneticEntityIds) throws DaoException { + public HashMap> getGeneticAlterationMapForEntityIds(int geneticProfileId, Collection geneticEntityIds) throws DaoException { Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; - HashMap> map = new HashMap>(); + HashMap> map = new HashMap>(); ArrayList orderedSampleList = DaoGeneticProfileSamples.getOrderedSampleList(geneticProfileId); - if (orderedSampleList == null || orderedSampleList.size() == 0) { - throw new IllegalArgumentException("Could not find any samples for genetic" + - " profile ID: " + geneticProfileId); + if (orderedSampleList == null || orderedSampleList.size() ==0) { + throw new IllegalArgumentException ("Could not find any samples for genetic" + + " profile ID: " + geneticProfileId); } try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); if (geneticEntityIds == null) { pstmt = con.prepareStatement("SELECT * FROM genetic_alteration WHERE" - + " GENETIC_PROFILE_ID = " + geneticProfileId); + + " GENETIC_PROFILE_ID = " + geneticProfileId); } else { pstmt = con.prepareStatement("SELECT * FROM genetic_alteration WHERE" - + " GENETIC_PROFILE_ID = " + geneticProfileId - + " AND GENETIC_ENTITY_ID IN (" + StringUtils.join(geneticEntityIds, ",") + ")"); + + " GENETIC_PROFILE_ID = " + geneticProfileId + + " AND GENETIC_ENTITY_ID IN ("+StringUtils.join(geneticEntityIds, ",")+")"); } rs = pstmt.executeQuery(); while (rs.next()) { @@ -245,7 +240,7 @@ public HashMap> getGeneticAlterationMapForEnti String values = rs.getString("VALUES"); //hm.debug.. String valueParts[] = values.split(DELIM); - for (int i = 0; i < valueParts.length; i++) { + for (int i=0; i> getGeneticAlterationMapForEnti /** * Process SQL result alteration data - * - * @param geneticProfileId Genetic Profile ID. - * @param entrezGeneIds Entrez Gene IDs. - * @param processor Implementation of AlterationProcesser Interface + * @param geneticProfileId Genetic Profile ID. + * @param entrezGeneIds Entrez Gene IDs. + * @param processor Implementation of AlterationProcesser Interface * @return ArrayList * @throws DaoException Database Error, MathException */ public static ArrayList getProcessedAlterationData( - int geneticProfileId, //queried profile internal id (num) - //Set entrezGeneIds, //list of genes in calculation gene pool (all genes or only cancer genes) - int offSet, //OFFSET for LIMIT (to get only one segment of the genes) - AlterationProcesser processor //implemented interface + int geneticProfileId, //queried profile internal id (num) + //Set entrezGeneIds, //list of genes in calculation gene pool (all genes or only cancer genes) + int offSet, //OFFSET for LIMIT (to get only one segment of the genes) + AlterationProcesser processor //implemented interface ) throws DaoException { ArrayList result = new ArrayList<>(); @@ -283,26 +277,26 @@ public static ArrayList getProcessedAlterationData( ResultSet rs = null; ArrayList orderedSampleList = DaoGeneticProfileSamples.getOrderedSampleList(geneticProfileId); - if (orderedSampleList == null || orderedSampleList.size() == 0) { - throw new IllegalArgumentException("Could not find any samples for genetic" + - " profile ID: " + geneticProfileId); + if (orderedSampleList == null || orderedSampleList.size() ==0) { + throw new IllegalArgumentException ("Could not find any samples for genetic" + + " profile ID: " + geneticProfileId); } try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement("SELECT * FROM genetic_alteration WHERE" - + " GENETIC_PROFILE_ID = " + geneticProfileId - + " LIMIT 3000 OFFSET " + offSet); + + " GENETIC_PROFILE_ID = " + geneticProfileId + + " LIMIT 3000 OFFSET " + offSet); rs = pstmt.executeQuery(); while (rs.next()) { long entrezGeneId = DaoGeneOptimized.getEntrezGeneId(rs.getInt("GENETIC_ENTITY_ID")); String[] values = rs.getString("VALUES").split(DELIM); ObjectNode datum = processor.process( - entrezGeneId, - values, - orderedSampleList); + entrezGeneId, + values, + orderedSampleList); if (datum != null) result.add(datum); } return result; @@ -316,8 +310,7 @@ public static ArrayList getProcessedAlterationData( /** * Gets all Genes in a Specific Genetic Profile. - * - * @param geneticProfileId Genetic Profile ID. + * @param geneticProfileId Genetic Profile ID. * @return Set of Canonical Genes. * @throws DaoException Database Error. */ @@ -325,17 +318,17 @@ public Set getGenesInProfile(int geneticProfileId) throws DaoExce Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; - Set geneList = new HashSet(); + Set geneList = new HashSet (); DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement - ("SELECT * FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); + ("SELECT * FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); pstmt.setInt(1, geneticProfileId); rs = pstmt.executeQuery(); - while (rs.next()) { + while (rs.next()) { Long entrezGeneId = DaoGeneOptimized.getEntrezGeneId(rs.getInt("GENETIC_ENTITY_ID")); geneList.add(daoGene.getGene(entrezGeneId)); } @@ -349,8 +342,7 @@ public Set getGenesInProfile(int geneticProfileId) throws DaoExce /** * Gets all Genes in a Specific Genetic Profile. - * - * @param geneticProfileId Genetic Profile ID. + * @param geneticProfileId Genetic Profile ID. * @return Set of Canonical Genes. * @throws DaoException Database Error. */ @@ -363,12 +355,12 @@ public static Set getEntityIdsInProfile(int geneticProfileId) throws Da try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement - ("SELECT * FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); + ("SELECT * FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); pstmt.setInt(1, geneticProfileId); rs = pstmt.executeQuery(); - while (rs.next()) { - int geneticEntityId = rs.getInt("GENETIC_ENTITY_ID"); + while (rs.next()) { + int geneticEntityId = rs.getInt("GENETIC_ENTITY_ID"); geneticEntityList.add(geneticEntityId); } return geneticEntityList; @@ -381,8 +373,7 @@ public static Set getEntityIdsInProfile(int geneticProfileId) throws Da /** * Gets the total number of all genes in a Specific Genetic Profile. - * - * @param geneticProfileId Genetic Profile ID. + * @param geneticProfileId Genetic Profile ID. * @return number of Canonical Genes. * @throws DaoException Database Error. */ @@ -393,7 +384,7 @@ public static int getGenesCountInProfile(int geneticProfileId) throws DaoExcepti try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement - ("SELECT COUNT(*) FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); + ("SELECT COUNT(*) FROM genetic_alteration WHERE GENETIC_PROFILE_ID = ?"); pstmt.setInt(1, geneticProfileId); rs = pstmt.executeQuery(); if (rs.next()) { @@ -409,7 +400,6 @@ public static int getGenesCountInProfile(int geneticProfileId) throws DaoExcepti /** * Gets total number of records in table. - * * @return number of records. * @throws DaoException Database Error. */ @@ -420,7 +410,7 @@ public int getCount() throws DaoException { try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement - ("SELECT COUNT(*) FROM genetic_alteration"); + ("SELECT COUNT(*) FROM genetic_alteration"); rs = pstmt.executeQuery(); if (rs.next()) { return rs.getInt(1); @@ -446,7 +436,7 @@ public void deleteAllRecordsInGeneticProfile(long geneticProfileId) throws DaoEx try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement("DELETE from " + - "genetic_alteration WHERE GENETIC_PROFILE_ID=?"); + "genetic_alteration WHERE GENETIC_PROFILE_ID=?"); pstmt.setLong(1, geneticProfileId); pstmt.executeUpdate(); } catch (SQLException e) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java index a928ca4f477..a444d72edff 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportExtendedMutationData.java @@ -59,8 +59,8 @@ *
* @author Selcuk Onur Sumer */ - public class ImportExtendedMutationData{ + private File mutationFile; private int geneticProfileId; private boolean swissprotIsAccession; diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java index 4ab98b582eb..d53f78a4185 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java @@ -33,14 +33,10 @@ package org.mskcc.cbio.portal.scripts; import java.io.*; -import java.util.ArrayList; -import java.util.Date; import java.util.Set; -import com.fasterxml.jackson.databind.ObjectMapper; import joptsimple.*; -import org.cbioportal.model.EntityType; import org.mskcc.cbio.portal.model.*; import org.mskcc.cbio.portal.util.*; @@ -126,7 +122,7 @@ public void run() { if (pdAnnotationsFilename != null && !"".equals(pdAnnotationsFilename)) { importer.setPdAnnotationsFile(new File(dataFile.getParent(), pdAnnotationsFilename)); } - importer.importData(numLines); + importer.importData(numLines); } } catch (Exception e) { diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportStructuralVariantData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportStructuralVariantData.java index 228b395757d..3db9870f011 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportStructuralVariantData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportStructuralVariantData.java @@ -44,7 +44,7 @@ */ public class ImportStructuralVariantData { - + // Initialize variables private File structuralVariantFile; private int geneticProfileId; diff --git a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java index 2090d5c04af..dad2c451b6b 100644 --- a/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java +++ b/core/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java @@ -28,23 +28,23 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ +*/ package org.mskcc.cbio.portal.scripts; -import org.apache.commons.lang3.*; -import org.cbioportal.model.*; +import java.io.*; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import org.apache.commons.lang3.ArrayUtils; +import org.cbioportal.model.EntityType; import org.mskcc.cbio.portal.dao.*; -import org.mskcc.cbio.portal.model.Geneset; -import org.mskcc.cbio.portal.model.Sample; import org.mskcc.cbio.portal.model.*; import org.mskcc.cbio.portal.util.*; -import java.io.*; -import java.util.*; -import java.util.regex.*; -import java.util.stream.*; - /** * Code to Import Copy Number Alteration, MRNA Expression Data, Methylation, or protein RPPA data @@ -58,8 +58,7 @@ public class ImportTabDelimData { public static final String CNA_VALUE_HOMOZYGOUS_DELETION = "-2"; public static final String CNA_VALUE_PARTIAL_DELETION = "-1.5"; public static final String CNA_VALUE_ZERO = "0"; - private HashSet importSetOfGenes = new HashSet(); - private HashSet importedGeneticEntitySet = new HashSet<>(); + private HashSet importedGeneticEntitySet = new HashSet<>(); private File dataFile; private String targetLine; private int geneticProfileId; @@ -72,7 +71,7 @@ public class ImportTabDelimData { private File pdAnnotationsFile; private Map, Map> pdAnnotations; private final GeneticAlterationImporter geneticAlterationImporter; - + /** * Constructor. * @@ -82,16 +81,10 @@ public class ImportTabDelimData { * @param geneticProfileId GeneticProfile ID. * @param genePanel GenePanel * @param genericEntityProperties Generic Assay Entities. - * + * * @deprecated : TODO shall we deprecate this feature (i.e. the targetLine)? */ - public ImportTabDelimData( - File dataFile, - String targetLine, - int geneticProfileId, - String genePanel, - String genericEntityProperties - ) throws DaoException { + public ImportTabDelimData(File dataFile, String targetLine, int geneticProfileId, String genePanel, String genericEntityProperties) { this.dataFile = dataFile; this.targetLine = targetLine; this.geneticProfileId = geneticProfileId; @@ -107,7 +100,7 @@ public ImportTabDelimData( * @param targetLine The line we want to import. * If null, all lines are imported. * @param geneticProfileId GeneticProfile ID. - * + * * @deprecated : TODO shall we deprecate this feature (i.e. the targetLine)? */ public ImportTabDelimData(File dataFile, String targetLine, int geneticProfileId, String genePanel) { @@ -124,11 +117,7 @@ public ImportTabDelimData(File dataFile, String targetLine, int geneticProfileId * @param dataFile Data File containing Copy Number Alteration, MRNA Expression Data, or protein RPPA data * @param geneticProfileId GeneticProfile ID. */ - public ImportTabDelimData( - File dataFile, - int geneticProfileId, - String genePanel - ) { + public ImportTabDelimData(File dataFile, int geneticProfileId, String genePanel) { this.dataFile = dataFile; this.geneticProfileId = geneticProfileId; this.genePanel = genePanel; @@ -149,21 +138,21 @@ public void importData(int numLines) throws IOException, DaoException { BufferedReader buf = new BufferedReader(reader); String headerLine = buf.readLine(); String parts[] = headerLine.split("\t"); - + //Whether data regards CNA or RPPA: boolean isDiscretizedCnaProfile = geneticProfile!=null - && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.COPY_NUMBER_ALTERATION - && geneticProfile.showProfileInAnalysisTab(); + && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.COPY_NUMBER_ALTERATION + && geneticProfile.showProfileInAnalysisTab(); boolean isRppaProfile = geneticProfile!=null - && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.PROTEIN_LEVEL - && "Composite.Element.Ref".equalsIgnoreCase(parts[0]); + && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.PROTEIN_LEVEL + && "Composite.Element.Ref".equalsIgnoreCase(parts[0]); boolean isGsvaProfile = geneticProfile!=null - && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENESET_SCORE - && parts[0].equalsIgnoreCase("geneset_id"); + && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENESET_SCORE + && parts[0].equalsIgnoreCase("geneset_id"); boolean isGenericAssayProfile = geneticProfile!=null - && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENERIC_ASSAY - && parts[0].equalsIgnoreCase("ENTITY_STABLE_ID"); - + && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENERIC_ASSAY + && parts[0].equalsIgnoreCase("ENTITY_STABLE_ID"); + int numRecordsToAdd = 0; int samplesSkipped = 0; try { @@ -188,7 +177,7 @@ public void importData(int numLines) throws IOException, DaoException { } else if (hugoSymbolIndex == -1 && entrezGeneIdIndex == -1) { throw new RuntimeException("Error: at least one of the following columns should be present: Hugo_Symbol or Entrez_Gene_Id"); } - + String sampleIds[]; sampleIds = new String[parts.length - sampleStartIndex]; System.arraycopy(parts, sampleStartIndex, sampleIds, 0, parts.length - sampleStartIndex); @@ -206,7 +195,7 @@ public void importData(int numLines) throws IOException, DaoException { this.pdAnnotations = new HashMap<>(); for (int i = 0; i < sampleIds.length; i++) { Sample sample = DaoSample.getSampleByCancerStudyAndSampleId(geneticProfile.getCancerStudyId(), - StableIdUtil.getSampleId(sampleIds[i])); + StableIdUtil.getSampleId(sampleIds[i])); // can be null in case of 'normal' sample, throw exception if not 'normal' and sample not found in db if (sample == null) { if (StableIdUtil.isNormal(sampleIds[i])) { @@ -245,17 +234,17 @@ public void importData(int numLines) throws IOException, DaoException { ProgressMonitor.setCurrentMessage(" --> total number of samples skipped (normal samples): " + samplesSkipped); } ProgressMonitor.setCurrentMessage(" --> total number of data lines: " + (numLines-1)); - + DaoGeneticProfileSamples.addGeneticProfileSamples(geneticProfileId, orderedSampleList); - + //Gene cache: DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); - + //Object to insert records in the generic 'genetic_alteration' table: DaoGeneticAlteration daoGeneticAlteration = DaoGeneticAlteration.getInstance(); - + //cache for data found in cna_event' table: - Map existingCnaEvents = null; + Map existingCnaEvents = null; if (isDiscretizedCnaProfile) { existingCnaEvents = new HashMap(); for (CnaEvent.Event event : DaoCnaEvent.getAllCnaEvents()) { @@ -266,34 +255,34 @@ public void importData(int numLines) throws IOException, DaoException { // load entities map from database Map genericAssayStableIdToEntityIdMap = Collections.emptyMap(); - if (isGenericAssayProfile) { - genericAssayStableIdToEntityIdMap = GenericAssayMetaUtils.buildGenericAssayStableIdToEntityIdMap(); + if (isGenericAssayProfile) { + genericAssayStableIdToEntityIdMap = GenericAssayMetaUtils.buildGenericAssayStableIdToEntityIdMap(); } - + int lenParts = parts.length; - + String line = buf.readLine(); while (line != null) { ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); boolean recordAdded = false; - + // either parse line as geneset or gene for importing into 'genetic_alteration' table if (isGsvaProfile) { - recordAdded = parseGenesetLine(line, lenParts, sampleStartIndex, genesetIdIndex, - filteredSampleIndices, daoGeneticAlteration); + recordAdded = parseGenesetLine(line, lenParts, sampleStartIndex, genesetIdIndex, + filteredSampleIndices, daoGeneticAlteration); } else if (isGenericAssayProfile) { - recordAdded = parseGenericAssayLine(line, lenParts, sampleStartIndex, genericAssayIdIndex, - filteredSampleIndices, daoGeneticAlteration, genericAssayStableIdToEntityIdMap); + recordAdded = parseGenericAssayLine(line, lenParts, sampleStartIndex, genericAssayIdIndex, + filteredSampleIndices, daoGeneticAlteration, genericAssayStableIdToEntityIdMap); } else { - recordAdded = parseLine(line, lenParts, sampleStartIndex, - hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, - isRppaProfile, isDiscretizedCnaProfile, - daoGene, - filteredSampleIndices, orderedSampleList, - existingCnaEvents, daoGeneticAlteration); + recordAdded = parseLine(line, lenParts, sampleStartIndex, + hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, + isRppaProfile, isDiscretizedCnaProfile, + daoGene, + filteredSampleIndices, orderedSampleList, + existingCnaEvents, daoGeneticAlteration); } - + // increment number of records added or entries skipped if (recordAdded) { numRecordsToAdd++; @@ -301,13 +290,13 @@ public void importData(int numLines) throws IOException, DaoException { else { entriesSkipped++; } - + line = buf.readLine(); } if (MySQLbulkLoader.isBulkLoad()) { - MySQLbulkLoader.flushAll(); + MySQLbulkLoader.flushAll(); } - + if (isRppaProfile) { ProgressMonitor.setCurrentMessage(" --> total number of extra records added because of multiple genes in one line: " + nrExtraRecords); } @@ -317,12 +306,12 @@ public void importData(int numLines) throws IOException, DaoException { if (numRecordsToAdd == 0) { throw new DaoException ("Something has gone wrong! I did not save any records" + - " to the database!"); + " to the database!"); } } finally { buf.close(); - } + } } private Map, Map> readPdAnnotations(File pdAnnotationsFile) { @@ -379,138 +368,138 @@ private Map, Map> readPdAnnotations(File } reader.close(); } catch (IOException e) { - throw new RuntimeException("Can't read PD annotation file", e); + throw new RuntimeException("Can't read PD annotation file", e); } return pdAnnotations; } /** - * Attempt to create a genetic_alteration record based on the current line read from a profile data file. - *
    - *
  1. Commented out lines and blank lines are always skipped (returns false) - *
  2. The line is split into columns by the tab delimiter - *
  3. The involved genes (list of entrez_gene_ids) are determined: - *
      - *
    1. Hugo_Symbol and Entrez_Gene_Id column indices are read and validated - *
    2. if neither are available, the line is skipped - *
    3. if Hugo_Symbol contains '///' or '---', the line is skipped - *
    4. rppaProfile parsing has special rules for determining the involved genes - *
    5. if Entrez_Gene_Id is available, use that to determine the involved genes - *
    6. if Hugo_Symbol is available, use that to determine the involved genes (truncate symbols with '|' in them) - *
    7. if the involved genes list is still empty, the line is skipped (returns false) - *
    - *
  4. Both gene_alias and gene records are examined to see how many genes of type 'miRNA' are matched - *
  5. If any matched record is of type 'miRNA': - *
      - *
    • Loop through each gene or gene_alias of type 'miRNA' and attempt to store the record under that gene in genetic_alteration - *
    • If no records were successfully stored in genetic_alteration, log the failure - *
    - *
  6. If no matched record is of type 'miRNA': - *
  7. if there is exactly 1 involved gene (using only the gene table if sufficient, or gene_alias if neccessary): - *
      - *
    1. if this is a 'discretizedCnaProfile', normalize the CNA values and create a list of cnaEvents to be added - *
    2. attempt to store the record in genetic_alteration - *
    3. if the record is successfully stored (not duplicated), create (or update) records in sample_cna_event for the created list of cnaEvents (if any) - *
    - *
  8. if there are several involved genes and the profile is an rppaProfile, loop through the genes; for each one: - *
      - *
    1. attempt to store the record under that gene in genetic_alteration - *
    2. count the number of successfully imported records (for logging) - *
    - *
      - *
    • after looping through all involved genes, check whether any records were successfully stored in genetic_alteration - if not log the failure - *
    - *
  9. if there are several involved genes and the profile is not an rppaProfile, log a failure to import the current line due to ambiguous gene symbol - *
- *
  • If a record was (or more than one were) successfully stored in genetic_alteration, return true ; else false - * - *

    - * During the import of any single profile data file, at most one record per Entrez_Gene_Id will be successfuly imported to genetic_alteration. - * Each attempt to import is done through a call to the function storeGeneticAlterations(). - * That function will check an instance variable importSetOfGenes, and if the gene has been previously imported, no new attempt is made (failure). - * Each time a gene is successfully imported, it is added to importSetOfGenes. - *

    - * MicroRNA are treated specially because of the possible presence of constructed combination forms (such as 'MIR-100/100*' and 'MIR-100/100'). - * In these cases a Hugo_Symbol such as 'hsa-mir-100' may be expected to match the (fake) Entrez_Gene_Id for both of these combination forms. - * In that case, we want to import several copies of the genetic alteration profile line .. one for each matched gene of type 'miRNA'. - * This allows the visualization of both CNA event profiles for the microRNA precursor with expression profiles for the microRNA mature form. - *

    - * The current implementation of this code does not attempt to "merge" / "unify" lines in the profile data file which have duplicated Entrez_Gene_Id. - * Instead, the first encountered line which maps to the Entrez_Gene_Id will be stored as a record in genetic_alteration (returns true). - * Later lines which attempt to store a record with that Entrez_Gene_Id will not be stored as a record in genetic_alteration (returns false). - * For microRNA gene aliases it is possible that complex interactions will occur, where an earlier line in the data file stores a record under several Entrez_Gene_Ids, and a later line in the file fails to store records under some of those previously 'used' Entrez_Gene_Ids, but succeeds in storing a record under one or more not previously used Entrez_Gene_Ids. So a microRNA line from the file may be imported "partially successfully" (returns true). - *

    - * Examples Cases:
    - * Gene records are P1, P2, P3, P4 (protein coding), M1, M2, M3 (microRNA). - * Gene_Symbol AMA is gene_alias for M1 and M2, Gene_Symbol AMB is gene_alias for M2 and M3, Gene_Symbol AAMBIG is gene_alias for P3 and P4. Gene_Symbol AMIXED is gene_alias for P1 and M3. - *

    - * Case_1 (the last two lines will be skipped and logged like "Gene P1 (#) found to be duplicated in your file. Duplicated row will be ignored!")
    - * - *
    Hugo_SymbolSample1... - *
    P10... - *
    P20... - *
    P10... - *
    P10... - *
    - *

    - * Case_2 (the last line will be skipped and logged like "Gene M1 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene M2 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Could not store microRNA or RPPA data" )
    - * - *
    Hugo_SymbolSample1... - *
    AMA0... - *
    AMA0... - *
    - *

    - * Case_3 (the last line in the file will fail with log messages like "Gene symbol AAMBIG found to be ambiguous. Record will be skipped for this gene.")
    - * - *
    Hugo_SymbolSample1... - *
    P10... - *
    P20... - *
    AAMBIG0... - *
    - *

    - * Case_4 (the second to last line will partially succeed, storing a record in genetic_alteration for gene M3 but failing for M2 with a log message like "Gene M2 (#) (given as alias in your file as: AMB) found to be duplicated in your file. Duplicated row will be ignored!" ; the last line in the file will fail with log messages like "Gene M3 (#) (given as alias in your file as: AMIXED) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene symbol AMIXED found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.")
    - * - *
    Hugo_SymbolSample1... - *
    AMA0... - *
    AMB0... - *
    AMIXED0... - *
    - * - * @param line the line from the profile data file to be parsed - * @param nrColumns the number of columns, defined by the header line - * @param sampleStartIndex the index of the first column with a sample name in the header field - * @param hugoSymbolIndex the index of the column Hugo_Symbol - * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id - * @param rppaGeneRefIndex the index of the column Composite.Element.Ref - * @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref) - * @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true) - * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols - * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line - * @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines - * @param daoGeneticAlteration in instance of DaoGeneticAlteration ... for use in storing records in the genetic_alteration table - * @return true if any record was stored in genetic_alteration, else false - * @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration - */ - private boolean parseLine(String line, int nrColumns, int sampleStartIndex, - int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex, - boolean isRppaProfile, boolean isDiscretizedCnaProfile, - DaoGeneOptimized daoGene, - List filteredSampleIndices, List orderedSampleList, - Map existingCnaEvents, DaoGeneticAlteration daoGeneticAlteration - ) throws DaoException { - + * Attempt to create a genetic_alteration record based on the current line read from a profile data file. + *

      + *
    1. Commented out lines and blank lines are always skipped (returns false) + *
    2. The line is split into columns by the tab delimiter + *
    3. The involved genes (list of entrez_gene_ids) are determined: + *
        + *
      1. Hugo_Symbol and Entrez_Gene_Id column indices are read and validated + *
      2. if neither are available, the line is skipped + *
      3. if Hugo_Symbol contains '///' or '---', the line is skipped + *
      4. rppaProfile parsing has special rules for determining the involved genes + *
      5. if Entrez_Gene_Id is available, use that to determine the involved genes + *
      6. if Hugo_Symbol is available, use that to determine the involved genes (truncate symbols with '|' in them) + *
      7. if the involved genes list is still empty, the line is skipped (returns false) + *
      + *
    4. Both gene_alias and gene records are examined to see how many genes of type 'miRNA' are matched + *
    5. If any matched record is of type 'miRNA': + *
        + *
      • Loop through each gene or gene_alias of type 'miRNA' and attempt to store the record under that gene in genetic_alteration + *
      • If no records were successfully stored in genetic_alteration, log the failure + *
      + *
    6. If no matched record is of type 'miRNA': + *
    7. if there is exactly 1 involved gene (using only the gene table if sufficient, or gene_alias if neccessary): + *
        + *
      1. if this is a 'discretizedCnaProfile', normalize the CNA values and create a list of cnaEvents to be added + *
      2. attempt to store the record in genetic_alteration + *
      3. if the record is successfully stored (not duplicated), create (or update) records in sample_cna_event for the created list of cnaEvents (if any) + *
      + *
    8. if there are several involved genes and the profile is an rppaProfile, loop through the genes; for each one: + *
        + *
      1. attempt to store the record under that gene in genetic_alteration + *
      2. count the number of successfully imported records (for logging) + *
      + *
        + *
      • after looping through all involved genes, check whether any records were successfully stored in genetic_alteration - if not log the failure + *
      + *
    9. if there are several involved genes and the profile is not an rppaProfile, log a failure to import the current line due to ambiguous gene symbol + *
    + *
  • If a record was (or more than one were) successfully stored in genetic_alteration, return true ; else false + * + *

    + * During the import of any single profile data file, at most one record per Entrez_Gene_Id will be successfuly imported to genetic_alteration. + * Each attempt to import is done through a call to the function storeGeneticAlterations(). + * That function will check an instance variable importSetOfGenes, and if the gene has been previously imported, no new attempt is made (failure). + * Each time a gene is successfully imported, it is added to importSetOfGenes. + *

    + * MicroRNA are treated specially because of the possible presence of constructed combination forms (such as 'MIR-100/100*' and 'MIR-100/100'). + * In these cases a Hugo_Symbol such as 'hsa-mir-100' may be expected to match the (fake) Entrez_Gene_Id for both of these combination forms. + * In that case, we want to import several copies of the genetic alteration profile line .. one for each matched gene of type 'miRNA'. + * This allows the visualization of both CNA event profiles for the microRNA precursor with expression profiles for the microRNA mature form. + *

    + * The current implementation of this code does not attempt to "merge" / "unify" lines in the profile data file which have duplicated Entrez_Gene_Id. + * Instead, the first encountered line which maps to the Entrez_Gene_Id will be stored as a record in genetic_alteration (returns true). + * Later lines which attempt to store a record with that Entrez_Gene_Id will not be stored as a record in genetic_alteration (returns false). + * For microRNA gene aliases it is possible that complex interactions will occur, where an earlier line in the data file stores a record under several Entrez_Gene_Ids, and a later line in the file fails to store records under some of those previously 'used' Entrez_Gene_Ids, but succeeds in storing a record under one or more not previously used Entrez_Gene_Ids. So a microRNA line from the file may be imported "partially successfully" (returns true). + *

    + * Examples Cases:
    + * Gene records are P1, P2, P3, P4 (protein coding), M1, M2, M3 (microRNA). + * Gene_Symbol AMA is gene_alias for M1 and M2, Gene_Symbol AMB is gene_alias for M2 and M3, Gene_Symbol AAMBIG is gene_alias for P3 and P4. Gene_Symbol AMIXED is gene_alias for P1 and M3. + *

    + * Case_1 (the last two lines will be skipped and logged like "Gene P1 (#) found to be duplicated in your file. Duplicated row will be ignored!")
    + * + *
    Hugo_SymbolSample1... + *
    P10... + *
    P20... + *
    P10... + *
    P10... + *
    + *

    + * Case_2 (the last line will be skipped and logged like "Gene M1 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene M2 (#) (given as alias in your file as: AMA) found to be duplicated in your file. Duplicated row will be ignored!" , "Could not store microRNA or RPPA data" )
    + * + *
    Hugo_SymbolSample1... + *
    AMA0... + *
    AMA0... + *
    + *

    + * Case_3 (the last line in the file will fail with log messages like "Gene symbol AAMBIG found to be ambiguous. Record will be skipped for this gene.")
    + * + *
    Hugo_SymbolSample1... + *
    P10... + *
    P20... + *
    AAMBIG0... + *
    + *

    + * Case_4 (the second to last line will partially succeed, storing a record in genetic_alteration for gene M3 but failing for M2 with a log message like "Gene M2 (#) (given as alias in your file as: AMB) found to be duplicated in your file. Duplicated row will be ignored!" ; the last line in the file will fail with log messages like "Gene M3 (#) (given as alias in your file as: AMIXED) found to be duplicated in your file. Duplicated row will be ignored!" , "Gene symbol AMIXED found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.")
    + * + *
    Hugo_SymbolSample1... + *
    AMA0... + *
    AMB0... + *
    AMIXED0... + *
    + * + * @param line the line from the profile data file to be parsed + * @param nrColumns the number of columns, defined by the header line + * @param sampleStartIndex the index of the first column with a sample name in the header field + * @param hugoSymbolIndex the index of the column Hugo_Symbol + * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id + * @param rppaGeneRefIndex the index of the column Composite.Element.Ref + * @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref) + * @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true) + * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols + * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line + * @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines + * @param daoGeneticAlteration in instance of DaoGeneticAlteration ... for use in storing records in the genetic_alteration table + * @return true if any record was stored in genetic_alteration, else false + * @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration + */ + private boolean parseLine(String line, int nrColumns, int sampleStartIndex, + int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex, + boolean isRppaProfile, boolean isDiscretizedCnaProfile, + DaoGeneOptimized daoGene, + List filteredSampleIndices, List orderedSampleList, + Map existingCnaEvents, DaoGeneticAlteration daoGeneticAlteration + ) throws DaoException { + //TODO: refactor this entire function - split functionality into smaller units / subroutines - boolean recordStored = false; - + boolean recordStored = false; + // Ignore lines starting with # if (!line.startsWith("#") && line.trim().length() > 0) { String[] parts = line.split("\t",-1); - + if (parts.length>nrColumns) { if (line.split("\t").length>nrColumns) { ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); + + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); return false; } } @@ -545,9 +534,9 @@ else if (!entrez.matches("[0-9]+")) { //TODO - would be better to give an exception in some cases, like negative Entrez values ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez); return false; - } + } } - + //If all are empty, skip line: if (geneSymbol == null && entrez == null) { ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol or Entrez_Id value"); @@ -571,7 +560,7 @@ else if (!entrez.matches("[0-9]+")) { //will be null when there is a parse error in this case, so we //can return here and avoid duplicated messages: return false; - } + } if (genes.isEmpty()) { String gene = (geneSymbol != null) ? geneSymbol : entrez; ProgressMonitor.logWarning("Gene not found for: [" + gene @@ -648,16 +637,16 @@ else if (!entrez.matches("[0-9]+")) { // none of the matched genes are type "miRNA" if (genes.size() == 1) { List cnaEventsToAdd = new ArrayList(); - + if (isDiscretizedCnaProfile) { long entrezGeneId = genes.get(0).getEntrezGeneId(); for (int i = 0; i < values.length; i++) { - + // temporary solution -- change partial deletion back to full deletion. if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) { values[i] = CNA_VALUE_HOMOZYGOUS_DELETION; } - if (values[i].equals(CNA_VALUE_AMPLIFICATION) + if (values[i].equals(CNA_VALUE_AMPLIFICATION) // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB // || values[i].equals(CNA_VALUE_ZERO) // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION) @@ -683,7 +672,7 @@ else if (!entrez.matches("[0-9]+")) { //only add extra CNA related records if the step above worked, otherwise skip: if (recordStored) { CnaUtil.storeCnaEvents(existingCnaEvents, cnaEventsToAdd); - } + } } else { if (isRppaProfile) { // for protein data, duplicate the data for (CanonicalGene gene : genes) { @@ -714,7 +703,7 @@ else if (!entrez.matches("[0-9]+")) { } return recordStored; } - + /** * Parses line for gene set record and stores record in 'genetic_alteration' table. * @param line @@ -724,33 +713,33 @@ else if (!entrez.matches("[0-9]+")) { * @param filteredSampleIndices * @param daoGeneticAlteration * @return - * @throws DaoException + * @throws DaoException */ private boolean parseGenesetLine(String line, int nrColumns, int sampleStartIndex, int genesetIdIndex, - List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException { + List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException { boolean storedRecord = false; - + if (!line.startsWith("#") && line.trim().length() > 0) { String[] parts = line.split("\t",-1); if (parts.length>nrColumns) { if (line.split("\t").length>nrColumns) { ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); + + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); return false; } } - + String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length>nrColumns?nrColumns:parts.length); // trim whitespace from values values = Stream.of(values).map(String::trim).toArray(String[]::new); values = filterOutNormalValues(filteredSampleIndices, values); - + Geneset geneset = DaoGeneset.getGenesetByExternalId(parts[genesetIdIndex]); if (geneset != null) { - storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(), - EntityType.GENESET, geneset.getExternalId()); + storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(), + EntityType.GENESET, geneset.getExternalId()); } else { ProgressMonitor.logWarning("Geneset " + parts[genesetIdIndex] + " not found in DB. Record will be skipped."); @@ -768,39 +757,39 @@ private boolean parseGenesetLine(String line, int nrColumns, int sampleStartInde * @param filteredSampleIndices * @param daoGeneticAlteration * @return - * @throws DaoException + * @throws DaoException */ private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStartIndex, int genericAssayIdIndex, - List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException { + List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException { boolean recordIsStored = false; - + if (!line.startsWith("#") && line.trim().length() > 0) { String[] parts = line.split("\t", -1); if (parts.length > nrColumns) { if (line.split("\t").length > nrColumns) { ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); + + ") than specified in the headers(" + nrColumns + "): \n"+parts[0]); return false; } } - + String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length>nrColumns?nrColumns:parts.length); // trim whitespace from values values = Stream.of(values).map(String::trim).toArray(String[]::new); values = filterOutNormalValues(filteredSampleIndices, values); - + String stableId = parts[genericAssayIdIndex]; Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(stableId, null); - + if (entityId == null) { ProgressMonitor.logWarning("Generic Assay entity " + parts[genericAssayIdIndex] + " not found in DB. Record will be skipped."); } else { - recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId, - EntityType.GENERIC_ASSAY, stableId); + recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId, + EntityType.GENERIC_ASSAY, stableId); } return recordIsStored; @@ -819,14 +808,14 @@ private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStar * @return boolean indicating if record was stored successfully or not */ private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGeneticAlteration daoGeneticAlteration, - Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) { + Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) { try { if (importedGeneticEntitySet.add(geneticEntityId)) { daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values); return true; } else { - ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName + ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName + " [" + geneticEntityType +"] already imported from file. Record will be skipped."); return false; } @@ -838,7 +827,7 @@ private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGenetic /** * Tries to parse the genes and look them up in DaoGeneOptimized - * + * * @param antibodyWithGene * @return returns null if something was wrong, e.g. could not parse the antibodyWithGene string; returns * a list with 0 or more elements otherwise. @@ -886,7 +875,7 @@ private List parseRPPAGenes(String antibodyWithGene) throws DaoEx for (String symbol : symbolsNotFound) { ProgressMonitor.logWarning("Gene " + symbol + " not found in DB. Record will be skipped for this gene."); } - + Pattern p = Pattern.compile("(p[STY][0-9]+(?:_[STY][0-9]+)*)"); Matcher m = p.matcher(arrayId); String residue; @@ -899,7 +888,7 @@ private List parseRPPAGenes(String antibodyWithGene) throws DaoEx return importPhosphoGene(genes, residue); } } - + private List importPhosphoGene(List genes, String residue) throws DaoException { DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); List phosphoGenes = new ArrayList(); @@ -921,7 +910,7 @@ private List importPhosphoGene(List genes, String return phosphoGenes; } - + // returns index for geneset id column private int getGenesetIdIndex(String[] headers) { return getColIndexByName(headers, "geneset_id"); @@ -930,19 +919,19 @@ private int getGenesetIdIndex(String[] headers) { private int getGenericAssayIdIndex(String[] headers) { return getColIndexByName(headers, "ENTITY_STABLE_ID"); } - + private int getHugoSymbolIndex(String[] headers) { return getColIndexByName(headers, "Hugo_Symbol"); } - + private int getEntrezGeneIdIndex(String[] headers) { return getColIndexByName(headers, "Entrez_Gene_Id"); } - + private int getRppaGeneRefIndex(String[] headers) { return getColIndexByName(headers, "Composite.Element.Ref"); } - + // helper function for finding the index of a column by name private int getColIndexByName(String[] headers, String colName) { for (int i=0; i featureColNames = new ArrayList(); featureColNames.add("Gene Symbol"); @@ -977,7 +966,7 @@ private int getStartIndex(String[] headers, int ...featureColIds) { } int startIndex = -1; - + for (int i=0; i e.getAlteration().getDescription()) .collect(toList()); assertEquals(2, cnaEvents.size()); - assertEquals("Amplified,Homozygously deleted", String.join(",", cnaEvents)); + assertTrue(newArrayList("Amplified", "Homozygously deleted").containsAll(cnaEvents)); // Test gene with partial deletion and amplification has two cna events: List convertedCnaEvents = resultCnaEvents @@ -141,7 +142,7 @@ public void testImportCnaDiscreteLongDataAddsCnaEvents() throws Exception { .map(e -> e.getAlteration().getDescription()) .collect(toList()); assertEquals(2, cnaEvents.size()); - assertEquals("Amplified,Homozygously deleted", String.join(",", cnaEvents)); + assertTrue( newArrayList("Amplified", "Homozygously deleted").containsAll(cnaEvents)); // Test gene with homozygous deletion and amplification has no cna events: List skippedCnaEvents = resultCnaEvents diff --git a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java index 53b5db61b50..dc8e4239385 100644 --- a/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java +++ b/core/src/test/java/org/mskcc/cbio/portal/scripts/TestImportCopyNumberSegmentData.java @@ -19,7 +19,7 @@ * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . - */ +*/ package org.mskcc.cbio.portal.scripts; @@ -62,45 +62,45 @@ @Transactional public class TestImportCopyNumberSegmentData { - @Autowired - ApplicationContext applicationContext; - - //To use in test cases where we expect an exception: - @Rule - public ExpectedException exception = ExpectedException.none(); - - - @Before - public void setUp() throws DaoException - { - //set it, to avoid this being set to the runtime (not for testing) application context: - SpringUtil.setApplicationContext(applicationContext); - } - - /** + @Autowired + ApplicationContext applicationContext; + + //To use in test cases where we expect an exception: + @Rule + public ExpectedException exception = ExpectedException.none(); + + + @Before + public void setUp() throws DaoException + { + //set it, to avoid this being set to the runtime (not for testing) application context: + SpringUtil.setApplicationContext(applicationContext); + } + + /** * Test importing of Clinical Data File. * * @throws DaoException Database Access Error. * @throws IOException IO Error. */ - @Test + @Test public void testImportSegmentDataNewStudy() throws Exception { - //new dummy study to simulate importing clinical data in empty study: - CancerStudy cancerStudy = new CancerStudy("testnewseg","testnewseg","testnewseg","brca",true); + //new dummy study to simulate importing clinical data in empty study: + CancerStudy cancerStudy = new CancerStudy("testnewseg","testnewseg","testnewseg","brca",true); cancerStudy.setReferenceGenome("hg19"); - DaoCancerStudy.addCancerStudy(cancerStudy); + DaoCancerStudy.addCancerStudy(cancerStudy); addTestPatientAndSampleRecords(new File("src/test/resources/segment/data_cna_hg19.seg"), cancerStudy); String[] args = { - "--data","src/test/resources/segment/data_cna_hg19.seg", - "--meta","src/test/resources/segment/meta_cna_hg19_seg.txt", - "--loadMode", "bulkLoad" - }; + "--data","src/test/resources/segment/data_cna_hg19.seg", + "--meta","src/test/resources/segment/meta_cna_hg19_seg.txt", + "--loadMode", "bulkLoad" + }; ImportCopyNumberSegmentData runner = new ImportCopyNumberSegmentData(args); - runner.run(); + runner.run(); //TODO : fix test to actually store data and add some checks - - } + + } private void addTestPatientAndSampleRecords(File file, CancerStudy cancerStudy) throws FileNotFoundException, IOException, DaoException { // extract sample ids from first column @@ -124,4 +124,4 @@ private void addTestPatientAndSampleRecords(File file, CancerStudy cancerStudy) } MySQLbulkLoader.flushAll(); } -} \ No newline at end of file +} diff --git a/db-scripts/src/main/resources/cgds.sql b/db-scripts/src/main/resources/cgds.sql index 797bfc0b45b..f28432e323d 100644 --- a/db-scripts/src/main/resources/cgds.sql +++ b/db-scripts/src/main/resources/cgds.sql @@ -590,7 +590,6 @@ CREATE TABLE `cna_event` ( `CNA_EVENT_ID` int(255) NOT NULL auto_increment, `ENTREZ_GENE_ID` int(11) NOT NULL, `ALTERATION` tinyint NOT NULL, - `ANNOTATION_JSON` JSON, PRIMARY KEY (`CNA_EVENT_ID`), UNIQUE (`ENTREZ_GENE_ID`, `ALTERATION`), FOREIGN KEY (`ENTREZ_GENE_ID`) REFERENCES `gene` (`ENTREZ_GENE_ID`), @@ -761,4 +760,4 @@ CREATE TABLE `resource_study` ( ); -- THIS MUST BE KEPT IN SYNC WITH db.version PROPERTY IN pom.xml -INSERT INTO info VALUES ('2.12.15', NULL); +INSERT INTO info VALUES ('2.12.14', NULL); diff --git a/db-scripts/src/main/resources/migration.sql b/db-scripts/src/main/resources/migration.sql index ea77948547e..c3c4fa5a069 100644 --- a/db-scripts/src/main/resources/migration.sql +++ b/db-scripts/src/main/resources/migration.sql @@ -992,7 +992,3 @@ ALTER TABLE `structural_variant` DROP COLUMN `SITE2_EXON`; ALTER TABLE `structural_variant` DROP COLUMN `CENTER`; ALTER TABLE `structural_variant` DROP COLUMN `EXTERNAL_ANNOTATION`; UPDATE `info` SET `DB_SCHEMA_VERSION`="2.12.14"; - -##version: 2.12.15 -ALTER TABLE `cna_event` ADD COLUMN `ANNOTATION_JSON` JSON AFTER `ALTERATION`; -UPDATE `info` SET `DB_SCHEMA_VERSION`="2.12.15"; diff --git a/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java b/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java index cbeb2862d16..054c68164c0 100644 --- a/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java +++ b/model/src/main/java/org/cbioportal/model/DiscreteCopyNumberData.java @@ -1,19 +1,13 @@ package org.cbioportal.model; -import com.fasterxml.jackson.annotation.JsonRawValue; -import io.swagger.annotations.ApiModelProperty; - import java.io.Serializable; import javax.validation.constraints.NotNull; public class DiscreteCopyNumberData extends Alteration implements Serializable { + @NotNull private Integer alteration; - - @JsonRawValue - @ApiModelProperty(dataType = "java.util.Map") - private String annotationJson; - + public Integer getAlteration() { return alteration; } @@ -21,13 +15,4 @@ public Integer getAlteration() { public void setAlteration(Integer alteration) { this.alteration = alteration; } - - public String getAnnotationJson() { - return annotationJson; - } - - public void setAnnotationJson(String annotationJson) { - this.annotationJson = annotationJson; - } - } diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml index 6e63ab375ed..0caffb5697d 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatis/DiscreteCopyNumberMapper.xml @@ -6,7 +6,6 @@ cna_event.ENTREZ_GENE_ID as entrezGeneId, cna_event.ALTERATION AS alteration, - cna_event.ANNOTATION_JSON as annotationJson, genetic_profile.STABLE_ID AS molecularProfileId, sample.STABLE_ID AS sampleId, patient.STABLE_ID AS patientId, diff --git a/persistence/persistence-mybatis/src/test/resources/testSql.sql b/persistence/persistence-mybatis/src/test/resources/testSql.sql index c5021caa7ce..b8e4a3adc1b 100644 --- a/persistence/persistence-mybatis/src/test/resources/testSql.sql +++ b/persistence/persistence-mybatis/src/test/resources/testSql.sql @@ -417,9 +417,9 @@ INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) V INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) VALUES (2,2,'1.4146,-0.0662,-0.8585,-1.6576,-0.3552,-0.8306,0.8102,0.1146,0.3498,0.0349,0.4927,-0.8665,-0.4754,-0.7221,'); INSERT INTO genetic_alteration (GENETIC_PROFILE_ID,GENETIC_ENTITY_ID,`VALUES`) VALUES (3,2,'-0.8097,0.7360,-1.0225,-0.8922,0.7247,0.3537,1.2702,-0.1419,'); -INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (1,207,-2, '{"columnName":{"fieldName":"fieldValue"}}'); -INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (2,208,2, '{"columnName":{"fieldName":"fieldValue"}}'); -INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION,ANNOTATION_JSON) VALUES (3,207,2, '{"columnName":{"fieldName":"fieldValue"}}'); +INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (1,207,-2); +INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (2,208,2); +INSERT INTO cna_event (CNA_EVENT_ID,ENTREZ_GENE_ID,ALTERATION) VALUES (3,207,2); INSERT INTO sample_cna_event (CNA_EVENT_ID,SAMPLE_ID,GENETIC_PROFILE_ID) VALUES (1,1,2); INSERT INTO sample_cna_event (CNA_EVENT_ID,SAMPLE_ID,GENETIC_PROFILE_ID) VALUES (2,1,2); diff --git a/pom.xml b/pom.xml index 93129e9c95a..97e3c3980b2 100644 --- a/pom.xml +++ b/pom.xml @@ -315,7 +315,7 @@ 720 - 2.12.15 + 2.12.14 diff --git a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java index 957a4b930ab..beaa290db64 100644 --- a/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java +++ b/service/src/test/java/org/cbioportal/service/impl/DiscreteCopyNumberServiceImplTest.java @@ -64,65 +64,6 @@ public void getDiscreteCopyNumbersInMultipleMolecularProfilesHomdelOrAmp() { Assert.assertEquals(toStrings(returned), toStrings(actual)); } - @Test - public void getDiscreteCopyNumbersWithAnnotationJson() { - List returned = Arrays.asList( - discreteCopyNumberData("sample1", "study1", -2), - discreteCopyNumberData("sample2", "study2", 2) - ); - returned.get(0).setAnnotationJson("{\"columnName\":{\"fieldName\":\"fieldValue\"}}"); - List profiles = Arrays.asList("profile1", "profile2"); - List samples = Arrays.asList("sample1", "sample2"); - List geneIds = Arrays.asList(0, 1); - List alterationTypes = Arrays.asList(-2, 2); - - Mockito.when(discreteCopyNumberRepository.getDiscreteCopyNumbersInMultipleMolecularProfiles( - profiles, - samples, - geneIds, - alterationTypes, - PROJECTION - )) - .thenReturn( - returned - ); - - List actual = discreteCopyNumberService.getDiscreteCopyNumbersInMultipleMolecularProfiles( - profiles, samples, geneIds, alterationTypes, PROJECTION - ); - - Assert.assertEquals(toStrings(returned), toStrings(actual)); - } - @Test - public void getDiscreteCopyNumbersWithoutAnnotationJson() { - List returned = Arrays.asList( - discreteCopyNumberData("sample1", "study1", -2), - discreteCopyNumberData("sample2", "study2", 2) - ); - // returned.get(0).setAnnotationJson("{\"columnName\":{\"fieldName\":\"fieldValue\"}}"); - List profiles = Arrays.asList("profile1", "profile2"); - List samples = Arrays.asList("sample1", "sample2"); - List geneIds = Arrays.asList(0, 1); - List alterationTypes = Arrays.asList(-2, 2); - - Mockito.when(discreteCopyNumberRepository.getDiscreteCopyNumbersInMultipleMolecularProfiles( - profiles, - samples, - geneIds, - alterationTypes, - PROJECTION - )) - .thenReturn( - returned - ); - - List actual = discreteCopyNumberService.getDiscreteCopyNumbersInMultipleMolecularProfiles( - profiles, samples, geneIds, alterationTypes, PROJECTION - ); - Assert.assertNull(returned.get(0).getAnnotationJson()); - Assert.assertEquals(toStrings(returned), toStrings(actual)); - } - @Test public void getDiscreteCopyNumbersInMultipleMolecularProfilesAllAlterationTypes() { List returned = Arrays.asList( diff --git a/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java b/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java index f189329be98..88a8d99ea78 100644 --- a/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java +++ b/web/src/main/java/org/cbioportal/web/config/CustomObjectMapper.java @@ -47,7 +47,6 @@ import org.cbioportal.model.ClinicalEvent; import org.cbioportal.model.ClinicalEventData; import org.cbioportal.model.DataAccessToken; -import org.cbioportal.model.DiscreteCopyNumberData; import org.cbioportal.model.CopyNumberSeg; import org.cbioportal.model.Gene; import org.cbioportal.model.GenePanel; @@ -83,7 +82,6 @@ import org.cbioportal.web.mixin.ClinicalEventMixin; import org.cbioportal.web.mixin.CopyNumberSegMixin; import org.cbioportal.web.mixin.DataAccessTokenMixin; -import org.cbioportal.web.mixin.DiscreteCopyNumberDataMixin; import org.cbioportal.web.mixin.GeneMixin; import org.cbioportal.web.mixin.GenePanelMixin; import org.cbioportal.web.mixin.GenePanelToGeneMixin; @@ -120,7 +118,6 @@ public CustomObjectMapper() { mixinMap.put(ClinicalEventData.class, ClinicalEventDataMixin.class); mixinMap.put(CopyNumberSeg.class, CopyNumberSegMixin.class); mixinMap.put(DataAccessToken.class, DataAccessTokenMixin.class); - mixinMap.put(DiscreteCopyNumberData.class, DiscreteCopyNumberDataMixin.class); mixinMap.put(Gene.class, GeneMixin.class); mixinMap.put(GenePanel.class, GenePanelMixin.class); mixinMap.put(GenePanelToGene.class, GenePanelToGeneMixin.class); diff --git a/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java b/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java deleted file mode 100644 index f8c904f252b..00000000000 --- a/web/src/main/java/org/cbioportal/web/mixin/DiscreteCopyNumberDataMixin.java +++ /dev/null @@ -1,9 +0,0 @@ -package org.cbioportal.web.mixin; - -import com.fasterxml.jackson.annotation.JsonProperty; - -public class DiscreteCopyNumberDataMixin { - - @JsonProperty("namespaceColumns") - private String annotationJson; -} \ No newline at end of file diff --git a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java index c0d87247c78..0bf7adbfa65 100644 --- a/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java +++ b/web/src/test/java/org/cbioportal/web/DiscreteCopyNumberControllerTest.java @@ -41,7 +41,6 @@ public class DiscreteCopyNumberControllerTest { private static final String TEST_SAMPLE_STABLE_ID_1 = "test_sample_stable_id_1"; private static final int TEST_ENTREZ_GENE_ID_1 = 1; private static final int TEST_ALTERATION_1 = 1; - private static final String TEST_ANNOTATION_JSON_1 = "{\"columnName\":{\"fieldName\":\"fieldValue\"}}"; private static final String TEST_HUGO_GENE_SYMBOL_1 = "test_hugo_gene_symbol_1"; private static final String TEST_TYPE_1 = "test_type_1"; private static final String TEST_CYTOBAND_1 = "test_cytoband_1"; @@ -103,7 +102,6 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDefaultProject .andExpect(MockMvcResultMatchers.jsonPath("$[0].sampleId").value(TEST_SAMPLE_STABLE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].entrezGeneId").value(TEST_ENTREZ_GENE_ID_1)) .andExpect(MockMvcResultMatchers.jsonPath("$[0].alteration").value(TEST_ALTERATION_1)) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].namespaceColumns.columnName.fieldName").value("fieldValue")) .andExpect(MockMvcResultMatchers.jsonPath("$[0].gene").doesNotExist()) .andExpect(MockMvcResultMatchers.jsonPath("$[1].molecularProfileId") .value(TEST_MOLECULAR_PROFILE_STABLE_ID_2)) @@ -111,24 +109,6 @@ public void getDiscreteCopyNumbersInMolecularProfileBySampleListIdDefaultProject .andExpect(MockMvcResultMatchers.jsonPath("$[1].entrezGeneId").value(TEST_ENTREZ_GENE_ID_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].alteration").value(TEST_ALTERATION_2)) .andExpect(MockMvcResultMatchers.jsonPath("$[1].gene").doesNotExist()); - } - @Test - public void getDiscreteCopyNumbersWithoutAnnotationJson() throws Exception { - - List discreteCopyNumberDataList = createExampleDiscreteCopyNumberData(); - discreteCopyNumberDataList.get(0).setAnnotationJson(null); - Mockito.when(discreteCopyNumberService.getDiscreteCopyNumbersInMolecularProfileBySampleListId( - Mockito.any(), Mockito.any(), Mockito.any(), - Mockito.any(), Mockito.any())).thenReturn(discreteCopyNumberDataList); - - mockMvc.perform(MockMvcRequestBuilders.get("/molecular-profiles/test_molecular_profile_id/discrete-copy-number") - .param("sampleListId", TEST_SAMPLE_LIST_ID) - .param("discreteCopyNumberEventType", DiscreteCopyNumberEventType.HOMDEL_AND_AMP.name()) - .accept(MediaType.APPLICATION_JSON)) - .andExpect(MockMvcResultMatchers.status().isOk()) - .andExpect(MockMvcResultMatchers.content().contentTypeCompatibleWith(MediaType.APPLICATION_JSON)) - .andExpect(MockMvcResultMatchers.jsonPath("$", Matchers.hasSize(2))) - .andExpect(MockMvcResultMatchers.jsonPath("$[0].namespaceColumns").doesNotExist()); } @Test @@ -359,7 +339,6 @@ private List createExampleDiscreteCopyNumberData() { discreteCopyNumberData1.setEntrezGeneId(TEST_ENTREZ_GENE_ID_1); discreteCopyNumberData1.setAlteration(TEST_ALTERATION_1); discreteCopyNumberDataList.add(discreteCopyNumberData1); - discreteCopyNumberData1.setAnnotationJson(TEST_ANNOTATION_JSON_1); DiscreteCopyNumberData discreteCopyNumberData2 = new DiscreteCopyNumberData(); discreteCopyNumberData2.setMolecularProfileId(TEST_MOLECULAR_PROFILE_STABLE_ID_2); discreteCopyNumberData2.setSampleId(TEST_SAMPLE_STABLE_ID_2);