Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up CnaEvent lookup during import #25

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class ImportCnaDiscreteLongData {
private String genePanel;
private final DaoGeneOptimized daoGene;
private CnaUtil cnaUtil;
private Set<CnaEvent.Event> existingCnaEvents = new HashSet<>();
private Map<CnaEvent.Event, CnaEvent.Event> existingCnaEvents = new HashMap<>();
private int samplesSkipped = 0;
private Set<String> namespaces;

Expand Down Expand Up @@ -84,7 +84,9 @@ public void importData() throws Exception {
&& geneticProfile.showProfileInAnalysisTab();

if (isDiscretizedCnaProfile) {
existingCnaEvents.addAll(DaoCnaEvent.getAllCnaEvents());
for (CnaEvent.Event event : DaoCnaEvent.getAllCnaEvents()) {
existingCnaEvents.put(event, event);
}
MySQLbulkLoader.bulkLoadOn();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,11 @@ public void importData(int numLines) throws IOException, DaoException {
DaoGeneticAlteration daoGeneticAlteration = DaoGeneticAlteration.getInstance();

//cache for data found in cna_event' table:
Set<CnaEvent.Event> existingCnaEvents = new HashSet<>();
Map<CnaEvent.Event, CnaEvent.Event> existingCnaEvents = new HashMap<>();
if (isDiscretizedCnaProfile) {
existingCnaEvents.addAll(DaoCnaEvent.getAllCnaEvents());
for (CnaEvent.Event event : DaoCnaEvent.getAllCnaEvents()) {
existingCnaEvents.put(event, event);
}
MySQLbulkLoader.bulkLoadOn();
}

Expand Down Expand Up @@ -502,7 +504,7 @@ private boolean parseLine(String line, int nrColumns, int sampleStartIndex,
boolean isRppaProfile, boolean isDiscretizedCnaProfile,
DaoGeneOptimized daoGene,
List<Integer> filteredSampleIndices, List<Integer> orderedSampleList,
Set<CnaEvent.Event> existingCnaEvents
Map<CnaEvent.Event, CnaEvent.Event> existingCnaEvents
) throws DaoException {

//TODO: refactor this entire function - split functionality into smaller units / subroutines
Expand Down
31 changes: 17 additions & 14 deletions src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,36 @@ public CnaUtil(String[] headerParts, Set<String> namespaces) {
}

public static void storeCnaEvents(
Set<CnaEvent.Event> existingCnaEvents,
Map<CnaEvent.Event,CnaEvent.Event> existingCnaEvents,
List<CnaEvent> cnaEventsToAdd
) throws DaoException {
for (CnaEvent cnaEvent : cnaEventsToAdd) {
if (!CNA.AMP.equals(cnaEvent.getAlteration()) && !CNA.HOMDEL.equals(cnaEvent.getAlteration())) {
continue;
}

// Revert PR https://github.com/cBioPortal/cbioportal-core/pull/1 breaks importer
Optional<CnaEvent.Event> existingCnaEvent = existingCnaEvents
.stream()
.filter(e -> e.equals(cnaEvent.getEvent()))
.findFirst();
if (existingCnaEvent.isPresent()) {
cnaEvent.setEventId(existingCnaEvent.get().getEventId());
CnaEvent.Event event = cnaEvent.getEvent();
CnaEvent.Event existingEvent = existingCnaEvents.get(event);
// Caution :
// existingEvent (if found) was retrieved from the database and has a populated event_id field.
// event is constructed while parsing the CNA file and does not have a populated event_id field.
// The type CnaEvent.Event, and contained types, have overridden hashCode() and equals() functions
// which allow successful comparison so that an Event with a non-populated event_id field will
// match an Event with a populated / discrepant event_id field. That is to allow this hashmap lookup
// of the previously existing event from the database in order to obtain the event_id (see below).
if (existingEvent != null) {
cnaEvent.setEventId(existingEvent.getEventId());
DaoCnaEvent.addCaseCnaEvent(cnaEvent, false);
} else {
DaoCnaEvent.addCaseCnaEvent(cnaEvent, true);
existingCnaEvents.add(cnaEvent.getEvent());
existingCnaEvents.put(event, event);
}
}
}

public CnaEvent createEvent(
GeneticProfile geneticProfile,
int sampleId,
long entrezId,
long entrezId,
String[] parts
) throws IOException {
int cnaProfileId = geneticProfile.getGeneticProfileId();
Expand All @@ -88,11 +91,11 @@ public CnaEvent createEvent(
);
return cna;
}

private String convertMapToJsonString(Map<String, Map<String, Object>> map) throws JsonProcessingException {
return this.objectMapper.writeValueAsString(map);
}

public long getEntrezSymbol(String[] parts) {
String entrezAsString = TabDelimitedFileUtil.getPartString(getColumnIndex(CnaUtil.ENTREZ_GENE_ID), parts);
if (entrezAsString.isEmpty()) {
Expand Down Expand Up @@ -123,7 +126,7 @@ private short createAlteration(String[] parts) {
*/
public int getColumnIndex(String colName) {
return this.columnIndexMap.getOrDefault(
colName.toLowerCase(),
colName.toLowerCase(),
-1
);
}
Expand Down
Loading