Skip to content

Commit

Permalink
Merge pull request #203 from julianu/main
Browse files Browse the repository at this point in the history
Updating jmzIdentML and adding Comet TSV parser
  • Loading branch information
julianu authored Dec 15, 2023
2 parents e890b41 + e041584 commit 8817dfb
Show file tree
Hide file tree
Showing 7 changed files with 5,738 additions and 13 deletions.
17 changes: 13 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>de.mpc.pia</groupId>
<artifactId>pia</artifactId>
<version>1.4.10</version>
<version>1.5.0</version>
<name>PIA - Protein Inference Algorithms</name>
<url>https://github.com/mpc-bioinformatics/pia</url>

Expand Down Expand Up @@ -43,9 +43,9 @@
<junit.version>4.13.2</junit.version>
<commons-collections.version>4.4</commons-collections.version>
<commons-text.version>1.11.0</commons-text.version>
<jmzidentml.version>1.2.11</jmzidentml.version>
<jmzidentml.version>1.2.13</jmzidentml.version>
<jmztab.version>3.0.8</jmztab.version>
<pride-mod.version>2.1.8</pride-mod.version>
<pride-mod.version>2.1.12</pride-mod.version>
<pride-jaxb.version>1.0.22</pride-jaxb.version>
<xxindex.version>0.23</xxindex.version>
<mascotdatfile.version>3.6.1</mascotdatfile.version>
Expand Down Expand Up @@ -147,6 +147,12 @@
<artifactId>pride-mod</artifactId>
<version>${pride-mod.version}</version>
</dependency>

<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.5.12</version>
</dependency>
<!-- End pride mod dependency -->

<!-- mzTab dependencies -->
Expand Down Expand Up @@ -327,6 +333,9 @@
<exclude>src/test/*.class</exclude>
</excludes>
<archive>
<manifestEntries>
<Add-Opens>java.base/sun.reflect.annotation</Add-Opens>
</manifestEntries>
<index>true</index>
<manifest>
<!-- Adds the classpath to the created manifest -->
Expand Down Expand Up @@ -384,7 +393,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.2</version>
<configuration>
<argLine>${argLine} -Xmx2560m</argLine>
<argLine>${argLine} -Xmx2560m --add-opens java.base/sun.reflect.annotation=ALL-UNNAMED</argLine>
</configuration>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.logging.log4j.Logger;

import de.mpc.pia.intermediate.compiler.PIACompiler;
import de.mpc.pia.intermediate.compiler.parser.searchengines.CometTSVFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.MascotDatFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.TandemFileParser;
import de.mpc.pia.intermediate.compiler.parser.searchengines.ThermoMSFFileParser;
Expand All @@ -20,6 +21,38 @@ public class InputFileParserFactory {
private static final Logger LOGGER = LogManager.getLogger();

public enum InputFileTypes {

/**
* the input file is a Comet TSV file
*/
COMET_TSV_INPUT {
@Override
public String getFileSuffix() {
return "txt";
}

@Override
public String getFileTypeName() {
return "Comet TSV";
}

@Override
public String getFileTypeShort() {
return "comet";
}

@Override
public boolean checkFileType(String fileName) {
return CometTSVFileParser.checkFileType(fileName);
}

@Override
public boolean parseFile(String name, String fileName,
PIACompiler compiler, String additionalInfoFileName) {
return CometTSVFileParser.getDataFromCometTSVFile(name, fileName, compiler);
}
},

/**
* the input file is a FASTA database file
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ private boolean parseFile(String name, String fileName) {

// get the AnalysisCollection:SpectrumIdentification for the SpectrumIdentificationLists
AnalysisCollection analysisCollection = unmarshaller.unmarshal(AnalysisCollection.class);

LOGGER.debug("scanning analysisCollection: " + analysisCollection
+ "\n\tgetSpectrumIdentification " + analysisCollection.getSpectrumIdentification()
+ "\n\tgetProteinDetection " + analysisCollection.getProteinDetection()
);

for (SpectrumIdentification si : analysisCollection.getSpectrumIdentification()) {
if (specIdLists.keySet().contains(si.getSpectrumIdentificationListRef())) {
// if the SpectrumIdentification's SpectrumIdentificationList is in the file, we need the SpectrumIdentification
Expand Down Expand Up @@ -165,8 +171,6 @@ private boolean parseFile(String name, String fileName) {
spectraDataRefs.put(ref, sd);
});

LOGGER.debug("Number of spectraData in inputs: " + inputs.getSpectraData().size());

// get the necessary inputs:SearchDBs
inputs.getSearchDatabase().stream()
.filter(searchDB -> neededSearchDatabases.contains(searchDB.getId()))
Expand All @@ -189,23 +193,37 @@ private boolean parseFile(String name, String fileName) {
// update the PIAFile's references for SpectraData, SearchDBs and AnalysisSoftwares
file.updateReferences(spectraDataRefs, searchDBRefs, analysisSoftwareRefs);

// get/hash the SequenceCollection:PeptideEvidences
SequenceCollection sc = unmarshaller.unmarshal(SequenceCollection.class);
peptideEvidences = new HashMap<>();
for (PeptideEvidence pepEvidence : sc.getPeptideEvidence()) {
peptideEvidences.put(pepEvidence.getId(), pepEvidence);
}

// get/hash the SequenceCollection:DBSequences
dbSequences = new HashMap<>();
for (DBSequence dbSeq : sc.getDBSequence()) {
dbSequences.put(dbSeq.getId(), dbSeq);

LOGGER.debug("added dbSequence: " + dbSeq.getId() + " -> " + dbSequences.get(dbSeq.getId()));
}

// get/hash the SequenceCollection:Peptides
peptides = new HashMap<>();
for (uk.ac.ebi.jmzidml.model.mzidml.Peptide peptide: sc.getPeptide()) {
peptides.put(peptide.getId(), peptide);

LOGGER.debug("added peptide: " + peptide.getId()
+ " -> " + peptides.get(peptide.getId())
+ "\n\tpeptideSequence " + peptide.getPeptideSequence()
);
}

// get/hash the SequenceCollection:PeptideEvidences
peptideEvidences = new HashMap<>();
for (PeptideEvidence pepEvidence : sc.getPeptideEvidence()) {
peptideEvidences.put(pepEvidence.getId(), pepEvidence);

LOGGER.debug("added pepEvidence: " + pepEvidence.getId()
+ " -> " + peptideEvidences.get(pepEvidence.getId())
+ "\n\tdbSequenceRef " + pepEvidence.getDBSequenceRef()
+ "\n\tdbSequence " + pepEvidence.getDBSequence()
);
}


Expand Down Expand Up @@ -667,7 +685,8 @@ private Peptide parseSIIPeptideEvidences(List<PeptideEvidenceRef> peptideEvidenc

DBSequence dbSeq = dbSequences.get(pepEvidence.getDBSequenceRef());
if (dbSeq == null) {
LOGGER.error("DBSequence " + pepEvidence.getDBSequenceRef() + " not found!");
LOGGER.error("DBSequence " + pepEvidence.getDBSequenceRef()
+ " for pepEvidence " + pepEvidence.getId() + " not found!");
return null;
}

Expand Down
Loading

0 comments on commit 8817dfb

Please sign in to comment.