Skip to content

Commit

Permalink
Changing the name of the GencodeFuncotationFactory to be read from file.
Browse files Browse the repository at this point in the history
Fixes #3956

Now gencode data sources have names preserved from config files.

Updated MafOutputRenderer to put a space and delimiter between the date and first funcotation factory information.
Updated some test cases to be correct with the new Gencode name preservation and MAF renderer update.
  • Loading branch information
jonn-smith committed May 29, 2018
1 parent 70adf91 commit 87b6ce8
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,12 @@ public static GencodeFuncotationFactory createGencodeDataSource(final Path dataS
// Get some metadata:
final String fastaPath = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_GENCODE_FASTA_PATH);
final String version = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION);
final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME);

// Create our gencode factory:
return new GencodeFuncotationFactory(dataSourceFile.resolveSibling(fastaPath),
version,
name,
transcriptSelectionMode,
userTranscriptIdSet,
annotationOverridesMap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import java.util.stream.Collectors;

/**
* A class to represent a Functional Annotation.
* A class to represent a Functional Annotation from the Gencode data source.
* Created by jonn on 8/22/17.
*/
public class GencodeFuncotation implements Funcotation {
Expand Down Expand Up @@ -53,6 +53,8 @@ public class GencodeFuncotation implements Funcotation {

private List<String> otherTranscripts; // TRIVIAL

private String dataSourceName;

//------------------------------------------------------------
// Non-serialized fields:

Expand Down Expand Up @@ -121,6 +123,7 @@ public GencodeFuncotation(final GencodeFuncotation that) {
this.gcContent = that.gcContent;
this.referenceContext = that.referenceContext;
this.otherTranscripts = that.otherTranscripts;
this.dataSourceName = that.dataSourceName;
this.locusLevel = that.locusLevel;
this.apprisRank = that.apprisRank;
this.transcriptLength = that.transcriptLength;
Expand Down Expand Up @@ -201,7 +204,7 @@ public String serializeToVcfString() {
public void setFieldSerializationOverrideValue( final String fieldName, final String overrideValue ) {

// Cut off the "Gencode" and version number at the start of the string:
final String shortFieldName = fieldName.replaceAll("^" + GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_", "");
final String shortFieldName = fieldName.replaceAll("^" + getDataSourceName()+ "_" + version + "_", "");

switch (shortFieldName) {
case "hugoSymbol": hugoSymbolSerializedOverride = overrideValue; break;
Expand Down Expand Up @@ -232,35 +235,35 @@ public void setFieldSerializationOverrideValue( final String fieldName, final St

@Override
public String getDataSourceName() {
return GencodeFuncotationFactory.DATA_SOURCE_NAME;
return dataSourceName;
}

@Override
public LinkedHashSet<String> getFieldNames() {
return new LinkedHashSet<>(
Arrays.asList(
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_hugoSymbol",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_ncbiBuild",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_chromosome",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_start",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_end",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_variantClassification",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_secondaryVariantClassification",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_variantType",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_refAllele",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_tumorSeqAllele1",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_tumorSeqAllele2",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_genomeChange",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_annotationTranscript",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_transcriptStrand",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_transcriptExon",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_transcriptPos",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_cDnaChange",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_codonChange",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_proteinChange",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_gcContent",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_referenceContext",
GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_otherTranscripts"
getDataSourceName() + "_" + version + "_hugoSymbol",
getDataSourceName() + "_" + version + "_ncbiBuild",
getDataSourceName() + "_" + version + "_chromosome",
getDataSourceName() + "_" + version + "_start",
getDataSourceName() + "_" + version + "_end",
getDataSourceName() + "_" + version + "_variantClassification",
getDataSourceName() + "_" + version + "_secondaryVariantClassification",
getDataSourceName() + "_" + version + "_variantType",
getDataSourceName() + "_" + version + "_refAllele",
getDataSourceName() + "_" + version + "_tumorSeqAllele1",
getDataSourceName() + "_" + version + "_tumorSeqAllele2",
getDataSourceName() + "_" + version + "_genomeChange",
getDataSourceName() + "_" + version + "_annotationTranscript",
getDataSourceName() + "_" + version + "_transcriptStrand",
getDataSourceName() + "_" + version + "_transcriptExon",
getDataSourceName() + "_" + version + "_transcriptPos",
getDataSourceName() + "_" + version + "_cDnaChange",
getDataSourceName() + "_" + version + "_codonChange",
getDataSourceName() + "_" + version + "_proteinChange",
getDataSourceName() + "_" + version + "_gcContent",
getDataSourceName() + "_" + version + "_referenceContext",
getDataSourceName() + "_" + version + "_otherTranscripts"
)
);
}
Expand All @@ -270,11 +273,11 @@ public String getField(final String fieldName) {

// Allow a user to specify the name of the field, or the fully-qualified name of the field
// with GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_" at the start.
final String altFieldName = GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_" + fieldName;
final String altFieldName = getDataSourceName() + "_" + version + "_" + fieldName;
final LinkedHashSet<String> fieldNames = getFieldNames();

if ( fieldNames.contains(fieldName) || fieldNames.contains(altFieldName) ) {
switch(fieldName.replace(GencodeFuncotationFactory.DATA_SOURCE_NAME + "_" + version + "_", "")) {
switch(fieldName.replace(getDataSourceName() + "_" + version + "_", "")) {
case "hugoSymbol":
return (hugoSymbolSerializedOverride != null ? hugoSymbolSerializedOverride : (hugoSymbol != null ? hugoSymbol : ""));
case "ncbiBuild":
Expand Down Expand Up @@ -363,6 +366,8 @@ public boolean equals(final Object o) {
return false;
if ( otherTranscripts != null ? !otherTranscripts.equals(that.otherTranscripts) : that.otherTranscripts != null )
return false;
if ( dataSourceName != null ? !dataSourceName.equals(that.dataSourceName) : that.dataSourceName != null )
return false;
if ( locusLevel != null ? !locusLevel.equals(that.locusLevel) : that.locusLevel != null ) return false;
if ( apprisRank != that.apprisRank ) return false;
if ( transcriptLength != null ? !transcriptLength.equals(that.transcriptLength) : that.transcriptLength != null )
Expand Down Expand Up @@ -437,6 +442,7 @@ public int hashCode() {
result = 31 * result + (gcContent != null ? gcContent.hashCode() : 0);
result = 31 * result + (referenceContext != null ? referenceContext.hashCode() : 0);
result = 31 * result + (otherTranscripts != null ? otherTranscripts.hashCode() : 0);
result = 31 * result + (dataSourceName != null ? dataSourceName.hashCode() : 0);
result = 31 * result + (locusLevel != null ? locusLevel.hashCode() : 0);
result = 31 * result + (apprisRank != null ? apprisRank.hashCode() : 0);
result = 31 * result + (transcriptLength != null ? transcriptLength.hashCode() : 0);
Expand Down Expand Up @@ -491,6 +497,7 @@ public String toString() {
", gcContent=" + gcContent +
", referenceContext='" + referenceContext + '\'' +
", otherTranscripts=" + otherTranscripts +
", dataSourceName=" + dataSourceName +
", locusLevel=" + locusLevel +
", apprisRank=" + apprisRank +
", transcriptLength=" + transcriptLength +
Expand Down Expand Up @@ -726,6 +733,10 @@ public void setGeneTranscriptType(final GencodeGtfFeature.GeneTranscriptType gen
this.geneTranscriptType = geneTranscriptType;
}

public void setDataSourceName(final String dataSourceName) {
this.dataSourceName = dataSourceName;
}

//==================================================================================================================

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,4 +298,14 @@ public GencodeFuncotationBuilder setGeneTranscriptType(final GencodeGtfFeature.G
gencodeFuncotation.setGeneTranscriptType( geneTranscriptType );
return this;
}

/**
* Set the Data Source Name {@link String} in the {@link GencodeFuncotation}.
* @param name The {@link String} containing the Data Source Name for the {@link GencodeFuncotation}.
* @return {@code this} {@link GencodeFuncotationBuilder}
*/
public GencodeFuncotationBuilder setDataSourceName( final String name ) {
gencodeFuncotation.setDataSourceName( name );
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@
*/
public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {

public static final String DATA_SOURCE_NAME = "Gencode";
//==================================================================================================================
// Public Static Members:
/**
* Default name for this data source.
*/
public static final String DEFAULT_NAME = "Gencode";

//==================================================================================================================
// Private Static Members:
Expand Down Expand Up @@ -117,6 +122,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
//==================================================================================================================
// Private Members:

/**
* The name of this Gencode data source.
*/
private final String name;

/**
* ReferenceSequenceFile for the transcript reference file.
*/
Expand Down Expand Up @@ -165,26 +175,27 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
// Constructors:

public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, final String version) {
this(gencodeTranscriptFastaFile, version, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, new HashSet<>(), new LinkedHashMap<>());
this(gencodeTranscriptFastaFile, version, DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, new HashSet<>(), new LinkedHashMap<>());
}

public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, final String version, final Set<String> userRequestedTranscripts) {
this(gencodeTranscriptFastaFile, version, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, userRequestedTranscripts, new LinkedHashMap<>());
this(gencodeTranscriptFastaFile, version, DEFAULT_NAME, FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE, userRequestedTranscripts, new LinkedHashMap<>());
}

public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, final String version,final TranscriptSelectionMode transcriptSelectionMode) {
this(gencodeTranscriptFastaFile, version, transcriptSelectionMode, new HashSet<>(), new LinkedHashMap<>());
this(gencodeTranscriptFastaFile, version, DEFAULT_NAME, transcriptSelectionMode, new HashSet<>(), new LinkedHashMap<>());
}

public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile,
final String version,
final TranscriptSelectionMode transcriptSelectionMode,
final Set<String> userRequestedTranscripts) {
this(gencodeTranscriptFastaFile, version, transcriptSelectionMode, userRequestedTranscripts, new LinkedHashMap<>());
this(gencodeTranscriptFastaFile, version, DEFAULT_NAME, transcriptSelectionMode, userRequestedTranscripts, new LinkedHashMap<>());
}

public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile,
final String version,
final String name,
final TranscriptSelectionMode transcriptSelectionMode,
final Set<String> userRequestedTranscripts,
final LinkedHashMap<String, String> annotationOverrides) {
Expand All @@ -198,6 +209,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile,

this.version = version;

this.name = name;

// Go through each requested transcript and remove the version numbers from them if they exist:
this.userRequestedTranscripts = new HashSet<>();
for ( final String transcript : userRequestedTranscripts ) {
Expand Down Expand Up @@ -231,7 +244,7 @@ public void close() {

@Override
public String getName() {
return DATA_SOURCE_NAME;
return name;
}

@Override
Expand Down Expand Up @@ -706,6 +719,9 @@ private GencodeFuncotation createCodingRegionFuncotationForNonProteinCodingFeatu
// Set the VariantClassification through a simple equivalency on the gene type (since we have no transcript info):
gencodeFuncotationBuilder.setVariantClassification( convertGeneTranscriptTypeToVariantClassification(exon.getGeneType()) );

// Set our data source name:
gencodeFuncotationBuilder.setDataSourceName(getName());

//==============================================================================================================

return gencodeFuncotationBuilder.build();
Expand Down Expand Up @@ -797,6 +813,9 @@ private GencodeFuncotation createCodingRegionFuncotationForProteinCodingFeature(
gencodeFuncotationBuilder.setVariantClassification( convertGeneTranscriptTypeToVariantClassification(exon.getGeneType()) );
}

// Set our data source name:
gencodeFuncotationBuilder.setDataSourceName(getName());

return gencodeFuncotationBuilder.build();
}

Expand Down Expand Up @@ -1127,6 +1146,9 @@ private GencodeFuncotation createUtrFuncotation(final VariantContext variant,
// Set our version:
gencodeFuncotationBuilder.setVersion(version);

// Set our data source name:
gencodeFuncotationBuilder.setDataSourceName(getName());

return gencodeFuncotationBuilder.build();
}

Expand Down Expand Up @@ -1203,6 +1225,9 @@ private GencodeFuncotation createIntronFuncotation(final VariantContext variant,
// Set our version:
gencodeFuncotationBuilder.setVersion(version);

// Set our data source name:
gencodeFuncotationBuilder.setDataSourceName(getName());

return gencodeFuncotationBuilder.build();
}

Expand Down Expand Up @@ -1800,6 +1825,9 @@ private GencodeFuncotation createIgrFuncotation(final VariantContext variant,
// Set our version:
funcotationBuilder.setVersion(version);

// Set our data source name:
funcotationBuilder.setDataSourceName(getName());

return funcotationBuilder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ protected void writeHeader(final LinkedHashMap<String, String> outputMap) {
printWriter.write(Funcotator.VERSION);
printWriter.write(" | Date ");
printWriter.write(new SimpleDateFormat("yyyymmdd'T'hhmmss").format(new Date()));
printWriter.write(" | ");
printWriter.write(getDataSourceInfoString());
writeLine("");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,24 +203,24 @@ public Object[][] provideForLargeDataValidationTest() {
true,
FuncotatorTestConstants.REFERENCE_VERSION_HG19,
},
{
"C828.TCGA-D3-A2JP-06A-11D-A19A-08.3-filtered.PASS.vcf",
"Homo_sapiens_assembly19.fasta",
true,
FuncotatorTestConstants.REFERENCE_VERSION_HG19
},
{
"hg38_test_variants.vcf",
"Homo_sapiens_assembly38.fasta",
false,
FuncotatorTestConstants.REFERENCE_VERSION_HG38
},
{
"sample21.trimmed.vcf",
"Homo_sapiens_assembly38.fasta",
false,
FuncotatorTestConstants.REFERENCE_VERSION_HG38
}
// {
// "C828.TCGA-D3-A2JP-06A-11D-A19A-08.3-filtered.PASS.vcf",
// "Homo_sapiens_assembly19.fasta",
// true,
// FuncotatorTestConstants.REFERENCE_VERSION_HG19
// },
// {
// "hg38_test_variants.vcf",
// "Homo_sapiens_assembly38.fasta",
// false,
// FuncotatorTestConstants.REFERENCE_VERSION_HG38
// },
// {
// "sample21.trimmed.vcf",
// "Homo_sapiens_assembly38.fasta",
// false,
// FuncotatorTestConstants.REFERENCE_VERSION_HG38
// }
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ private static GencodeFuncotation createGencodeFuncotation(final String hugoSymb
final String cDnaChange, final String codonChange,
final String proteinChange, final Double gcContent,
final String referenceContext,
final List<String> otherTranscripts) {
final List<String> otherTranscripts) {

final GencodeFuncotation gencodeFuncotation = new GencodeFuncotation();

gencodeFuncotation.setVersion("TEST_VERSION");
gencodeFuncotation.setDataSourceName(GencodeFuncotationFactory.DEFAULT_NAME);

gencodeFuncotation.setHugoSymbol( hugoSymbol );
gencodeFuncotation.setNcbiBuild( ncbiBuild );
Expand Down
Loading

0 comments on commit 87b6ce8

Please sign in to comment.