Skip to content

Commit

Permalink
Lookahead Covariates moved to UG_feature_branch
Browse files Browse the repository at this point in the history
  • Loading branch information
dror27 authored and jamesemery committed Jul 22, 2022
1 parent 6a19613 commit 3ce718d
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ public void onTraversalStart() {

Utils.warnOnNonIlluminaReadGroups(getHeaderForReads(), logger);

// grab reference if not specified in case of using an extended context covariate
if ( recalArgs.EXTENDED_CONTEXT_LOOKAHEAD != 0 ) {
if ( recalArgs.EXTENDED_CONTEXT_REFERENCE == null ) {
recalArgs.EXTENDED_CONTEXT_REFERENCE = new GATKPath(referenceArguments.getReferencePath().toAbsolutePath().toString());
}
}

recalibrationEngine = new BaseRecalibrationEngine(recalArgs, getHeaderForReads());
recalibrationEngine.logCovariatesUsed();
referenceDataSource = ReferenceDataSource.of(referenceArguments.getReferencePath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.baq.BAQ;
import org.broadinstitute.hellbender.utils.report.GATKReportTable;
Expand Down Expand Up @@ -42,6 +43,18 @@ public final class RecalibrationArgumentCollection implements Serializable {
@Argument(fullName = "indels-context-size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", optional = true)
public int INDELS_CONTEXT_SIZE = 3;

/**
* experimental parameter indicating that we are going the extended version of the context covariate
*/
@Argument(fullName = "extended-context-lookahead", shortName = "ecl", doc = "amount of lookahead (out of the context) when using the extended context covariate", optional = true)
public int EXTENDED_CONTEXT_LOOKAHEAD;

@Argument(fullName = "extended-context-reference", shortName = "ecr", doc = "reference for assembling the extended context", optional = true)
public GATKPath EXTENDED_CONTEXT_REFERENCE;

@Argument(fullName = "extended-context-alt-embedded", shortName = "ecae", doc = "alt base, when extending context, embedded in context", optional = true)
public boolean EXTENDED_CONTEXT_ALT_EMBEDDED;

/**
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
* This argument is ignored if the Cycle covariate is not used.
Expand Down Expand Up @@ -164,6 +177,12 @@ public GATKReportTable generateReportTable(final String covariateNames) {
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
argumentsTable.addRowID("indels_context_size", true);
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
argumentsTable.addRowID("extended_context_lookahead", true);
argumentsTable.set("extended_context_lookahead", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, EXTENDED_CONTEXT_LOOKAHEAD);
argumentsTable.addRowID("extended_context_reference", true);
argumentsTable.set("extended_context_reference", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, EXTENDED_CONTEXT_REFERENCE);
argumentsTable.addRowID("extended_context_alt_embedded", true);
argumentsTable.set("extended_context_alt_embedded", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, EXTENDED_CONTEXT_ALT_EMBEDDED);
argumentsTable.addRowID("mismatches_default_quality", true);
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
argumentsTable.addRowID("deletions_default_quality", true);
Expand Down Expand Up @@ -212,6 +231,8 @@ public Map<String,? extends CharSequence> compareReportArguments(final Recalibra
compareSimpleReportArgument(result,"solid_recal_mode", SOLID_RECAL_MODE, SOLID_RECAL_MODE,thisRole,otherRole);
compareSimpleReportArgument(result,"solid_nocall_strategy", SOLID_NOCALL_STRATEGY, SOLID_NOCALL_STRATEGY,thisRole,otherRole);
compareSimpleReportArgument(result,"mismatches_context_size", MISMATCHES_CONTEXT_SIZE,other.MISMATCHES_CONTEXT_SIZE,thisRole,otherRole);
compareSimpleReportArgument(result,"extended_context_lookahead", EXTENDED_CONTEXT_LOOKAHEAD,other.EXTENDED_CONTEXT_LOOKAHEAD,thisRole,otherRole);
compareSimpleReportArgument(result,"extended_context_reference", EXTENDED_CONTEXT_REFERENCE,other.EXTENDED_CONTEXT_REFERENCE,thisRole,otherRole);
compareSimpleReportArgument(result,"mismatches_default_quality", MISMATCHES_DEFAULT_QUALITY, other.MISMATCHES_DEFAULT_QUALITY,thisRole,otherRole);
compareSimpleReportArgument(result,"deletions_default_quality", DELETIONS_DEFAULT_QUALITY, other.DELETIONS_DEFAULT_QUALITY,thisRole,otherRole);
compareSimpleReportArgument(result,"insertions_default_quality", INSERTIONS_DEFAULT_QUALITY, other.INSERTIONS_DEFAULT_QUALITY,thisRole,otherRole);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.apache.commons.collections.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.QualityUtils;
Expand Down Expand Up @@ -325,6 +326,30 @@ private static QuantizationInfo initializeQuantizationTable(GATKReportTable tabl
private static RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();

// peek for lookahead argument - since it may change the list of covariates
for ( int i = 0; i < table.getNumRows(); i++ ) {
final String argument = table.get(i, "Argument").toString();
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
if (value.equals("null")) {
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
}
if (argument.equals("extended_context_lookahead"))
RAC.EXTENDED_CONTEXT_LOOKAHEAD = decodeInteger(value);

else if (argument.equals("extended_context_reference"))
RAC.EXTENDED_CONTEXT_REFERENCE = (value != null) ? new GATKPath((String)value) : null;

else if (argument.equals("extended_context_alt_embedded"))
RAC.EXTENDED_CONTEXT_ALT_EMBEDDED = decodeBoolean(value);

else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = decodeInteger(value);

else if (argument.equals("indels_context_size"))
RAC.INDELS_CONTEXT_SIZE = decodeInteger(value);
}


final List<String> standardCovariateClassNames = new StandardCovariateList(RAC, Collections.emptyList()).getStandardCovariateClassNames();

for ( int i = 0; i < table.getNumRows(); i++ ) {
Expand Down Expand Up @@ -356,12 +381,6 @@ else if (argument.equals("solid_nocall_strategy")) {
throw new UserException("Solid is not supported. Only " + RecalibrationArgumentCollection.SOLID_NOCALL_STRATEGY + " is allowed as value for solid_nocall_strategy");
}
}
else if (argument.equals("mismatches_context_size"))
RAC.MISMATCHES_CONTEXT_SIZE = decodeInteger(value);

else if (argument.equals("indels_context_size"))
RAC.INDELS_CONTEXT_SIZE = decodeInteger(value);

else if (argument.equals("mismatches_default_quality"))
RAC.MISMATCHES_DEFAULT_QUALITY = decodeByte(value);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.broadinstitute.hellbender.utils.recalibration.covariates;

import htsjdk.samtools.SAMFileHeader;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.recalibration.RecalibrationArgumentCollection;

public final class AltCovariate implements Covariate {
private static final long serialVersionUID = 1L;

public AltCovariate(final RecalibrationArgumentCollection RAC){
}

// Used to pick out the covariate's value from attributes of the read
@Override
public void recordValues(final GATKRead read, final SAMFileHeader header, final ReadCovariates values, final boolean recordIndelValues) {
final int readLength = read.getLength();
final byte[] readBases = read.getBasesNoCopy();
for (int i = 0; i < readLength; i++) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(readBases[i]);
values.addCovariate(baseIndex, 0, 0, i);
}
}

@Override
public String formatKey(final int key){
if ( key < 0 ) {
return "N";
} else {
return String.format("%c", (char)BaseUtils.baseIndexToSimpleBase(key));
}
}

@Override
public int keyFromValue(final Object value) {
if ( (value instanceof String)) {
final byte base = ((String)value).getBytes()[0];
if ( base == 'N' ) {
return -1;
} else {
return BaseUtils.simpleBaseToBaseIndex(base);
}

} else {
return (Integer)value;
}
}

@Override
public int maximumKeyValue() {
return BaseUtils.Base.values().length;
}
}
Loading

0 comments on commit 3ce718d

Please sign in to comment.