Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding base level comparison modes for CompareReferences tool #7987

Merged
merged 13 commits into from
Aug 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ public ReferencePair(ReferenceSequenceTable table, GATKPath reference1, GATKPath
analysis = EnumSet.of(Status.EXACT_MATCH);
}

public GATKPath getRef1() {
return ref1;
}

public GATKPath getRef2() {
return ref2;
}

/**
* Given a Status, add it to the ReferencePair's analysis
*
Expand Down Expand Up @@ -76,12 +84,6 @@ public String getRef1AsString(){
public String getRef2AsString(){
return ReferenceSequenceTable.getReferenceColumnName(ref2);
}
public GATKPath getRef1(){
return ref1;
}
public GATKPath getRef2() {
return ref2;
}

/**
* Displays a ReferencePair's status set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ public final List<ReferencePair> generateReferencePairs(){
}

/**
* Generate ReferencePairs for comaprison of all references present in the table against a provided dictionary.
* Generate ReferencePairs for comparison of all references present in the table against a provided dictionary.
*
* @return the list of ReferencePairs
*/
Expand All @@ -259,7 +259,7 @@ public final List<ReferencePair> generateReferencePairsAgainstDictionary(GATKPat
return referencePairs;
}

/** Create list ReferencePair for pairwise comparison of references in the table and analyze each pair.
/** Create list of ReferencePair for pairwise comparison of references in the table and analyze each pair.
*
* @return list of ReferencePairs in the table with updated status sets
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ public static boolean runningOnPPCArchitecture() {
return runningOnArchitecture("ppc64le");
}

/**
* @return True if we're running on a 64-bit x86 platform, otherwise false
*/
public static boolean runningOn64BitX86Architecture() {
return runningOnArchitecture("x86_64") || runningOnArchitecture("amd64");
}

/**
* @param architecture Architecture to look for
* @return true if we're running on the specified architecture, otherwise false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package org.broadinstitute.hellbender.utils.alignment;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.SystemUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.NativeUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.io.Resource;
import org.broadinstitute.hellbender.utils.runtime.ProcessController;
import org.broadinstitute.hellbender.utils.runtime.ProcessOutput;
import org.broadinstitute.hellbender.utils.runtime.ProcessSettings;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.*;

/**
* Class for executing MUMmer alignment pipeline to detect SNPs and INDELs in mismatching sequences.
* The MUMmer pipeline runs nucmer, delta-filter, and show-snps programs, sequentially.
* Used in CompareReferences tool when running in FULL_ALIGNMENT mode.
*/
public final class MummerExecutor {

public static final String MUMMER_X86_64_MAC_BINARIES_ZIPFILE = "MUMmer-4.0.0rc1_mac_x64_64.zip";
public static final String MUMMER_X86_64_LINUX_BINARIES_ZIPFILE = "MUMmer-4.0.0rc1_linux_x64_64.zip";
// TODO: Need to recompile MUMmer 4.0.0rc1 for native M1 architecture as well
// TODO: (however the Mac Intel build should be runnable on M1 platforms with x86 emulation)

private static final Logger logger = LogManager.getLogger(MummerExecutor.class);
private File mummerExecutableDirectory;

/**
* Returns a MummerExecutor pointing to the provided directory
* @param mummerExecutableDirectory File representing the location of the MUMmer executables
*/
public MummerExecutor(File mummerExecutableDirectory){
this.mummerExecutableDirectory = mummerExecutableDirectory;
}

/**
* Returns a MummerExecutor pointing to the unzipped MUMmer executables packaged in GATK
*/
public MummerExecutor(){
final Resource mummerDistribution = new Resource(selectCorrectMummerDistributionForPlatform(), getClass());
mummerExecutableDirectory = prepareMUMmerExecutionDirectory(mummerDistribution);
}

private String selectCorrectMummerDistributionForPlatform() {
if ( NativeUtils.runningOnMac() && NativeUtils.runningOn64BitX86Architecture() ) {
logger.info("Using the x86_64 Mac OS build of MUMmer");
return MUMMER_X86_64_MAC_BINARIES_ZIPFILE;
}
else if ( NativeUtils.runningOnLinux() && NativeUtils.runningOn64BitX86Architecture() ) {
logger.info("Using the x86_64 Linux build of MUMmer");
return MUMMER_X86_64_LINUX_BINARIES_ZIPFILE;
}
else {
throw new UserException("Unable to run MUMmer aligner: GATK does not contain a MUMmer distribution for system architecture " +
SystemUtils.OS_ARCH + " on operating system " + SystemUtils.OS_NAME);
}
}

/**
* Getter method to access the location of the MUMmer executables.
* @return File representing the directory location of the MUMmer executables
*/
public File getMummerExecutableDirectory() {
return mummerExecutableDirectory;
}

/**
* Execute a full alignment of a sequence across a given pair of references to get a File containing the SNPs and INDELs.
* Begins by running nucmer to get a delta file, which is passed to delta-filter. The result of delta filter is a
* delta file filtered to only output alignments of length >= 1; this file is passed to show-snps, which displays SNPs
* and INDELs in a tabular format.
*
* @param fasta1 first reference
* @param fasta2 second reference
* @param outputDirectory directory to output final snps file
* @return the final snps File
*/
public File executeMummer(File fasta1, File fasta2, File outputDirectory){

// NUCMER
logger.debug("Running nucmer.");
File nucmerTempDirectory = IOUtils.createTempDir("nucmerTempDir");
File deltaFile = new File(nucmerTempDirectory, "deltaFile"); // delta file for nucmer output --> input to delta-filter
String[] nucmerArgs = {mummerExecutableDirectory.getAbsolutePath() + "/nucmer", "--mum", "-p", deltaFile.getAbsolutePath(), fasta1.getAbsolutePath(), fasta2.getAbsolutePath()};
ProcessOutput nucmer = runShellCommand(nucmerArgs, null, null,false);

// DELTA-FILTER
logger.debug("Running delta-filter.");
File deltaFilterOutput = IOUtils.createTempFile("deltaFilterOutput", ".delta"); // file for delta filter output --> input to show-snps
String[] deltaFilterArgs = {mummerExecutableDirectory.getAbsolutePath() + "/delta-filter", "-1", deltaFile.getAbsolutePath() + ".delta"};
ProcessOutput deltaFilter = runShellCommand(deltaFilterArgs, null, deltaFilterOutput, false);

// SHOW-SNPS
logger.debug("Running show-snps.");
File showSNPSOutput = new File(outputDirectory, "snps_output.snps");
String[] showSNPsArgs = {mummerExecutableDirectory.getAbsolutePath() + "/show-snps", "-rlTH", deltaFilterOutput.getAbsolutePath()};
ProcessOutput showSNPs = runShellCommand(showSNPsArgs, null, showSNPSOutput, false);

return showSNPSOutput;
}

/**
* Method to run shell commands from GATK.
*
* @param command String[] containing the commandline for execution
* @param environment a Map of environment variables to their values; may be left null if no environment variables needed
* @param stdoutCaptureFile File for output
* @param printStdout boolean to trigger displaying to stdout
* @return the ProcessOutput of the run
*/
public static ProcessOutput runShellCommand(String[] command, Map<String, String> environment, File stdoutCaptureFile, boolean printStdout){
ProcessController processController = ProcessController.getThreadLocal();
final ProcessSettings prs = new ProcessSettings(command);
if(printStdout){
prs.getStderrSettings().printStandard(true);
prs.getStdoutSettings().printStandard(true);
}
if(stdoutCaptureFile != null){
prs.getStdoutSettings().setOutputFile(stdoutCaptureFile);
}
prs.setEnvironment(environment);
final ProcessOutput output = processController.exec(prs);

if(output.getExitValue() != 0){
throw new UserException("Error running " + command[0] + ". Exit with code " + output.getExitValue());
}

return output;
}

// method to unzip and locate the MUMmer binaries packaged within GATK
@VisibleForTesting
static File prepareMUMmerExecutionDirectory(final Resource mummerZipFile){
try{
File tempMUMmerZipFile = IOUtils.writeTempResource(mummerZipFile);
File mummerExecutionDirectory = IOUtils.createTempDir("MUMmerExecutionDirectory");
IOUtils.unzipToFolder(Paths.get(tempMUMmerZipFile.getAbsolutePath()), Paths.get(mummerExecutionDirectory.getAbsolutePath()));
for(File file : mummerExecutionDirectory.listFiles()){
Path path = Paths.get(file.getAbsolutePath());
Files.setPosixFilePermissions(path, PosixFilePermissions.fromString("rwxr-xr-x"));
}
return mummerExecutionDirectory;
}
catch(IOException e){
throw new UserException("Unable to unzip MUMmer binaries.", e);
}
}
}
Loading