Skip to content

Commit

Permalink
cleaning up, base level modes done, integration tests, fixed unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
orlicohen committed Aug 11, 2022
1 parent d6d47dc commit 2038fd1
Show file tree
Hide file tree
Showing 21 changed files with 2,515 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,14 @@ private void runFullAlignment(ReferencePair refPair, ReferenceSequenceTable tabl
MummerExecutor executor = new MummerExecutor();
logger.info("Running mummer alignment on sequence " + sequenceName);
File tempSnpsDirectory = IOUtils.createTempDir("tempsnps");
File mummerOutput = executor.executeMummer(ref1Fasta.toPath().toFile(), ref2Fasta.toPath().toFile(), tempSnpsDirectory, sequenceName);
File mummerOutput = executor.executeMummer(ref1Fasta.toPath().toFile(), ref2Fasta.toPath().toFile(), tempSnpsDirectory);
logger.info("Finished running mummer alignment on sequence " + sequenceName);
snpsFiles.add(mummerOutput);
}
}
// merge individual snps files
File snps = new File(baseComparisonOutputDirectory.toPath().toString(), String.format("%s_%s.snps", refPair.getRef1AsString(), refPair.getRef2AsString()));
File snps = IOUtils.createTempFile(String.format("%s_%s", refPair.getRef1AsString(), refPair.getRef2AsString()), ".snps");
/*new File(baseComparisonOutputDirectory.toPath().toString(), String.format("%s_%s.snps", refPair.getRef1AsString(), refPair.getRef2AsString()));*/
try (PrintWriter writer = new PrintWriter(snps)) {
for (File file : snpsFiles) {
try (XReadLines reader = new XReadLines(file)) {
Expand All @@ -312,7 +313,7 @@ private void runFullAlignment(ReferencePair refPair, ReferenceSequenceTable tabl
int previousPos = -1;
for (String line : reader) {
String[] fields = line.split("\\t", -1);
String contig = fields[10];
String contig = fields[12];
int pos = Integer.valueOf(fields[0]);
String ref = fields[1];
String alt = fields[2];
Expand Down Expand Up @@ -532,7 +533,6 @@ private static class MummerIndel{
}

public VariantContext getAsVCFRecord(){
// builder, add fields
VariantContextBuilder vcfBuilder = new VariantContextBuilder();
int stopPos = isInsertion ? pos : pos + ref.length()-1;
VariantContext record = vcfBuilder.chr(chr).start(pos).stop(stopPos).alleles(ref, alt).make();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public File getMummerExecutableDirectory() {
* @param outputDirectory directory to output final snps file
* @return the final snps File
*/
public File executeMummer(File fasta1, File fasta2, File outputDirectory, String sequenceName){
public File executeMummer(File fasta1, File fasta2, File outputDirectory){

// NUCMER
logger.debug("Running nucmer.");
Expand All @@ -80,7 +80,7 @@ public File executeMummer(File fasta1, File fasta2, File outputDirectory, String

// SHOW-SNPS
logger.debug("Running show-snps.");
File showSNPSOutput = new File(outputDirectory, String.format("chr%s_snps_output.snps", sequenceName));
File showSNPSOutput = new File(outputDirectory, "snps_output.snps");
String[] showSNPsArgs = {mummerExecutableDirectory.getAbsolutePath() + "/show-snps", "-rlTH", deltaFilterOutput.getAbsolutePath()};
ProcessOutput showSNPs = runShellCommand(showSNPsArgs, null, showSNPSOutput, false);

Expand Down Expand Up @@ -143,7 +143,7 @@ public static ProcessOutput runPythonCommand(String script, List<String> scriptA
return output;
}

// method to locate the MUMmer binaries packaged within GATK
// method to unzip and locate the MUMmer binaries packaged within GATK
private File prepareMUMmerExecutionDirectory(){
try{
Resource mummerZipFile = new Resource(MUMMER_BINARIES_ZIPFILE, getClass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public void testCompareReferencesToStdOutput() throws IOException{
runCommandLine(args);
}

// The following tests run the tool on different combinations of reference files
// The following test runs the tool on different combinations of reference files
// and produce output to stdout for the sake of manually inspecting outputs.
// Disabled, as no actual assertions made.
@Test(enabled = false)
Expand All @@ -132,18 +132,19 @@ public void testCompareReferencesMultipleReferencesStdOut() throws IOException{
final File ref3 = new File(getToolTestDataDir() + "hg19mini_chr2snp.fasta");
final File ref4 = new File(getToolTestDataDir() + "hg19mini_missingchr1.fasta");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-refcomp", ref3.getAbsolutePath(),
"-refcomp", ref4.getAbsolutePath()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref3.getAbsolutePath(), "-refcomp", ref3.getAbsolutePath(),
"-refcomp", ref4.getAbsolutePath(), "-display-sequences-by-name", "-display-only-differing-sequences"};
runCommandLine(args);
}

// FIND_SNPS_ONLY tests:
@Test
public void testFindMultipleSNPs() throws IOException{
public void testFindSNPsMultipleSNPs() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2multiplesnps.fasta");
final File output = IOUtils.createTempDir("tempFindSNPs");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.toPath().toString()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FIND_SNPS_ONLY", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr2multiplesnps.fasta_snps.tsv");
Expand All @@ -152,7 +153,7 @@ public void testFindMultipleSNPs() throws IOException{
}

@Test
public void testFindIUPACSNPs() throws IOException{
public void testFindSNPsIUPACBases() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr2iupacsnps.fasta");
final File output = IOUtils.createTempDir("tempFindSNPs");
Expand All @@ -166,6 +167,7 @@ public void testFindIUPACSNPs() throws IOException{
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

// FULL_ALIGNMENT tests:
@Test
public void testFullAlignmentModeMultipleSNPs() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
Expand All @@ -184,14 +186,14 @@ public void testFullAlignmentModeMultipleSNPs() throws IOException{
public void testFullAlignmentModeDeletion() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_chr1indel.fasta");
//final File output = IOUtils.createTempDir("tempFullAlignmentIndel");
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", getToolTestDataDir()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

/*final File expectedOutput = new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.snps");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr1indel.fasta.snps");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);*/
final File expectedOutput = new File(getToolTestDataDir(), "expected.testDeletion.hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_chr1indel.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

@Test
Expand All @@ -203,8 +205,8 @@ public void testFullAlignmentModeInsertion() throws IOException{
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.snps");
final File actualOutput = new File(output, "hg19mini_chr1indel.fasta_hg19mini.fasta.snps");
final File expectedOutput = new File(getToolTestDataDir(), "expected.testInsertion.hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
final File actualOutput = new File(output, "hg19mini_chr1indel.fasta_hg19mini.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

Expand All @@ -214,11 +216,25 @@ public void testFullAlignmentSNPsOnMultipleContigs() throws IOException{
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpsmultiplecontigs.fasta");
final File output = IOUtils.createTempDir("tempFullAlignmentSNPsMultipleContigs");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.toPath().toString()};
final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}

@Test
public void testFullAlignmentModeSNPAndINDEL() throws IOException{
final File ref1 = new File(getToolTestDataDir() + "hg19mini.fasta");
final File ref2 = new File(getToolTestDataDir() + "hg19mini_snpandindel.fasta");
final File output = IOUtils.createTempDir("tempFullAlignmentIndel");

final String[] args = new String[] {"-R", ref1.getAbsolutePath() , "-refcomp", ref2.getAbsolutePath(), "-base-comparison", "FULL_ALIGNMENT", "-base-comparison-output", output.getAbsolutePath()};
runCommandLine(args);

final File expectedOutput = new File(getToolTestDataDir(), "expected.hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.snps");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpsmultiplecontigs.fasta.snps");
final File expectedOutput = new File(getToolTestDataDir(), "expected.SNPandINDEL.hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
final File actualOutput = new File(output, "hg19mini.fasta_hg19mini_snpandindel.fasta.vcf");
IntegrationTestSpec.assertEqualTextFiles(actualOutput, expectedOutput);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class CompareReferencesUnitTest extends CommandLineProgramTest {
@Test
public void testGenerateFastaForSequence() throws IOException {
File ref = new File(getToolTestDataDir() + "hg19mini.fasta");
File expectedOutput = new File("/Users/ocohen/workingcode/gatk/tempreferences/1.fasta");
File expectedOutput = new File(getToolTestDataDir() + "1.fasta");
String sequenceName = "1";
File output = createTempFile("example_chr1", ".fasta");

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
>1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 12881 . G C . . .
2 10400 . AACCCCGAACCCCGAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTCACCCTCACCCTCGACCCCCGACCCCCGAC A . . .
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 13368 . A T . . .
1 13770 . A C . . .
1 15953 . G C . . .
2 13368 . A T . . .
2 13770 . A C . . .
2 15953 . G C . . .
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

Sequence Name Position hg19mini.fasta hg19mini_chr2multiplesnps.fasta
2 13368 A T
2 13770 A C
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 12084 . A C . . .
2 12084 . A T . . .
4 14728 . T A . . .
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=16000>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 14000 . AACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCT A . . .
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
##fileformat=VCFv4.2
##contig=<ID=1,length=15920>
##contig=<ID=2,length=16000>
##contig=<ID=3,length=16000>
##contig=<ID=4,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO
1 13999 . T TACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCT . . .
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
>1 dna:chromosome chromosome:GRCh37:1:1:15920:1
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@HD VN:1.6
@SQ SN:1 LN:16000 M5:eee82532c1dea7dc57397d4205648f13 UR:file:/Users/ocohen/workingcode/gatk/./src/test/resources/org/broadinstitute/hellbender/tools/reference/CompareReferences/hg19mini_chr2iupacsnps.fasta
@SQ SN:2 LN:16000 M5:a1cdf5289fd7c56ec9be9e2d47695a76 UR:file:/Users/ocohen/workingcode/gatk/./src/test/resources/org/broadinstitute/hellbender/tools/reference/CompareReferences/hg19mini_chr2iupacsnps.fasta
@SQ SN:3 LN:16000 M5:94de808a3a2203dbb02434a47bd8184f UR:file:/Users/ocohen/workingcode/gatk/./src/test/resources/org/broadinstitute/hellbender/tools/reference/CompareReferences/hg19mini_chr2iupacsnps.fasta
@SQ SN:4 LN:16000 M5:7d397ee919e379328d8f52c57a54c778 UR:file:/Users/ocohen/workingcode/gatk/./src/test/resources/org/broadinstitute/hellbender/tools/reference/CompareReferences/hg19mini_chr2iupacsnps.fasta
Loading

0 comments on commit 2038fd1

Please sign in to comment.