Skip to content

Commit

Permalink
Genomes (#1603)
Browse files Browse the repository at this point in the history
Add support for downloading genomes including sequence and annotations.
  • Loading branch information
jrobinso authored Oct 19, 2024
1 parent f36d4cc commit d0afb72
Show file tree
Hide file tree
Showing 41 changed files with 1,770 additions and 1,419 deletions.
3 changes: 1 addition & 2 deletions src/main/java/org/broad/igv/DirectoryManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,7 @@ public static void moveDirectoryContents(File oldDirectory, File newDirectory) {
}
}

public static boolean isChildOf(File base, File child)
throws IOException {
public static boolean isChildOf(File base, File child) {

File parent = child.getParentFile();
while (parent != null) {
Expand Down
25 changes: 10 additions & 15 deletions src/main/java/org/broad/igv/batch/CommandExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public String execute(String commandLine) {
} else if (cmd.equalsIgnoreCase("scrolltotrack") || cmd.equalsIgnoreCase("gototrack")) {
boolean res = this.igv.scrollToTrack(StringUtils.stripQuotes(param1));
result = res ? "OK" : String.format("Error: Track %s not found", param1);
} else if (cmd.equalsIgnoreCase("scrolltotop") ) {
} else if (cmd.equalsIgnoreCase("scrolltotop")) {
this.igv.scrollToTop();
result = "OK";
} else if (cmd.equalsIgnoreCase("snapshotdirectory")) {
Expand All @@ -143,8 +143,7 @@ public String execute(String commandLine) {
return result;
}
String id = GenomeManager.getInstance().getCurrentGenome().getId();
if (id != null)
{
if (id != null) {
GenomeListItem item = GenomeListManager.getInstance().getGenomeListItem(id);
if (item != null) {
result = item.getPath();
Expand Down Expand Up @@ -633,25 +632,21 @@ private String genome(String param1) {
if (param1 == null) {
return "ERROR missing genome parameter";
}
String result = "OK";
String genomeID = param1;

igv.selectGenomeFromList(genomeID);
if (GenomeManager.getInstance().getCurrentGenome().getId().equals(genomeID)) {
return result;
}
String result;
String genomeIDorPath = param1;

String genomePath = resolveFileReference(genomeID);
try {
GenomeManager.getInstance().loadGenome(genomePath);
GenomeManager.getInstance().loadGenomeById(genomeIDorPath);
result = "OK";
} catch (IOException e) {
result = "ERROR: Could not load genome: " + genomeID;
result = "ERROR: Could not load genome: " + genomeIDorPath;
MessageUtils.showMessage(result);
}

return result;
}


/**
* Load function for port and batch script
*
Expand Down Expand Up @@ -1263,9 +1258,9 @@ private static AlignmentTrack.GroupOption getAlignmentGroupOption(String str) {
return AlignmentTrack.GroupOption.READ_GROUP;
} else if (str.equalsIgnoreCase("base")) {
return AlignmentTrack.GroupOption.BASE_AT_POS;
}else if (str.equalsIgnoreCase("insertion")) {
} else if (str.equalsIgnoreCase("insertion")) {
return AlignmentTrack.GroupOption.INSERTION_AT_POS;
}else {
} else {
try {
return AlignmentTrack.GroupOption.valueOf(str.toUpperCase());
} catch (IllegalArgumentException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@

package org.broad.igv.feature.genome;

import org.broad.igv.Globals;
import org.broad.igv.feature.Chromosome;
import org.broad.igv.util.ParsingUtils;

import java.io.*;
import java.util.*;


/**
* Static utility functions for genome data-wrangling.
Expand All @@ -39,24 +38,14 @@
* Date: 4/22/13
* Time: 1:27 PM
*/
public class GenomeUtils {
public class ChromSizesUtils {


public static void main(String[] args) throws IOException {

String genomeListFile = "genomes/genomes.tab";
String outputDirectory = "genomes/sizes";
String outputFile = "nonFastas.txt";

updateChromSizes(genomeListFile, new File(outputDirectory));

//findNonFastas(genomeListFile, new File(outputFile));

// mergeINCDCNames(
// new File("genomes/alias/hg38_alias.tab"),
// new File("/Users/jrobinso/projects/INSDC/GCF_000001405.26.assembly.txt"),
// new File("/Users/jrobinso/projects/INSDC"));

}


Expand Down Expand Up @@ -136,92 +125,5 @@ public static void exportChromSizes(File directory, Genome genome) throws FileNo

}

/**
* Merge chromosome names from an NCBI assembly.txt file with an existing IGV alias file
*
* @param aliasFile
* @param assemblyFile
*/
public static void mergeINCDCNames(File aliasFile, File assemblyFile, File outputDirectory) throws IOException {

Map<String, Set<String>> aliasRows = new LinkedHashMap<String, Set<String>>();

BufferedReader br = null;
PrintWriter pw = null;

// Build alias dictionary
br = new BufferedReader(new FileReader(aliasFile));
String nextLine;
while ((nextLine = br.readLine()) != null) {
String[] tokens = Globals.whitespacePattern.split(nextLine);
HashSet<String> row = new LinkedHashSet<String>(Arrays.asList(tokens));
for (String nm : tokens) {
aliasRows.put(nm, row);
}
}
br.close();

// Loop through assembly file
int[] chrIndeces = {0, 4, 6, 9};
br = new BufferedReader(new FileReader(assemblyFile));
boolean start = false;
List<String> newRows = new ArrayList<String>();
while ((nextLine = br.readLine()) != null) {
if (start) {

String[] tokens = Globals.tabPattern.split(nextLine);
boolean foundRow = false;
for (int i : chrIndeces) {
Set<String> row = aliasRows.get(tokens[i]);
if (row != null) {
for (int j : chrIndeces) {
if (!"na".equals(tokens[j])) {
row.add(tokens[j]);
}
}
foundRow = true;
break;
}
}
if (!foundRow) {
String newRow = tokens[chrIndeces[0]];
for (int i = 1; i < chrIndeces.length; i++) {
String chrNm = tokens[chrIndeces[i]];
if (!"na".equals(chrNm)) {
newRow += ("\t" + chrNm);
}
}
newRows.add(newRow);
System.out.println("New alias row: " + newRow);
}

} else if (nextLine.startsWith("# Sequence-Name")) {
start = true;
}

}
br.close();

pw = new PrintWriter(new BufferedWriter(new FileWriter(new File(outputDirectory, aliasFile.getName()))));
Set<Set<String>> output = new HashSet<Set<String>>();
for (Set<String> row : aliasRows.values()) {
if (row.size() == 0) continue;
if (!output.contains(row)) {
output.add(row);
List<String> chrNames = new ArrayList<String>(row);
pw.print(chrNames.get(0));
for (int i = 1; i < chrNames.size(); i++) {
pw.print("\t" + chrNames.get(i));
}
pw.println();
}
}
for (String row : newRows) {
pw.println(row);
}
pw.close();

}


}
89 changes: 89 additions & 0 deletions src/main/java/org/broad/igv/feature/genome/DotGenomeUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package org.broad.igv.feature.genome;

import org.broad.igv.DirectoryManager;
import org.broad.igv.feature.genome.load.GenomeDescriptor;
import org.broad.igv.feature.genome.load.GenomeLoader;
import org.broad.igv.logging.LogManager;
import org.broad.igv.logging.Logger;
import org.broad.igv.ui.IGV;
import org.broad.igv.ui.util.download.Downloader;
import org.broad.igv.util.HttpUtils;
import org.broad.igv.util.Utilities;

import java.awt.*;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.Map;

/**
* Utilities for the ".genome" format. These files are not created anymore, having been replaced by the genome json
* format, but we need to maintain support for reading and managing them.
*/
public class DotGenomeUtils {

private static Logger log = LogManager.getLogger(DotGenomeUtils.class);


/**
* Returns a File of the provided genomePath. If the genomePath is a URL, it will be downloaded
* and saved in the genome cache directory.
*
* @param genomePath
* @return
* @throws MalformedURLException
* @throws UnsupportedEncodingException
*/
public static File getDotGenomeFile(String genomePath) throws MalformedURLException, UnsupportedEncodingException {

File archiveFile;

if (HttpUtils.isRemoteURL(genomePath.toLowerCase())) {
// We need a local copy, as there is no http zip file reader
URL genomeArchiveURL = HttpUtils.createURL(genomePath);
final String tmp = URLDecoder.decode(genomeArchiveURL.getFile(), "UTF-8");
String cachedFilename = Utilities.getFileNameFromURL(tmp);
if (!DirectoryManager.getGenomeCacheDirectory().exists()) {
DirectoryManager.getGenomeCacheDirectory().mkdir();
}
archiveFile = new File(DirectoryManager.getGenomeCacheDirectory(), cachedFilename);
Frame parent = IGV.hasInstance() ? IGV.getInstance().getMainFrame() : null;
Downloader.download(genomeArchiveURL, archiveFile, parent);
} else {
archiveFile = new File(genomePath);
}
return archiveFile;
}



public static File getLocalFasta(String id) {
return GenomeLoader.localSequenceMap.get(id);
}

public static void removeLocalFasta(String id) {
GenomeLoader.localSequenceMap.remove(id);
updateSequenceMapFile();
}


private static void updateSequenceMapFile() {

PrintWriter pw = null;

try {
File sequenceFile = new File(DirectoryManager.getGenomeCacheDirectory(), GenomeDescriptor.SEQUENCE_MAP_FILE);
pw = new PrintWriter(new BufferedWriter(new FileWriter(sequenceFile)));

for (Map.Entry<String, File> entry : GenomeLoader.localSequenceMap.entrySet()) {
pw.println(entry.getKey() + "\t" + entry.getValue());
}
} catch (IOException e) {
log.error("Error writing sequence map", e);
} finally {
if (pw != null) pw.close();
}
}

}
Loading

0 comments on commit d0afb72

Please sign in to comment.