diff --git a/src/juicebox/HiC.java b/src/juicebox/HiC.java index a4b90585..3ed57265 100644 --- a/src/juicebox/HiC.java +++ b/src/juicebox/HiC.java @@ -1235,10 +1235,7 @@ public void clearAllMatrixZoomDataCache() { } private void clearAllCacheForDataset(Dataset ds) { - Matrix matrix = ds.getMatrix(xContext.getChromosome(), yContext.getChromosome()); - for (HiCZoom zoom : ds.getBpZooms()) { - matrix.getZoomData(zoom).clearCache(); - } + ds.clearCache(false); } public List> getRTreeHandlerIntersectingFeatures(String name, int g1, int g2) { diff --git a/src/juicebox/HiCGlobals.java b/src/juicebox/HiCGlobals.java index c932c12a..53a5d65f 100644 --- a/src/juicebox/HiCGlobals.java +++ b/src/juicebox/HiCGlobals.java @@ -36,7 +36,7 @@ */ public class HiCGlobals { - public static final String versionNum = "2.11.00"; + public static final String versionNum = "2.12.00"; public static final String juiceboxTitle = "[Juicebox " + versionNum + "] Hi-C Map "; // MainWindow variables diff --git a/src/juicebox/data/Dataset.java b/src/juicebox/data/Dataset.java index 9a82fccb..37fffcd5 100644 --- a/src/juicebox/data/Dataset.java +++ b/src/juicebox/data/Dataset.java @@ -1003,4 +1003,26 @@ public NormalizationHandler getNormalizationHandler() { public int getDepthBase() { return v9DepthBase; } + + public void clearCache(boolean onlyClearInter) { + for (Matrix matrix : matrices.values()) { + for (HiCZoom zoom : getBpZooms()) { + try { + matrix.getZoomData(zoom).clearCache(onlyClearInter); + } catch (Exception e) { + System.err.println("Clearing err: " + e.getLocalizedMessage()); + } + } + } + } + + public void clearCache(boolean onlyClearInter, HiCZoom zoom) { + for (Matrix matrix : matrices.values()) { + try { + matrix.getZoomData(zoom).clearCache(onlyClearInter); + } catch (Exception e) { + System.err.println("Clearing z_err: " + e.getLocalizedMessage()); + } + } + } } diff --git a/src/juicebox/data/DatasetReaderV2.java b/src/juicebox/data/DatasetReaderV2.java index d96d0047..639d3f1a 100644 --- a/src/juicebox/data/DatasetReaderV2.java +++ b/src/juicebox/data/DatasetReaderV2.java @@ -63,12 +63,12 @@ public class DatasetReaderV2 extends AbstractDatasetReader { * Cache of chromosome name -> array of restriction sites */ private final Map fragmentSitesCache = new HashMap<>(); - private final Map masterIndex = Collections.synchronizedMap(new HashMap<>()); + private final Map masterIndex = new HashMap<>(); private Map normVectorIndex; private final Dataset dataset; private int version = -1; private Map fragmentSitesIndex; - private final Map blockIndexMap = Collections.synchronizedMap(new HashMap<>()); + private final Map blockIndexMap = new HashMap<>(); private long masterIndexPos; private long normVectorFilePosition; private long nviHeaderPosition; @@ -332,11 +332,15 @@ private Pair readMatrixZoomData(Chromosome chr1, Chromosom if (binSize < 50 && HiCGlobals.allowDynamicBlockIndex) { int maxPossibleBlockNumber = blockColumnCount * blockColumnCount - 1; DynamicBlockIndex blockIndex = new DynamicBlockIndex(getValidStream(), nBlocks, maxPossibleBlockNumber, currentFilePointer); - blockIndexMap.put(zd.getKey(), blockIndex); + synchronized (blockIndexMap) { + blockIndexMap.put(zd.getKey(), blockIndex); + } } else { BlockIndex blockIndex = new BlockIndex(nBlocks); blockIndex.populateBlocks(dis); - blockIndexMap.put(zd.getKey(), blockIndex); + synchronized (blockIndexMap) { + blockIndexMap.put(zd.getKey(), blockIndex); + } } currentFilePointer += (nBlocks * 16L); @@ -483,7 +487,9 @@ private void readFooter(long position) throws IOException { long filePosition = dis.readLong(); int sizeInBytes = dis.readInt(); currentPosition += 12; - masterIndex.put(key, new IndexEntry(filePosition, sizeInBytes)); + synchronized (masterIndex) { + masterIndex.put(key, new IndexEntry(filePosition, sizeInBytes)); + } } Map expectedValuesMap = new LinkedHashMap<>(); diff --git a/src/juicebox/data/MatrixZoomData.java b/src/juicebox/data/MatrixZoomData.java index 5eabaec3..79b202f0 100644 --- a/src/juicebox/data/MatrixZoomData.java +++ b/src/juicebox/data/MatrixZoomData.java @@ -34,6 +34,7 @@ import juicebox.data.basics.Chromosome; import juicebox.data.iterator.IteratorContainer; import juicebox.data.iterator.ListOfListGenerator; +import juicebox.data.iterator.ZDIteratorContainer; import juicebox.data.v9depth.LogDepth; import juicebox.data.v9depth.V9Depth; import juicebox.gui.SuperAdapter; @@ -61,11 +62,6 @@ import java.util.concurrent.atomic.AtomicInteger; - -/** - * @author jrobinso - * @since Aug 10, 2010 - */ public class MatrixZoomData { final Chromosome chr1; // Chromosome on the X axis @@ -1197,8 +1193,15 @@ public void setAverageCount(double averageCount) { this.averageCount = averageCount; } - public void clearCache() { - blockCache.clear(); + public void clearCache(boolean onlyClearInter) { + if (onlyClearInter && isIntra) return; + if (HiCGlobals.useCache) { + blockCache.clear(); + } + if (iteratorContainer != null) { + iteratorContainer.clear(); + iteratorContainer = null; + } } private Iterator getNewContactRecordIterator() { @@ -1212,4 +1215,8 @@ public IteratorContainer getIteratorContainer() { } return iteratorContainer; } + + public IteratorContainer getFromFileIteratorContainer() { + return new ZDIteratorContainer(reader, this, blockCache); + } } diff --git a/src/juicebox/data/basics/ListOfFloatArrays.java b/src/juicebox/data/basics/ListOfFloatArrays.java index 815797eb..e4565f5a 100644 --- a/src/juicebox/data/basics/ListOfFloatArrays.java +++ b/src/juicebox/data/basics/ListOfFloatArrays.java @@ -151,8 +151,10 @@ public void addTo(long index, float value) { public void addValuesFrom(ListOfFloatArrays other) { if (overallLength == other.overallLength) { for (int i = 0; i < internalList.size(); i++) { - for (int j = 0; j < internalList.get(i).length; j++) { - internalList.get(i)[j] += other.internalList.get(i)[j]; + float[] array = internalList.get(i); + float[] otherArray = other.internalList.get(i); + for (int j = 0; j < array.length; j++) { + array[j] += otherArray[j]; } } } else { @@ -184,8 +186,12 @@ public void multiplyEverythingBy(double val) { public ListOfDoubleArrays convertToDoubles() { ListOfDoubleArrays newList = new ListOfDoubleArrays(overallLength); for (int j = 0; j < internalList.size(); j++) { - for (int k = 0; k < internalList.get(j).length; k++) { - newList.getValues().get(j)[k] = internalList.get(j)[k]; + + float[] array = internalList.get(j); + double[] newArray = newList.getValues().get(j); + + for (int k = 0; k < array.length; k++) { + newArray[k] = array[k]; } } return newList; diff --git a/src/juicebox/data/iterator/CoupledIteratorAndOffset.java b/src/juicebox/data/iterator/CoupledIteratorAndOffset.java new file mode 100644 index 00000000..d85968e3 --- /dev/null +++ b/src/juicebox/data/iterator/CoupledIteratorAndOffset.java @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package juicebox.data.iterator; + +import juicebox.data.ContactRecord; + +import java.util.Iterator; + +public class CoupledIteratorAndOffset implements Iterator { + + private final Iterator internalIterator; + private final int xOffset, yOffset; + + public CoupledIteratorAndOffset(Iterator iterator, int xOffset, int yOffset) { + internalIterator = iterator; + this.xOffset = xOffset; + this.yOffset = yOffset; + } + + @Override + public boolean hasNext() { + return internalIterator.hasNext(); + } + + @Override + public ContactRecord next() { + ContactRecord cr = internalIterator.next(); + int binX = cr.getBinX() + xOffset; + int binY = cr.getBinY() + yOffset; + return new ContactRecord(binX, binY, cr.getCounts()); + } +} diff --git a/src/juicebox/data/iterator/GWIteratorContainer.java b/src/juicebox/data/iterator/GWIteratorContainer.java index e50d6d76..68433ddb 100644 --- a/src/juicebox/data/iterator/GWIteratorContainer.java +++ b/src/juicebox/data/iterator/GWIteratorContainer.java @@ -28,9 +28,13 @@ import juicebox.data.ContactRecord; import juicebox.data.Dataset; import juicebox.data.basics.Chromosome; +import juicebox.data.basics.ListOfFloatArrays; +import juicebox.tools.dev.ParallelizedJuicerTools; import juicebox.windowui.HiCZoom; import java.util.Iterator; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; public class GWIteratorContainer extends IteratorContainer { @@ -62,4 +66,38 @@ private static long calculateMatrixSize(ChromosomeHandler handler, HiCZoom zoom) public Iterator getNewContactRecordIterator() { return new GenomeWideIterator(dataset, handler, zoom, includeIntra); } + + public List> getAllFromFileContactRecordIterators() { + return GenomeWideIterator.getAllFromFileIterators(dataset, handler, zoom, includeIntra); + } + + @Override + public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength) { + final ListOfFloatArrays totalSumVector = new ListOfFloatArrays(vectorLength); + + List> allIterators = getAllFromFileContactRecordIterators(); + + AtomicInteger index = new AtomicInteger(0); + ParallelizedJuicerTools.launchParallelizedCode(numCPUMatrixThreads, () -> { + int i = index.getAndIncrement(); + ListOfFloatArrays accumSumVector = new ListOfFloatArrays(vectorLength); + while (i < allIterators.size()) { + accumSumVector.addValuesFrom(ZDIteratorContainer.matrixVectorMultiplyOnIterator( + allIterators.get(i), vector, vectorLength)); + i = index.getAndIncrement(); + } + synchronized (totalSumVector) { + totalSumVector.addValuesFrom(accumSumVector); + } + }); + + allIterators.clear(); + + return totalSumVector; + } + + @Override + public void clear() { + // null, doesn't need to clean anything + } } diff --git a/src/juicebox/data/iterator/GenomeWideIterator.java b/src/juicebox/data/iterator/GenomeWideIterator.java index 57896a46..ac0b9034 100644 --- a/src/juicebox/data/iterator/GenomeWideIterator.java +++ b/src/juicebox/data/iterator/GenomeWideIterator.java @@ -28,7 +28,9 @@ import juicebox.data.basics.Chromosome; import juicebox.windowui.HiCZoom; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; public class GenomeWideIterator implements Iterator { @@ -62,6 +64,35 @@ public boolean hasNext() { return getNextIterator(); } + public static List> getAllFromFileIterators(Dataset dataset, ChromosomeHandler handler, + HiCZoom zoom, boolean includeIntra) { + Chromosome[] chromosomes = handler.getChromosomeArrayWithoutAllByAll(); + List> allIterators = new ArrayList<>(); + + int xOffset = 0; + for (int i = 0; i < chromosomes.length; i++) { + Chromosome c1 = chromosomes[i]; + int yOffset = 0 + xOffset; + for (int j = i; j < chromosomes.length; j++) { + Chromosome c2 = chromosomes[j]; + + if (c1.getIndex() < c2.getIndex() || (c1.equals(c2) && includeIntra)) { + MatrixZoomData zd = HiCFileTools.getMatrixZoomData(dataset, c1, c2, zoom); + if (zd != null) { + IteratorContainer ic = zd.getFromFileIteratorContainer(); + Iterator iterator = ic.getNewContactRecordIterator(); + if (iterator != null && iterator.hasNext()) { + allIterators.add(new CoupledIteratorAndOffset(iterator, xOffset, yOffset)); + } + } + } + yOffset += c2.getLength() / zoom.getBinSize() + 1; + } + xOffset += c1.getLength() / zoom.getBinSize() + 1; + } + return allIterators; + } + private boolean getNextIterator() { while (c1i < chromosomes.length) { Chromosome c1 = chromosomes[c1i]; @@ -71,8 +102,9 @@ private boolean getNextIterator() { if (c1.getIndex() < c2.getIndex() || (c1.equals(c2) && includeIntra)) { MatrixZoomData zd = HiCFileTools.getMatrixZoomData(dataset, c1, c2, zoom); if (zd != null) { - currentIterator = zd.getIteratorContainer().getNewContactRecordIterator(); - if (currentIterator != null && currentIterator.hasNext()) { + Iterator newIterator = zd.getFromFileIteratorContainer().getNewContactRecordIterator(); + if (newIterator != null && newIterator.hasNext()) { + currentIterator = new CoupledIteratorAndOffset(newIterator, recentAddX, recentAddY); return true; } } @@ -81,18 +113,15 @@ private boolean getNextIterator() { c2i++; } recentAddX += c1.getLength() / zoom.getBinSize() + 1; - recentAddY = 0; + recentAddY = 0 + recentAddX; c1i++; - c2i = 0; + c2i = c1i; } return false; } @Override public ContactRecord next() { - ContactRecord cr = currentIterator.next(); - int binX = cr.getBinX() + recentAddX; - int binY = cr.getBinY() + recentAddY; - return new ContactRecord(binX, binY, cr.getCounts()); + return currentIterator.next(); } } diff --git a/src/juicebox/data/iterator/IteratorContainer.java b/src/juicebox/data/iterator/IteratorContainer.java index 068d9998..965faa28 100644 --- a/src/juicebox/data/iterator/IteratorContainer.java +++ b/src/juicebox/data/iterator/IteratorContainer.java @@ -25,6 +25,7 @@ package juicebox.data.iterator; import juicebox.data.ContactRecord; +import juicebox.data.basics.ListOfFloatArrays; import java.util.Iterator; @@ -32,6 +33,7 @@ public abstract class IteratorContainer { private final long matrixSize; private long numberOfContactRecords = -1; + public static int numCPUMatrixThreads = 10; public IteratorContainer(long matrixSize) { this.matrixSize = matrixSize; @@ -66,4 +68,9 @@ public boolean getIsThereEnoughMemoryForNormCalculation() { // float is 4 bytes; one for each row return matrixSize * 4 < Runtime.getRuntime().maxMemory(); } + + public abstract ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength); + + public abstract void clear(); + } diff --git a/src/juicebox/data/iterator/ListIteratorContainer.java b/src/juicebox/data/iterator/ListIteratorContainer.java index 24afcbe2..04e74f47 100644 --- a/src/juicebox/data/iterator/ListIteratorContainer.java +++ b/src/juicebox/data/iterator/ListIteratorContainer.java @@ -25,9 +25,13 @@ package juicebox.data.iterator; import juicebox.data.ContactRecord; +import juicebox.data.basics.ListOfDoubleArrays; +import juicebox.data.basics.ListOfFloatArrays; +import juicebox.tools.dev.ParallelizedJuicerTools; import java.util.Iterator; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; public class ListIteratorContainer extends IteratorContainer { @@ -50,4 +54,49 @@ public boolean getIsThereEnoughMemoryForNormCalculation() { // 12 bytes (2 ints, 1 float) for contact record return 4 * getMatrixSize() + 12 * getNumberOfContactRecords() < Runtime.getRuntime().maxMemory(); } + + public static ListOfFloatArrays sparseMultiplyByListContacts(List readList, ListOfFloatArrays vector, + long vectorLength, int numThreads) { + final ListOfDoubleArrays totalSumVector = new ListOfDoubleArrays(vectorLength); + + int[] cutoffs = ParallelizedListOperations.createCutoffs(numThreads, readList.size()); + + AtomicInteger index = new AtomicInteger(0); + ParallelizedJuicerTools.launchParallelizedCode(numThreads, () -> { + int sIndx = index.getAndIncrement(); + ListOfDoubleArrays sumVector = new ListOfDoubleArrays(vectorLength); + for (int i = cutoffs[sIndx]; i < cutoffs[sIndx + 1]; i++) { + ContactRecord cr = readList.get(i); + matrixVectorMult(vector, sumVector, cr); + } + + synchronized (totalSumVector) { + totalSumVector.addValuesFrom(sumVector); + } + }); + + return totalSumVector.convertToFloats(); + } + + public static void matrixVectorMult(ListOfFloatArrays vector, ListOfDoubleArrays sumVector, ContactRecord cr) { + int x = cr.getBinX(); + int y = cr.getBinY(); + double counts = cr.getCounts(); + if (x == y) { + counts *= .5; + } + + sumVector.addTo(x, counts * vector.get(y)); + sumVector.addTo(y, counts * vector.get(x)); + } + + @Override + public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength) { + return sparseMultiplyByListContacts(readList, vector, vectorLength, numCPUMatrixThreads); + } + + @Override + public void clear() { + readList.clear(); + } } diff --git a/src/juicebox/data/iterator/ListOfListGenerator.java b/src/juicebox/data/iterator/ListOfListGenerator.java index e025fdf1..aa82f0d7 100644 --- a/src/juicebox/data/iterator/ListOfListGenerator.java +++ b/src/juicebox/data/iterator/ListOfListGenerator.java @@ -26,14 +26,18 @@ import juicebox.HiCGlobals; import juicebox.data.*; +import juicebox.tools.dev.ParallelizedJuicerTools; import juicebox.windowui.HiCZoom; import org.broad.igv.util.collections.LRUCache; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; public class ListOfListGenerator { + private static final int MAX_LIMIT = Integer.MAX_VALUE - 10; + public static IteratorContainer createFromZD(DatasetReader reader, MatrixZoomData matrixZoomData, LRUCache blockCache) { IteratorContainer ic = new ZDIteratorContainer(reader, matrixZoomData, blockCache); @@ -76,25 +80,47 @@ private static IteratorContainer tryToCreateIteratorInRAM(IteratorContainer ic0) } private static List> populateListOfLists(IteratorContainer ic) { + + if (ic instanceof GWIteratorContainer) { + List> iterators = ((GWIteratorContainer) ic).getAllFromFileContactRecordIterators(); + List> allRecords = new ArrayList<>(); + + AtomicInteger index = new AtomicInteger(0); + ParallelizedJuicerTools.launchParallelizedCode(IteratorContainer.numCPUMatrixThreads, () -> { + int i = index.getAndIncrement(); + List> recordsForThread = new ArrayList<>(); + while (i < iterators.size()) { + List> recordsForIter = populateListOfListsFromSingleIterator(iterators.get(i)); + recordsForThread.addAll(recordsForIter); + i = index.getAndIncrement(); + } + synchronized (allRecords) { + allRecords.addAll(recordsForThread); + } + }); + return allRecords; + } else { + return populateListOfListsFromSingleIterator(ic.getNewContactRecordIterator()); + } + } + + private static List> populateListOfListsFromSingleIterator(Iterator iterator) { + List> allRecords = new ArrayList<>(); List tempList = new ArrayList<>(); int counter = 0; - - Iterator iterator = ic.getNewContactRecordIterator(); while (iterator.hasNext()) { tempList.add(iterator.next()); counter++; - if (counter > Integer.MAX_VALUE - 10) { + if (counter > MAX_LIMIT) { allRecords.add(tempList); tempList = new ArrayList<>(); counter = 0; } } - if (tempList.size() > 0) { allRecords.add(tempList); } - return allRecords; } diff --git a/src/juicebox/data/iterator/ListOfListIteratorContainer.java b/src/juicebox/data/iterator/ListOfListIteratorContainer.java index 3b8af7e5..473ea3ab 100644 --- a/src/juicebox/data/iterator/ListOfListIteratorContainer.java +++ b/src/juicebox/data/iterator/ListOfListIteratorContainer.java @@ -25,9 +25,13 @@ package juicebox.data.iterator; import juicebox.data.ContactRecord; +import juicebox.data.basics.ListOfDoubleArrays; +import juicebox.data.basics.ListOfFloatArrays; +import juicebox.tools.dev.ParallelizedJuicerTools; import java.util.Iterator; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; public class ListOfListIteratorContainer extends IteratorContainer { @@ -51,4 +55,49 @@ public boolean getIsThereEnoughMemoryForNormCalculation() { // 12 bytes (2 ints, 1 float) for contact record return 4 * getMatrixSize() + 12 * getNumberOfContactRecords() < Runtime.getRuntime().maxMemory(); } + + @Override + public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength) { + + if (allContactRecords.size() < numCPUMatrixThreads) { + final ListOfFloatArrays totalSumVector = new ListOfFloatArrays(vectorLength); + for (List contactRecords : allContactRecords) { + totalSumVector.addValuesFrom(ListIteratorContainer.sparseMultiplyByListContacts( + contactRecords, vector, vectorLength, numCPUMatrixThreads)); + } + return totalSumVector; + } + + return sparseMultiplyAcrossLists(vector, vectorLength); + } + + @Override + public void clear() { + for (List cList : allContactRecords) { + cList.clear(); + } + allContactRecords.clear(); + } + + private ListOfFloatArrays sparseMultiplyAcrossLists(ListOfFloatArrays vector, long vectorLength) { + final ListOfDoubleArrays totalSumVector = new ListOfDoubleArrays(vectorLength); + + AtomicInteger index = new AtomicInteger(0); + ParallelizedJuicerTools.launchParallelizedCode(numCPUMatrixThreads, () -> { + int sIndx = index.getAndIncrement(); + ListOfDoubleArrays sumVector = new ListOfDoubleArrays(vectorLength); + while (sIndx < allContactRecords.size()) { + for (ContactRecord cr : allContactRecords.get(sIndx)) { + ListIteratorContainer.matrixVectorMult(vector, sumVector, cr); + } + sIndx = index.getAndIncrement(); + } + + synchronized (totalSumVector) { + totalSumVector.addValuesFrom(sumVector); + } + }); + + return totalSumVector.convertToFloats(); + } } diff --git a/src/juicebox/data/iterator/ParallelizedListOperations.java b/src/juicebox/data/iterator/ParallelizedListOperations.java new file mode 100644 index 00000000..78d24baf --- /dev/null +++ b/src/juicebox/data/iterator/ParallelizedListOperations.java @@ -0,0 +1,39 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package juicebox.data.iterator; + +public class ParallelizedListOperations { + + public static int[] createCutoffs(int n, int listSize) { + int[] cutoffs = new int[n + 1]; + int increment = listSize / n; + for (int k = 1; k < n; k++) { + cutoffs[k] = k * increment; + } + cutoffs[0] = 0; + cutoffs[n] = listSize; + return cutoffs; + } +} diff --git a/src/juicebox/data/iterator/ZDIteratorContainer.java b/src/juicebox/data/iterator/ZDIteratorContainer.java index de3b91c1..4d627fca 100644 --- a/src/juicebox/data/iterator/ZDIteratorContainer.java +++ b/src/juicebox/data/iterator/ZDIteratorContainer.java @@ -28,6 +28,8 @@ import juicebox.data.ContactRecord; import juicebox.data.DatasetReader; import juicebox.data.MatrixZoomData; +import juicebox.data.basics.ListOfDoubleArrays; +import juicebox.data.basics.ListOfFloatArrays; import org.broad.igv.util.collections.LRUCache; import java.util.Iterator; @@ -49,4 +51,24 @@ public ZDIteratorContainer(DatasetReader reader, MatrixZoomData zd, LRUCache getNewContactRecordIterator() { return new ContactRecordIterator(reader, zd, blockCache); } + + public static ListOfFloatArrays matrixVectorMultiplyOnIterator(Iterator iterator, + ListOfFloatArrays vector, long vectorLength) { + ListOfDoubleArrays sumVector = new ListOfDoubleArrays(vectorLength); + while (iterator.hasNext()) { + ContactRecord cr = iterator.next(); + ListIteratorContainer.matrixVectorMult(vector, sumVector, cr); + } + return sumVector.convertToFloats(); + } + + @Override + public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength) { + return matrixVectorMultiplyOnIterator(getNewContactRecordIterator(), vector, vectorLength); + } + + @Override + public void clear() { + //blockCache.clear(); + } } diff --git a/src/juicebox/tools/clt/CommandLineParser.java b/src/juicebox/tools/clt/CommandLineParser.java index eb1ca1f7..dd035092 100644 --- a/src/juicebox/tools/clt/CommandLineParser.java +++ b/src/juicebox/tools/clt/CommandLineParser.java @@ -75,6 +75,7 @@ public class CommandLineParser extends CmdLineParser { private final Option genomeWideOption = addIntegerOption('w', "genomewide"); private final Option alignmentFilterOption = addIntegerOption('a', "alignment"); private final Option threadNumOption = addIntegerOption('j', "threads"); + private final Option matrixThreadNumOption = addIntegerOption("mthreads"); private final Option v9DepthBaseOption = addIntegerOption("v9-depth-base"); // sets of strings @@ -248,6 +249,10 @@ public int getNumThreads() { return optionToInt(threadNumOption); } + public int getNumMatrixOperationThreads() { + return optionToInt(matrixThreadNumOption); + } + public int getV9DepthBase() { return optionToInt(v9DepthBaseOption); } diff --git a/src/juicebox/tools/clt/CommandLineParserForJuicer.java b/src/juicebox/tools/clt/CommandLineParserForJuicer.java index 245cb9e3..8d7618f6 100644 --- a/src/juicebox/tools/clt/CommandLineParserForJuicer.java +++ b/src/juicebox/tools/clt/CommandLineParserForJuicer.java @@ -46,7 +46,6 @@ public class CommandLineParserForJuicer extends CommandLineParser { private final Option multipleChromosomesOption = addStringOption('c', "chromosomes"); private final Option multipleResolutionsOption = addStringOption('r', "resolutions"); private final Option legacyOutputOption = addBooleanOption('g', "legacy"); - private final Option threadNumOption = addIntegerOption('z', "threads"); // APA private final Option apaWindowOption = addIntegerOption('w', "window"); @@ -135,10 +134,6 @@ public int getMatrixSizeOption() { return optionToInt(matrixSizeOption); } - public int getNumThreads() { - return optionToInt(threadNumOption); - } - /** * double flags */ diff --git a/src/juicebox/tools/clt/JuiceboxCLT.java b/src/juicebox/tools/clt/JuiceboxCLT.java index 6dea3d6d..160c0589 100644 --- a/src/juicebox/tools/clt/JuiceboxCLT.java +++ b/src/juicebox/tools/clt/JuiceboxCLT.java @@ -26,6 +26,7 @@ import juicebox.data.Dataset; import juicebox.data.HiCFileTools; +import juicebox.data.iterator.IteratorContainer; import juicebox.windowui.NormalizationType; import java.util.Arrays; @@ -39,6 +40,7 @@ public abstract class JuiceboxCLT { protected Dataset dataset = null; protected NormalizationType norm = null; protected static int numCPUThreads = 1; + protected static int numCPUThreadsForSecondTask = 1; protected boolean usingMultiThreadedVersion = false; protected JuiceboxCLT(String usage) { @@ -79,16 +81,26 @@ protected void setDatasetAndNorm(String files, String normType, boolean allowPri } } - protected void updateNumberOfCPUThreads(CommandLineParser parser) { - int numThreads = parser.getNumThreads(); + public static int getAppropriateNumberOfThreads(int numThreads, int defaultNum) { if (numThreads > 0) { - numCPUThreads = numThreads; + return numThreads; } else if (numThreads < 0) { - numCPUThreads = Runtime.getRuntime().availableProcessors(); + return Math.abs(numThreads) * Runtime.getRuntime().availableProcessors(); } else { - numCPUThreads = 1; + return defaultNum; } - System.out.println("Using " + numCPUThreads + " CPU thread(s)"); + } + + protected void updateNumberOfCPUThreads(CommandLineParser parser, int numDefaultThreads) { + int numThreads = parser.getNumThreads(); + numCPUThreads = getAppropriateNumberOfThreads(numThreads, numDefaultThreads); + System.out.println("Using " + numCPUThreads + " CPU thread(s) for primary task"); + } + + protected void updateSecondaryNumberOfCPUThreads(CommandLineParser parser, int numDefaultThreads) { + int numMThreads = parser.getNumMatrixOperationThreads(); + numCPUThreadsForSecondTask = getAppropriateNumberOfThreads(numMThreads, numDefaultThreads); + System.out.println("Using " + IteratorContainer.numCPUMatrixThreads + " CPU thread(s) for secondary task"); } } diff --git a/src/juicebox/tools/clt/JuicerCLT.java b/src/juicebox/tools/clt/JuicerCLT.java index fa1e7af7..164d70d1 100644 --- a/src/juicebox/tools/clt/JuicerCLT.java +++ b/src/juicebox/tools/clt/JuicerCLT.java @@ -42,7 +42,6 @@ public abstract class JuicerCLT extends JuiceboxCLT { protected NormalizationType norm = NormalizationHandler.SCALE; protected List givenChromosomes = null; //TODO set to protected - protected static int numCPUThreads = 1; protected JuicerCLT(String usage) { super(usage); @@ -66,18 +65,6 @@ public void readArguments(String[] args, CommandLineParser parser) { readJuicerArguments(args, juicerParser); } - protected void updateNumberOfCPUThreads(CommandLineParserForJuicer juicerParser) { - int numThreads = juicerParser.getNumThreads(); - if (numThreads > 0) { - numCPUThreads = numThreads; - } else if (numThreads < 0) { - numCPUThreads = Runtime.getRuntime().availableProcessors(); - } else { - numCPUThreads = 1; - } - System.out.println("Using " + numCPUThreads + " CPU thread(s)"); - } - protected abstract void readJuicerArguments(String[] args, CommandLineParserForJuicer juicerParser); private void assessIfChromosomesHaveBeenSpecified(CommandLineParserForJuicer juicerParser) { diff --git a/src/juicebox/tools/clt/juicer/APA.java b/src/juicebox/tools/clt/juicer/APA.java index 50986a5a..1682b755 100644 --- a/src/juicebox/tools/clt/juicer/APA.java +++ b/src/juicebox/tools/clt/juicer/APA.java @@ -136,7 +136,7 @@ public class APA extends JuicerCLT { */ public APA() { super("apa [-n minval] [-x maxval] [-w window] [-r resolution(s)] [-c chromosomes]" + - " [-k NONE/VC/VC_SQRT/KR] [-q corner_width] [-e include_inter_chr] [-u save_all_data]" + + " [-k NONE/VC/VC_SQRT/KR] [-q corner_width] [--include-inter-chr] [--save-all]" + " "); HiCGlobals.useCache = false; } @@ -210,7 +210,7 @@ protected void readJuicerArguments(String[] args, CommandLineParserForJuicer jui resolutions = Ints.toArray(intResolutions); } - updateNumberOfCPUThreads(juicerParser); + updateNumberOfCPUThreads(juicerParser, 1); } @Override diff --git a/src/juicebox/tools/clt/juicer/Arrowhead.java b/src/juicebox/tools/clt/juicer/Arrowhead.java index dfac863e..c73afb34 100644 --- a/src/juicebox/tools/clt/juicer/Arrowhead.java +++ b/src/juicebox/tools/clt/juicer/Arrowhead.java @@ -156,7 +156,7 @@ protected void readJuicerArguments(String[] args, CommandLineParserForJuicer jui matrixSize = specifiedMatrixSize; } - updateNumberOfCPUThreads(juicerParser); + updateNumberOfCPUThreads(juicerParser, 1); List t = juicerParser.getThresholdOptions(); if (t != null && t.size() == 6) { diff --git a/src/juicebox/tools/clt/juicer/HiCCUPS.java b/src/juicebox/tools/clt/juicer/HiCCUPS.java index 8b66cd34..7cd96bb9 100644 --- a/src/juicebox/tools/clt/juicer/HiCCUPS.java +++ b/src/juicebox/tools/clt/juicer/HiCCUPS.java @@ -246,7 +246,7 @@ protected void readJuicerArguments(String[] args, CommandLineParserForJuicer jui System.out.println(CPU_VERSION_WARNING); } - updateNumberOfCPUThreads(juicerParser); + updateNumberOfCPUThreads(juicerParser, 1); } /** diff --git a/src/juicebox/tools/clt/juicer/HiCCUPSDiff.java b/src/juicebox/tools/clt/juicer/HiCCUPSDiff.java index 161db833..0de96f73 100644 --- a/src/juicebox/tools/clt/juicer/HiCCUPSDiff.java +++ b/src/juicebox/tools/clt/juicer/HiCCUPSDiff.java @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2011-2020 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine + * Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -15,7 +15,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN @@ -179,7 +179,7 @@ protected void readJuicerArguments(String[] args, CommandLineParserForJuicer jui thresholds = HiCCUPSUtils.extractDoubleValues(t, 4, Double.NaN); } - updateNumberOfCPUThreads(juicerParser); + updateNumberOfCPUThreads(juicerParser, 1); System.out.println("Running HiCCUPS with alternate loop lists"); hiccups1 = new HiCCUPS(); diff --git a/src/juicebox/tools/clt/old/AddNorm.java b/src/juicebox/tools/clt/old/AddNorm.java index 4a4cc278..24cb2e18 100644 --- a/src/juicebox/tools/clt/old/AddNorm.java +++ b/src/juicebox/tools/clt/old/AddNorm.java @@ -25,10 +25,10 @@ package juicebox.tools.clt.old; import juicebox.HiCGlobals; +import juicebox.data.iterator.IteratorContainer; import juicebox.tools.clt.CommandLineParser; import juicebox.tools.clt.JuiceboxCLT; import juicebox.tools.utils.norm.CustomNormVectorFileHandler; -import juicebox.tools.utils.norm.MultithreadedNormalizationVectorUpdater; import juicebox.tools.utils.norm.NormalizationVectorUpdater; import juicebox.windowui.NormalizationHandler; import juicebox.windowui.NormalizationType; @@ -78,12 +78,8 @@ public static Map defaultHashMapForResToBuildTo(List public static void launch(String outputFile, List normalizationTypes, int genomeWide, boolean noFragNorm, int numCPUThreads, Map resolutionsToBuildTo) throws IOException { - NormalizationVectorUpdater updater; - if (numCPUThreads > 1) { - updater = new MultithreadedNormalizationVectorUpdater(numCPUThreads); - } else { - updater = new NormalizationVectorUpdater(); - } + HiCGlobals.useCache = false; + NormalizationVectorUpdater updater = new NormalizationVectorUpdater(); updater.updateHicFile(outputFile, normalizationTypes, resolutionsToBuildTo, genomeWide, noFragNorm); } @@ -101,7 +97,9 @@ public void readArguments(String[] args, CommandLineParser parser) { noFragNorm = parser.getNoFragNormOption(); HiCGlobals.USE_ITERATOR_NOT_ALL_IN_RAM = parser.getDontPutAllContactsIntoRAM(); HiCGlobals.CHECK_RAM_USAGE = parser.shouldCheckRAMUsage(); - updateNumberOfCPUThreads(parser); + updateNumberOfCPUThreads(parser, 10); + IteratorContainer.numCPUMatrixThreads = numCPUThreads; + usingMultiThreadedVersion = numCPUThreads > 1; genomeWideResolution = parser.getGenomeWideOption(); diff --git a/src/juicebox/tools/clt/old/PairsToBin.java b/src/juicebox/tools/clt/old/PairsToBin.java index ce25671a..007155de 100644 --- a/src/juicebox/tools/clt/old/PairsToBin.java +++ b/src/juicebox/tools/clt/old/PairsToBin.java @@ -35,7 +35,7 @@ public class PairsToBin extends JuiceboxCLT { private String ifile, ofile, genomeId; public PairsToBin() { - super("pairsToBin "); + super("pairsToBin "); } @Override diff --git a/src/juicebox/tools/clt/old/PreProcessing.java b/src/juicebox/tools/clt/old/PreProcessing.java index 053e89cd..db44a8fe 100644 --- a/src/juicebox/tools/clt/old/PreProcessing.java +++ b/src/juicebox/tools/clt/old/PreProcessing.java @@ -27,6 +27,7 @@ import juicebox.HiCGlobals; import juicebox.data.ChromosomeHandler; import juicebox.data.HiCFileTools; +import juicebox.data.iterator.IteratorContainer; import juicebox.tools.clt.CommandLineParser; import juicebox.tools.clt.JuiceboxCLT; import juicebox.tools.utils.common.ShellCommandRunner; @@ -100,7 +101,10 @@ public void readArguments(String[] args, CommandLineParser parser) { String tmpDir = parser.getTmpdirOption(); double hicFileScalingFactor = parser.getScalingOption(); - updateNumberOfCPUThreads(parser); + updateNumberOfCPUThreads(parser, 1); + updateSecondaryNumberOfCPUThreads(parser, 10); + IteratorContainer.numCPUMatrixThreads = numCPUThreadsForSecondTask; + if (numCPUThreads < 2) { preprocessor = new Preprocessor(new File(outputFile), genomeId, chromHandler, hicFileScalingFactor); usingMultiThreadedVersion = false; diff --git a/src/juicebox/tools/clt/old/Statistics.java b/src/juicebox/tools/clt/old/Statistics.java index 9191c23d..eddc6703 100644 --- a/src/juicebox/tools/clt/old/Statistics.java +++ b/src/juicebox/tools/clt/old/Statistics.java @@ -43,7 +43,6 @@ public class Statistics extends JuiceboxCLT { - private int numThreads; private String siteFile; private String ligationJunction = "none"; private String inFile; @@ -134,7 +133,7 @@ public void readArguments(String[] args, CommandLineParser parser) { ligationJunction = ligJunc; } //multithreading flags - numThreads = Math.max(parser.getNumThreads(), 1); + updateNumberOfCPUThreads(parser, 1); mndIndexFile = parser.getMndIndexOption(); } @@ -153,14 +152,14 @@ public void run() { setMndIndex(); FragmentCalculation fragmentCalculation = readSiteFile(siteFile); StatisticsContainer container; - if (localHandler == null || mndChunks.size() < 2 || numThreads == 1) { + if (localHandler == null || mndChunks.size() < 2 || numCPUThreads == 1) { LoneStatisticsWorker runner = new LoneStatisticsWorker(siteFile, statsFiles, mapqThresholds, ligationJunction, inFile, fragmentCalculation); runner.infileStatistics(); container = runner.getResultsContainer(); } else { container = new StatisticsContainer(); - ParallelStatistics pStats = new ParallelStatistics(numThreads, container, + ParallelStatistics pStats = new ParallelStatistics(numCPUThreads, container, mndChunks, siteFile, statsFiles, mapqThresholds, ligationJunction, inFile, localHandler, fragmentCalculation); pStats.launchThreads(); diff --git a/src/juicebox/tools/utils/norm/GenomeWideNormalizationVectorUpdater.java b/src/juicebox/tools/utils/norm/GenomeWideNormalizationVectorUpdater.java index 1188e3e5..604e5905 100644 --- a/src/juicebox/tools/utils/norm/GenomeWideNormalizationVectorUpdater.java +++ b/src/juicebox/tools/utils/norm/GenomeWideNormalizationVectorUpdater.java @@ -27,7 +27,6 @@ import juicebox.HiCGlobals; import juicebox.data.*; import juicebox.data.basics.Chromosome; -import juicebox.data.basics.ListOfDoubleArrays; import juicebox.data.basics.ListOfFloatArrays; import juicebox.data.iterator.IteratorContainer; import juicebox.data.iterator.ListOfListGenerator; @@ -40,7 +39,6 @@ import java.io.IOException; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -129,6 +127,7 @@ public static void updateHicFileForGWfromPreAddNormOnly(Dataset ds, HiCZoom zoom if (NormalizationHandler.isGenomeWideNorm(normType)) { if (zoom.getBinSize() >= resolutionsToBuildTo.get(normType)) { + System.out.println("Now Doing " + normType.getLabel()); long currentTime = System.currentTimeMillis(); Pair, ExpectedValueCalculation> wgVectors = getWGVectors(ds, zoom, normType); if (HiCGlobals.printVerboseComments) { @@ -171,12 +170,11 @@ private static Pair, ExpectedValueCalculati int addY = 0; // Loop through chromosomes for (Chromosome chr : chromosomeHandler.getChromosomeArrayWithoutAllByAll()) { - final int chrIdx = chr.getIndex(); - MatrixZoomData zd = HiCFileTools.getMatrixZoomData(dataset, chr, chr, zoom); if (zd == null) continue; + final int chrIdx = chr.getIndex(); - Iterator iterator = zd.getIteratorContainer().getNewContactRecordIterator(); + Iterator iterator = zd.getFromFileIteratorContainer().getNewContactRecordIterator(); while (iterator.hasNext()) { ContactRecord cr = iterator.next(); int x = cr.getBinX(); @@ -192,17 +190,10 @@ private static Pair, ExpectedValueCalculati } // Split normalization vector by chromosome - Map normVectorMap = new LinkedHashMap<>(); - long location1 = 0; - for (Chromosome c1 : chromosomeHandler.getChromosomeArrayWithoutAllByAll()) { - long chrBinned = c1.getLength() / resolution + 1; - ListOfDoubleArrays chrNV = new ListOfDoubleArrays(chrBinned); - for (long k = 0; k < chrNV.getLength(); k++) { // todo optimize a version with system.arraycopy and long - chrNV.set(k, vector.get(location1 + k)); - } - location1 += chrNV.getLength(); - normVectorMap.put(c1, new NormalizationVector(norm, c1.getIndex(), zoom.getUnit(), resolution, chrNV)); - } + Map normVectorMap = + NormalizationTools.parCreateNormVectorMap(chromosomeHandler, resolution, vector, norm, zoom); + + ic.clear(); return new Pair<>(normVectorMap, expectedValueCalculation); } diff --git a/src/juicebox/tools/utils/norm/NormalizationTools.java b/src/juicebox/tools/utils/norm/NormalizationTools.java new file mode 100644 index 00000000..e8c4243b --- /dev/null +++ b/src/juicebox/tools/utils/norm/NormalizationTools.java @@ -0,0 +1,78 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package juicebox.tools.utils.norm; + +import juicebox.data.ChromosomeHandler; +import juicebox.data.NormalizationVector; +import juicebox.data.basics.Chromosome; +import juicebox.data.basics.ListOfDoubleArrays; +import juicebox.data.basics.ListOfFloatArrays; +import juicebox.data.iterator.IteratorContainer; +import juicebox.tools.dev.ParallelizedJuicerTools; +import juicebox.windowui.HiCZoom; +import juicebox.windowui.NormalizationType; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; + +public class NormalizationTools { + public static Map parCreateNormVectorMap(ChromosomeHandler chromosomeHandler, + int resolution, ListOfFloatArrays vector, + NormalizationType norm, HiCZoom zoom) { + final Map normVectorMap = new LinkedHashMap<>(); + + final AtomicInteger index = new AtomicInteger(0); + Chromosome[] chromosomes = chromosomeHandler.getChromosomeArrayWithoutAllByAll(); + long[] offsets = createOffsets(chromosomes, resolution); + ParallelizedJuicerTools.launchParallelizedCode(IteratorContainer.numCPUMatrixThreads, () -> { + int i = index.getAndIncrement(); + while (i < (chromosomes).length) { + Chromosome c1 = chromosomes[i]; + long offset = offsets[i]; + long chrBinned = c1.getLength() / resolution + 1; + ListOfDoubleArrays chrNV = new ListOfDoubleArrays(chrBinned); + for (long k = 0; k < chrNV.getLength(); k++) { // todo optimize a version with system.arraycopy and long + chrNV.set(k, vector.get(offset + k)); + } + synchronized (normVectorMap) { + normVectorMap.put(c1, new NormalizationVector(norm, c1.getIndex(), zoom.getUnit(), resolution, chrNV)); + } + i = index.getAndIncrement(); + } + }); + + return normVectorMap; + } + + private static long[] createOffsets(Chromosome[] chromosomes, int resolution) { + long[] offsets = new long[chromosomes.length]; + offsets[0] = 0L; + for (int i = 0; i < chromosomes.length - 1; i++) { + offsets[i + 1] = offsets[i] + (chromosomes[i].getLength() / resolution) + 1; + } + return offsets; + } +} diff --git a/src/juicebox/tools/utils/norm/NormalizationVectorUpdater.java b/src/juicebox/tools/utils/norm/NormalizationVectorUpdater.java index 29f419f2..b1a2d99c 100644 --- a/src/juicebox/tools/utils/norm/NormalizationVectorUpdater.java +++ b/src/juicebox/tools/utils/norm/NormalizationVectorUpdater.java @@ -89,7 +89,6 @@ protected void buildVCOrVCSQRT(boolean weShouldBuildVC, boolean weShouldBuildVCS NormalizationCalculations nc, HiCZoom zoom, MatrixZoomData zd, ExpectedValueCalculation evVC, ExpectedValueCalculation evVCSqrt) throws IOException { final int chrIdx = chr.getIndex(); - long currentTime = System.currentTimeMillis(); ListOfFloatArrays vc = nc.computeVC(); ListOfFloatArrays vcSqrt = new ListOfFloatArrays(vc.getLength()); @@ -152,15 +151,18 @@ public void updateHicFile(String path, List normalizationsToB } if (noFrag && zoom.getUnit() == HiC.Unit.FRAG) continue; + System.out.println(); + System.out.print("Calculating norms for zoom " + zoom); + // compute genome-wide normalizations if (zoom.getUnit() == HiC.Unit.BP && zoom.getBinSize() >= genomeWideLowestResolutionAllowed) { GenomeWideNormalizationVectorUpdater.updateHicFileForGWfromPreAddNormOnly(ds, zoom, normalizationsToBuild, resolutionsToBuildTo, normVectorIndices, normVectorBuffers, expectedValueCalculations); } + ds.clearCache(true, zoom); + //System.out.println("genomewide normalization: " + Duration.between(A,B).toMillis()); - System.out.println(); - System.out.print("Calculating norms for zoom " + zoom); Map fcm = zoom.getUnit() == HiC.Unit.FRAG ? fragCountMap : null; @@ -175,6 +177,8 @@ public void updateHicFile(String path, List normalizationsToB MatrixZoomData zd = HiCFileTools.getMatrixZoomData(ds, chr, chr, zoom); if (zd == null) continue; + System.out.println("Now Doing " + chr.getName()); + NormalizationCalculations nc = new NormalizationCalculations(zd.getIteratorContainer()); if (!nc.isEnoughMemory()) { System.err.println("Not enough memory, skipping " + chr); @@ -196,6 +200,8 @@ public void updateHicFile(String path, List normalizationsToB if (weShouldBuildScale && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.SCALE)) { buildScale(chr, nc, zoom, zd, evSCALE); } + + zd.clearCache(false); } if (weShouldBuildVC && evVC.hasData() && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.VC)) { @@ -210,6 +216,8 @@ public void updateHicFile(String path, List normalizationsToB if (weShouldBuildScale && evSCALE.hasData() && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.SCALE)) { expectedValueCalculations.add(evSCALE); } + + ds.clearCache(false, zoom); } writeNormsToUpdateFile(reader, path, true, expectedValueCalculations, null, normVectorIndices, normVectorBuffers, "Finished writing norms"); diff --git a/src/juicebox/tools/utils/norm/final2/FinalScale.java b/src/juicebox/tools/utils/norm/final2/FinalScale.java index 2c7875fc..784ddae8 100644 --- a/src/juicebox/tools/utils/norm/final2/FinalScale.java +++ b/src/juicebox/tools/utils/norm/final2/FinalScale.java @@ -371,22 +371,6 @@ private static double[] dealWithSorting(double[] vector, int length) { private static ListOfFloatArrays sparseMultiplyGetRowSums(IteratorContainer ic, ListOfFloatArrays vector, long vectorLength) { - ListOfFloatArrays sumVector = new ListOfFloatArrays(vectorLength); - - Iterator iterator = ic.getNewContactRecordIterator(); - while (iterator.hasNext()) { - ContactRecord cr = iterator.next(); - int x = cr.getBinX(); - int y = cr.getBinY(); - float counts = cr.getCounts(); - if (x == y) { - counts *= .5; - } - - sumVector.addTo(x, counts * vector.get(y)); - sumVector.addTo(y, counts * vector.get(x)); - } - - return sumVector; + return ic.sparseMultiply(vector, vectorLength); } } diff --git a/src/juicebox/tools/utils/original/ExpectedValueCalculation.java b/src/juicebox/tools/utils/original/ExpectedValueCalculation.java index fe1cbe6a..6b1f9b76 100644 --- a/src/juicebox/tools/utils/original/ExpectedValueCalculation.java +++ b/src/juicebox/tools/utils/original/ExpectedValueCalculation.java @@ -155,19 +155,24 @@ public int getGridSize() { * @param bin2 Position2 observed in units of "bins" */ public synchronized void addDistance(Integer chrIdx, int bin1, int bin2, double weight) { - - // Ignore NaN values TODO -- is this the right thing to do? - if (Double.isNaN(weight)) return; - - int dist; - Chromosome chr = chromosomesMap.get(chrIdx); - if (chr == null) return; - - chromosomeCounts.merge(chrIdx, weight, Double::sum); - dist = Math.abs(bin1 - bin2); - - actualDistances[dist] += weight; - } + + // Ignore NaN values TODO -- is this the right thing to do? + if (Double.isNaN(weight)) return; + + int dist; + Chromosome chr = chromosomesMap.get(chrIdx); + if (chr == null) return; + + Double count = chromosomeCounts.get(chrIdx); + if (count == null) { + chromosomeCounts.put(chrIdx, weight); + } else { + chromosomeCounts.put(chrIdx, count + weight); + } + dist = Math.abs(bin1 - bin2); + + actualDistances[dist] += weight; + } public void merge(ExpectedValueCalculation otherEVCalc) { for (Map.Entry entry : otherEVCalc.chromosomesMap.entrySet()) { diff --git a/src/juicebox/tools/utils/original/mnditerator/AsciiToBinConverter.java b/src/juicebox/tools/utils/original/mnditerator/AsciiToBinConverter.java index 060951e4..2354b210 100644 --- a/src/juicebox/tools/utils/original/mnditerator/AsciiToBinConverter.java +++ b/src/juicebox/tools/utils/original/mnditerator/AsciiToBinConverter.java @@ -62,16 +62,10 @@ public static void convert(String inputPath, String outputFile, ChromosomeHandle LittleEndianOutputStream les = new LittleEndianOutputStream(bos); iter = new AsciiPairIterator(inputPath, chromosomeOrdinals, chromosomeHandler, true); - while (iter.hasNext()) { - AlignmentPair pair = iter.next(); - les.writeBoolean(pair.getStrand1()); - les.writeInt(pair.getChr1()); - les.writeInt(pair.getPos1()); - les.writeInt(pair.getFrag1()); - les.writeBoolean(pair.getStrand2()); - les.writeInt(pair.getChr2()); - les.writeInt(pair.getPos2()); - les.writeInt(pair.getFrag2()); + if (outputFile.endsWith(".bn")) { + writeOutShortBinaryFormat(iter, les); + } else { + writeOutStandardBinaryFormat(iter, les); } les.flush(); bos.flush(); @@ -82,6 +76,31 @@ public static void convert(String inputPath, String outputFile, ChromosomeHandle } } + private static void writeOutShortBinaryFormat(AsciiPairIterator iter, LittleEndianOutputStream les) throws IOException { + while (iter.hasNext()) { + AlignmentPair pair = iter.next(); + les.writeInt(pair.getChr1()); + les.writeInt(pair.getPos1()); + les.writeInt(pair.getChr2()); + les.writeInt(pair.getPos2()); + les.writeFloat(pair.getScore()); + } + } + + private static void writeOutStandardBinaryFormat(AsciiPairIterator iter, LittleEndianOutputStream les) throws IOException { + while (iter.hasNext()) { + AlignmentPair pair = iter.next(); + les.writeBoolean(pair.getStrand1()); + les.writeInt(pair.getChr1()); + les.writeInt(pair.getPos1()); + les.writeInt(pair.getFrag1()); + les.writeBoolean(pair.getStrand2()); + les.writeInt(pair.getChr2()); + les.writeInt(pair.getPos2()); + les.writeInt(pair.getFrag2()); + } + } + public static void convertBack(String inputPath, String outputFile) throws IOException { PrintWriter pw = null; try {