Skip to content

Commit

Permalink
Merge pull request #965 from aidenlab/deterministic_norm
Browse files Browse the repository at this point in the history
Deterministic norm
  • Loading branch information
sa501428 authored Aug 31, 2021
2 parents 3f07a86 + 4e9cfaf commit e983324
Show file tree
Hide file tree
Showing 14 changed files with 265 additions and 232 deletions.
2 changes: 1 addition & 1 deletion src/juicebox/HiCGlobals.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
*/
public class HiCGlobals {

public static final String versionNum = "2.12.00";
public static final String versionNum = "2.13.05";
public static final String juiceboxTitle = "[Juicebox " + versionNum + "] Hi-C Map ";

// MainWindow variables
Expand Down
8 changes: 6 additions & 2 deletions src/juicebox/data/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,9 @@ public void clearCache(boolean onlyClearInter) {
try {
matrix.getZoomData(zoom).clearCache(onlyClearInter);
} catch (Exception e) {
System.err.println("Clearing err: " + e.getLocalizedMessage());
if (HiCGlobals.printVerboseComments) {
System.err.println("Clearing err: " + e.getLocalizedMessage());
}
}
}
}
Expand All @@ -1021,7 +1023,9 @@ public void clearCache(boolean onlyClearInter, HiCZoom zoom) {
try {
matrix.getZoomData(zoom).clearCache(onlyClearInter);
} catch (Exception e) {
System.err.println("Clearing z_err: " + e.getLocalizedMessage());
if (HiCGlobals.printVerboseComments) {
System.err.println("Clearing z_err: " + e.getLocalizedMessage());
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/juicebox/data/MatrixZoomData.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public class MatrixZoomData {
protected final LRUCache<String, Block> blockCache = new LRUCache<>(500);
private final HashMap<NormalizationType, BasicMatrix> pearsonsMap;
private final HashMap<NormalizationType, BasicMatrix> normSquaredMaps;
//private List<List<ContactRecord>> localCacheOfRecords = null;
//private BigContactRecordList localCacheOfRecords = null;
private final V9Depth v9Depth;
private double averageCount = -1;
protected DatasetReader reader;
Expand Down
125 changes: 125 additions & 0 deletions src/juicebox/data/iterator/BigContactRecordList.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package juicebox.data.iterator;

import juicebox.data.ContactRecord;

import java.util.*;

public class BigContactRecordList {

private static final int MAX_LIMIT = Integer.MAX_VALUE - 10;
private List<List<ContactRecord>> internalList = new ArrayList<>();
private long numOfContactRecords = 0;

public static BigContactRecordList populateListOfListsFromSingleIterator(Iterator<ContactRecord> iterator) {
BigContactRecordList allRecords = new BigContactRecordList();
List<ContactRecord> tempList = new ArrayList<>();
int counter = 0;
while (iterator.hasNext()) {
tempList.add(iterator.next());
counter++;
if (counter > MAX_LIMIT) {
allRecords.addSubList(tempList);
tempList = new ArrayList<>();
counter = 0;
}
}
if (tempList.size() > 0) {
allRecords.addSubList(tempList);
}
return allRecords;
}

public void addAllSubLists(BigContactRecordList other) {
internalList.addAll(other.internalList);
for (List<ContactRecord> records : other.internalList) {
numOfContactRecords += records.size();
}
}

private void addSubList(List<ContactRecord> cList) {
internalList.add(cList);
numOfContactRecords += cList.size();
}

public long getTotalSize() {
return numOfContactRecords;
}

public int getNumLists() {
return internalList.size();
}

public List<ContactRecord> getSubList(int index) {
return internalList.get(index);
}

public void clear() {
for (List<ContactRecord> cList : internalList) {
cList.clear();
}
internalList.clear();
internalList = new ArrayList<>();
numOfContactRecords = 0;
}

public void sort() {
internalList.sort(Comparator.comparing(o -> o.get(0)));
}

public void collapse() {
System.out.println("Was n " + internalList.size());

int numFinList = (int) Math.max(numOfContactRecords / 200000000, 20);
List<List<ContactRecord>> newInternalList = new ArrayList<>();
int[] countForList = new int[numFinList];
Arrays.fill(countForList, 0);
for (int z = 0; z < numFinList; z++) {
newInternalList.add(new ArrayList<>());
}

for (List<ContactRecord> subList : internalList) {
int whichIndexToAddTo = getIndexOfMin(countForList);
countForList[whichIndexToAddTo] += subList.size();
newInternalList.get(whichIndexToAddTo).addAll(subList);
}

internalList.clear();
internalList = newInternalList;

System.out.println("Now is n " + internalList.size());
}

private int getIndexOfMin(int[] counts) {
int minIndex = 0;
for (int k = 1; k < counts.length; k++) {
if (counts[k] < counts[minIndex]) {
minIndex = k;
}
}
return minIndex;
}
}
16 changes: 16 additions & 0 deletions src/juicebox/data/iterator/IteratorContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
package juicebox.data.iterator;

import juicebox.data.ContactRecord;
import juicebox.data.basics.ListOfDoubleArrays;
import juicebox.data.basics.ListOfFloatArrays;

import java.util.Iterator;
Expand Down Expand Up @@ -73,4 +74,19 @@ public boolean getIsThereEnoughMemoryForNormCalculation() {

public abstract void clear();

protected static ListOfFloatArrays[] getArrayOfFloatVectors(int size, long vectorLength) {
ListOfFloatArrays[] array = new ListOfFloatArrays[size];
for (int i = 0; i < size; i++) {
array[i] = new ListOfFloatArrays(vectorLength);
}
return array;
}

protected static ListOfDoubleArrays[] getArrayOfDoubleVectors(int size, long vectorLength) {
ListOfDoubleArrays[] array = new ListOfDoubleArrays[size];
for (int i = 0; i < size; i++) {
array[i] = new ListOfDoubleArrays(vectorLength);
}
return array;
}
}
48 changes: 9 additions & 39 deletions src/juicebox/data/iterator/ListOfListGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@
import juicebox.windowui.HiCZoom;
import org.broad.igv.util.collections.LRUCache;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

public class ListOfListGenerator {
private static final int MAX_LIMIT = Integer.MAX_VALUE - 10;

public static IteratorContainer createFromZD(DatasetReader reader, MatrixZoomData matrixZoomData,
LRUCache<String, Block> blockCache) {
Expand All @@ -63,8 +61,8 @@ private static IteratorContainer tryToCreateIteratorInRAM(IteratorContainer ic0)
}

if (shouldFitInMemory) {
List<List<ContactRecord>> allContactRecords = populateListOfLists(ic0);
long numOfContactRecords = getTotalSize(allContactRecords);
BigContactRecordList allContactRecords = populateListOfLists(ic0);
long numOfContactRecords = allContactRecords.getTotalSize();

IteratorContainer newIC = new ListOfListIteratorContainer(allContactRecords,
ic0.getMatrixSize(),
Expand All @@ -79,62 +77,34 @@ private static IteratorContainer tryToCreateIteratorInRAM(IteratorContainer ic0)
return ic0;
}

private static List<List<ContactRecord>> populateListOfLists(IteratorContainer ic) {
private static BigContactRecordList populateListOfLists(IteratorContainer ic) {

if (ic instanceof GWIteratorContainer) {
List<Iterator<ContactRecord>> iterators = ((GWIteratorContainer) ic).getAllFromFileContactRecordIterators();
List<List<ContactRecord>> allRecords = new ArrayList<>();
BigContactRecordList allRecords = new BigContactRecordList();

AtomicInteger index = new AtomicInteger(0);
ParallelizedJuicerTools.launchParallelizedCode(IteratorContainer.numCPUMatrixThreads, () -> {
int i = index.getAndIncrement();
List<List<ContactRecord>> recordsForThread = new ArrayList<>();
BigContactRecordList recordsForThread = new BigContactRecordList();
while (i < iterators.size()) {
List<List<ContactRecord>> recordsForIter = populateListOfListsFromSingleIterator(iterators.get(i));
recordsForThread.addAll(recordsForIter);
BigContactRecordList recordsForIter = BigContactRecordList.populateListOfListsFromSingleIterator(iterators.get(i));
recordsForThread.addAllSubLists(recordsForIter);
i = index.getAndIncrement();
}
synchronized (allRecords) {
allRecords.addAll(recordsForThread);
allRecords.addAllSubLists(recordsForThread);
}
});
return allRecords;
} else {
return populateListOfListsFromSingleIterator(ic.getNewContactRecordIterator());
return BigContactRecordList.populateListOfListsFromSingleIterator(ic.getNewContactRecordIterator());
}
}

private static List<List<ContactRecord>> populateListOfListsFromSingleIterator(Iterator<ContactRecord> iterator) {

List<List<ContactRecord>> allRecords = new ArrayList<>();
List<ContactRecord> tempList = new ArrayList<>();
int counter = 0;
while (iterator.hasNext()) {
tempList.add(iterator.next());
counter++;
if (counter > MAX_LIMIT) {
allRecords.add(tempList);
tempList = new ArrayList<>();
counter = 0;
}
}
if (tempList.size() > 0) {
allRecords.add(tempList);
}
return allRecords;
}

private static boolean checkMemory(IteratorContainer ic) {
long ramForRowSums = ic.getMatrixSize() * 4;
long ramForAllContactRecords = ic.getNumberOfContactRecords() * 12;
return ramForRowSums + ramForAllContactRecords < Runtime.getRuntime().maxMemory();
}

private static long getTotalSize(List<List<ContactRecord>> allContactRecords) {
long numOfContactRecords = 0;
for (List<ContactRecord> records : allContactRecords) {
numOfContactRecords += records.size();
}
return numOfContactRecords;
}
}
9 changes: 4 additions & 5 deletions src/juicebox/data/iterator/ListOfListIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@
import juicebox.data.ContactRecord;

import java.util.Iterator;
import java.util.List;

public class ListOfListIterator implements Iterator<ContactRecord> {

private final List<List<ContactRecord>> allContactRecords;
private final BigContactRecordList allContactRecords;
private Iterator<ContactRecord> currentIterator = null;
private int currentListIndex = 0;

public ListOfListIterator(List<List<ContactRecord>> allContactRecords) {
public ListOfListIterator(BigContactRecordList allContactRecords) {
this.allContactRecords = allContactRecords;
getNextIterator();
}
Expand All @@ -51,8 +50,8 @@ public boolean hasNext() {
}

private boolean getNextIterator() {
while (currentListIndex < allContactRecords.size()) {
currentIterator = allContactRecords.get(currentListIndex).iterator();
while (currentListIndex < allContactRecords.getNumLists()) {
currentIterator = allContactRecords.getSubList(currentListIndex).iterator();
if (currentIterator.hasNext()) {
return true;
}
Expand Down
16 changes: 7 additions & 9 deletions src/juicebox/data/iterator/ListOfListIteratorContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@

public class ListOfListIteratorContainer extends IteratorContainer {

private final List<List<ContactRecord>> allContactRecords;
private final BigContactRecordList allContactRecords;

public ListOfListIteratorContainer(List<List<ContactRecord>> allContactRecords, long matrixSize,
public ListOfListIteratorContainer(BigContactRecordList allContactRecords, long matrixSize,
long totalNumberOfContacts) {
super(matrixSize);
setNumberOfContactRecords(totalNumberOfContacts);
Expand All @@ -59,9 +59,10 @@ public boolean getIsThereEnoughMemoryForNormCalculation() {
@Override
public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLength) {

if (allContactRecords.size() < numCPUMatrixThreads) {
if (allContactRecords.getNumLists() < numCPUMatrixThreads) {
final ListOfFloatArrays totalSumVector = new ListOfFloatArrays(vectorLength);
for (List<ContactRecord> contactRecords : allContactRecords) {
for (int k = 0; k < allContactRecords.getNumLists(); k++) {
List<ContactRecord> contactRecords = allContactRecords.getSubList(k);
totalSumVector.addValuesFrom(ListIteratorContainer.sparseMultiplyByListContacts(
contactRecords, vector, vectorLength, numCPUMatrixThreads));
}
Expand All @@ -73,9 +74,6 @@ public ListOfFloatArrays sparseMultiply(ListOfFloatArrays vector, long vectorLen

@Override
public void clear() {
for (List<ContactRecord> cList : allContactRecords) {
cList.clear();
}
allContactRecords.clear();
}

Expand All @@ -86,8 +84,8 @@ private ListOfFloatArrays sparseMultiplyAcrossLists(ListOfFloatArrays vector, lo
ParallelizedJuicerTools.launchParallelizedCode(numCPUMatrixThreads, () -> {
int sIndx = index.getAndIncrement();
ListOfDoubleArrays sumVector = new ListOfDoubleArrays(vectorLength);
while (sIndx < allContactRecords.size()) {
for (ContactRecord cr : allContactRecords.get(sIndx)) {
while (sIndx < allContactRecords.getNumLists()) {
for (ContactRecord cr : allContactRecords.getSubList(sIndx)) {
ListIteratorContainer.matrixVectorMult(vector, sumVector, cr);
}
sIndx = index.getAndIncrement();
Expand Down
2 changes: 1 addition & 1 deletion src/juicebox/tools/clt/UnitTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public static void testCustomFastScaling() {
HiCGlobals.printVerboseComments = true;
List<List<ContactRecord>> listOfLists = new ArrayList<>();
BigContactRecordList listOfLists = new ArrayList<>();
listOfLists.addAll(zd.getContactRecordList());
double[] result = ZeroScale.scale(listOfLists, targetVectorInitial, zd.getKey());
Expand Down
Loading

0 comments on commit e983324

Please sign in to comment.