diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
index 8397c130b..a1ac53c6d 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
@@ -169,7 +169,7 @@ private DirectQuickSelectSketch(
//clear hash table area
dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
memReqSvr_ = memReqSvr;
}
@@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l
final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin
final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) {
if (actLgRF > 0) { //Expand in current Memory
//lgArrLongs will change; thetaLong, curCount will not
resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Expand in current memory, exit.
@@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) {
memReqSvr_.requestClose(wmem_, newDstMem);
wmem_ = newDstMem;
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Request more memory to resize
} //end of resize
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index c593f52e3..a3ffebc14 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see
final DirectQuickSelectSketchR dqssr =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
- dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqssr;
}
@@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long
final DirectQuickSelectSketchR dqss =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -276,11 +276,11 @@ UpdateReturnState hashUpdate(final long hash) {
* @return the hash table threshold
*/
@SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments")
- static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
//SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
//but this allows us to tune these constants for different sketches.
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
- return (int) Math.floor(fraction * (1 << lgArrLongs));
+ return (int) (fraction * (1 << lgArrLongs));
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
index 37b615456..b9d4dc9e1 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
@@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float
}
lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_);
curCount_ = 0;
thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
@@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see
final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF,
preambleLongs, family);
hqss.lgArrLongs_ = lgArrLongs;
- hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = extractCurCount(srcMem);
hqss.thetaLong_ = extractThetaLong(srcMem);
hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem);
@@ -197,7 +197,7 @@ public void reset() {
cache_ = new long[1 << lgArrLongsSM];
lgArrLongs_ = lgArrLongsSM;
}
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
empty_ = true;
curCount_ = 0;
thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
@@ -293,7 +293,7 @@ private final void resizeCache() {
curCount_ = newCount;
cache_ = tgtArr;
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
}
//array stays the same size. Changes theta and thus count
@@ -318,9 +318,9 @@ private final void quickSelectAndRebuild() {
* @param lgArrLongs See lgArrLongs.
* @return the hash table threshold
*/
- static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
- return (int) Math.floor(fraction * (1 << lgArrLongs));
+ return (int) (fraction * (1 << lgArrLongs));
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index 888116512..cc1fd4d23 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) {
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
+ * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
+ @Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
if (numberOfEntries == 0) { return 8; }
if (numberOfEntries == 1) { return 16; }
return (numberOfEntries << 3) + 24;
}
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * log_base2 of the number of nominal entries, which is a power of 2.
+ * @param lgNomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * nomEntries.
+ */
+ public static int getCompactSketchMaxBytes(final int lgNomEntries) {
+ return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD)
+ + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES;
+ }
+
/**
* Returns the maximum number of storage bytes required for an UpdateSketch with the given
* number of nominal entries (power of 2).
diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java
index a5862e4a4..4b1461876 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketches.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketches.java
@@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) {
}
/**
- * Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
- * @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)},
- * {@code numberOfEntries}
- * @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
- */
+ * Returns the maximum number of storage bytes required for a CompactSketch with the given
+ * number of actual entries. Note that this assumes the worse case of the sketch in
+ * estimation mode, which requires storing theta and count.
+ * @param numberOfEntries the actual number of entries stored with the CompactSketch.
+ * @return the maximum number of storage bytes required for a CompactSketch with the given number
+ * of entries.
+ * @see Sketch#getMaxCompactSketchBytes(int)
+ * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
+ */
+ @Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
return Sketch.getMaxCompactSketchBytes(numberOfEntries);
}
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * number of nominal entries (power of 2).
+ * @param nomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * nomEntries.
+ * @see Sketch#getCompactSketchMaxBytes(int)
+ */
+ public static int getCompactSketchMaxBytes(final int nomEntries) {
+ return Sketch.getCompactSketchMaxBytes(nomEntries);
+ }
+
/**
* Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
* @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries}
diff --git a/src/test/java/org/apache/datasketches/theta/SketchesTest.java b/src/test/java/org/apache/datasketches/theta/SketchesTest.java
index 6b887448e..84942fe21 100644
--- a/src/test/java/org/apache/datasketches/theta/SketchesTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SketchesTest.java
@@ -20,6 +20,7 @@
package org.apache.datasketches.theta;
import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta.Sketches.getCompactSketchMaxBytes;
import static org.apache.datasketches.theta.Sketches.getMaxCompactSketchBytes;
import static org.apache.datasketches.theta.Sketches.getMaxIntersectionBytes;
import static org.apache.datasketches.theta.Sketches.getMaxUnionBytes;
@@ -35,6 +36,7 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
+import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
@@ -130,7 +132,8 @@ public void checkSetOpMethods() {
@Test
public void checkUtilMethods() {
- final int k = 1024;
+ final int lgK = 10;
+ final int k = 1 << lgK;
final int maxUnionBytes = getMaxUnionBytes(k);
assertEquals(2*k*8+32, maxUnionBytes);
@@ -141,6 +144,10 @@ public void checkUtilMethods() {
final int maxCompSkBytes = getMaxCompactSketchBytes(k+1);
assertEquals(24+(k+1)*8, maxCompSkBytes);
+ final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); {
+ assertEquals(compSkMaxBytes, ((2 << lgK) * 15) / 16 + (Family.QUICKSELECT.getMaxPreLongs() << 3));
+ }
+
final int maxSkBytes = getMaxUpdateSketchBytes(k);
assertEquals(24+2*k*8, maxSkBytes);
}
diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml
index 8a45ea574..873a878a0 100644
--- a/tools/SketchesCheckstyle.xml
+++ b/tools/SketchesCheckstyle.xml
@@ -36,18 +36,17 @@ under the License.
+
+
-
+
-
-
-
-
-
-
-
-
+
+
+
+
@@ -77,7 +76,18 @@ under the License.
-
+
+
+
+
+
+
+