diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index 8397c130b..a1ac53c6d 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -169,7 +169,7 @@ private DirectQuickSelectSketch( //clear hash table area dstMem.clear(preambleLongs << 3, 8 << lgArrLongs); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); memReqSvr_ = memReqSvr; } @@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(seed, srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(seed, srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) { if (actLgRF > 0) { //Expand in current Memory //lgArrLongs will change; thetaLong, curCount will not resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; } //end of Expand in current memory, exit. @@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) { memReqSvr_.requestClose(wmem_, newDstMem); wmem_ = newDstMem; - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; } //end of Request more memory to resize } //end of resize diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index c593f52e3..a3ffebc14 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see final DirectQuickSelectSketchR dqssr = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); - dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqssr; } @@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -276,11 +276,11 @@ UpdateReturnState hashUpdate(final long hash) { * @return the hash table threshold */ @SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments") - static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { //SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, //but this allows us to tune these constants for different sketches. final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) Math.floor(fraction * (1 << lgArrLongs)); + return (int) (fraction * (1 << lgArrLongs)); } } diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java index 37b615456..b9d4dc9e1 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java @@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float } lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_); curCount_ = 0; thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; @@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, preambleLongs, family); hqss.lgArrLongs_ = lgArrLongs; - hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); hqss.curCount_ = extractCurCount(srcMem); hqss.thetaLong_ = extractThetaLong(srcMem); hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem); @@ -197,7 +197,7 @@ public void reset() { cache_ = new long[1 << lgArrLongsSM]; lgArrLongs_ = lgArrLongsSM; } - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); empty_ = true; curCount_ = 0; thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); @@ -293,7 +293,7 @@ private final void resizeCache() { curCount_ = newCount; cache_ = tgtArr; - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); } //array stays the same size. Changes theta and thus count @@ -318,9 +318,9 @@ private final void quickSelectAndRebuild() { * @param lgArrLongs See lgArrLongs. * @return the hash table threshold */ - static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) Math.floor(fraction * (1 << lgArrLongs)); + return (int) (fraction * (1 << lgArrLongs)); } } diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index 888116512..cc1fd4d23 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) { * @param numberOfEntries the actual number of entries stored with the CompactSketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number * of entries. + * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} */ + @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { if (numberOfEntries == 0) { return 8; } if (numberOfEntries == 1) { return 16; } return (numberOfEntries << 3) + 24; } + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * nomEntries. + */ + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD) + + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES; + } + /** * Returns the maximum number of storage bytes required for an UpdateSketch with the given * number of nominal entries (power of 2). diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index a5862e4a4..4b1461876 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { } /** - * Ref: {@link Sketch#getMaxCompactSketchBytes(int)} - * @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)}, - * {@code numberOfEntries} - * @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)} - */ + * Returns the maximum number of storage bytes required for a CompactSketch with the given + * number of actual entries. Note that this assumes the worse case of the sketch in + * estimation mode, which requires storing theta and count. + * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * @return the maximum number of storage bytes required for a CompactSketch with the given number + * of entries. + * @see Sketch#getMaxCompactSketchBytes(int) + * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + */ + @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { return Sketch.getMaxCompactSketchBytes(numberOfEntries); } + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * number of nominal entries (power of 2). + * @param nomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * nomEntries. + * @see Sketch#getCompactSketchMaxBytes(int) + */ + public static int getCompactSketchMaxBytes(final int nomEntries) { + return Sketch.getCompactSketchMaxBytes(nomEntries); + } + /** * Ref: {@link SetOperation#getMaxIntersectionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} diff --git a/src/test/java/org/apache/datasketches/theta/SketchesTest.java b/src/test/java/org/apache/datasketches/theta/SketchesTest.java index 6b887448e..84942fe21 100644 --- a/src/test/java/org/apache/datasketches/theta/SketchesTest.java +++ b/src/test/java/org/apache/datasketches/theta/SketchesTest.java @@ -20,6 +20,7 @@ package org.apache.datasketches.theta; import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta.Sketches.getCompactSketchMaxBytes; import static org.apache.datasketches.theta.Sketches.getMaxCompactSketchBytes; import static org.apache.datasketches.theta.Sketches.getMaxIntersectionBytes; import static org.apache.datasketches.theta.Sketches.getMaxUnionBytes; @@ -35,6 +36,7 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -130,7 +132,8 @@ public void checkSetOpMethods() { @Test public void checkUtilMethods() { - final int k = 1024; + final int lgK = 10; + final int k = 1 << lgK; final int maxUnionBytes = getMaxUnionBytes(k); assertEquals(2*k*8+32, maxUnionBytes); @@ -141,6 +144,10 @@ public void checkUtilMethods() { final int maxCompSkBytes = getMaxCompactSketchBytes(k+1); assertEquals(24+(k+1)*8, maxCompSkBytes); + final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); { + assertEquals(compSkMaxBytes, ((2 << lgK) * 15) / 16 + (Family.QUICKSELECT.getMaxPreLongs() << 3)); + } + final int maxSkBytes = getMaxUpdateSketchBytes(k); assertEquals(24+2*k*8, maxSkBytes); } diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 8a45ea574..873a878a0 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -36,18 +36,17 @@ under the License. + + - + - - - - - - - - + + + + @@ -77,7 +76,18 @@ under the License. - + + + + + + +