Skip to content

Commit

Permalink
Merge pull request #559 from apache/fix_getMaxCompactSketchBytes
Browse files Browse the repository at this point in the history
Added new method
  • Loading branch information
leerho authored May 15, 2024
2 parents 204b4f9 + f979b37 commit f8772e4
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ private DirectQuickSelectSketch(
//clear hash table area
dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);

hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
memReqSvr_ = memReqSvr;
}

Expand Down Expand Up @@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l

final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand All @@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin

final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand Down Expand Up @@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) {
if (actLgRF > 0) { //Expand in current Memory
//lgArrLongs will change; thetaLong, curCount will not
resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs);
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Expand in current memory, exit.

Expand All @@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) {
memReqSvr_.requestClose(wmem_, newDstMem);

wmem_ = newDstMem;
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Request more memory to resize
} //end of resize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see

final DirectQuickSelectSketchR dqssr =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqssr;
}

Expand All @@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long

final DirectQuickSelectSketchR dqss =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand Down Expand Up @@ -276,11 +276,11 @@ UpdateReturnState hashUpdate(final long hash) {
* @return the hash table threshold
*/
@SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments")
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
//SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
//but this allows us to tune these constants for different sketches.
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
return (int) Math.floor(fraction * (1 << lgArrLongs));
return (int) (fraction * (1 << lgArrLongs));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float
}

lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_);
curCount_ = 0;
thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
Expand Down Expand Up @@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see
final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF,
preambleLongs, family);
hqss.lgArrLongs_ = lgArrLongs;
hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = extractCurCount(srcMem);
hqss.thetaLong_ = extractThetaLong(srcMem);
hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem);
Expand Down Expand Up @@ -197,7 +197,7 @@ public void reset() {
cache_ = new long[1 << lgArrLongsSM];
lgArrLongs_ = lgArrLongsSM;
}
hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
empty_ = true;
curCount_ = 0;
thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
Expand Down Expand Up @@ -293,7 +293,7 @@ private final void resizeCache() {
curCount_ = newCount;

cache_ = tgtArr;
hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
}

//array stays the same size. Changes theta and thus count
Expand All @@ -318,9 +318,9 @@ private final void quickSelectAndRebuild() {
* @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
* @return the hash table threshold
*/
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
return (int) Math.floor(fraction * (1 << lgArrLongs));
return (int) (fraction * (1 << lgArrLongs));
}

}
14 changes: 14 additions & 0 deletions src/main/java/org/apache/datasketches/theta/Sketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) {
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
* @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
@Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
if (numberOfEntries == 0) { return 8; }
if (numberOfEntries == 1) { return 16; }
return (numberOfEntries << 3) + 24;
}

/**
* Returns the maximum number of storage bytes required for a CompactSketch given the configured
* log_base2 of the number of nominal entries, which is a power of 2.
* @param lgNomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
* @return the maximum number of storage bytes required for a CompactSketch with the given
* nomEntries.
*/
public static int getCompactSketchMaxBytes(final int lgNomEntries) {
return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD)
+ Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES;
}

/**
* Returns the maximum number of storage bytes required for an UpdateSketch with the given
* number of nominal entries (power of 2).
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/org/apache/datasketches/theta/Sketches.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) {
}

/**
* Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
* @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)},
* {@code numberOfEntries}
* @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
*/
* Returns the maximum number of storage bytes required for a CompactSketch with the given
* number of actual entries. Note that this assumes the worse case of the sketch in
* estimation mode, which requires storing theta and count.
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
* @see Sketch#getMaxCompactSketchBytes(int)
* @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
@Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
return Sketch.getMaxCompactSketchBytes(numberOfEntries);
}

/**
* Returns the maximum number of storage bytes required for a CompactSketch given the configured
* number of nominal entries (power of 2).
* @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
* @return the maximum number of storage bytes required for a CompactSketch with the given
* nomEntries.
* @see Sketch#getCompactSketchMaxBytes(int)
*/
public static int getCompactSketchMaxBytes(final int nomEntries) {
return Sketch.getCompactSketchMaxBytes(nomEntries);
}

/**
* Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
* @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.datasketches.theta;

import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1;
import static org.apache.datasketches.theta.Sketches.getCompactSketchMaxBytes;
import static org.apache.datasketches.theta.Sketches.getMaxCompactSketchBytes;
import static org.apache.datasketches.theta.Sketches.getMaxIntersectionBytes;
import static org.apache.datasketches.theta.Sketches.getMaxUnionBytes;
Expand All @@ -35,6 +36,7 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
Expand Down Expand Up @@ -130,7 +132,8 @@ public void checkSetOpMethods() {

@Test
public void checkUtilMethods() {
final int k = 1024;
final int lgK = 10;
final int k = 1 << lgK;

final int maxUnionBytes = getMaxUnionBytes(k);
assertEquals(2*k*8+32, maxUnionBytes);
Expand All @@ -141,6 +144,10 @@ public void checkUtilMethods() {
final int maxCompSkBytes = getMaxCompactSketchBytes(k+1);
assertEquals(24+(k+1)*8, maxCompSkBytes);

final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); {
assertEquals(compSkMaxBytes, ((2 << lgK) * 15) / 16 + (Family.QUICKSELECT.getMaxPreLongs() << 3));
}

final int maxSkBytes = getMaxUpdateSketchBytes(k);
assertEquals(24+2*k*8, maxSkBytes);
}
Expand Down
30 changes: 20 additions & 10 deletions tools/SketchesCheckstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,17 @@ under the License.
<property name="charset" value="UTF-8"/>
<property name="severity" value="warning"/>
<property name="fileExtensions" value="java"/>
<property name="basedir" value="${basedir}"/>

<!-- Exclude all module-info.java files
https://checkstyle.org/filefilters/beforeexecutionexclusionfilefilter.html#BeforeExecutionExclusionFileFilter -->
<module name="BeforeExecutionExclusionFileFilter">
<property name="fileNamePattern" value="src[\\/]test[\\/]java[\\/].+$|module\-info\.java.+$"/>
<property name="fileNamePattern" value="module\-info\.java$"/>
</module>

<!-- Be able to ignore violations with @SuppressWarnings -->
<!-- See https://checkstyle.org/config_filters.html#SuppressWarningsFilter -->
<module name="SuppressWarningsFilter"/>

<module name="SuppressionFilter">
<property name="file" value="${config_loc}/suppressions.xml"/>
<property name="optional" value="false"/>

<!-- Exclude all src/test/... files -->
<module name="BeforeExecutionExclusionFileFilter">
<property name="fileNamePattern" value=".*[\\/]src[\\/]test[\\/].*$"/>
</module>

<module name="FileTabCharacter">
Expand Down Expand Up @@ -77,7 +76,18 @@ under the License.
<!-- ******************************************************** -->

<module name="TreeWalker">


<!-- Be able to ignore violations with @SuppressWarnings -->
<!-- See https://checkstyle.org/filters/suppresswarningsfilter.html -->
<module name="SuppressWarningsHolder"/>

<!--
<module name="SuppressionFilter">
<property name="file" value="basedir/tools/suppressions.xml"/>
<property name="optional" value="false"/>
</module>
-->

<!-- Annotations -->
<module name="AnnotationLocation">
<property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
Expand Down

0 comments on commit f8772e4

Please sign in to comment.