Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new method #559

Merged
merged 5 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ private DirectQuickSelectSketch(
//clear hash table area
dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);

hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
memReqSvr_ = memReqSvr;
}

Expand Down Expand Up @@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l

final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand All @@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin

final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand Down Expand Up @@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) {
if (actLgRF > 0) { //Expand in current Memory
//lgArrLongs will change; thetaLong, curCount will not
resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs);
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Expand in current memory, exit.

Expand All @@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) {
memReqSvr_.requestClose(wmem_, newDstMem);

wmem_ = newDstMem;
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Request more memory to resize
} //end of resize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see

final DirectQuickSelectSketchR dqssr =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqssr;
}

Expand All @@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long

final DirectQuickSelectSketchR dqss =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}

Expand Down Expand Up @@ -276,7 +276,7 @@ UpdateReturnState hashUpdate(final long hash) {
* @return the hash table threshold
*/
@SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments")
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
//SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
//but this allows us to tune these constants for different sketches.
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float
}

lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_);
curCount_ = 0;
thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
Expand Down Expand Up @@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see
final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF,
preambleLongs, family);
hqss.lgArrLongs_ = lgArrLongs;
hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = extractCurCount(srcMem);
hqss.thetaLong_ = extractThetaLong(srcMem);
hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem);
Expand Down Expand Up @@ -197,7 +197,7 @@ public void reset() {
cache_ = new long[1 << lgArrLongsSM];
lgArrLongs_ = lgArrLongsSM;
}
hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
empty_ = true;
curCount_ = 0;
thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
Expand Down Expand Up @@ -293,7 +293,7 @@ private final void resizeCache() {
curCount_ = newCount;

cache_ = tgtArr;
hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
}

//array stays the same size. Changes theta and thus count
Expand All @@ -318,7 +318,7 @@ private final void quickSelectAndRebuild() {
* @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
* @return the hash table threshold
*/
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
return (int) Math.floor(fraction * (1 << lgArrLongs));
}
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/apache/datasketches/theta/Sketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) {
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
* @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
@Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
if (numberOfEntries == 0) { return 8; }
if (numberOfEntries == 1) { return 16; }
return (numberOfEntries << 3) + 24;
}

/**
* Returns the maximum number of storage bytes required for a CompactSketch given the configured
* log_base2 of the number of nominal entries, which is a power of 2.
* @param lgNomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
* @return the maximum number of storage bytes required for a CompactSketch with the given
* nomEntries.
*/
public static int getCompactSketchMaxBytes(final int lgNomEntries) {
return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD)
+ Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES;
}

/**
* Returns the maximum number of storage bytes required for an UpdateSketch with the given
* number of nominal entries (power of 2).
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/org/apache/datasketches/theta/Sketches.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) {
}

/**
* Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
* @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)},
* {@code numberOfEntries}
* @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
*/
* Returns the maximum number of storage bytes required for a CompactSketch with the given
* number of actual entries. Note that this assumes the worse case of the sketch in
* estimation mode, which requires storing theta and count.
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
* @see Sketch#getMaxCompactSketchBytes(int)
* @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
@Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
return Sketch.getMaxCompactSketchBytes(numberOfEntries);
}

/**
* Returns the maximum number of storage bytes required for a CompactSketch given the configured
* number of nominal entries (power of 2).
* @param nomEntries <a href="{@docRoot}/resources/dictionary.html#nomEntries">Nominal Entries</a>
* @return the maximum number of storage bytes required for a CompactSketch with the given
* nomEntries.
* @see Sketch#getCompactSketchMaxBytes(int)
*/
public static int getCompactSketchMaxBytes(final int nomEntries) {
return Sketch.getCompactSketchMaxBytes(nomEntries);
}

/**
* Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
* @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.datasketches.theta;

import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1;
import static org.apache.datasketches.theta.Sketches.getCompactSketchMaxBytes;
import static org.apache.datasketches.theta.Sketches.getMaxCompactSketchBytes;
import static org.apache.datasketches.theta.Sketches.getMaxIntersectionBytes;
import static org.apache.datasketches.theta.Sketches.getMaxUnionBytes;
Expand All @@ -35,6 +36,7 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
Expand Down Expand Up @@ -130,7 +132,8 @@ public void checkSetOpMethods() {

@Test
public void checkUtilMethods() {
final int k = 1024;
final int lgK = 10;
final int k = 1 << lgK;

final int maxUnionBytes = getMaxUnionBytes(k);
assertEquals(2*k*8+32, maxUnionBytes);
Expand All @@ -141,6 +144,10 @@ public void checkUtilMethods() {
final int maxCompSkBytes = getMaxCompactSketchBytes(k+1);
assertEquals(24+(k+1)*8, maxCompSkBytes);

final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); {
assertEquals(compSkMaxBytes, ((2 << lgK) * 15) / 16 + (Family.QUICKSELECT.getMaxPreLongs() << 3));
}

final int maxSkBytes = getMaxUpdateSketchBytes(k);
assertEquals(24+2*k*8, maxSkBytes);
}
Expand Down
30 changes: 20 additions & 10 deletions tools/SketchesCheckstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,17 @@ under the License.
<property name="charset" value="UTF-8"/>
<property name="severity" value="warning"/>
<property name="fileExtensions" value="java"/>
<property name="basedir" value="${basedir}"/>

<!-- Exclude all module-info.java files
https://checkstyle.org/filefilters/beforeexecutionexclusionfilefilter.html#BeforeExecutionExclusionFileFilter -->
<module name="BeforeExecutionExclusionFileFilter">
<property name="fileNamePattern" value="src[\\/]test[\\/]java[\\/].+$|module\-info\.java.+$"/>
<property name="fileNamePattern" value="module\-info\.java$"/>
</module>

<!-- Be able to ignore violations with @SuppressWarnings -->
<!-- See https://checkstyle.org/config_filters.html#SuppressWarningsFilter -->
<module name="SuppressWarningsFilter"/>

<module name="SuppressionFilter">
<property name="file" value="${config_loc}/suppressions.xml"/>
<property name="optional" value="false"/>

<!-- Exclude all src/test/... files -->
<module name="BeforeExecutionExclusionFileFilter">
<property name="fileNamePattern" value=".*[\\/]src[\\/]test[\\/].*$"/>
</module>

<module name="FileTabCharacter">
Expand Down Expand Up @@ -77,7 +76,18 @@ under the License.
<!-- ******************************************************** -->

<module name="TreeWalker">


<!-- Be able to ignore violations with @SuppressWarnings -->
<!-- See https://checkstyle.org/filters/suppresswarningsfilter.html -->
<module name="SuppressWarningsHolder"/>

<!--
<module name="SuppressionFilter">
<property name="file" value="basedir/tools/suppressions.xml"/>
<property name="optional" value="false"/>
</module>
-->

<!-- Annotations -->
<module name="AnnotationLocation">
<property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
Expand Down
Loading