Skip to content

Commit

Permalink
Merge pull request #450 from apache/prep_for_items_sketch
Browse files Browse the repository at this point in the history
Preparatory work for generic items.
  • Loading branch information
leerho authored Jun 7, 2023
2 parents ee84291 + e4271bf commit 3b446ee
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 215 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
Expand Down Expand Up @@ -59,13 +57,12 @@
class KllDirectDoublesSketch extends KllDoublesSketch {

/**
* The constructor with Memory that can be off-heap.
* The constructor with WritableMemory that can be off-heap.
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @param memVal the MemoryValadate object
*/
KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
final KllMemoryValidate memVal) {
KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
super(wmem, memReqSvr);
levelsArr = memVal.levelsArr;
}
Expand All @@ -83,7 +80,6 @@ static KllDirectDoublesSketch newDirectInstance(final int k, final int m, final
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
setMemoryFamilyID(dstMem, Family.KLL.getID());
setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
setMemoryK(dstMem, k);
setMemoryM(dstMem, m);
setMemoryN(dstMem, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
Expand Down Expand Up @@ -59,13 +57,12 @@
class KllDirectFloatsSketch extends KllFloatsSketch {

/**
* The constructor with Memory that can be off-heap.
* The constructor with WritableMemory that can be off-heap.
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @param memVal the MemoryValadate object
*/
KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
final KllMemoryValidate memVal) {
KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
super(wmem, memReqSvr);
levelsArr = memVal.levelsArr;
}
Expand All @@ -83,7 +80,6 @@ static KllDirectFloatsSketch newDirectInstance(final int k, final int m, final W
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
setMemoryFamilyID(dstMem, Family.KLL.getID());
setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
setMemoryK(dstMem, k);
setMemoryM(dstMem, m);
setMemoryN(dstMem, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,13 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
final int myMinK = mySketch.getMinK();

//update this sketch with level0 items from the other sketch

if (otherDblSk.isCompactSingleItem()) {
updateDouble(mySketch, otherDblSk.getDoubleSingleItem());
otherDoubleItemsArr = new double[0];
} else {
otherDoubleItemsArr = otherDblSk.getDoubleItemsArray();
for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) {
KllDoublesHelper.updateDouble(mySketch, otherDoubleItemsArr[i]);
updateDouble(mySketch, otherDoubleItemsArr[i]);
}
}
// after the level 0 update, we capture the state of levels and items arrays
Expand All @@ -68,7 +67,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
int[] myNewLevelsArr = myCurLevelsArr;
double[] myNewDoubleItemsArr = myCurDoubleItemsArr;

if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge other levels if they exist
if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge higher levels if they exist
final int tmpSpaceNeeded = mySketch.getNumRetained()
+ KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr);
final double[] workbuf = new double[tmpSpaceNeeded];
Expand Down Expand Up @@ -115,7 +114,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final KllSketch oth
}

//MEMORY SPACE MANAGEMENT
if (mySketch.updatableMemFormat) {
if (mySketch.serialVersionUpdatable) {
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewDoubleItemsArr.length);
}
}
Expand Down Expand Up @@ -358,7 +357,7 @@ private static void populateDoubleWorkArrays(
worklevels[0] = 0;

// Note: the level zero data from "other" was already inserted into "self"
final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels,myCurLevelsArr);
final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels, myCurLevelsArr);
System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero);
worklevels[1] = worklevels[0] + selfPopZero;

Expand Down
44 changes: 19 additions & 25 deletions src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
Expand Down Expand Up @@ -61,60 +61,56 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou
*/
public static KllDoublesSketch heapify(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
return KllHeapDoublesSketch.heapifyImpl(srcMem);
}

/**
* Create a new direct instance of this sketch with a given <em>k</em>.
* @param k parameter that controls size of the sketch and accuracy of estimates.
* Create a new direct instance of this sketch with the default <em>k</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllDoublesSketch newDirectInstance(
final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
}

/**
* Create a new direct instance of this sketch with the default <em>k</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* Create a new direct instance of this sketch with a given <em>k</em>.
* @param k parameter that controls size of the sketch and accuracy of estimates.
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllDoublesSketch newDirectInstance(
final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return KllDirectDoublesSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
}

/**
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* This will have a rank error of about 1.65%.
* @return new KllDoublesSketch on the heap.
* @return new KllDoublesSketch on the Java heap.
*/
public static KllDoublesSketch newHeapInstance() {
return new KllHeapDoublesSketch(DEFAULT_K, DEFAULT_M);
public static KllDoublesSketch newHeapInstance() {
return newHeapInstance(DEFAULT_K);
}

/**
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
* <em>k</em> can be between DEFAULT_M and 65535, inclusive.
* <em>k</em> can be between 8, inclusive, and 65535, inclusive.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* @param k parameter that controls size of the sketch and accuracy of estimates.
* @return new KllDoublesSketch on the heap.
* @return new KllDoublesSketch on the Java heap.
*/
public static KllDoublesSketch newHeapInstance(final int k) {
return new KllHeapDoublesSketch(k, DEFAULT_M);
Expand All @@ -129,7 +125,7 @@ public static KllDoublesSketch newHeapInstance(final int k) {
public static KllDoublesSketch wrap(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
if (memVal.updatableMemFormat) {
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
return new KllDirectDoublesSketch((WritableMemory) srcMem, null, memVal);
} else {
return new KllDirectCompactDoublesSketch(srcMem, memVal);
Expand All @@ -148,10 +144,8 @@ public static KllDoublesSketch writableWrap(
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
if (memVal.updatableMemFormat) {
if (!memVal.readOnly) {
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
}
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal);
} else {
return new KllDirectCompactDoublesSketch(srcMem, memVal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static void mergeFloatImpl(final KllFloatsSketch mySketch, final KllSketch other
}

//MEMORY SPACE MANAGEMENT
if (mySketch.updatableMemFormat) {
if (mySketch.serialVersionUpdatable) {
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewFloatItemsArr.length);
}
}
Expand Down
40 changes: 17 additions & 23 deletions src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

import static java.lang.Math.max;
import static java.lang.Math.min;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
Expand Down Expand Up @@ -61,60 +61,56 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa
*/
public static KllFloatsSketch heapify(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
return KllHeapFloatsSketch.heapifyImpl(srcMem);
}

/**
* Create a new direct instance of this sketch with a given <em>k</em>.
* @param k parameter that controls size of the sketch and accuracy of estimates.
* Create a new direct instance of this sketch with the default <em>k</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllFloatsSketch newDirectInstance(
final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
}

/**
* Create a new direct instance of this sketch with the default <em>k</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* Create a new direct instance of this sketch with a given <em>k</em>.
* @param k parameter that controls size of the sketch and accuracy of estimates.
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllFloatsSketch newDirectInstance(
final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return KllDirectFloatsSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
}

/**
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* This will have a rank error of about 1.65%.
* @return new KllFloatsSketch on the heap.
* @return new KllFloatsSketch on the Java heap.
*/
public static KllFloatsSketch newHeapInstance() {
return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M);
return newHeapInstance(DEFAULT_K);
}

/**
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
* <em>k</em> can be between DEFAULT_M and 65535, inclusive.
* <em>k</em> can be between 8, inclusive, and 65535, inclusive.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* @param k parameter that controls size of the sketch and accuracy of estimates.
* @return new KllFloatsSketch on the heap.
* @return new KllFloatsSketch on the Java heap.
*/
public static KllFloatsSketch newHeapInstance(final int k) {
return new KllHeapFloatsSketch(k, DEFAULT_M);
Expand All @@ -129,7 +125,7 @@ public static KllFloatsSketch newHeapInstance(final int k) {
public static KllFloatsSketch wrap(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
if (memVal.updatableMemFormat) {
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
return new KllDirectFloatsSketch((WritableMemory) srcMem, null, memVal);
} else {
return new KllDirectCompactFloatsSketch(srcMem, memVal);
Expand All @@ -148,10 +144,8 @@ public static KllFloatsSketch writableWrap(
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
if (memVal.updatableMemFormat) {
if (!memVal.readOnly) {
if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
}
return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal);
} else {
return new KllDirectCompactFloatsSketch(srcMem, memVal);
Expand Down
Loading

0 comments on commit 3b446ee

Please sign in to comment.