Skip to content

Commit

Permalink
Improved javadocs for main code.
Browse files Browse the repository at this point in the history
  • Loading branch information
leerho committed Apr 23, 2024
1 parent de0d96e commit 7c39955
Show file tree
Hide file tree
Showing 26 changed files with 270 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ public class CompressionCharacterization {
private CompressedState[] compressedStates2;
private CpcSketch[] unCompressedSketches;

/**
* Only used in test.
* @param lgMinK min lgK
* @param lgMaxK max lgK
* @param lgMinT min lgTrials
* @param lgMaxT max lgTrials
* @param lgMulK lg multiple
* @param uPPO unique axis Points Per Octave
* @param incLgK increment lgK
* @param pS PrintStream
* @param pW PrintWriter
*/
@SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here")
public CompressionCharacterization(
final int lgMinK,
Expand All @@ -91,6 +103,9 @@ public CompressionCharacterization(
assembleFormats();
}

/**
* Only used in test
*/
public void start() {
printf(hfmt, (Object[]) hStrArr); //print header
doRangeOfLgK();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@
* false positive probability.</p>
*
* <p>This implementation uses xxHash64 and follows the approach in Kirsch and Mitzenmacher,
* "Less Hashing, Same Performance: Building a Better Bloom Filter," Wiley Interscience, 2008,
* pp. 187-218.</p>
* "Less Hashing, Same Performance: Building a Better Bloom Filter," Wiley Interscience, 2008, pp. 187-218.</p>
*/
public final class BloomFilter {
/**
* The maximum size of a bloom filter in bits.
*/
public static final long MAX_SIZE_BITS = (Integer.MAX_VALUE - Family.BLOOMFILTER.getMaxPreLongs()) * (long) Long.SIZE;
private static final int SER_VER = 1;
private static final int EMPTY_FLAG_MASK = 4;
Expand Down Expand Up @@ -133,11 +135,23 @@ public static BloomFilter heapify(final Memory mem) {
return internalHeapifyOrWrap((WritableMemory) mem, false, false);
}

/**
* Wraps the given Memory into this filter class. The class itself only contains a few metadata items and holds
* a reference to the Memory object, which contains all the data.
* @param mem the given Memory object
* @return the wrapping BloomFilter class.
*/
public static BloomFilter wrap(final Memory mem) {
// casting to writable, but tracking that the object is read-only
return internalHeapifyOrWrap((WritableMemory) mem, true, false);
}

/**
* Wraps the given WritableMemory into this filter class. The class itself only contains a few metadata items and holds
* a reference to the Memory object, which contains all the data.
* @param wmem the given WritableMemory object
* @return the wrapping BloomFilter class.
*/
public static BloomFilter writableWrap(final WritableMemory wmem) {
return internalHeapifyOrWrap(wmem, true, true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
* This class can maintain the BitArray object off-heap.
*/
public class DirectBitArrayR extends BitArray {
final static protected long NUM_BITS_OFFSET = Long.BYTES;
final static protected long DATA_OFFSET = 2L * Long.BYTES;
Expand Down
19 changes: 18 additions & 1 deletion src/main/java/org/apache/datasketches/hll/TgtHllType.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,27 @@
* </ul>
* @author Lee Rhodes
*/
public enum TgtHllType { HLL_4, HLL_6, HLL_8;
public enum TgtHllType {
/**
* An HLL sketch with a bin size of 4 bits
*/
HLL_4,
/**
* An HLL sketch with a bin size of 6 bits
*/
HLL_6,
/**
* An Hll Sketch with a bin size of 8 bits
*/
HLL_8;

private static final TgtHllType values[] = values();

/**
* Convert the typeId to the enum type
* @param typeId the given typeId
* @return the enum type
*/
public static final TgtHllType fromOrdinal(final int typeId) {
return values[typeId];
}
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ public void reset() {
itemsSV = null;
}

/**
* Export the current sketch as a compact byte array.
* @return the current sketch as a compact byte array.
*/
public byte[] toByteArray() {
return KllHelper.toByteArray(this, false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

/**
* Iterator over KllItemsSketch. The order is not defined.
* @param <T> the item class type
*/
public final class KllItemsSketchIterator<T> extends KllSketchIterator implements QuantilesGenericSketchIterator<T> {
private final Object[] quantiles;
Expand Down
45 changes: 42 additions & 3 deletions src/main/java/org/apache/datasketches/kll/KllSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ public boolean hasMemory() {
return (wmem != null);
}

/**
* Returns true if this sketch is in a Compact Memory Format.
* @return true if this sketch is in a Compact Memory Format.
*/
public boolean isCompactMemoryFormat() {
return hasMemory() && sketchStructure != UPDATABLE;
}
Expand Down Expand Up @@ -488,9 +492,18 @@ final void setLevelsArrayAt(final int index, final int idxVal) {
* Used to define the variable type of the current instance of this class.
*/
public enum SketchType {
DOUBLES_SKETCH(Double.BYTES, "DoublesSketch"),
FLOATS_SKETCH(Float.BYTES, "FloatsSketch"),
ITEMS_SKETCH(0, "ItemsSketch");
/**
* KllDoublesSketch
*/
DOUBLES_SKETCH(Double.BYTES, "KllDoublesSketch"),
/**
* KllFloatsSketch
*/
FLOATS_SKETCH(Float.BYTES, "KllFloatsSketch"),
/**
* KllItemsSketch
*/
ITEMS_SKETCH(0, "KllItemsSketch");

private int typeBytes;
private String name;
Expand All @@ -500,18 +513,30 @@ private SketchType(final int typeBytes, final String name) {
this.name = name;
}

/**
* Gets the item size in bytes. If the item is generic, this returns zero.
* @return the item size in bytes
*/
public int getBytes() { return typeBytes; }

/**
* Get the name of the associated sketch
* @return the name of the associated sketch
*/
public String getName() { return name; }
}

/**
* Used primarily to define the structure of the serialized sketch. Also used by the Heap Sketch.
*/
public enum SketchStructure {
/** Compact Empty Structure */
COMPACT_EMPTY(PREAMBLE_INTS_EMPTY_SINGLE, SERIAL_VERSION_EMPTY_FULL),
/** Compact Single Item Structure */
COMPACT_SINGLE(PREAMBLE_INTS_EMPTY_SINGLE, SERIAL_VERSION_SINGLE),
/** Compact Full Preamble Structure */
COMPACT_FULL(PREAMBLE_INTS_FULL, SERIAL_VERSION_EMPTY_FULL),
/** Updatable Preamble Structure */
UPDATABLE(PREAMBLE_INTS_FULL, SERIAL_VERSION_UPDATABLE); //also used by the heap sketch.

private int preInts;
Expand All @@ -522,10 +547,24 @@ private SketchStructure(final int preInts, final int serVer) {
this.serVer = serVer;
}

/**
* gets the Preamble Integers for this Structure.
* @return the Preamble Integers for this Structure
*/
public int getPreInts() { return preInts; }

/**
* gets the Serialization Version for this Structure.
* @return the Serialization Version for this Structure.
*/
public int getSerVer() { return serVer; }

/**
* gets the SketchStructure given preInts and serVer.
* @param preInts the given preamble size in integers
* @param serVer the given Serialization Version
* @return the SketchStructure given preInts and serVer.
*/
public static SketchStructure getSketchStructure(final int preInts, final int serVer) {
final SketchStructure[] ssArr = SketchStructure.values();
for (int i = 0; i < ssArr.length; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

package org.apache.datasketches.partitions;

/**
* This instructs the user about which of the upper and lower bounds of a partition definition row
* should be included with the returned data.
*/
public enum BoundsRule {

/**
Expand All @@ -30,10 +34,12 @@ public enum BoundsRule {
* Include only the upper bound but not the lower bound
*/
INCLUDE_UPPER,

/**
* Include only the lower bound but not the upper bound
*/
INCLUDE_LOWER,

/**
* Include none
*/
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/org/apache/datasketches/partitions/Partitioner.java
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,22 @@ private void partitionSearch(final ArrayDeque<StackElement<T>> stack) {

/**
* Holds data for a Stack element
* @param <T> the item class type
*/
public static class StackElement<T> {
/** A reference to the relevant GenericPartitionBoundaries class */
public final GenericPartitionBoundaries<T> gpb;
/** The partition index */
public int part;
/** A brief string description of the partition and its hierarchy */
public String levelPartId;

/**
* Constructs this StackElement
* @param gpb the given GenericPartitionBoundarie reference
* @param part The partition index
* @param levelPartId A brief string description of the partition and its hierarchy
*/
public StackElement(final GenericPartitionBoundaries<T> gpb, final int part, final String levelPartId) {
this.gpb = gpb;
this.part = part;
Expand All @@ -177,15 +187,26 @@ public StackElement(final GenericPartitionBoundaries<T> gpb, final int part, fin

/**
* Defines a row for List of PartitionBounds.
* @param <T> the item class type
*/
public static class PartitionBoundsRow<T> {
/** The partition index */
public int part;
/** A brief string description of the partition and its hierarchy */
public String levelPartId;
/** The approximate number of items represented by this partition description row. */
public long approxNumDeltaItems;
/** The BoundsRule for this partition description row. */
public BoundsRule rule;
/** The lower bound value */
public T lowerBound;
/** The upper bound value */
public T upperBound;

/**
* The constructor for the StackElement class.
* @param se the given stack element.
*/
public PartitionBoundsRow(final StackElement<T> se) {
final GenericPartitionBoundaries<T> gpb = se.gpb;
final QuantileSearchCriteria searchCrit = gpb.getSearchCriteria();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
/**
* This is a callback request to the data source to fill a quantiles sketch,
* which is returned to the caller.
*
* @param <T> the item class type
* @param <S> the sketch type
* @author Lee Rhodes
*/
public interface SketchFillRequest<T, S extends QuantilesGenericAPI<T> & PartitioningFeature<T>> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
public final class DoublesSortedViewIterator extends SortedViewIterator {
private final double[] quantiles;

/**
* Constructor.
* @param quantiles the given array of quantiles, which must be ordered.
* @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
* the last value must be equal to N, the total number of items updated to the sketch.
*/
public DoublesSortedViewIterator(final double[] quantiles, final long[] cumWeights) {
super(cumWeights);
this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
public final class FloatsSortedViewIterator extends SortedViewIterator {
private final float[] quantiles;

/**
* Constructor.
* @param quantiles the given array of quantiles, which must be ordered.
* @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
* the last value must be equal to N, the total number of items updated to the sketch.
*/
public FloatsSortedViewIterator(final float[] quantiles, final long[] cumWeights) {
super(cumWeights);
this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
/**
* This defines the returned results of the getParitionBoundaries() function and
* includes the basic methods needed to construct actual partitions.
* @param <T> the item class type
*/
public final class GenericPartitionBoundaries<T> {
private long totalN; //totalN of source sketch
Expand All @@ -40,6 +41,16 @@ public final class GenericPartitionBoundaries<T> {
private long[] numDeltaItems; //num of items in each partition
private int numPartitions; //num of partitions

/**
* Constructor.
* @param totalN the total number of items input to the sketch.
* @param boundaries The quantile boundaries between partitions
* @param natRanks The array of natural Ranks corresponding to the array of boundaries.
* @param normRanks The normalized Ranks corresponding to the array of boundaries.
* @param maxItem the maximum item of the stream.
* @param minItem the minimum item of the stream.
* @param searchCrit the user defined search criteria
*/
public GenericPartitionBoundaries(
final long totalN,
final T[] boundaries,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@

/**
* Iterator over quantile sketches of generic type.
* @param <T> The generic quantile type
* @param <T> The generic item class type
*/
public class GenericSortedViewIterator<T> extends SortedViewIterator {
private final T[] quantiles;

/**
* Constructor
* @param quantiles the given array of quantiles
* @param cumWeights the array of cumulative weights, corresponding to the array of quantiles,
* starting with the value one and the end value must equal N, the total number of items input to the sketch.
*/
public GenericSortedViewIterator(final T[] quantiles, final long[] cumWeights) {
super(cumWeights);
this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
Expand Down
Loading

0 comments on commit 7c39955

Please sign in to comment.