Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor TSDB doc_values utils for use in the new codec #115042

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public abstract class AbstractDocValuesForUtilBenchmark {
protected final int blockSize;

public AbstractDocValuesForUtilBenchmark() {
this.forUtil = new DocValuesForUtil();
this.forUtil = new DocValuesForUtil(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
this.blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataOutput;
import org.elasticsearch.index.codec.tsdb.DocValuesForUtil;
import org.openjdk.jmh.infra.Blackhole;

import java.io.IOException;
Expand Down Expand Up @@ -44,7 +43,7 @@ public void setupInvocation(int bitsPerValue) {

@Override
public void benchmark(int bitsPerValue, Blackhole bh) throws IOException {
DocValuesForUtil.decode(bitsPerValue, this.dataInput, this.output);
forUtil.decode(bitsPerValue, this.dataInput, this.output);
bh.consume(this.output);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ public class DocValuesForUtil {
private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE;
private static final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
private final int blockSize;
private final byte[] encoded = new byte[1024];

public DocValuesForUtil() {}
public DocValuesForUtil(int numericBlockSize) {
this.blockSize = numericBlockSize;
}

public static int roundBits(int bitsPerValue) {
if (bitsPerValue > 24 && bitsPerValue <= 32) {
Expand Down Expand Up @@ -67,7 +69,7 @@ private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, fina
out.writeBytes(this.encoded, bytesPerValue * in.length);
}

public static void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
if (bitsPerValue <= 24) {
ForUtil.decode(bitsPerValue, in, out);
} else if (bitsPerValue <= 32) {
Expand All @@ -81,7 +83,7 @@ public static void decode(int bitsPerValue, final DataInput in, long[] out) thro
}
}

private static void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
// NOTE: we expect multibyte values to be written "least significant byte" first
int bytesPerValue = bitsPerValue / Byte.SIZE;
long mask = (1L << bitsPerValue) - 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ private long[] writeField(FieldInfo field, DocValuesProducer valuesProducer, lon
if (maxOrd != 1) {
final long[] buffer = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
int bufferSize = 0;
final ES87TSDBDocValuesEncoder encoder = new ES87TSDBDocValuesEncoder();
final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
values = valuesProducer.getSortedNumeric(field);
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ public long longValue() {

private final int maxDoc = ES87TSDBDocValuesProducer.this.maxDoc;
private int doc = -1;
private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
private long currentBlockIndex = -1;
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];

Expand Down Expand Up @@ -1030,7 +1030,7 @@ public long longValue() throws IOException {
);
return new NumericDocValues() {

private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
private long currentBlockIndex = -1;
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];

Expand Down Expand Up @@ -1092,7 +1092,7 @@ private NumericValues getValues(NumericEntry entry, final long maxOrd) throws IO
final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1;
return new NumericValues() {

private final ES87TSDBDocValuesEncoder decoder = new ES87TSDBDocValuesEncoder();
private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
private long currentBlockIndex = -1;
private final long[] currentBlock = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
* </li>
* </ul>
*
* Notice that encoding and decoding are written in a nested way, for instance {@link ES87TSDBDocValuesEncoder#deltaEncode} calling
* {@link ES87TSDBDocValuesEncoder#removeOffset} and so on. This allows us to easily introduce new encoding schemes or remove existing
* Notice that encoding and decoding are written in a nested way, for instance {@link TSDBDocValuesEncoder#deltaEncode} calling
* {@link TSDBDocValuesEncoder#removeOffset} and so on. This allows us to easily introduce new encoding schemes or remove existing
* (non-effective) encoding schemes in a backward-compatible way.
*
* A token is used as a bitmask to represent which encoding is applied and allows us to detect the applied encoding scheme at decoding time.
Expand All @@ -54,11 +54,13 @@
*
* Of course, decoding follows the opposite order with respect to encoding.
*/
public class ES87TSDBDocValuesEncoder {
public class TSDBDocValuesEncoder {
private final DocValuesForUtil forUtil;
private final int numericBlockSize;

public ES87TSDBDocValuesEncoder() {
this.forUtil = new DocValuesForUtil();
public TSDBDocValuesEncoder(int numericBlockSize) {
this.forUtil = new DocValuesForUtil(numericBlockSize);
this.numericBlockSize = numericBlockSize;
}

/**
Expand All @@ -68,7 +70,7 @@ public ES87TSDBDocValuesEncoder() {
private void deltaEncode(int token, int tokenBits, long[] in, DataOutput out) throws IOException {
int gts = 0;
int lts = 0;
for (int i = 1; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
for (int i = 1; i < numericBlockSize; ++i) {
if (in[i] > in[i - 1]) {
gts++;
} else if (in[i] < in[i - 1]) {
Expand All @@ -79,7 +81,7 @@ private void deltaEncode(int token, int tokenBits, long[] in, DataOutput out) th
final boolean doDeltaCompression = (gts == 0 && lts >= 2) || (lts == 0 && gts >= 2);
long first = 0;
if (doDeltaCompression) {
for (int i = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE - 1; i > 0; --i) {
for (int i = numericBlockSize - 1; i > 0; --i) {
in[i] -= in[i - 1];
}
// Avoid setting in[0] to 0 in case there is a minimum interval between
Expand Down Expand Up @@ -115,7 +117,7 @@ private void removeOffset(int token, int tokenBits, long[] in, DataOutput out) t
}

if (min != 0) {
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
for (int i = 0; i < numericBlockSize; ++i) {
in[i] -= min;
}
token = (token << 1) | 0x01;
Expand Down Expand Up @@ -143,7 +145,7 @@ private void gcdEncode(int token, int tokenBits, long[] in, DataOutput out) thro
}
final boolean doGcdCompression = Long.compareUnsigned(gcd, 1) > 0;
if (doGcdCompression) {
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
for (int i = 0; i < numericBlockSize; ++i) {
in[i] /= gcd;
}
token = (token << 1) | 0x01;
Expand Down Expand Up @@ -174,7 +176,7 @@ private void forEncode(int token, int tokenBits, long[] in, DataOutput out) thro
* Encode the given longs using a combination of delta-coding, GCD factorization and bit packing.
*/
void encode(long[] in, DataOutput out) throws IOException {
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
assert in.length == numericBlockSize;

deltaEncode(0, 0, in, out);
}
Expand All @@ -192,7 +194,7 @@ void encode(long[] in, DataOutput out) throws IOException {
* </ul>
*/
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
assert in.length == numericBlockSize;
int numRuns = 1;
long firstValue = in[0];
long previousValue = firstValue;
Expand Down Expand Up @@ -259,7 +261,7 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
}

void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException {
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
assert out.length == numericBlockSize : out.length;

long v1 = in.readVLong();
int encoding = Long.numberOfTrailingZeros(~v1);
Expand All @@ -275,7 +277,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
Arrays.fill(out, runLen, out.length, v2);
} else if (encoding == 2) {
// bit-packed
DocValuesForUtil.decode(bitsPerOrd, in, out);
forUtil.decode(bitsPerOrd, in, out);
} else if (encoding == 3) {
// cycle encoding
int cycleLength = (int) v1;
Expand All @@ -293,13 +295,13 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException

/** Decode longs that have been encoded with {@link #encode}. */
void decode(DataInput in, long[] out) throws IOException {
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
assert out.length == numericBlockSize : out.length;

final int token = in.readVInt();
final int bitsPerValue = token >>> 3;

if (bitsPerValue != 0) {
DocValuesForUtil.decode(bitsPerValue, in, out);
forUtil.decode(bitsPerValue, in, out);
} else {
Arrays.fill(out, 0L);
}
Expand Down Expand Up @@ -330,21 +332,21 @@ void decode(DataInput in, long[] out) throws IOException {
}

// this loop should auto-vectorize
private static void mul(long[] arr, long m) {
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
private void mul(long[] arr, long m) {
for (int i = 0; i < numericBlockSize; ++i) {
arr[i] *= m;
}
}

// this loop should auto-vectorize
private static void add(long[] arr, long min) {
for (int i = 0; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
private void add(long[] arr, long min) {
for (int i = 0; i < numericBlockSize; ++i) {
arr[i] += min;
}
}

private static void deltaDecode(long[] arr) {
for (int i = 1; i < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++i) {
private void deltaDecode(long[] arr) {
for (int i = 1; i < numericBlockSize; ++i) {
arr[i] += arr[i - 1];
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,18 @@
import java.util.Random;

public class DocValuesForUtilTests extends LuceneTestCase {
int NUMERIC_BLOCK_SIZE = 1 << 7;

public void testEncodeDecode() throws IOException {
final int iterations = RandomNumbers.randomIntBetween(random(), 50, 1000);
final long[] values = new long[iterations * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
final long[] values = new long[iterations * NUMERIC_BLOCK_SIZE];
final int[] bpvs = new int[iterations];

for (int i = 0; i < iterations; ++i) {
final int bpv = TestUtil.nextInt(random(), 1, 64);
bpvs[i] = DocValuesForUtil.roundBits(bpv);
for (int j = 0; j < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++j) {
values[i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE + j] = bpv == 64
for (int j = 0; j < NUMERIC_BLOCK_SIZE; ++j) {
values[i * NUMERIC_BLOCK_SIZE + j] = bpv == 64
? random().nextLong()
: TestUtil.nextLong(random(), 0, PackedInts.maxValue(bpv));
}
Expand All @@ -53,12 +54,12 @@ public void testEncodeDecode() throws IOException {
{
// encode
IndexOutput out = d.createOutput("test.bin", IOContext.DEFAULT);
final DocValuesForUtil forUtil = new DocValuesForUtil();
final DocValuesForUtil forUtil = new DocValuesForUtil(NUMERIC_BLOCK_SIZE);

for (int i = 0; i < iterations; ++i) {
long[] source = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
for (int j = 0; j < ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; ++j) {
source[j] = values[i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE + j];
long[] source = new long[NUMERIC_BLOCK_SIZE];
for (int j = 0; j < NUMERIC_BLOCK_SIZE; ++j) {
source[j] = values[i * NUMERIC_BLOCK_SIZE + j];
}
out.writeByte((byte) bpvs[i]);
forUtil.encode(source, bpvs[i], out);
Expand All @@ -70,17 +71,14 @@ public void testEncodeDecode() throws IOException {
{
// decode
IndexInput in = d.openInput("test.bin", IOContext.READONCE);
final long[] restored = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE];
final DocValuesForUtil forUtil = new DocValuesForUtil(NUMERIC_BLOCK_SIZE);
final long[] restored = new long[NUMERIC_BLOCK_SIZE];
for (int i = 0; i < iterations; ++i) {
final int bitsPerValue = in.readByte();
DocValuesForUtil.decode(bitsPerValue, in, restored);
forUtil.decode(bitsPerValue, in, restored);
assertArrayEquals(
Arrays.toString(restored),
ArrayUtil.copyOfSubArray(
values,
i * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE,
(i + 1) * ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE
),
ArrayUtil.copyOfSubArray(values, i * NUMERIC_BLOCK_SIZE, (i + 1) * NUMERIC_BLOCK_SIZE),
restored
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@

public class ES87TSDBDocValuesEncoderTests extends LuceneTestCase {

private final ES87TSDBDocValuesEncoder encoder;
private final TSDBDocValuesEncoder encoder;
private final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;

public ES87TSDBDocValuesEncoderTests() {
this.encoder = new ES87TSDBDocValuesEncoder();
this.encoder = new TSDBDocValuesEncoder(blockSize);
}

public void testRandomValues() throws IOException {
Expand Down