Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-40937: [Java] Implement Holder-based functions for ViewVarCharVector & ViewVarBinaryVector #44187

Merged
merged 8 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,31 @@ public byte[] getObject(int index) {
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
final int dataLength = getValueLength(index);
if (isSet(index) == 0) {
holder.isSet = 0;
return;
}
holder.isSet = 1;
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
holder.buffer = dataBuffers.get(bufferIndex);
holder.start = dataOffset;
holder.end = dataOffset + dataLength;
} else {
final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
// data is in the value buffer
holder.buffer = viewBuffer;
holder.start = (int) dataOffset;
holder.end = (int) dataOffset + dataLength;
}
}

/*----------------------------------------------------------------*
Expand All @@ -150,8 +173,10 @@ public void get(int index, NullableViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
lastSet = index;
}

/**
Expand All @@ -162,8 +187,9 @@ public void set(int index, ViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand All @@ -174,8 +200,15 @@ public void setSafe(int index, ViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
if (holder.isSet == 0) {
setNull(index);
} else {
BitVectorHelper.setBit(validityBuffer, index);
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
}
lastSet = index;
}

/**
Expand All @@ -186,8 +219,9 @@ public void set(int index, NullableViewVarBinaryHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarBinaryHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40936
throw new UnsupportedOperationException("Unsupported operation");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/*----------------------------------------------------------------*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,31 @@ public void read(int index, ReusableBuffer<?> buffer) {
* @param holder data holder to be populated by this function
*/
public void get(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder get operation not supported");
final int dataLength = getValueLength(index);
if (isSet(index) == 0) {
holder.isSet = 0;
return;
}
holder.isSet = 1;
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
holder.buffer = dataBuffers.get(bufferIndex);
holder.start = dataOffset;
holder.end = dataOffset + dataLength;
} else {
final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH;
// data is in the value buffer
holder.buffer = viewBuffer;
holder.start = (int) dataOffset;
holder.end = (int) dataOffset + dataLength;
}
}

/*----------------------------------------------------------------*
Expand All @@ -162,8 +184,10 @@ public void get(int index, NullableViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, ViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException("ViewVarCharHolder set operation not supported");
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
lastSet = index;
}

/**
Expand All @@ -174,8 +198,9 @@ public void set(int index, ViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, ViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException("ViewVarCharHolder setSafe operation not supported");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand All @@ -186,9 +211,15 @@ public void setSafe(int index, ViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void set(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder set operation not supported");
if (holder.isSet == 0) {
setNull(index);
} else {
BitVectorHelper.setBit(validityBuffer, index);
int start = holder.start;
int length = holder.end - start;
setBytes(index, holder.buffer, start, length);
}
lastSet = index;
}

/**
Expand All @@ -199,9 +230,9 @@ public void set(int index, NullableViewVarCharHolder holder) {
* @param holder holder that carries data buffer.
*/
public void setSafe(int index, NullableViewVarCharHolder holder) {
// TODO: https://github.com/apache/arrow/issues/40937
throw new UnsupportedOperationException(
"NullableViewVarCharHolder setSafe operation not supported");
int length = holder.end - holder.start;
handleSafe(index, length);
set(index, holder);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.CommonUtil;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder;
import org.apache.arrow.vector.holders.NullableViewVarCharHolder;
import org.apache.arrow.vector.holders.ValueHolder;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.Types;
Expand All @@ -63,7 +67,7 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestVarCharViewVector {
public class TestVariableWidthViewVector {

// short string (length <= 12)
private static final byte[] STR0 = "0123456".getBytes(StandardCharsets.UTF_8);
Expand Down Expand Up @@ -371,6 +375,136 @@ public void testMixedAllocation() {
}
}

@Test
public void testSetNullableViewVarCharHolder() {
try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
viewVarCharVector.allocateNew(0, 0);
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);

NullableViewVarCharHolder stringHolder = new NullableViewVarCharHolder();

// set not null
int size = strings.size();
for (int i = 0; i < size; i++) {
setAndCheck(viewVarCharVector, i, strings.get(i), stringHolder);
}

// set null
setAndCheck(viewVarCharVector, 6, null, stringHolder);

// copy by holder
// len < 12
copyAndCheck(viewVarCharVector, stringHolder, 0, 7);
// len > 12
copyAndCheck(viewVarCharVector, stringHolder, 2, 8);
// null
copyAndCheck(viewVarCharVector, stringHolder, 6, 9);

// test overwrite
for (int i = 0; i < size; i++) {
setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), stringHolder);
}

String longString = generateRandomString(128);
setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder);
}
}

@Test
public void testSetNullableViewVarBinaryHolder() {
try (final ViewVarBinaryVector viewVarBinaryVector =
new ViewVarBinaryVector("myvector", allocator)) {
viewVarBinaryVector.allocateNew(0, 0);
final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5);

NullableViewVarBinaryHolder holder = new NullableViewVarBinaryHolder();

// set not null
int size = strings.size();
for (int i = 0; i < size; i++) {
setAndCheck(viewVarBinaryVector, i, strings.get(i), holder);
}

// set null
setAndCheck(viewVarBinaryVector, 6, null, holder);

// copy by holder
// len < 12
copyAndCheck(viewVarBinaryVector, holder, 0, 7);
// len > 12
copyAndCheck(viewVarBinaryVector, holder, 2, 8);
// null
copyAndCheck(viewVarBinaryVector, holder, 6, 9);

// test overwrite
for (int i = 0; i < size; i++) {
setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder);
}

String longString = generateRandomString(128);
setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder);
}
}

private static void copyAndCheck(
BaseVariableWidthViewVector vector, ValueHolder holder, int fromIndex, int toIndex) {
if (vector instanceof ViewVarCharVector) {
ViewVarCharVector viewVarCharVector = (ViewVarCharVector) vector;
NullableViewVarCharHolder stringHolder = (NullableViewVarCharHolder) holder;
viewVarCharVector.get(fromIndex, stringHolder);
viewVarCharVector.setSafe(toIndex, stringHolder);
}

if (vector instanceof ViewVarBinaryVector) {
ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector) vector;
NullableViewVarBinaryHolder binaryHolder = (NullableViewVarBinaryHolder) holder;
viewVarBinaryVector.get(fromIndex, binaryHolder);
viewVarBinaryVector.setSafe(toIndex, binaryHolder);
}

assertArrayEquals(vector.get(fromIndex), vector.get(toIndex));
}

private void setAndCheck(
ViewVarCharVector vector, int index, byte[] str, NullableViewVarCharHolder stringHolder) {
ArrowBuf buf = null;
if (null == str) {
stringHolder.isSet = 0;
} else {
buf = allocator.buffer(str.length);
buf.setBytes(0, str);
stringHolder.isSet = 1;
stringHolder.start = 0;
stringHolder.end = str.length;
stringHolder.buffer = buf;
}
vector.setSafe(index, stringHolder);

// verify results
assertArrayEquals(str, vector.get(index));
AutoCloseables.closeNoChecked(buf);
}

private void setAndCheck(
ViewVarBinaryVector vector, int index, byte[] str, NullableViewVarBinaryHolder binaryHolder) {
ArrowBuf buf = null;
if (null == str) {
binaryHolder.isSet = 0;
} else {
buf = allocator.buffer(str.length);
buf.setBytes(0, str);
binaryHolder.isSet = 1;
binaryHolder.start = 0;
binaryHolder.end = str.length;
binaryHolder.buffer = buf;
}
vector.setSafe(index, binaryHolder);

// verify results
assertArrayEquals(str, vector.get(index));
AutoCloseables.closeNoChecked(buf);
}

@Test
public void testAllocationIndexOutOfBounds() {
assertThrows(
Expand Down
Loading