Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-709: FIX Boolean to StringGroup schema evolution #594

Merged
merged 3 commits into from
Dec 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1774,12 +1774,64 @@ public void nextVector(ColumnVector previousVector,
}
}

private static TypeReader createBooleanConvertTreeReader(int columnId,
TypeDescription fileType,
TypeDescription readerType,
Context context) throws IOException {

// CONVERT from BOOLEAN to schema type.
//
switch (readerType.getCategory()) {

case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
if (fileType.getCategory() == readerType.getCategory()) {
throw new IllegalArgumentException("No conversion of type " +
readerType.getCategory() + " to self needed");
}
return new AnyIntegerFromAnyIntegerTreeReader(columnId, fileType, readerType,
context);

case FLOAT:
case DOUBLE:
return new DoubleFromAnyIntegerTreeReader(columnId, fileType, context);

case DECIMAL:
return new DecimalFromAnyIntegerTreeReader(columnId, fileType, context);

case STRING:
case CHAR:
case VARCHAR:
return new StringGroupFromBooleanTreeReader(columnId, fileType, readerType,
context);

case TIMESTAMP:
case TIMESTAMP_INSTANT:
return new TimestampFromAnyIntegerTreeReader(columnId, fileType, context,
readerType.getCategory() == Category.TIMESTAMP_INSTANT);

// Not currently supported conversion(s):
case BINARY:
case DATE:
case STRUCT:
case LIST:
case MAP:
case UNION:
default:
throw new IllegalArgumentException("Unsupported type " +
readerType.getCategory());
}
}

private static TypeReader createAnyIntegerConvertTreeReader(int columnId,
TypeDescription fileType,
TypeDescription readerType,
Context context) throws IOException {

// CONVERT from (BOOLEAN, BYTE, SHORT, INT, LONG) to schema type.
// CONVERT from (BYTE, SHORT, INT, LONG) to schema type.
//
switch (readerType.getCategory()) {

Expand Down Expand Up @@ -2065,7 +2117,7 @@ private static TypeReader createBinaryConvertTreeReader(int columnId,
TypeDescription readerType,
Context context) throws IOException {

// CONVERT from DATE to schema type.
// CONVERT from BINARY to schema type.
switch (readerType.getCategory()) {

case STRING:
Expand Down Expand Up @@ -2145,7 +2197,8 @@ private static TypeReader createBinaryConvertTreeReader(int columnId,
* DecimalFromStringGroupTreeReader (written)
*
* To STRING, CHAR, VARCHAR:
* Convert from (BOOLEAN, BYTE, SHORT, INT, LONG) using to string conversion
* Convert from (BYTE, SHORT, INT, LONG) using to string conversion
* Convert from BOOLEAN using boolean (True/False) conversion
* Convert from (FLOAT, DOUBLE) using to string conversion
* Convert from DECIMAL using HiveDecimal.toString
* Convert from CHAR by stripping pads
Expand All @@ -2155,6 +2208,7 @@ private static TypeReader createBinaryConvertTreeReader(int columnId,
* Convert from BINARY using Text.decode
*
* StringGroupFromAnyIntegerTreeReader (written)
* StringGroupFromBooleanTreeReader (written)
* StringGroupFromFloatTreeReader (written)
* StringGroupFromDoubleTreeReader (written)
* StringGroupFromDecimalTreeReader (written)
Expand Down Expand Up @@ -2233,13 +2287,15 @@ public static TypeReader createConvertTreeReader(TypeDescription readerType,

switch (fileType.getCategory()) {

case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
return createAnyIntegerConvertTreeReader(columnId, fileType, readerType, context);

case BOOLEAN:
return createBooleanConvertTreeReader(columnId, fileType, readerType, context);

case FLOAT:
case DOUBLE:
return createDoubleConvertTreeReader(columnId, fileType, readerType, context);
Expand Down
31 changes: 31 additions & 0 deletions java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,37 @@ public void testDecimalToDecimal64Evolution() throws Exception {
rows.close();
}

@Test
public void testBooleanToStringEvolution() throws Exception {
testFilePath = new Path(workDir, "TestSchemaEvolution." +
testCaseName.getMethodName() + ".orc");
TypeDescription schema = TypeDescription.createBoolean();
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
.bufferSize(10000));
VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
LongColumnVector lcv = new LongColumnVector(1024);
batch.cols[0] = lcv;
batch.reset();
batch.size = 3;
lcv.vector[0] = 1L; // True
lcv.vector[1] = 0L; // False
lcv.vector[2] = 1L; // True
writer.addRowBatch(batch);
writer.close();

Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
TypeDescription schemaOnRead = TypeDescription.createString();
RecordReader rows = reader.rows(reader.options().schema(schemaOnRead));
batch = schemaOnRead.createRowBatch();
rows.nextBatch(batch);
assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(0));
assertEquals("FALSE", ((BytesColumnVector) batch.cols[0]).toString(1));
assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(2));
rows.close();
}

@Test
public void testCharToStringEvolution() throws IOException {
TypeDescription fileType = TypeDescription.fromString("struct<x:char(10)>");
Expand Down