Skip to content

Commit

Permalink
support parquet write arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
wbo4958 committed Jun 4, 2021
1 parent 7a74d25 commit 84f64db
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
8 changes: 4 additions & 4 deletions docs/supported_ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,9 @@ Accelerator supports are described below.
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS* (Only supported for Parquet; missing nested NULL, BINARY, CALENDAR, MAP, UDT)</em></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS* (Only supported for Parquet; missing nested NULL, BINARY, CALENDAR, ARRAY, MAP, UDT)</em></td>
<td><em>PS* (Only supported for Parquet; missing nested NULL, BINARY, CALENDAR, MAP, UDT)</em></td>
<td><b>NS</b></td>
</tr>
<tr>
Expand Down Expand Up @@ -20627,9 +20627,9 @@ dates or timestamps, or for a lack of type coercion support.
<td> </td>
<td><b>NS</b></td>
<td> </td>
<td><em>PS* (missing nested BINARY, MAP, UDT)</em></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS* (missing nested BINARY, ARRAY, MAP, UDT)</em></td>
<td><em>PS* (missing nested BINARY, MAP, UDT)</em></td>
<td><b>NS</b></td>
</tr>
</table>
5 changes: 3 additions & 2 deletions integration_tests/src/main/python/parquet_write_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
TimestampGen(start=datetime(1677, 9, 22, tzinfo=timezone.utc), end=datetime(2262, 4, 11, tzinfo=timezone.utc))]
parquet_struct_gen = [StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(parquet_basic_gen)]),
StructGen([['child0', StructGen([[ 'child1', byte_gen]])]])]

parquet_write_gens_list = [parquet_basic_gen + parquet_struct_gen +
parquet_array_gen = [ArrayGen(sub_gen, max_length=10) for sub_gen in parquet_basic_gen + parquet_struct_gen] + \
[ArrayGen(ArrayGen(sub_gen, max_length=10), max_length=10) for sub_gen in parquet_basic_gen + parquet_struct_gen]
parquet_write_gens_list = [parquet_basic_gen + parquet_struct_gen + parquet_array_gen +
[decimal_gen_default,
decimal_gen_scale_precision, decimal_gen_same_scale_precision, decimal_gen_64bit]]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,8 @@ object GpuOverrides {
(ParquetFormatType, FileFormatChecks(
cudfRead = (TypeSig.commonCudfTypes + TypeSig.DECIMAL + TypeSig.STRUCT + TypeSig.ARRAY +
TypeSig.MAP).nested(),
cudfWrite = (TypeSig.commonCudfTypes + TypeSig.DECIMAL + TypeSig.STRUCT).nested(),
cudfWrite = (TypeSig.commonCudfTypes + TypeSig.DECIMAL + TypeSig.STRUCT +
TypeSig.ARRAY).nested(),
sparkSig = (TypeSig.atomics + TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP +
TypeSig.UDT).nested())),
(OrcFormatType, FileFormatChecks(
Expand Down Expand Up @@ -2828,7 +2829,8 @@ object GpuOverrides {
"Writing data",
ExecChecks((TypeSig.commonCudfTypes +
TypeSig.DECIMAL.withPsNote(TypeEnum.DECIMAL, "Only supported for Parquet") +
TypeSig.STRUCT.withPsNote(TypeEnum.STRUCT, "Only supported for Parquet")).nested(),
TypeSig.STRUCT.withPsNote(TypeEnum.STRUCT, "Only supported for Parquet") +
TypeSig.ARRAY.withPsNote(TypeEnum.ARRAY, "Only supported for Parquet")).nested(),
TypeSig.all),
(p, conf, parent, r) => new SparkPlanMeta[DataWritingCommandExec](p, conf, parent, r) {
override val childDataWriteCmds: scala.Seq[DataWritingCommandMeta[_]] =
Expand Down

0 comments on commit 84f64db

Please sign in to comment.