diff --git a/parquet/date.go b/parquet/date.go index 2fae6f7e..d3454e74 100644 --- a/parquet/date.go +++ b/parquet/date.go @@ -6,8 +6,8 @@ import ( "github.com/apache/arrow/go/v15/arrow/memory" ) -func reverseTransformDate32(arr *array.Timestamp, toTime toTimeFunc) arrow.Array { - builder := array.NewDate32Builder(memory.DefaultAllocator) +func reverseTransformDate64(arr *array.Date32) *array.Date64 { + builder := array.NewDate64Builder(memory.DefaultAllocator) for i := 0; i < arr.Len(); i++ { if arr.IsNull(i) { @@ -15,23 +15,19 @@ func reverseTransformDate32(arr *array.Timestamp, toTime toTimeFunc) arrow.Array continue } - builder.Append(arrow.Date32FromTime(toTime(arr.Value(i)))) + builder.Append(arrow.Date64FromTime(arr.Value(i).ToTime())) } - return builder.NewArray() + return builder.NewDate64Array() } -func reverseTransformDate64(arr *array.Timestamp, toTime toTimeFunc) arrow.Array { - builder := array.NewDate64Builder(memory.DefaultAllocator) - - for i := 0; i < arr.Len(); i++ { - if arr.IsNull(i) { - builder.AppendNull() - continue - } - - builder.Append(arrow.Date64FromTime(toTime(arr.Value(i)))) +func reverseTransformFromDate32(dt arrow.DataType, arr *array.Date32) arrow.Array { + switch dt.(type) { + case *arrow.Date32Type: + return arr + case *arrow.Date64Type: + return reverseTransformDate64(arr) + default: + panic("unsupported " + dt.String() + " type in reverseTransformFromDate32") } - - return builder.NewArray() } diff --git a/parquet/read.go b/parquet/read.go index f8994b62..0b8d0e73 100644 --- a/parquet/read.go +++ b/parquet/read.go @@ -72,6 +72,8 @@ func reverseTransformArray(dt arrow.DataType, arr arrow.Array) arrow.Array { return reverseTransformTime32(dt.(*arrow.Time32Type), arr) case *array.Time64: return reverseTransformTime64(dt.(*arrow.Time64Type), arr) + case *array.Date32: + return reverseTransformFromDate32(dt, arr) case *array.Struct: dt := dt.(*arrow.StructType) children := make([]arrow.ArrayData, arr.NumField()) diff --git a/parquet/time.go b/parquet/time.go index 646914ff..1bac3c07 100644 --- a/parquet/time.go +++ b/parquet/time.go @@ -1,15 +1,11 @@ package parquet import ( - "time" - "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/memory" ) -type toTimeFunc func(arrow.Timestamp) time.Time - func reverseTransformTime32(dt *arrow.Time32Type, arr *array.Time32) arrow.Array { builder := array.NewTime32Builder(memory.DefaultAllocator, dt) diff --git a/parquet/timestamp.go b/parquet/timestamp.go index 2549f38f..b168c140 100644 --- a/parquet/timestamp.go +++ b/parquet/timestamp.go @@ -1,8 +1,6 @@ package parquet import ( - "fmt" - "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/memory" @@ -25,16 +23,7 @@ func reverseTransformTimestamp(dt *arrow.TimestampType, arr *array.Timestamp) ar } func reverseTransformFromTimestamp(dt arrow.DataType, arr *array.Timestamp) arrow.Array { - toTime, err := arr.DataType().(*arrow.TimestampType).GetToTimeFunc() - if err != nil { - panic(fmt.Errorf("failed GetToTimeFunc: %w", err)) - } - switch dt := dt.(type) { - case *arrow.Date32Type: - return reverseTransformDate32(arr, toTime) - case *arrow.Date64Type: - return reverseTransformDate64(arr, toTime) case *arrow.TimestampType: return reverseTransformTimestamp(dt, arr) default: diff --git a/parquet/write_read_test.go b/parquet/write_read_test.go index be176759..ecb4dcf1 100644 --- a/parquet/write_read_test.go +++ b/parquet/write_read_test.go @@ -17,8 +17,7 @@ import ( func TestWriteRead(t *testing.T) { const rows = 10 var b bytes.Buffer - // We can't test DATE64 since arrow changes it to DATE32 see https://github.com/apache/arrow/pull/39460 - table := schema.TestTable("test", schema.TestSourceOptions{SkipDates: true}) + table := schema.TestTable("test", schema.TestSourceOptions{}) sourceName := "test-source" syncTime := time.Now().UTC().Round(time.Second) opts := schema.GenTestDataOptions{ @@ -66,8 +65,7 @@ func TestWriteRead(t *testing.T) { func TestWriteReadSliced(t *testing.T) { const rows = 10 var b bytes.Buffer - // We can't test DATE64 since arrow changes it to DATE32 see https://github.com/apache/arrow/pull/39460 - table := schema.TestTable("test", schema.TestSourceOptions{SkipDates: true}) + table := schema.TestTable("test", schema.TestSourceOptions{}) sourceName := "test-source" syncTime := time.Now().UTC().Round(time.Second) opts := schema.GenTestDataOptions{ @@ -114,8 +112,7 @@ func TestWriteReadSliced(t *testing.T) { } func BenchmarkWrite(b *testing.B) { - // We can't test DATE64 since arrow changes it to DATE32 see https://github.com/apache/arrow/pull/39460 - table := schema.TestTable("test", schema.TestSourceOptions{SkipDates: true}) + table := schema.TestTable("test", schema.TestSourceOptions{}) sourceName := "test-source" syncTime := time.Now().UTC().Round(time.Second) opts := schema.GenTestDataOptions{