-
Notifications
You must be signed in to change notification settings - Fork 294
/
column_read.go
95 lines (83 loc) · 3.04 KB
/
column_read.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"log"
"time"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/common"
"github.com/xitongsys/parquet-go/reader"
"github.com/xitongsys/parquet-go/writer"
)
type Student struct {
Name string `parquet:"name=name, type=BYTE_ARRAY, convertedtype=UTF8"`
Age int32 `parquet:"name=age, type=INT32"`
Id int64 `parquet:"name=id, type=INT64"`
Weight float32 `parquet:"name=weight, type=FLOAT"`
Sex bool `parquet:"name=sex, type=BOOLEAN"`
Day int32 `parquet:"name=day, type=INT32, convertedtype=DATE"`
Class []string `parquet:"name=class, type=SLICE, convertedtype=SLICE, valuetype=BYTE_ARRAY, valueconvertedtype=UTF8"`
Score map[string]int32 `parquet:"name=score, type=MAP, convertedtype=MAP, keytype=BYTE_ARRAY, keyconvertedtype=UTF8, valuetype=INT32"`
}
func main() {
var err error
//write
fw, err := local.NewLocalFileWriter("column.parquet")
if err != nil {
log.Println("Can't create file", err)
return
}
pw, err :=
writer.NewParquetWriter(fw, new(Student), 4)
if err != nil {
log.Println("Can't create parquet writer")
return
}
num := int64(10)
for i := 0; int64(i) < num; i++ {
stu := Student{
Name: "StudentName",
Age: int32(20 + i%5),
Id: int64(i),
Weight: float32(50.0 + float32(i)*0.1),
Sex: bool(i%2 == 0),
Day: int32(time.Now().Unix() / 3600 / 24),
Class: []string{"Math", "Physics", "Algorithm"},
Score: map[string]int32{"Math": int32(100 - i), "Physics": int32(100 - i), "Algorithm": int32(100 - i)},
}
if err = pw.Write(stu); err != nil {
log.Println("Write error", err)
}
}
if err = pw.WriteStop(); err != nil {
log.Println("WriteStop error", err)
}
log.Println("Write Finished")
fw.Close()
var names, classes, scores_key, scores_value, ids []interface{}
var rls, dls []int32
///read
fr, err := local.NewLocalFileReader("column.parquet")
if err != nil {
log.Println("Can't open file", err)
return
}
pr, err := reader.NewParquetColumnReader(fr, 4)
if err != nil {
log.Println("Can't create column reader", err)
return
}
num = int64(pr.GetNumRows())
pr.SkipRowsByPath(common.ReformPathStr("parquet_go_root.name"), 5) //skip the first five rows
names, rls, dls, err = pr.ReadColumnByPath(common.ReformPathStr("parquet_go_root.name"), num)
log.Println("name", names, rls, dls, err)
classes, rls, dls, err = pr.ReadColumnByPath(common.ReformPathStr("parquet_go_root.class.list.element"), num)
log.Println("class", classes, rls, dls, err)
scores_key, rls, dls, err = pr.ReadColumnByPath(common.ReformPathStr("parquet_go_root.score.key_value.key"), num)
scores_value, rls, dls, err = pr.ReadColumnByPath(common.ReformPathStr("parquet_go_root.score.key_value.value"), num)
log.Println("parquet_go_root.scores_key", scores_key, err)
log.Println("parquet_go_root.scores_value", scores_value, err)
pr.SkipRowsByIndex(2, 5) //skip the first five rows
ids, _, _, _ = pr.ReadColumnByIndex(2, num)
log.Println(ids)
pr.ReadStop()
fr.Close()
}