-
Notifications
You must be signed in to change notification settings - Fork 2
/
bulk_processor.go
188 lines (154 loc) · 3.82 KB
/
bulk_processor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
package dorisloader
import (
"context"
"errors"
"sync"
"time"
)
type BulkProcessor struct {
c *Client
name string
db string
table string
bulkActions int
bulkSize int
flushInterval time.Duration
flusherStopC chan struct{}
retryItemStatusCodes map[int]struct{}
numWorkers int
executionId int64
rows chan []byte
workerWg sync.WaitGroup
workers []*bulkWorker
backoff Backoff
startedMu sync.Mutex
started bool
stopReconnC chan struct{}
}
func NewBulkProcessor(
client *Client,
name string,
db string,
table string,
numWorkers int,
bulkActions int,
bulkSize int,
flushInterval time.Duration,
backoff Backoff,
retryItemStatusCodes map[int]struct{}) *BulkProcessor {
return &BulkProcessor{
c: client,
name: name,
db: db,
table: table,
numWorkers: numWorkers,
bulkActions: bulkActions,
bulkSize: bulkSize,
flushInterval: flushInterval,
retryItemStatusCodes: retryItemStatusCodes,
backoff: backoff,
}
}
func (p *BulkProcessor) Start(ctx context.Context) error {
p.startedMu.Lock()
defer p.startedMu.Unlock()
if err := p.checkInterval(); err != nil {
return err
}
if p.started {
return nil
}
// We must have at least one worker.
if p.numWorkers < 1 {
p.numWorkers = 1
}
p.rows = make(chan []byte)
p.executionId = 0
p.stopReconnC = make(chan struct{})
// Create and start up workers.
p.workers = make([]*bulkWorker, p.numWorkers)
for i := 0; i < p.numWorkers; i++ {
p.workerWg.Add(1)
p.workers[i] = newBulkWorker(p, i)
go p.workers[i].work(ctx)
}
// Start the ticker for flush (if enabled)
if int64(p.flushInterval) > 0 {
p.flusherStopC = make(chan struct{})
go p.flusher(p.flushInterval)
}
p.started = true
return nil
}
func (p *BulkProcessor) checkInterval() error {
if p.bulkActions == 0 && p.bulkSize == 0 && p.flushInterval == 0 {
return errors.New("bulk actions and bulk size and flush interval all is nil(0)")
}
return nil
}
// Stop is an alias for Close.
func (p *BulkProcessor) Stop() error {
return p.Close()
}
func (p *BulkProcessor) Close() error {
p.startedMu.Lock()
defer p.startedMu.Unlock()
// Already stopped? Do nothing.
if !p.started {
return nil
}
// Tell connection checkers to stop
if p.stopReconnC != nil {
close(p.stopReconnC)
p.stopReconnC = nil
}
// Stop flusher (if enabled)
if p.flusherStopC != nil {
p.flusherStopC <- struct{}{}
<-p.flusherStopC
close(p.flusherStopC)
p.flusherStopC = nil
}
// Stop all workers.
close(p.rows)
p.workerWg.Wait()
p.started = false
return nil
}
// Add adds a single request to commit by the BulkProcessorService.
//
// The caller is responsible for setting the index and type on the request.
func (p *BulkProcessor) Add(row []byte) {
p.rows <- row
}
// Flush manually asks all workers to commit their outstanding requests.
// It returns only when all workers acknowledge completion.
func (p *BulkProcessor) Flush() error {
for _, w := range p.workers {
w.flushC <- struct{}{}
<-w.flushAckC // wait for completion
}
return nil
}
// flusher is a single goroutine that periodically asks all workers to
// commit their outstanding bulk requests. It is only started if
// FlushInterval is greater than 0.
func (p *BulkProcessor) flusher(interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C: // Periodic flush
p.Flush() // TODO swallow errors here?
case <-p.flusherStopC:
p.flusherStopC <- struct{}{}
return
}
}
}
func (p *BulkProcessor) DB() string {
return p.db
}
func (p *BulkProcessor) Table() string {
return p.table
}