-
Notifications
You must be signed in to change notification settings - Fork 3
/
sparse.go
139 lines (124 loc) · 2.22 KB
/
sparse.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package hll
import (
"encoding/binary"
"sort"
)
// sparse keeps a set of untique hashes.
// if sparse if dirty `s[0]&(1<<7) != 0`, hashes are unordered, maybe even with duplicates.
// if sparse is not dirty, hashes are sorted and duplicates are removed.
type sparse []byte
func (s sparse) dirty() bool {
return s[0]&(1<<7) != 0
}
func (s sparse) EstimateCardinality() int {
if s.dirty() {
s.sort() // clears dirty.
}
return int(s.size())
}
func (s sparse) Add(hash uint64) addResult {
sz := s.size()
sz++
if sz < uint32(len(s))>>3 {
binary.LittleEndian.PutUint64(s[sz<<3:], hash)
s.setSize(sz | 1<<31)
return ok
}
if !s.dirty() {
return full
}
s.sort()
sz = s.size()
// Add a 100 byte padding so we do not re-sort on every add once the sparse buffer is almost full.
if sz+100 < uint32(len(s))>>3 {
sz++
binary.LittleEndian.PutUint64(s[sz<<3:], hash)
s.setSize(sz | 1<<31)
return ok
}
return full
}
type sortable sparse
func (s sortable) Len() int {
return len(s) >> 3
}
func (s sortable) Swap(i, j int) {
i <<= 3
j <<= 3
for k := 0; k < 8; k++ {
s[i], s[j] = s[j], s[i]
i++
j++
}
}
func (s sortable) Less(i, j int) bool {
i <<= 3
j <<= 3
for k := 0; k < 8; k++ {
a := s[i]
b := s[j]
if a < b {
return true
}
if a > b {
return false
}
i++
j++
}
return false
}
type addResult int
const (
ok addResult = 0
full addResult = 1
)
func (s sparse) size() uint32 {
return binary.BigEndian.Uint32(s) & ^(uint32(1) << 31)
}
func (s sparse) setSize(sz uint32) {
binary.BigEndian.PutUint32(s, sz)
}
func (s sortable) l() int {
return len(s)
}
func (s sparse) sort() {
sz := s.size()
end := sz<<3 + 8
t := sortable(s[8:end])
sort.Sort(t)
// Remove dups.
to := 0
from := 0
outer:
for ; from < len(t); from += 8 {
if from == 0 {
to += 8
continue
}
prev := from - 8
i := from
for k := 0; k < 8; k++ {
if t[prev] != t[i] {
if from == to {
to += 8
continue outer
}
j := from
for k := 0; k < 8; k++ {
t[to] = t[j]
j++
to++
}
continue outer
}
i++
prev++
}
}
// Clear the slack (having zeroes at the end could make hll compress better).
s.setSize(uint32(to >> 3))
for i := to; i < len(t); i++ {
t[i] = 0
}
}