-
Notifications
You must be signed in to change notification settings - Fork 2.5k
/
v004.cql.tmpl
222 lines (205 loc) · 8.2 KB
/
v004.cql.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
--
-- Creates Cassandra keyspace with tables for traces and dependencies.
--
-- Required parameters:
--
-- keyspace
-- name of the keyspace
-- replication
-- replication strategy for the keyspace, such as
-- for prod environments
-- {'class': 'NetworkTopologyStrategy', '$datacenter': '${replication_factor}' }
-- for test environments
-- {'class': 'SimpleStrategy', 'replication_factor': '1'}
-- trace_ttl
-- default time to live for trace data, in seconds
-- dependencies_ttl
-- default time to live for dependencies data, in seconds (0 for no TTL)
--
-- Non-configurable settings:
-- gc_grace_seconds is non-zero, see: http://www.uberobert.com/cassandra_gc_grace_disables_hinted_handoff/
-- For TTL of 2 days, compaction window is 1 hour, rule of thumb here: http://thelastpickle.com/blog/2016/12/08/TWCS-part1.html
CREATE KEYSPACE IF NOT EXISTS ${keyspace} WITH replication = ${replication};
CREATE TYPE IF NOT EXISTS ${keyspace}.keyvalue (
key text,
value_type text,
value_string text,
value_bool boolean,
value_long bigint,
value_double double,
value_binary blob
);
CREATE TYPE IF NOT EXISTS ${keyspace}.log (
ts bigint, -- microseconds since epoch
fields frozen<list<frozen<${keyspace}.keyvalue>>>
);
CREATE TYPE IF NOT EXISTS ${keyspace}.span_ref (
ref_type text,
trace_id blob,
span_id bigint
);
CREATE TYPE IF NOT EXISTS ${keyspace}.process (
service_name text,
tags frozen<list<frozen<${keyspace}.keyvalue>>>
);
-- Notice we have span_hash. This exists only for zipkin backwards compat. Zipkin allows spans with the same ID.
-- Note: Cassandra re-orders non-PK columns alphabetically, so the table looks differently in CQLSH "describe table".
-- start_time is bigint instead of timestamp as we require microsecond precision
CREATE TABLE IF NOT EXISTS ${keyspace}.traces (
trace_id blob,
span_id bigint,
span_hash bigint,
parent_id bigint,
operation_name text,
flags int,
start_time bigint, -- microseconds since epoch
duration bigint, -- microseconds
tags list<frozen<keyvalue>>,
logs list<frozen<log>>,
refs list<frozen<span_ref>>,
process frozen<process>,
PRIMARY KEY (trace_id, span_id, span_hash)
)
WITH compaction = {
'compaction_window_size': '1',
'compaction_window_unit': 'HOURS',
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
CREATE TABLE IF NOT EXISTS ${keyspace}.service_names (
service_name text,
PRIMARY KEY (service_name)
)
WITH compaction = {
'min_threshold': '4',
'max_threshold': '32',
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
CREATE TABLE IF NOT EXISTS ${keyspace}.operation_names_v2 (
service_name text,
span_kind text,
operation_name text,
PRIMARY KEY ((service_name), span_kind, operation_name)
)
WITH compaction = {
'min_threshold': '4',
'max_threshold': '32',
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
-- index of trace IDs by service + operation names, sorted by span start_time.
CREATE TABLE IF NOT EXISTS ${keyspace}.service_operation_index (
service_name text,
operation_name text,
start_time bigint, -- microseconds since epoch
trace_id blob,
PRIMARY KEY ((service_name, operation_name), start_time)
) WITH CLUSTERING ORDER BY (start_time DESC)
AND compaction = {
'compaction_window_size': '1',
'compaction_window_unit': 'HOURS',
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
CREATE TABLE IF NOT EXISTS ${keyspace}.service_name_index (
service_name text,
bucket int,
start_time bigint, -- microseconds since epoch
trace_id blob,
PRIMARY KEY ((service_name, bucket), start_time)
) WITH CLUSTERING ORDER BY (start_time DESC)
AND compaction = {
'compaction_window_size': '1',
'compaction_window_unit': 'HOURS',
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
CREATE TABLE IF NOT EXISTS ${keyspace}.duration_index (
service_name text, -- service name
operation_name text, -- operation name, or blank for queries without span name
bucket timestamp, -- time bucket, - the start_time of the given span rounded to an hour
duration bigint, -- span duration, in microseconds
start_time bigint, -- microseconds since epoch
trace_id blob,
PRIMARY KEY ((service_name, operation_name, bucket), duration, start_time, trace_id)
) WITH CLUSTERING ORDER BY (duration DESC, start_time DESC)
AND compaction = {
'compaction_window_size': '1',
'compaction_window_unit': 'HOURS',
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
-- a bucketing strategy may have to be added for tag queries
-- we can make this table even better by adding a timestamp to it
CREATE TABLE IF NOT EXISTS ${keyspace}.tag_index (
service_name text,
tag_key text,
tag_value text,
start_time bigint, -- microseconds since epoch
trace_id blob,
span_id bigint,
PRIMARY KEY ((service_name, tag_key, tag_value), start_time, trace_id, span_id)
)
WITH CLUSTERING ORDER BY (start_time DESC)
AND compaction = {
'compaction_window_size': '1',
'compaction_window_unit': 'HOURS',
'class': 'org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy'
}
AND default_time_to_live = ${trace_ttl}
AND speculative_retry = 'NONE'
AND gc_grace_seconds = 10800; -- 3 hours of downtime acceptable on nodes
CREATE TYPE IF NOT EXISTS ${keyspace}.dependency (
parent text,
child text,
call_count bigint,
source text
);
-- compaction strategy is intentionally different as compared to other tables due to the size of dependencies data
CREATE TABLE IF NOT EXISTS ${keyspace}.dependencies_v2 (
ts_bucket timestamp,
ts timestamp,
dependencies list<frozen<dependency>>,
PRIMARY KEY (ts_bucket, ts)
) WITH CLUSTERING ORDER BY (ts DESC)
AND compaction = {
'min_threshold': '4',
'max_threshold': '32',
'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy'
}
AND default_time_to_live = ${dependencies_ttl};
-- adaptive sampling tables
-- ./plugin/storage/cassandra/samplingstore/storage.go
CREATE TABLE IF NOT EXISTS ${keyspace}.operation_throughput (
bucket int,
ts timeuuid,
throughput text,
PRIMARY KEY(bucket, ts)
) WITH CLUSTERING ORDER BY (ts desc);
CREATE TABLE IF NOT EXISTS ${keyspace}.sampling_probabilities (
bucket int,
ts timeuuid,
hostname text,
probabilities text,
PRIMARY KEY(bucket, ts)
) WITH CLUSTERING ORDER BY (ts desc);
-- distributed lock
-- ./plugin/pkg/distributedlock/cassandra/lock.go
CREATE TABLE IF NOT EXISTS ${keyspace}.leases (
name text,
owner text,
PRIMARY KEY (name)
);