-
Notifications
You must be signed in to change notification settings - Fork 0
/
bio.yaml
170 lines (143 loc) · 5.16 KB
/
bio.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
programs:
# See:
# * https://github.com/iovisor/bcc/blob/master/tools/biolatency.py
# * https://github.com/iovisor/bcc/blob/master/tools/biolatency_example.txt
#
# See also: bio-tracepoints.yaml
- name: bio
metrics:
histograms:
- name: bio_latency_seconds
help: Block IO latency histogram
table: io_latency
bucket_type: exp2
bucket_min: 0
bucket_max: 26
bucket_multiplier: 0.000001 # microseconds to seconds
labels:
- name: device
size: 32
decoders:
- name: string
- name: operation
size: 8
decoders:
- name: uint
- name: static_map
static_map:
1: read
2: write
- name: bucket
size: 8
decoders:
- name: uint
- name: bio_size_bytes
help: Block IO size histogram with kibibyte buckets
table: io_size
bucket_type: exp2
bucket_min: 0
bucket_max: 15
bucket_multiplier: 1024 # kibibytes to bytes
labels:
- name: device
size: 32
decoders:
- name: string
- name: operation
size: 8
decoders:
- name: uint
- name: static_map
static_map:
1: read
2: write
- name: bucket
size: 8
decoders:
- name: uint
kprobes:
# Remove blk_start_request if you're running Linux 5.3+, or better yet
# use tracepoint based code that depends on stable kernel ABI.
blk_start_request: trace_req_start
blk_mq_start_request: trace_req_start
blk_account_io_completion: trace_req_completion
code: |
#include <linux/blkdev.h>
#include <linux/blk_types.h>
typedef struct disk_key {
char disk[32];
u8 op;
u64 slot;
} disk_key_t;
// Max number of disks we expect to see on the host
const u8 max_disks = 255;
// 27 buckets for latency, max range is 33.6s .. 67.1s
const u8 max_latency_slot = 26;
// 16 buckets per disk in kib, max range is 16mib .. 32mib
const u8 max_size_slot = 15;
// Hash to temporily hold the start time of each bio request, max 10k in-flight by default
BPF_HASH(start, struct request *);
// Histograms to record latencies
BPF_HISTOGRAM(io_latency, disk_key_t, (max_latency_slot + 2) * max_disks);
// Histograms to record sizes
BPF_HISTOGRAM(io_size, disk_key_t, (max_size_slot + 2) * max_disks);
// Record start time of a request
int trace_req_start(struct pt_regs *ctx, struct request *req) {
u64 ts = bpf_ktime_get_ns();
start.update(&req, &ts);
return 0;
}
// Calculate request duration and store in appropriate histogram bucket
int trace_req_completion(struct pt_regs *ctx, struct request *req, unsigned int bytes) {
u64 *tsp, delta;
// Fetch timestamp and calculate delta
tsp = start.lookup(&req);
if (tsp == 0) {
return 0; // missed issue
}
// There are write request with zero length on sector zero,
// which do not seem to be real writes to device.
if (req->__sector == 0 && req->__data_len == 0) {
return 0;
}
// Disk that received the request
struct gendisk *disk = req->rq_disk;
// Delta in nanoseconds
delta = bpf_ktime_get_ns() - *tsp;
// Convert to microseconds
delta /= 1000;
// Latency histogram key
u64 latency_slot = bpf_log2l(delta);
// Cap latency bucket at max value
if (latency_slot > max_latency_slot) {
latency_slot = max_latency_slot;
}
disk_key_t latency_key = { .slot = latency_slot };
bpf_probe_read(&latency_key.disk, sizeof(latency_key.disk), &disk->disk_name);
// Size in kibibytes
u64 size_kib = bytes / 1024;
// Request size histogram key
u64 size_slot = bpf_log2(size_kib);
// Cap latency bucket at max value
if (size_slot > max_size_slot) {
size_slot = max_size_slot;
}
disk_key_t size_key = { .slot = size_slot };
bpf_probe_read(&size_key.disk, sizeof(size_key.disk), &disk->disk_name);
if ((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
latency_key.op = 2;
size_key.op = 2;
} else {
latency_key.op = 1;
size_key.op = 1;
}
io_latency.increment(latency_key);
io_size.increment(size_key);
// Increment sum keys
latency_key.slot = max_latency_slot + 1;
io_latency.increment(latency_key, delta);
size_key.slot = max_size_slot + 1;
io_size.increment(size_key, size_kib);
start.delete(&req);
return 0;
}