From a021fd89dd6f2cd772059496b865f22e1571fbe3 Mon Sep 17 00:00:00 2001 From: Emmanuel Bretelle Date: Thu, 14 Jul 2016 13:04:57 -0700 Subject: [PATCH 1/3] [cachetop] top-like cachestat Alike cachestat.py but providing cache stats at the process level. --- tools/cachetop.py | 252 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100755 tools/cachetop.py diff --git a/tools/cachetop.py b/tools/cachetop.py new file mode 100755 index 000000000000..428c3c27a09b --- /dev/null +++ b/tools/cachetop.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python +# @lint-avoid-python-3-compatibility-imports +# +# cachetop Count cache kernel function calls per processes +# For Linux, uses BCC, eBPF. +# +# USAGE: cachetop +# Taken from cachestat by Brendan Gregg +# +# Copyright (c) 2016-present, Facebook, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# 13-Jul-2016 Emmanuel Bretelle first version + +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals +from __future__ import print_function +from collections import defaultdict +from bcc import BPF + +import argparse +import curses +import pwd +import re +import signal +from time import sleep + +FIELDS = ( + "PID", + "UID", + "CMD", + "HITS", + "MISSES", + "DIRTIES", + "READ_HIT%", + "WRITE_HIT%" +) +DEFAULT_FIELD = "HITS" + + +# signal handler +def signal_ignore(signal, frame): + print() + + +# Function to gather data from /proc/meminfo +# return dictionary for quicker lookup of both values +def get_meminfo(): + result = {} + + for line in open('/proc/meminfo'): + k = line.split(':', 3) + v = k[1].split() + result[k[0]] = int(v[0]) + return result + + +def get_processes_stats( + bpf, + sort_field=FIELDS.index(DEFAULT_FIELD), + sort_reverse=False): + ''' + Return a tuple containing: + buffer + cached + list of tuple with per process cache stats + ''' + rtaccess = 0 + wtaccess = 0 + mpa = 0 + mbd = 0 + apcl = 0 + apd = 0 + access = 0 + misses = 0 + rhits = 0 + whits = 0 + + counts = bpf.get_table("counts") + stats = defaultdict(lambda: defaultdict(int)) + for k, v in counts.items(): + stats["%d-%d-%s" % (k.pid, k.uid, k.comm)][k.ip] = v.value + stats_list = [] + + for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): + for k, v in count.items(): + if re.match('mark_page_accessed', bpf.ksym(k)) is not None: + mpa = v + if mpa < 0: + mpa = 0 + + if re.match('mark_buffer_dirty', bpf.ksym(k)) is not None: + mbd = v + if mbd < 0: + mbd = 0 + + if re.match('add_to_page_cache_lru', bpf.ksym(k)) is not None: + apcl = v + if apcl < 0: + apcl = 0 + + if re.match('account_page_dirtied', bpf.ksym(k)) is not None: + apd = v + if apd < 0: + apd = 0 + + # access = total cache access incl. reads(mpa) and writes(mbd) + # misses = total of add to lru which we do when we write(mbd) + # and also the mark the page dirty(same as mbd) + access = (mpa + mbd) + misses = (apcl + apd) + + # rtaccess is the read hit % during the sample period. + # wtaccess is the write hit % during the smaple period. + if mpa > 0: + rtaccess = float(mpa) / (access + misses) + if apcl > 0: + wtaccess = float(apcl) / (access + misses) + + if wtaccess != 0: + whits = 100 * wtaccess + if rtaccess != 0: + rhits = 100 * rtaccess + + _pid, uid, comm = pid.split('-', 2) + stats_list.append( + (int(_pid), uid, comm, + access, misses, mbd, + rhits, whits)) + + stats_list = sorted( + stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse + ) + counts.clear() + return stats_list + + +def handle_loop(stdscr, args): + # don't wait on key press + stdscr.nodelay(1) + # set default sorting field + sort_field = FIELDS.index(DEFAULT_FIELD) + sort_reverse = False + + # load BPF program + bpf_text = """ + + #include + struct key_t { + u64 ip; + u32 pid; + u32 uid; + char comm[16]; + }; + + BPF_HASH(counts, struct key_t); + + int do_count(struct pt_regs *ctx) { + struct key_t key = {}; + u64 zero = 0 , *val; + u64 pid = bpf_get_current_pid_tgid(); + u32 uid = bpf_get_current_uid_gid(); + + key.ip = PT_REGS_IP(ctx); + key.pid = pid & 0xFFFFFFFF; + key.uid = uid & 0xFFFFFFFF; + bpf_get_current_comm(&(key.comm), 16); + + val = counts.lookup_or_init(&key, &zero); // update counter + (*val)++; + return 0; + } + + """ + b = BPF(text=bpf_text) + b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") + b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") + b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") + b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") + + exiting = 0 + + while 1: + s = stdscr.getch() + if s == ord('q'): + exiting = 1 + elif s == ord('r'): + sort_reverse = not sort_reverse + elif s == ord('<'): + sort_field = max(0, sort_field - 1) + elif s == ord('>'): + sort_field = min(len(FIELDS) - 1, sort_field + 1) + try: + sleep(args.interval) + except KeyboardInterrupt: + exiting = 1 + # as cleanup can take many seconds, trap Ctrl-C: + signal.signal(signal.SIGINT, signal_ignore) + + # Get memory info + mem = get_meminfo() + cached = int(mem["Cached"]) / 1024 + buff = int(mem["Buffers"]) / 1024 + + process_stats = get_processes_stats( + b, + sort_field=sort_field, + sort_reverse=sort_reverse) + stdscr.clear() + stdscr.addstr( + 0, 0, + "Buffers MB: %.0f / Cached MB: %.0f" % (buff, cached) + ) + + # header + stdscr.addstr( + 1, 0, + "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( + *FIELDS + ), + curses.A_REVERSE + ) + (height, width) = stdscr.getmaxyx() + for i, stat in enumerate(process_stats): + stdscr.addstr( + i + 2, 0, + "{0:8} {username:8} {2:16} {3:8} {4:8} " + "{5:8} {6:9.1f}% {7:9.1f}%".format( + *stat, username=pwd.getpwuid(int(stat[1]))[0] + ) + ) + if i > height - 4: + break + stdscr.refresh() + if exiting: + print("Detaching...") + return + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--interval', '-i', type=int, default=5, nargs='?', + help='Interval between probes.' + ) + + args = parser.parse_args() + return args + +args = parse_arguments() +curses.wrapper(handle_loop, args) From 75dfd5a4ef69be75305b996e0237a9ac5523bfde Mon Sep 17 00:00:00 2001 From: chantra Date: Tue, 19 Jul 2016 00:17:45 +0200 Subject: [PATCH 2/3] [cachetop] add example and man page. make interval a positional parameter. --- man/man8/cachetop.8 | 76 ++++++++++++++++++++++++++++++++++++++ tools/cachetop.py | 9 +++-- tools/cachetop_example.txt | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 man/man8/cachetop.8 create mode 100644 tools/cachetop_example.txt diff --git a/man/man8/cachetop.8 b/man/man8/cachetop.8 new file mode 100644 index 000000000000..6e6ee89bf762 --- /dev/null +++ b/man/man8/cachetop.8 @@ -0,0 +1,76 @@ +.TH cachetop 8 "2016-01-30" "USER COMMANDS" +.SH NAME +cachetop \- Statistics for linux page cache hit/miss ratios per processes. Uses Linux eBPF/bcc. +.SH SYNOPSIS +.B cachetop +[interval] +.SH DESCRIPTION +This traces four kernel functions and prints per-processes summaries every +\fBinterval\fR seconds. This can be useful for processes workload characterization, +and looking for patterns in operation usage over time. + +This works by tracing kernel page cache functions using dynamic tracing, and will +need updating to match any changes to these functions. Edit the script to +customize which functions are traced. + +Since this uses BPF, only the root user can use this tool. +.SH REQUIREMENTS +CONFIG_BPF and bcc. +.SH EXAMPLES +.TP +Update summaries every five second: +# +.B cachetop +.TP +Print summaries each second: +# +.B cachetop 1 +.SH FIELDS +.TP +PID +Process ID of the process causing the cache activity. +.TP +UID +User ID of the process causing the cache activity. +.TP +HITS +Number of page cache hits. +.TP +MISSES +Number of page cache misses. +.TP +DIRTIES +Number of dirty pages added to the page cache. +.TP +READ_HIT% +Read hit percent of page cache usage. +.TP +WRITE_HIT% +Write hit percent of page cache usage. +.TP +BUFFERS_MB +Buffers size taken from /proc/meminfo. +.TP +CACHED_MB +Cached amount of data in current page cache taken from /proc/meminfo. +.SH OVERHEAD +This traces various kernel page cache functions and maintains in-kernel counts, which +are asynchronously copied to user-space. While the rate of operations can +be very high (>1G/sec) we can have up to 34% overhead, this is still a relatively efficient way to trace +these events, and so the overhead is expected to be small for normal workloads. +Measure in a test environment. +.SH SOURCE +This is from bcc. +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a companion _examples.txt file containing +example usage, output, and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development. +.SH AUTHOR +Emmanuel Bretelle +.SH SEE ALSO +cachestat (8) diff --git a/tools/cachetop.py b/tools/cachetop.py index 428c3c27a09b..b1ea9a6db11e 100755 --- a/tools/cachetop.py +++ b/tools/cachetop.py @@ -225,7 +225,7 @@ def handle_loop(stdscr, args): for i, stat in enumerate(process_stats): stdscr.addstr( i + 2, 0, - "{0:8} {username:8} {2:16} {3:8} {4:8} " + "{0:8} {username:8.8} {2:16} {3:8} {4:8} " "{5:8} {6:9.1f}% {7:9.1f}%".format( *stat, username=pwd.getpwuid(int(stat[1]))[0] ) @@ -239,9 +239,12 @@ def handle_loop(stdscr, args): def parse_arguments(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description='show Linux page cache hit/miss statistics including read ' + 'and write hit % per processes in a UI like top.' + ) parser.add_argument( - '--interval', '-i', type=int, default=5, nargs='?', + 'interval', type=int, default=5, nargs='?', help='Interval between probes.' ) diff --git a/tools/cachetop_example.txt b/tools/cachetop_example.txt new file mode 100644 index 000000000000..95fd42595be2 --- /dev/null +++ b/tools/cachetop_example.txt @@ -0,0 +1,70 @@ +# ./cachetop -h +usage: cachetop.py [-h] [interval] + +show Linux page cache hit/miss statistics including read and write hit % per +processes in a UI like top. + +positional arguments: + interval Interval between probes. + +optional arguments: + -h, --help show this help message and exit + +examples: + ./cachetop # run with default option of 5 seconds delay + ./cachetop 1 # print every second hit/miss stats + +# ./cachetop 5 +Buffers MB: 76 / Cached MB: 114 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 1 root systemd 2 0 0 100.0% 0.0% + 680 root vminfo 3 4 2 14.3% 42.9% + 567 syslog rs:main Q:Reg 10 4 2 57.1% 21.4% + 986 root kworker/u2:2 10 2457 4 0.2% 99.5% + 988 root kworker/u2:2 10 9 4 31.6% 36.8% + 877 vagrant systemd 18 4 2 72.7% 13.6% + 983 root python 148 3 143 3.3% 1.3% + 981 root strace 419 3 143 65.4% 0.5% + 544 messageb dbus-daemon 455 371 454 0.1% 0.4% + 243 root jbd2/dm-0-8 457 371 454 0.4% 0.4% + 985 root (mount) 560 2457 4 18.4% 81.4% + 987 root systemd-udevd 566 9 4 97.7% 1.2% + 988 root systemd-cgroups 569 9 4 97.8% 1.2% + 986 root modprobe 578 9 4 97.8% 1.2% + 287 root systemd-journal 598 371 454 14.9% 0.3% + 985 root mount 692 2457 4 21.8% 78.0% + 984 vagrant find 9529 2457 4 79.5% 20.5% + +Above shows the run of `find /` on a newly booted system. + +Command used to generate the activity +# find / + +Below shows the hit rate increases as we run find a second time and it gets it +its pages from the cache. +# ./cachetop.py +Buffers MB: 76 / Cached MB: 115 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 2 2 1 25.0% 50.0% + 680 root vminfo 2 2 1 25.0% 50.0% + 243 root jbd2/dm-0-8 3 2 1 40.0% 40.0% + 1068 root python 5 0 0 100.0% 0.0% + 1071 vagrant bash 350 0 0 100.0% 0.0% + 1071 vagrant find 12959 0 0 100.0% 0.0% + + +Below shows that the dirty pages increases as a file of 80M is created running +# dd if=/dev/urandom of=/tmp/c bs=8192 count=10000 + +# ./cachetop.py 10 +Buffers MB: 77 / Cached MB: 193 +PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% + 544 messageb dbus-daemon 9 10 7 10.5% 15.8% + 680 root vminfo 9 10 7 10.5% 15.8% + 1109 root python 22 0 0 100.0% 0.0% + 243 root jbd2/dm-0-8 25 10 7 51.4% 8.6% + 1070 root kworker/u2:2 85 0 0 100.0% 0.0% + 1110 vagrant bash 366 0 0 100.0% 0.0% + 1110 vagrant dd 42183 40000 20000 27.0% 24.3% + +The file copied into page cache was named /tmp/c with a size of 81920000 (81920000/4096) = 20000 From e159f7e217965ba4e215c6d87a41c08d662eadd0 Mon Sep 17 00:00:00 2001 From: chantra Date: Sat, 23 Jul 2016 15:33:11 +0200 Subject: [PATCH 3/3] [cachetop] fix and doc * pass -fno-color-diagnostics to clang * remove unicode import (#623) * add time to cachetop output * add keybindings to cachetop.8 * add cachetop links to README.md --- README.md | 1 + man/man8/cachetop.8 | 17 ++++++++++++++++- src/cc/frontends/clang/loader.cc | 3 +++ tools/cachetop.py | 11 ++++++++--- tools/cachetop_example.txt | 6 +++--- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e98c7bbd3330..44d4bc7b6b16 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ Examples: - tools/[btrfsdist](tools/btrfsdist.py): Summarize btrfs operation latency distribution as a histogram. [Examples](tools/btrfsdist_example.txt). - tools/[btrfsslower](tools/btrfsslower.py): Trace slow btrfs operations. [Examples](tools/btrfsslower_example.txt). - tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt). +- tools/[cachetop](tools/cachetop.py): Trace page cache hit/miss ratio by processes. [Examples](tools/cachetop_example.txt). - tools/[cpudist](tools/cpudist.py): Summarize on- and off-CPU time per task as a histogram. [Examples](tools/cpudist_example.txt) - tools/[dcsnoop](tools/dcsnoop.py): Trace directory entry cache (dcache) lookups. [Examples](tools/dcsnoop_example.txt). - tools/[dcstat](tools/dcstat.py): Directory entry cache (dcache) stats. [Examples](tools/dcstat_example.txt). diff --git a/man/man8/cachetop.8 b/man/man8/cachetop.8 index 6e6ee89bf762..5642fa1dc9aa 100644 --- a/man/man8/cachetop.8 +++ b/man/man8/cachetop.8 @@ -7,13 +7,28 @@ cachetop \- Statistics for linux page cache hit/miss ratios per processes. Uses .SH DESCRIPTION This traces four kernel functions and prints per-processes summaries every \fBinterval\fR seconds. This can be useful for processes workload characterization, -and looking for patterns in operation usage over time. +and looking for patterns in operation usage over time. It provides a \fBtop\fR-like interface +which by default sorts by \fBHITS\fR in ascending order. This works by tracing kernel page cache functions using dynamic tracing, and will need updating to match any changes to these functions. Edit the script to customize which functions are traced. Since this uses BPF, only the root user can use this tool. +.SH KEYBINDINGS +The following keybindings can be used to control the output of \fBcachetop\fR. +.TP +.B < +Use the previous column for sorting. +.TP +.B > +Use the next column for sorting. +.TP +.B r +Toggle sorting order (default ascending). +.TP +.B q +Quit cachetop. .SH REQUIREMENTS CONFIG_BPF and bcc. .SH EXAMPLES diff --git a/src/cc/frontends/clang/loader.cc b/src/cc/frontends/clang/loader.cc index 4b0729df6b66..c7911ac326dd 100644 --- a/src/cc/frontends/clang/loader.cc +++ b/src/cc/frontends/clang/loader.cc @@ -99,9 +99,12 @@ int ClangLoader::parse(unique_ptr *mod, unique_ptr flags_cstr({"-O0", "-emit-llvm", "-I", dstack.cwd(), "-Wno-deprecated-declarations", "-Wno-gnu-variable-sized-type-not-at-end", + "-fno-color-diagnostics", "-x", "c", "-c", abs_file.c_str()}); KBuildHelper kbuild_helper(kdir); diff --git a/tools/cachetop.py b/tools/cachetop.py index b1ea9a6db11e..fc57da03be7a 100755 --- a/tools/cachetop.py +++ b/tools/cachetop.py @@ -14,10 +14,13 @@ from __future__ import absolute_import from __future__ import division -from __future__ import unicode_literals +# Do not import unicode_literals until #623 is fixed +# from __future__ import unicode_literals from __future__ import print_function -from collections import defaultdict + from bcc import BPF +from collections import defaultdict +from time import strftime import argparse import curses @@ -210,7 +213,9 @@ def handle_loop(stdscr, args): stdscr.clear() stdscr.addstr( 0, 0, - "Buffers MB: %.0f / Cached MB: %.0f" % (buff, cached) + "%-8s Buffers MB: %.0f / Cached MB: %.0f" % ( + strftime("%H:%M:%S"), buff, cached + ) ) # header diff --git a/tools/cachetop_example.txt b/tools/cachetop_example.txt index 95fd42595be2..13e56b4491b4 100644 --- a/tools/cachetop_example.txt +++ b/tools/cachetop_example.txt @@ -15,7 +15,7 @@ examples: ./cachetop 1 # print every second hit/miss stats # ./cachetop 5 -Buffers MB: 76 / Cached MB: 114 +13:01:01 Buffers MB: 76 / Cached MB: 114 PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% 1 root systemd 2 0 0 100.0% 0.0% 680 root vminfo 3 4 2 14.3% 42.9% @@ -43,7 +43,7 @@ Command used to generate the activity Below shows the hit rate increases as we run find a second time and it gets it its pages from the cache. # ./cachetop.py -Buffers MB: 76 / Cached MB: 115 +13:01:01 Buffers MB: 76 / Cached MB: 115 PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% 544 messageb dbus-daemon 2 2 1 25.0% 50.0% 680 root vminfo 2 2 1 25.0% 50.0% @@ -57,7 +57,7 @@ Below shows that the dirty pages increases as a file of 80M is created running # dd if=/dev/urandom of=/tmp/c bs=8192 count=10000 # ./cachetop.py 10 -Buffers MB: 77 / Cached MB: 193 +13:01:01 Buffers MB: 77 / Cached MB: 193 PID UID CMD HITS MISSES DIRTIES READ_HIT% WRITE_HIT% 544 messageb dbus-daemon 9 10 7 10.5% 15.8% 680 root vminfo 9 10 7 10.5% 15.8%