diff --git a/.gitignore b/.gitignore index f907e04309..2687bd47f2 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ spec/spec.out spec/spec.synctex.gz notes/*.docx test_run_dir +__pycache__ .project diff --git a/.travis.yml b/.travis.yml index e5c072a4da..06c32ae437 100644 --- a/.travis.yml +++ b/.travis.yml @@ -118,3 +118,4 @@ jobs: use: firrtl_build script: - benchmark/scripts/benchmark_cold_compile.py -N 2 --designs regress/ICache.fir --versions HEAD + - benchmark/scripts/find_heap_bound.py -- -cp firrtl*jar firrtl.stage.FirrtlMain -i regress/ICache.fir -o out -X verilog diff --git a/benchmark/scripts/benchmark_cold_compile.py b/benchmark/scripts/benchmark_cold_compile.py index aa452d83d3..c480987cf0 100755 --- a/benchmark/scripts/benchmark_cold_compile.py +++ b/benchmark/scripts/benchmark_cold_compile.py @@ -2,14 +2,14 @@ # See LICENSE for license details. import subprocess -import re from statistics import median, stdev -import sys import argparse from collections import OrderedDict import os import numbers +from monitor_job import monitor_job + # Currently hardcoded def get_firrtl_repo(): cmd = ['git', 'rev-parse', '--show-toplevel'] @@ -19,65 +19,13 @@ def get_firrtl_repo(): firrtl_repo = get_firrtl_repo() -platform = "" -if sys.platform == 'darwin': - print("Running on MacOS") - platform = 'macos' -elif sys.platform.startswith("linux"): - print("Running on Linux") - platform = 'linux' -else : - raise Exception('Unrecognized platform ' + sys.platform) - -def time(): - if platform == 'macos': - return ['/usr/bin/time', '-l'] - if platform == 'linux': - return ['/usr/bin/time', '-v'] - -def extract_max_size(output): - regex = '' - if platform == 'macos': - regex = '(\d+)\s+maximum resident set size' - if platform == 'linux': - regex = 'Maximum resident set size[^:]*:\s+(\d+)' - - m = re.search(regex, output, re.MULTILINE) - if m : - return int(m.group(1)) - else : - raise Exception('Max set size not found!') - -def extract_run_time(output): - regex = '' - res = None - if platform == 'macos': - regex = '(\d+\.\d+)\s+real' - if platform == 'linux': - regex = 'Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): ([0-9:.]+)' - m = re.search(regex, output, re.MULTILINE) - if m : - text = m.group(1) - if platform == 'macos': - return float(text) - if platform == 'linux': - parts = text.split(':') - if len(parts) == 3: - return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[0]) - if len(parts) == 2: - return float(parts[0]) * 60 + float(parts[1]) - raise Exception('Runtime not found!') - def run_firrtl(java, jar, design): java_cmd = java.split() - cmd = time() + java_cmd + ['-cp', jar, 'firrtl.stage.FirrtlMain', '-i', design,'-o','out.v','-X','verilog'] - result = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) - if result.returncode != 0 : - print(result.stdout) - print(result.stderr) - sys.exit(1) - size = extract_max_size(result.stderr.decode('utf-8')) - runtime = extract_run_time(result.stderr.decode('utf-8')) + cmd = java_cmd + ['-cp', jar, 'firrtl.stage.FirrtlMain', '-i', design,'-o','out.v','-X','verilog'] + print(' '.join(cmd)) + resource_use = monitor_job(cmd) + size = resource_use.maxrss // 1024 # KiB -> MiB + runtime = resource_use.wall_clock_time return (size, runtime) def parseargs(): @@ -138,15 +86,6 @@ def check_designs(designs): for design in designs: assert os.path.exists(design), '{} must be an existing file!'.format(design) -# /usr/bin/time -v on Linux returns size in kbytes -# /usr/bin/time -l on MacOS returns size in Bytes -def norm_max_set_sizes(sizes): - div = None - if platform == 'linux': - d = 1000.0 - if platform == 'macos': - d = 1000000.0 - return [s / d for s in sizes] def main(): args = parseargs() @@ -156,7 +95,7 @@ def main(): jars = build_firrtl_jars(hashes) jvms = args.jvms N = args.iterations - info = [['java', 'revision', 'design', 'max heap', 'SD', 'runtime', 'SD']] + info = [['java', 'revision', 'design', 'max heap (MiB)', 'SD', 'runtime (s)', 'SD']] for java in jvms: print("Running with '{}'".format(java)) for hashcode, jar in jars.items(): @@ -166,8 +105,7 @@ def main(): for design in designs: print('Running {}...'.format(design)) (sizes, runtimes) = zip(*[run_firrtl(java, jar, design) for i in range(N)]) - norm_sizes = norm_max_set_sizes(sizes) - info.append([java_title, revision, design, median(norm_sizes), stdev(norm_sizes), median(runtimes), stdev(runtimes)]) + info.append([java_title, revision, design, median(sizes), stdev(sizes), median(runtimes), stdev(runtimes)]) java_title = '' revision = '' diff --git a/benchmark/scripts/find_heap_bound.py b/benchmark/scripts/find_heap_bound.py new file mode 100755 index 0000000000..c230c35b1b --- /dev/null +++ b/benchmark/scripts/find_heap_bound.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# See LICENSE for license details. + +import re +import argparse +from typing import NamedTuple +from subprocess import TimeoutExpired +import logging + +from monitor_job import monitor_job, JobFailedError + +BaseHeapSize = NamedTuple('JavaHeapSize', [('value', int), ('suffix', str)]) +class HeapSize(BaseHeapSize): + K_FACTOR = 1024 + M_FACTOR = 1024*1024 + G_FACTOR = 1024*1024*1024 + + def toBytes(self) -> int: + return { + "": 1, + "K": self.K_FACTOR, + "M": self.M_FACTOR, + "G": self.G_FACTOR + }[self.suffix] * self.value + + def round_to(self, target): + """Round to positive multiple of target, only if we have the same suffix""" + if self.suffix == target.suffix: + me = self.toBytes() + tar = target.toBytes() + res = tar * round(me / tar) + if res == 0: + res = tar + return HeapSize.from_bytes(res) + else: + return self + + def __truediv__(self, div): + b = self.toBytes() + res = int(b / div) + return HeapSize.from_bytes(res) + + def __mul__(self, m): + b = self.toBytes() + res = int(b * m) + return HeapSize.from_bytes(res) + + def __add__(self, rhs): + return HeapSize.from_bytes(self.toBytes() + rhs.toBytes()) + + def __sub__(self, rhs): + return HeapSize.from_bytes(self.toBytes() - rhs.toBytes()) + + @classmethod + def from_str(cls, s: str): + regex = '(\d+)([kKmMgG])?' + m = re.match(regex, s) + if m: + suffix = m.group(2) + if suffix is None: + return HeapSize(int(m.group(1)), "") + else: + return HeapSize(int(m.group(1)), suffix.upper()) + else: + msg = "Invalid Heap Size '{}'! Format should be: '{}'".format(s, regex) + raise Exception(msg) + + @classmethod + def from_bytes(cls, b: int): + if b % cls.G_FACTOR == 0: + return HeapSize(round(b / cls.G_FACTOR), "G") + if b % cls.M_FACTOR == 0: + return HeapSize(round(b / cls.M_FACTOR), "M") + if b % cls.K_FACTOR == 0: + return HeapSize(round(b / cls.K_FACTOR), "K") + return HeapSize(round(b), "") + + + def __str__(self): + return str(self.value) + self.suffix + + +def parseargs(): + parser = argparse.ArgumentParser( + prog="find_heap_bound.py", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--verbose", "-v", action="count", default=0, + help="Increase verbosity level (cumulative)") + parser.add_argument("args", type=str, nargs="+", + help="Arguments to JVM, include classpath and main") + parser.add_argument("--start-size", type=str, default="4G", + help="Starting heap size") + parser.add_argument("--min-step", type=str, default="100M", + help="Minimum heap size step") + parser.add_argument("--java", type=str, default="java", + help="Java executable to use") + parser.add_argument("--timeout-factor", type=float, default=4.0, + help="Multiple of wallclock time of first successful run " + "that counts as a timeout, runs over this time count as a fail") + return parser.parse_args() + + +def get_logger(args): + logger = logging.getLogger("find_heap_bound") + if args.verbose == 1: + #logger.setLevel(logging.INFO) + logging.basicConfig(level=logging.INFO, format='%(message)s') + elif args.verbose >= 2: + logging.basicConfig(level=logging.DEBUG, format='%(message)s') + return logger + + +def mk_cmd(java, heap, args): + return [java, "-Xmx{}".format(heap)] + args + + +def job_failed_msg(e): + if isinstance(e, JobFailedError): + if "java.lang.OutOfMemoryError" in str(e): + return "Job failed, out of memory" + else: + return "Unexpected job failure\n{}".format(e) + elif isinstance(e, TimeoutExpired): + return "Job timed out at {} seconds".format(e.timeout) + else: + raise e + + +def main(): + args = parseargs() + logger = get_logger(args) + + results = [] + + min_step = HeapSize.from_str(args.min_step) + step = None + seen = set() + timeout = None # Set by first successful run + cur = HeapSize.from_str(args.start_size) + while cur not in seen: + seen.add(cur) + try: + cmd = mk_cmd(args.java, cur, args.args) + logger.info("Running {}".format(" ".join(cmd))) + stats = monitor_job(cmd, timeout=timeout) + logger.debug(stats) + if timeout is None: + timeout = stats.wall_clock_time * args.timeout_factor + logger.debug("Timeout set to {} s".format(timeout)) + results.append((cur, stats)) + if step is None: + step = (cur / 2).round_to(min_step) + else: + step = (step / 2).round_to(min_step) + cur = (cur - step).round_to(min_step) + except (JobFailedError, TimeoutExpired) as e: + logger.debug(job_failed_msg(e)) + results.append((cur, None)) + if step is None: + # Don't set step because we don't want to keep decreasing it + # when we haven't had a passing run yet + amt = (cur * 2).round_to(min_step) + else: + step = (step / 2).round_to(min_step) + amt = step + cur = (cur + step).round_to(min_step) + logger.debug("Next = {}, step = {}".format(cur, step)) + + sorted_results = sorted(results, key=lambda tup: tup[0].toBytes(), reverse=True) + + table = [["Xmx", "Max RSS (MiB)", "Wall Clock (s)", "User Time (s)", "System Time (s)"]] + for heap, resources in sorted_results: + line = [str(heap)] + if resources is None: + line.extend(["-"]*4) + else: + line.append(str(resources.maxrss // 1024)) + line.append(str(resources.wall_clock_time)) + line.append(str(resources.user_time)) + line.append(str(resources.system_time)) + table.append(line) + + csv = "\n".join([",".join(row) for row in table]) + print(csv) + + +if __name__ == "__main__": + main() diff --git a/benchmark/scripts/monitor_job.py b/benchmark/scripts/monitor_job.py new file mode 100644 index 0000000000..f0465e7c7f --- /dev/null +++ b/benchmark/scripts/monitor_job.py @@ -0,0 +1,123 @@ +# See LICENSE for license details. + +""" +Utilities for running a subprocess and collecting runtime and memory use information. +""" + +from typing import NamedTuple, List +from enum import Enum +import sys +import subprocess +import re + +# All times in seconds, sizes in kibibytes (KiB) +JobResourceUse = NamedTuple('JobResourceUse', [('user_time', float), + ('system_time', float), + ('wall_clock_time', float), + ('maxrss', int)]) + + +class JobFailedError(Exception): + pass + + +def monitor_job(args: List[str], timeout=None) -> JobResourceUse: + """Run a job with resource monitoring, returns resource usage""" + platform = get_platform() + cmd = time(platform) + args + result = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, + timeout=timeout) + if result.returncode != 0 : + msg = "[stdout]\n{}\n[stderr]\n{}".format(result.stdout.decode('utf-8'), + result.stderr.decode('utf-8')) + raise JobFailedError(msg) + stderr = result.stderr.decode('utf-8') + + user_time = extract_user_time(platform, stderr) + system_time = extract_system_time(platform, stderr) + wall_clock_time = extract_wall_clock_time(platform, stderr) + maxrss = extract_maxrss(platform, stderr) + return JobResourceUse(user_time, system_time, wall_clock_time, maxrss) + + +Platform = Enum('Platform', 'linux macos') + + +def get_platform() -> Platform: + p = sys.platform + if p == 'darwin': + return Platform.macos + elif p.startswith('linux'): + return Platform.linux + else: + raise Exception('Unsupported platform: ' + p) + + +def time(platform): + return { Platform.macos: ['/usr/bin/time', '-l'], + Platform.linux: ['/usr/bin/time', '-v'] }[platform] + + +def extract_maxrss(platform, output): + """Returns maxrss in kbytes""" + regex = { + Platform.macos: '(\d+)\s+maximum resident set size', + Platform.linux: 'Maximum resident set size[^:]*:\s+(\d+)' + }[platform] + + m = re.search(regex, output, re.MULTILINE) + if m : + return { + # /usr/bin/time -l on MacOS returns size in Bytes + Platform.macos: int(m.group(1)) // 1024, + # /usr/bin/time -v on Linux returns size in kibibytes + # https://stackoverflow.com/questions/61392725/kilobytes-or-kibibytes-in-gnu-time + Platform.linux: int(m.group(1)) + }[platform] + else : + raise Exception('Max set size not found!') + + +def extract_user_time(platform, output): + res = None + regex = { + Platform.macos: '(\d+\.\d+)\s+user', + Platform.linux: 'User time \(seconds\): (\d+\.\d+)' + }[platform] + m = re.search(regex, output, re.MULTILINE) + if m : + return float(m.group(1)) + raise Exception('User time not found!') + + +def extract_system_time(platform, output): + res = None + regex = { + Platform.macos: '(\d+\.\d+)\s+sys', + Platform.linux: 'System time \(seconds\): (\d+\.\d+)' + }[platform] + m = re.search(regex, output, re.MULTILINE) + if m : + return float(m.group(1)) + raise Exception('System time not found!') + + +def extract_wall_clock_time(platform, output): + res = None + regex = { + Platform.macos: '(\d+\.\d+)\s+real', + Platform.linux: 'Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): ([0-9:.]+)' + }[platform] + m = re.search(regex, output, re.MULTILINE) + if m : + text = m.group(1) + if platform == Platform.macos: + return float(text) + if platform == Platform.linux: + parts = text.split(':') + if len(parts) == 3: + return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[0]) + if len(parts) == 2: + return float(parts[0]) * 60 + float(parts[1]) + raise Exception('Wall clock time not found!') +