Skip to content

Commit

Permalink
[benchmark] Upgrade to Time Performance Regression Test (TPRT) (#1002)
Browse files Browse the repository at this point in the history
* [skip ci] Upgrade benchmark for TPRT

* [skip ci] add my comments

* [skip ci] pybenchmark :)

* [skip ci] tmp save (mature)

* [skip ci] fix typo

* [skip ci] add 'repeat='

* [skip ci] nit

* [skip ci] run 4 times to warm up

* [skip ci] fix typo
  • Loading branch information
archibate authored May 19, 2020
1 parent 8454986 commit 660e973
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 91 deletions.
Empty file removed benchmarks/__init__.py
Empty file.
9 changes: 0 additions & 9 deletions benchmarks/benchmark_baseline.txt

This file was deleted.

34 changes: 16 additions & 18 deletions benchmarks/fill_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# originally by @KLozes


@ti.all_archs
def benchmark_flat_struct():
N = 4096
a = ti.var(dt=ti.f32, shape=(N, N))
Expand All @@ -12,9 +13,10 @@ def fill():
for i, j in a:
a[i, j] = 2.0

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=500)


@ti.all_archs
def benchmark_flat_range():
N = 4096
a = ti.var(dt=ti.f32, shape=(N, N))
Expand All @@ -24,9 +26,10 @@ def fill():
for i, j in ti.ndrange(N, N):
a[i, j] = 2.0

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=700)


@ti.all_archs
def benchmark_nested_struct():
a = ti.var(dt=ti.f32)
N = 512
Expand All @@ -40,11 +43,10 @@ def fill():
for i, j in a:
a[i, j] = 2.0

fill()

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=700)


@ti.all_archs
def benchmark_nested_struct_listgen_8x8():
a = ti.var(dt=ti.f32)
ti.cfg.demote_dense_struct_fors = False
Expand All @@ -59,11 +61,10 @@ def fill():
for i, j in a:
a[i, j] = 2.0

fill()

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=1000)


@ti.all_archs
def benchmark_nested_struct_listgen_16x16():
a = ti.var(dt=ti.f32)
ti.cfg.demote_dense_struct_fors = False
Expand All @@ -78,11 +79,10 @@ def fill():
for i, j in a:
a[i, j] = 2.0

fill()

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=700)


@ti.all_archs
def benchmark_nested_range_blocked():
a = ti.var(dt=ti.f32)
N = 512
Expand All @@ -97,11 +97,10 @@ def fill():
for Y in range(64):
a[X // N * 8 + Y // 8, X % N * 8 + Y % 8] = 2.0

fill()

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=800)


@ti.all_archs
def benchmark_nested_range():
a = ti.var(dt=ti.f32)
N = 512
Expand All @@ -116,9 +115,10 @@ def fill():
for i in range(N * 8):
a[i, j] = 2.0

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=1000)


@ti.all_archs
def benchmark_root_listgen():
a = ti.var(dt=ti.f32)
ti.cfg.demote_dense_struct_fors = False
Expand All @@ -133,9 +133,7 @@ def fill():
for i, j in a.parent():
a[i, j] = 2.0

fill()

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=800)


'''
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/fill_sparse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import taichi as ti


@ti.archs_support_sparse
def benchmark_nested_struct():
a = ti.var(dt=ti.f32)
N = 512
Expand All @@ -19,6 +20,7 @@ def fill():
return ti.benchmark(fill)


@ti.archs_support_sparse
def benchmark_nested_struct_fill_and_clear():
a = ti.var(dt=ti.f32)
N = 512
Expand Down
4 changes: 4 additions & 0 deletions benchmarks/memory_bound.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


# 4 B/it
@ti.archs_excluding(ti.opengl)
def benchmark_memset():
a = ti.var(dt=ti.f32, shape=N)

Expand All @@ -16,6 +17,7 @@ def memset():


# 8 B/it
@ti.archs_excluding(ti.opengl)
def benchmark_sscal():
a = ti.var(dt=ti.f32, shape=N)

Expand All @@ -28,6 +30,7 @@ def task():


# 8 B/it
@ti.archs_excluding(ti.opengl)
def benchmark_memcpy():
a = ti.var(dt=ti.f32, shape=N)
b = ti.var(dt=ti.f32, shape=N)
Expand All @@ -41,6 +44,7 @@ def memcpy():


# 12 B/it
@ti.archs_excluding(ti.opengl)
def benchmark_saxpy():
x = ti.var(dt=ti.f32, shape=N)
y = ti.var(dt=ti.f32, shape=N)
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/minimal.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import taichi as ti


@ti.all_archs
def benchmark_fill_scalar():
a = ti.var(dt=ti.f32, shape=())

@ti.kernel
def fill():
a[None] = 1.0

return ti.benchmark(fill)
return ti.benchmark(fill, repeat=1000)
13 changes: 8 additions & 5 deletions benchmarks/mpm2d.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import time
import taichi as ti
import numpy as np
import time


@ti.all_archs
def benchmark_range():
import taichi as ti
quality = 1 # Use a larger value for higher-res simulations
n_particles, n_grid = 9000 * quality**2, 128 * quality
dx, inv_dx = 1 / n_grid, float(n_grid)
Expand Down Expand Up @@ -124,11 +125,12 @@ def substep():
# gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()])
# gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk
ti.get_runtime().sync()
return (time.time() - t) / 4000
avg = (time.time() - t) / 4000 * 1000 # miliseconds
ti.stat_write(avg)


@ti.all_archs
def benchmark_struct():
import taichi as ti
quality = 1 # Use a larger value for higher-res simulations
n_particles, n_grid = 9000 * quality**2, 128 * quality
dx, inv_dx = 1 / n_grid, float(n_grid)
Expand Down Expand Up @@ -251,4 +253,5 @@ def substep():
# gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()])
# gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk
ti.get_runtime().sync()
return (time.time() - t) / 4000
avg = (time.time() - t) / 4000 * 1000 # miliseconds
ti.stat_write(avg)
47 changes: 14 additions & 33 deletions benchmarks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
import taichi as ti


def get_benchmark_dir():
return os.path.join(ti.core.get_repo_dir(), 'benchmarks')


class Case:
def __init__(self, name, func):
self.name = name
Expand All @@ -14,20 +18,10 @@ def __lt__(self, other):
def __eq__(self, other):
return self.name == other.name

def pprint(self):
print(f' * {self.name[10:]:33}', end='')
for i, arch in enumerate(sorted(self.records.keys())):
ms = self.records[arch] * 1000
arch_name = str(arch)[5:]
print(f' {arch_name:8} {ms:7.3f} ms', end='')
if i < len(self.records) - 1:
print(' ', end='')
print()

def run(self, arch):
ti.init(arch=arch)
t = self.func()
self.records[arch] = t
def run(self):
print(f'==> {self.name}:')
os.environ['TI_CURRENT_BENCHMARK'] = self.name
self.func()


class Suite:
Expand All @@ -42,38 +36,25 @@ def __init__(self, filename):
sorted(filter(lambda x: x.startswith('benchmark_'), dir(suite))))
self.cases = [Case(k, getattr(suite, k)) for k in case_keys]

def print(self):
print(f'{self.name}:')
for b in self.cases:
b.pprint()

def run(self, arch):
def run(self):
print(f'{self.name}:')
for case in sorted(self.cases):
case.run(arch)
case.run()


class TaichiBenchmark:
def __init__(self):
self.suites = []
benchmark_dir = os.path.dirname(__file__)
benchmark_dir = get_benchmark_dir()
for f in map(os.path.basename, sorted(os.listdir(benchmark_dir))):
if f != 'run.py' and f.endswith('.py') and f[0] != '_':
self.suites.append(Suite(f))

def pprint(self):
for s in self.suites:
s.print()

def run(self, arch):
def run(self):
print("Running...")
for s in self.suites:
s.run(arch)
s.run()


b = TaichiBenchmark()
b.pprint()
b.run(ti.x64)
b.run(ti.cuda)
print()
b.pprint()
b.run()
12 changes: 0 additions & 12 deletions docs/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,19 +55,7 @@ For example, this is part of the output by ``ti regression`` after enabling cons
Discussion at: https://github.com/taichi-dev/taichi/issue/948


<<<<<<< HEAD
The suggested workflow for the performance related PR author to run the regression tests is:
=======
The suggested workflow for **the PR author** to run the regression tests is:

* When a performance related PR is ready, checkout that PR locally.

* Run ``ti benchmark && ti regression`` to obtain the result.

* Decide wheater to approve or request change, depends on the result.

* Right before merge, run ``ti baseline`` to save the benchmark result as new baseline.
>>>>>>> master

* Run ``ti benchmark && ti baseline`` in ``master`` to save the current performance as baseline.

Expand Down
21 changes: 18 additions & 3 deletions python/taichi/lang/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,33 @@ def static_print(*args, __p=print, **kwargs):
__p(*args, **kwargs)


def benchmark(func, repeat=100, args=()):
def benchmark(func, repeat=300, args=()):
import taichi as ti
import time
for i in range(repeat // 3):
# The reason why we run 4 times is to warm up instruction/data caches.
# Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136
for i in range(4):
func(*args) # compile the kernel first
ti.sync()
t = time.time()
for n in range(repeat):
func(*args)
ti.get_runtime().sync()
elapsed = time.time() - t
return elapsed / repeat
avg = elapsed / repeat * 1000 # miliseconds
ti.stat_write(avg)


def stat_write(avg):
name = os.environ.get('TI_CURRENT_BENCHMARK')
if name is None:
return
import taichi as ti
arch_name = ti.core.arch_name(ti.cfg.arch)
output_dir = os.environ.get('TI_BENCHMARK_OUTPUT_DIR', '.')
filename = f'{output_dir}/{name}__arch_{arch_name}.dat'
with open(filename, 'w') as f:
f.write(f'time_avg: {avg:.4f}')


def supported_archs():
Expand Down
Loading

0 comments on commit 660e973

Please sign in to comment.