diff --git a/benchmarks/mpm2d.py b/benchmarks/mpm2d.py index c88a747513c5a..8f52f59fb7d70 100644 --- a/benchmarks/mpm2d.py +++ b/benchmarks/mpm2d.py @@ -117,21 +117,7 @@ def substep(): F[i] = [[1, 0], [0, 1]] Jp[i] = 1 - compile_time = time.time() - substep() - compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) - ti.get_runtime().sync() - t = time.time() - for frame in range(200): - for s in range(20): - substep() - # colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk - ti.get_runtime().sync() - avg = (time.time() - t) / 4000 * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) + ti.benchmark(substep, repeat=4000) @ti.archs_excluding(ti.opengl) @@ -250,18 +236,4 @@ def substep(): F[i] = [[1, 0], [0, 1]] Jp[i] = 1 - compile_time = time.time() - substep() - compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) - ti.get_runtime().sync() - t = time.time() - for frame in range(200): - for s in range(20): - substep() - # colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk - ti.get_runtime().sync() - avg = (time.time() - t) / 4000 * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) + ti.benchmark(substep, repeat=4000) diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py index 5d9f0b4b4ca6f..71191f9f98002 100644 --- a/python/taichi/lang/__init__.py +++ b/python/taichi/lang/__init__.py @@ -321,25 +321,47 @@ def visit(node): def benchmark(func, repeat=300, args=()): import taichi as ti import time - compile_time = time.time() - func(*args) - compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) - # The reason why we run 4 times is to warm up instruction/data caches. - # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 - for i in range(4): - func(*args) # compile the kernel first - ti.sync() - t = time.time() - for n in range(repeat): - func(*args) - ti.get_runtime().sync() - elapsed = time.time() - t - avg = elapsed / repeat * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) - -def stat_write_yaml(key, value): + def run_benchmark(): + compile_time = time.time() + func(*args) + compile_time = time.time() - compile_time + ti.stat_write('compilation_time', compile_time) + codegen_stat = ti.core.stat() + for line in codegen_stat.split('\n'): + try: + a, b = line.strip().split(':') + except: + continue + a = a.strip() + b = int(float(b)) + if a == 'codegen_kernel_statements': + ti.stat_write('instructions', b) + if a == 'codegen_offloaded_tasks': + ti.stat_write('offloaded_tasks', b) + elif a == 'launched_kernels': + ti.stat_write('launched_kernels', b) + # The reason why we run 4 times is to warm up instruction/data caches. + # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 + for i in range(4): + func(*args) # compile the kernel first + ti.sync() + t = time.time() + for n in range(repeat): + func(*args) + ti.get_runtime().sync() + elapsed = time.time() - t + avg = elapsed / repeat + ti.stat_write('running_time', avg) + + ti.cfg.async_mode = False + run_benchmark() + if ti.is_extension_supported(ti.cfg.arch, ti.extension.async_mode): + ti.cfg.async_mode = True + run_benchmark() + + +def stat_write(key, value): import taichi as ti import yaml case_name = os.environ.get('TI_CURRENT_BENCHMARK') @@ -348,6 +370,7 @@ def stat_write_yaml(key, value): if case_name.startswith('benchmark_'): case_name = case_name[10:] arch_name = core.arch_name(ti.cfg.arch) + async_mode = 'async' if ti.cfg.async_mode else 'sync' output_dir = os.environ.get('TI_BENCHMARK_OUTPUT_DIR', '.') filename = f'{output_dir}/benchmark.yml' try: @@ -357,7 +380,8 @@ def stat_write_yaml(key, value): data = {} data.setdefault(key, {}) data[key].setdefault(case_name, {}) - data[key][case_name][arch_name] = value + data[key][case_name].setdefault(async_mode, {}) + data[key][case_name][async_mode][arch_name] = value with open(filename, 'w') as f: yaml.dump(data, f, Dumper=yaml.SafeDumper) diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 90f44e487708e..8b44712a95de6 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -595,6 +595,11 @@ void export_lang(py::module &m) { m.def("is_extension_supported", is_extension_supported); m.def("print_stat", [] { stat.print(); }); + m.def("stat", [] { + std::string result; + stat.print(&result); + return result; + }); m.def("record_action_hint", [](std::string content) { ActionRecorder::get_instance().record("hint",