Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[async] Set the default value of async_flush_every to 50 #2169

Merged
merged 5 commits into from
Jan 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/async_advection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

@benchmark_async
def simple_advection(scale):
n = 128 * 2**int((math.log(scale, 2)) // 2)
n = 256 * 2**int((math.log(scale, 2)) // 2)
x = ti.Vector.field(3, dtype=ti.f32, shape=(n, n))
new_x = ti.Vector.field(3, dtype=ti.f32, shape=(n, n))
v = ti.Vector.field(2, dtype=ti.f32, shape=(n, n))
Expand Down
7 changes: 4 additions & 3 deletions benchmarks/async_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,9 @@ def task():

@benchmark_async
def mpm_splitted(scale):
quality = int(scale**(1 /
3)) # Use a larger value for higher-res simulations
quality = int(3 * scale**(1 / 3))
# Use a larger value for higher-res simulations

n_particles, n_grid = 9000 * quality**2, 128 * quality
dx, inv_dx = 1 / n_grid, float(n_grid)
dt = 1e-4 / quality
Expand Down Expand Up @@ -294,7 +295,7 @@ def task():
for s in range(int(2e-3 // dt)):
substep()

ti.benchmark(task, repeat=10)
ti.benchmark(task, repeat=5)


@benchmark_async
Expand Down
14 changes: 12 additions & 2 deletions benchmarks/benchmark_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@

ti.benchmark_plot(fn='benchmark.yml',
cases=case_names,
archs=['x64', 'cuda'],
columns=[
'wall_clk_t', 'exec_t', 'launched_tasks',
'compiled_inst', 'compiled_tasks'
],
column_titles=[
'Wall-clock time', 'Backend time', 'Tasks launched',
'Instructions emitted', 'Tasks compiled'
],
archs=['cuda', 'x64'],
title='Whole-Program Optimization Microbenchmarks',
bars='sync_vs_async',
left_margin=0.2)
left_margin=0.2,
size=(11.5, 9))
5 changes: 4 additions & 1 deletion benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ def body():
for arch in [ti.cpu, ti.cuda]:
for async_mode in [True, False]:
os.environ['TI_CURRENT_BENCHMARK'] = func.__name__
ti.init(arch=arch, async_mode=async_mode, kernel_profiler=True)
ti.init(arch=arch,
async_mode=async_mode,
kernel_profiler=True,
verbose=False)
if arch == ti.cpu:
scale = 2
else:
Expand Down
21 changes: 14 additions & 7 deletions python/taichi/lang/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,20 +371,23 @@ def run_benchmark():
avg = elapsed / repeat
ti.stat_write('wall_clk_t', avg)
device_time = ti.kernel_profiler_total_time()
ti.stat_write('exec_t', device_time)
avg_device_time = device_time / repeat
ti.stat_write('exec_t', avg_device_time)

run_benchmark()


def benchmark_plot(fn=None,
cases=None,
columns=None,
column_titles=None,
archs=None,
title=None,
bars='sync_vs_async',
bar_width=0.4,
bar_distance=0,
left_margin=0):
left_margin=0,
size=(12, 8)):
import taichi as ti
import yaml
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -412,13 +415,15 @@ def benchmark_plot(fn=None,

if columns is None:
columns = list(data[cases[0]].keys())
if column_titles is None:
column_titles = columns
normalize_to_lowest = lambda x: True
figure, subfigures = plt.subplots(len(cases), len(columns))
if title is None:
title = 'Taichi Performance Benchmarks (Higher means more)'
figure.suptitle(title, fontweight="bold")
for col_id in range(len(columns)):
subfigures[0][col_id].set_title(columns[col_id])
subfigures[0][col_id].set_title(column_titles[col_id])
for case_id in range(len(cases)):
case = cases[case_id]
subfigures[case_id][0].annotate(
Expand All @@ -435,7 +440,9 @@ def benchmark_plot(fn=None,
if archs is None:
current_archs = data[case][col].keys()
else:
current_archs = archs & data[case][col].keys()
current_archs = [
x for x in archs if x in data[case][col].keys()
]
if bars == 'sync_vs_async':
y_left = [
data[case][col][arch]['sync'] for arch in current_archs
Expand Down Expand Up @@ -480,23 +487,23 @@ def benchmark_plot(fn=None,
height=y_left,
width=bar_width,
label=label_left,
color=(0.3, 0.7, 0.9, 1.0))
color=(0.47, 0.69, 0.89, 1.0))
bar_right = ax.bar(x=[
i + bar_width / 2 + bar_distance / 2
for i in range(len(current_archs))
],
height=y_right,
width=bar_width,
label=label_right,
color=(0.8, 0.2, 0.3, 1.0))
color=(0.68, 0.26, 0.31, 1.0))
ax.set_xticks(range(len(current_archs)))
ax.set_xticklabels(current_archs)
figure.legend((bar_left, bar_right), (label_left, label_right),
loc='lower center')
figure.subplots_adjust(left=left_margin)

fig = plt.gcf()
fig.set_size_inches(13, 8)
fig.set_size_inches(size)

plt.show()

Expand Down
3 changes: 2 additions & 1 deletion taichi/program/compile_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ struct CompileConfig {
bool async_opt_dse{true};
bool async_listgen_fast_filtering{true};
std::string async_opt_intermediate_file;
int async_flush_every{0};
// Setting 0 effectively means do not automatically flush
int async_flush_every{50};
// Setting 0 effectively means unlimited
int async_max_fuse_per_task{1};

Expand Down