From 56689ad61a7fad8dfd6818ef6be9a6d7e8ac1c85 Mon Sep 17 00:00:00 2001 From: Zihua Wu Date: Mon, 25 Jul 2022 09:41:14 +0800 Subject: [PATCH] [misc] ti.Tape -> ti.ad.Tape (#5501) --- cpp_examples/autograd.cpp | 2 +- docs/lang/articles/advanced/odop.md | 4 +-- .../differentiable_programming.md | 34 +++++++++---------- misc/demo_record_kernel_group.py | 2 +- python/taichi/ad/_ad.py | 6 ++-- python/taichi/aot/record.py | 2 +- .../taichi/examples/autodiff/minimization.py | 2 +- python/taichi/examples/autodiff/regression.py | 2 +- .../examples/ggui_examples/fem128_ggui.py | 2 +- .../taichi/examples/simulation/ad_gravity.py | 2 +- python/taichi/examples/simulation/fem128.py | 2 +- python/taichi/examples/simulation/fem99.py | 2 +- .../simulation/mpm_lagrangian_forces.py | 2 +- .../examples/autodiff/test_minimization.py | 2 +- tests/python/test_ad_basics.py | 6 ++-- tests/python/test_ad_for.py | 26 +++++++------- tests/python/test_ad_offload.py | 2 +- tests/python/test_customized_grad.py | 14 ++++---- tests/python/test_kernel_templates.py | 2 +- tests/python/test_no_grad.py | 2 +- tests/python/test_oop.py | 10 +++--- tests/python/test_stop_grad.py | 6 ++-- 22 files changed, 67 insertions(+), 67 deletions(-) diff --git a/cpp_examples/autograd.cpp b/cpp_examples/autograd.cpp index 1f46a1e338ab3..dcf5324b14ecb 100644 --- a/cpp_examples/autograd.cpp +++ b/cpp_examples/autograd.cpp @@ -30,7 +30,7 @@ void autograd() { energy += c[i] init() - with ti.Tape(energy): + with ti.ad.Tape(energy): cal() support() diff --git a/docs/lang/articles/advanced/odop.md b/docs/lang/articles/advanced/odop.md index 679b3d07cb07b..96c9e537bcec7 100644 --- a/docs/lang/articles/advanced/odop.md +++ b/docs/lang/articles/advanced/odop.md @@ -225,7 +225,7 @@ print(arr.val[0, 0]) # 3 arr.inc2(4) print(arr.val[0, 0]) # 7 -with ti.Tape(loss=arr.total): +with ti.ad.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): @@ -236,7 +236,7 @@ for i in range(arr.n): def double(): double_total[None] = 2 * arr.total[None] -with ti.Tape(loss=double_total): +with ti.ad.Tape(loss=double_total): arr.reduce() double() diff --git a/docs/lang/articles/differentiable/differentiable_programming.md b/docs/lang/articles/differentiable/differentiable_programming.md index 41657520256c2..b95fa0b0bc3b0 100644 --- a/docs/lang/articles/differentiable/differentiable_programming.md +++ b/docs/lang/articles/differentiable/differentiable_programming.md @@ -47,16 +47,16 @@ really error-prone and hard to maintain. If you run into this situation, Taichi's handy automatic differentiation (autodiff) system comes to your rescue! Taichi supports gradient evaluation through -either `ti.Tape()` or the more flexible `kernel.grad()` syntax. +either `ti.ad.Tape()` or the more flexible `kernel.grad()` syntax. -## Using `ti.Tape()` +## Using `ti.ad.Tape()` Let's still take the `compute_y` kernel above for an explanation. -Using `ti.Tape()` is the easiest way to obtain a kernel that computes `dy/dx`: +Using `ti.ad.Tape()` is the easiest way to obtain a kernel that computes `dy/dx`: 1. Enable `needs_grad=True` option when declaring fields involved in the derivative chain. -2. Use context manager `with ti.Tape(y):` to capture the kernel invocations which you want to automatically differentiate. +2. Use context manager `with ti.ad.Tape(y):` to capture the kernel invocations which you want to automatically differentiate. 3. Now `dy/dx` value at current `x` is available at `x.grad[None]`. The following code snippet explains the steps above: @@ -69,7 +69,7 @@ y = ti.field(dtype=ti.f32, shape=(), needs_grad=True) def compute_y(): y[None] = ti.sin(x[None]) -with ti.Tape(y): +with ti.ad.Tape(y): compute_y() print('dy/dx =', x.grad[None], ' at x =', x[None]) @@ -119,7 +119,7 @@ def advance(): def substep(): - with ti.Tape(loss=U): + with ti.ad.Tape(loss=U): # Kernel invocations in this scope will later contribute to partial derivatives of # U with respect to input variables such as x. compute_U( @@ -144,7 +144,7 @@ while gui.running: :::note -The argument `U` to `ti.Tape(U)` must be a 0D field. +The argument `U` to `ti.ad.Tape(U)` must be a 0D field. To use autodiff with multiple output variables, see the `kernel.grad()` usage below. @@ -152,7 +152,7 @@ To use autodiff with multiple output variables, see the :::note -`ti.Tape(U)` automatically sets _`U[None]`_ to `0` on +`ti.ad.Tape(U)` automatically sets _`U[None]`_ to `0` on start up. ::: @@ -166,13 +166,13 @@ for examples on using autodiff-based force evaluation MPM and FEM. ## Using `kernel.grad()` -As mentioned above, `ti.Tape()` can only track a 0D field as the output variable. +As mentioned above, `ti.ad.Tape()` can only track a 0D field as the output variable. If there are multiple output variables that you want to back-propagate -gradients to inputs, call `kernel.grad()` instead of `ti.Tape()`. -Different from using `ti.Tape()`, you need to set the `grad` of the output variables themselves to `1` manually +gradients to inputs, call `kernel.grad()` instead of `ti.ad.Tape()`. +Different from using `ti.ad.Tape()`, you need to set the `grad` of the output variables themselves to `1` manually before calling `kernel.grad()`. The reason is that the `grad` of the output variables themselves will always be multiplied to the `grad` with respect to the inputs at the end of the back-propagation. -By calling `ti.Tape()`, you have the program do this under the hood. +By calling `ti.ad.Tape()`, you have the program do this under the hood. ```python {13-14} import taichi as ti @@ -257,10 +257,10 @@ for i in range(N): b[None] = 10 loss.grad[None] = 1 -with ti.Tape(loss): +with ti.ad.Tape(loss): func_broke_rule_1() # Call func_equivalent to see the correct result -# with ti.Tape(loss): +# with ti.ad.Tape(loss): # func_equivalent() assert x.grad[1] == 10.0 @@ -357,7 +357,7 @@ def manipulation_in_kernel(): x[None] = 0.0 -with ti.Tape(loss=loss): +with ti.ad.Tape(loss=loss): # The line below in python scope only contribute to the forward pass # but not the backward pass i.e., not auto-differentiated. loss[None] += ti.sin(x[None]) + 1.0 @@ -412,13 +412,13 @@ def forward(mul): def backward(mul): func.grad(mul) -with ti.Tape(loss=total): +with ti.ad.Tape(loss=total): forward(4) assert x.grad[0] == 4 ``` -Customized gradient function works with both `ti.Tape()` and `kernel.grad()`. More examples can be found at `test_customized_grad.py`. +Customized gradient function works with both `ti.ad.Tape()` and `kernel.grad()`. More examples can be found at `test_customized_grad.py`. ### Checkpointing diff --git a/misc/demo_record_kernel_group.py b/misc/demo_record_kernel_group.py index 3a65ea33a1fd0..f12aa544c9a06 100644 --- a/misc/demo_record_kernel_group.py +++ b/misc/demo_record_kernel_group.py @@ -21,6 +21,6 @@ def do_some_works(): with ti.aot.RecordKernelGroup('my_substep'): x.fill(0) - with ti.Tape(loss): + with ti.ad.Tape(loss): compute_loss() do_some_works() diff --git a/python/taichi/ad/_ad.py b/python/taichi/ad/_ad.py index 31286dd6c4053..46649ec12036e 100644 --- a/python/taichi/ad/_ad.py +++ b/python/taichi/ad/_ad.py @@ -36,7 +36,7 @@ def __init__(self, loss=None, clear_gradients=True): >>> for I in ti.grouped(x): >>> y[None] += x[I] ** a >>> - >>> with ti.Tape(loss = y): + >>> with ti.ad.Tape(loss = y): >>> sum(2) """ self.calls = [] @@ -111,7 +111,7 @@ def visit(node): def grad_replaced(func): """A decorator for python function to customize gradient with Taichi's autodiff - system, e.g. `ti.Tape()` and `kernel.grad()`. + system, e.g. `ti.ad.Tape()` and `kernel.grad()`. This decorator forces Taichi's autodiff system to use a user-defined gradient function for the decorated function. Its customized gradient must be decorated @@ -186,7 +186,7 @@ def decorated(*args, **kwargs): def no_grad(func): """A decorator for python function to skip gradient calculation within Taichi's - autodiff system, e.g. `ti.Tape()` and `kernel.grad()`. + autodiff system, e.g. `ti.ad.Tape()` and `kernel.grad()`. This decorator forces Taichi's autodiff system to use an empty gradient function for the decorated function. diff --git a/python/taichi/aot/record.py b/python/taichi/aot/record.py index 2f1e873f492c3..76cd88bd9b113 100644 --- a/python/taichi/aot/record.py +++ b/python/taichi/aot/record.py @@ -40,7 +40,7 @@ def start_recording(filename): >>> for i in x: >>> x[i] -= x.grad[i] >>> - >>> with ti.Tape(loss): + >>> with ti.ad.Tape(loss): >>> compute_loss() >>> do_some_works() """ diff --git a/python/taichi/examples/autodiff/minimization.py b/python/taichi/examples/autodiff/minimization.py index 1a5f2708e05e5..5d142fae21ea8 100644 --- a/python/taichi/examples/autodiff/minimization.py +++ b/python/taichi/examples/autodiff/minimization.py @@ -30,7 +30,7 @@ def main(): # Optimize with 100 gradient descent iterations for k in range(100): - with ti.Tape(loss=L): + with ti.ad.Tape(loss=L): reduce() print('Loss =', L[None]) gradient_descent() diff --git a/python/taichi/examples/autodiff/regression.py b/python/taichi/examples/autodiff/regression.py index 8bdafbbd88c51..7b505c2811b3e 100644 --- a/python/taichi/examples/autodiff/regression.py +++ b/python/taichi/examples/autodiff/regression.py @@ -59,7 +59,7 @@ def regress_raw(): for i in range(1000): if use_tape: - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): regress() else: ti.clear_all_gradients() diff --git a/python/taichi/examples/ggui_examples/fem128_ggui.py b/python/taichi/examples/ggui_examples/fem128_ggui.py index cb62da87d4a3c..14f35484e6209 100644 --- a/python/taichi/examples/ggui_examples/fem128_ggui.py +++ b/python/taichi/examples/ggui_examples/fem128_ggui.py @@ -172,7 +172,7 @@ def main(): attractor_strength[None] = window.is_pressed( ti.ui.LMB) - window.is_pressed(ti.ui.RMB) for i in range(50): - with ti.Tape(loss=U): + with ti.ad.Tape(loss=U): update_U() advance() render() diff --git a/python/taichi/examples/simulation/ad_gravity.py b/python/taichi/examples/simulation/ad_gravity.py index 811346dd36756..38af146f0aeb6 100644 --- a/python/taichi/examples/simulation/ad_gravity.py +++ b/python/taichi/examples/simulation/ad_gravity.py @@ -29,7 +29,7 @@ def advance(): def substep(): - with ti.Tape(loss=U): + with ti.ad.Tape(loss=U): # Kernel invocations in this scope will later contribute to partial derivatives of # U with respect to input variables such as x. compute_U( diff --git a/python/taichi/examples/simulation/fem128.py b/python/taichi/examples/simulation/fem128.py index 75f16405d21da..b11cf8f801b19 100644 --- a/python/taichi/examples/simulation/fem128.py +++ b/python/taichi/examples/simulation/fem128.py @@ -132,7 +132,7 @@ def main(): attractor_strength[None] = gui.is_pressed(gui.LMB) - gui.is_pressed( gui.RMB) for i in range(50): - with ti.Tape(loss=U): + with ti.ad.Tape(loss=U): update_U() advance() paint_phi(gui) diff --git a/python/taichi/examples/simulation/fem99.py b/python/taichi/examples/simulation/fem99.py index 64d25cf12c25d..264aaf10bcde1 100644 --- a/python/taichi/examples/simulation/fem99.py +++ b/python/taichi/examples/simulation/fem99.py @@ -100,7 +100,7 @@ def main(): elif e.key == 'r': init_pos() for i in range(30): - with ti.Tape(loss=U): + with ti.ad.Tape(loss=U): update_U() advance() gui.circles(pos.to_numpy(), radius=2, color=0xffaa33) diff --git a/python/taichi/examples/simulation/mpm_lagrangian_forces.py b/python/taichi/examples/simulation/mpm_lagrangian_forces.py index 3da6708cafade..7c6510e0aedf4 100644 --- a/python/taichi/examples/simulation/mpm_lagrangian_forces.py +++ b/python/taichi/examples/simulation/mpm_lagrangian_forces.py @@ -166,7 +166,7 @@ def main(): grid_v.fill(0) # Note that we are now differentiating the total energy w.r.t. the particle position. # Recall that F = - \partial (total_energy) / \partial x - with ti.Tape(total_energy): + with ti.ad.Tape(total_energy): # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g compute_total_energy() # It's OK not to use the computed total_energy at all, since we only need x.grad diff --git a/tests/python/examples/autodiff/test_minimization.py b/tests/python/examples/autodiff/test_minimization.py index 272949eedaa79..51c0db5ab8c77 100644 --- a/tests/python/examples/autodiff/test_minimization.py +++ b/tests/python/examples/autodiff/test_minimization.py @@ -14,7 +14,7 @@ def test_minimization(): y[i] = random.random() for k in range(100): - with ti.Tape(loss=L): + with ti.ad.Tape(loss=L): reduce() gradient_descent() diff --git a/tests/python/test_ad_basics.py b/tests/python/test_ad_basics.py index 5cb83ac56767e..b672480328dd5 100644 --- a/tests/python/test_ad_basics.py +++ b/tests/python/test_ad_basics.py @@ -426,7 +426,7 @@ def test_ad_precision_2(): def func(): loss[None] = x[None] - with ti.Tape(loss): + with ti.ad.Tape(loss): func() assert x.grad[None] == 1 @@ -443,7 +443,7 @@ def work(): x[None] = 10 with pytest.raises(RuntimeError) as e: - with ti.Tape(loss): + with ti.ad.Tape(loss): work() assert 'RandStmt not supported' in e.value.args[0] @@ -472,7 +472,7 @@ def calc_loss(input_field: ti.template(), loss: ti.template()): field1 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True) loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True) - with ti.Tape(loss): + with ti.ad.Tape(loss): ti_frac(field0, field1) calc_loss(field1, loss) diff --git a/tests/python/test_ad_for.py b/tests/python/test_ad_for.py index cceea48764849..6b2913743b69f 100644 --- a/tests/python/test_ad_for.py +++ b/tests/python/test_ad_for.py @@ -454,7 +454,7 @@ def mixed_inner_loops_tape(): loss[None] += ti.sin(x[None]) + 1.0 x[None] = 0.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): mixed_inner_loops_tape() assert loss[None] == 10.0 @@ -479,7 +479,7 @@ def test_inner_loops_local_variable(): loss[None] += s + t x[None] = 0.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_inner_loops_local_variable() assert loss[None] == 18.0 @@ -530,7 +530,7 @@ def test_inner_loops_local_variable(): loss[None] += s + t x[None] = 0.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_inner_loops_local_variable() assert loss[None] == 18.0 @@ -585,7 +585,7 @@ def test_more_inner_loops_local_variable(): loss[None] += s x[None] = 0.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_more_inner_loops_local_variable() assert loss[None] == 12.0 @@ -614,7 +614,7 @@ def test_more_inner_loops_local_variable(): loss[None] += s x[None] = 0.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_more_inner_loops_local_variable() assert loss[None] == 12.0 @@ -768,7 +768,7 @@ def test_large_loop(): for k in range(1000): loss[None] += ti.sin(x[None]) + 1.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_large_loop() assert loss[None] == 1e7 @@ -790,7 +790,7 @@ def test_large_loop(): for k in range(1000): loss[None] += ti.sin(x[None]) + 1.0 - with ti.Tape(loss=loss): + with ti.ad.Tape(loss=loss): test_large_loop() assert loss[None] == 1e7 @@ -811,7 +811,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 12.0 @@ -837,7 +837,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 24.0 @@ -865,7 +865,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 42.0 @@ -890,7 +890,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 30.0 @@ -916,7 +916,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 42.0 @@ -975,7 +975,7 @@ def compute_y(): y[None] += x[None] x[None] = 1.0 - with ti.Tape(y): + with ti.ad.Tape(y): compute_y() assert y[None] == 78.0 diff --git a/tests/python/test_ad_offload.py b/tests/python/test_ad_offload.py index 945dba9c83d20..d562172e30e30 100644 --- a/tests/python/test_ad_offload.py +++ b/tests/python/test_ad_offload.py @@ -17,7 +17,7 @@ def forward(): # for i in x: # z[None] += y[i] - with ti.Tape(z): + with ti.ad.Tape(z): forward() # for i in range(n): diff --git a/tests/python/test_customized_grad.py b/tests/python/test_customized_grad.py index fbd7cc7eb1ac6..8a17fff2883d9 100644 --- a/tests/python/test_customized_grad.py +++ b/tests/python/test_customized_grad.py @@ -29,7 +29,7 @@ def forward(mul): def backward(mul): func.grad(mul) - with ti.Tape(loss=total): + with ti.ad.Tape(loss=total): forward(4) assert x.grad[0] == 4 @@ -93,7 +93,7 @@ def forward(mul): def backward(mul): func.grad(mul) - with ti.Tape(loss=total): + with ti.ad.Tape(loss=total): forward(4) assert x.grad[0] == 4 @@ -128,7 +128,7 @@ def backward(self, mul): ti.root.lazy_grad() - with ti.Tape(loss=a.total): + with ti.ad.Tape(loss=a.total): a.forward(4) assert a.x.grad[0] == 4 @@ -166,7 +166,7 @@ def backward(self, mul): ti.root.lazy_grad() - with ti.Tape(loss=a.total): + with ti.ad.Tape(loss=a.total): a.forward(4) assert a.x.grad[0] == 4 @@ -193,7 +193,7 @@ def func(mul: ti.f32): def backward(mul): func.grad(mul) - with ti.Tape(loss=total): + with ti.ad.Tape(loss=total): func(4) @@ -223,7 +223,7 @@ def forward(mul): def backward(mul): func.grad(mul) - with ti.Tape(loss=total): + with ti.ad.Tape(loss=total): func(4) @@ -248,7 +248,7 @@ def forward(mul): func(mul) func(mul) - with ti.Tape(loss=total): + with ti.ad.Tape(loss=total): forward(4) func(5) assert x.grad[0] == 5 diff --git a/tests/python/test_kernel_templates.py b/tests/python/test_kernel_templates.py index 09637436c3ff1..27e87c0084ff2 100644 --- a/tests/python/test_kernel_templates.py +++ b/tests/python/test_kernel_templates.py @@ -57,7 +57,7 @@ def compute_loss(): for i in range(16): x[i] = i - with ti.Tape(loss): + with ti.ad.Tape(loss): double(x, y) double(y, z) compute_loss() diff --git a/tests/python/test_no_grad.py b/tests/python/test_no_grad.py index e89275e0b176b..7c017ba6382ec 100644 --- a/tests/python/test_no_grad.py +++ b/tests/python/test_no_grad.py @@ -21,7 +21,7 @@ def func(): for i in range(N): ti.atomic_add(loss[None], x[i]**2) - with ti.Tape(loss): + with ti.ad.Tape(loss): func() diff --git a/tests/python/test_oop.py b/tests/python/test_oop.py index 059d774f93578..ffb4c195e7c31 100644 --- a/tests/python/test_oop.py +++ b/tests/python/test_oop.py @@ -79,7 +79,7 @@ def reduce(self): arr.inc2(4) assert arr.val[3, 4] == 7 - with ti.Tape(loss=arr.total): + with ti.ad.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): @@ -90,7 +90,7 @@ def reduce(self): def double(): double_total[None] = 2 * arr.total[None] - with ti.Tape(loss=double_total): + with ti.ad.Tape(loss=double_total): arr.reduce() double() @@ -137,9 +137,9 @@ def reduce(self): assert arr1.val[3, 4] == arr1_inc assert arr2.val[8, 6] == arr2_inc - with ti.Tape(loss=arr1.total): + with ti.ad.Tape(loss=arr1.total): arr1.reduce() - with ti.Tape(loss=arr2.total, clear_gradients=False): + with ti.ad.Tape(loss=arr2.total, clear_gradients=False): arr2.reduce() for i in range(arr1.n): for j in range(arr1.m): @@ -172,7 +172,7 @@ def reduce(self): ti.root.lazy_grad() - with ti.Tape(loss=arr.total): + with ti.ad.Tape(loss=arr.total): arr.reduce() for i in range(arr.n): for j in range(arr.n): diff --git a/tests/python/test_stop_grad.py b/tests/python/test_stop_grad.py index 450782b3733a9..169c420845d3b 100644 --- a/tests/python/test_stop_grad.py +++ b/tests/python/test_stop_grad.py @@ -21,7 +21,7 @@ def func(): for i in range(n): x[i] = i - with ti.Tape(loss): + with ti.ad.Tape(loss): func() for i in range(n): @@ -48,7 +48,7 @@ def func(): for i in range(n): x[i] = i - with ti.Tape(loss): + with ti.ad.Tape(loss): func() for i in range(n): @@ -78,7 +78,7 @@ def func(): for i in range(n): x[i] = i - with ti.Tape(loss): + with ti.ad.Tape(loss): func() # If without stop, grad x.grad[i] = i * 4