From 56689ad61a7fad8dfd6818ef6be9a6d7e8ac1c85 Mon Sep 17 00:00:00 2001
From: Zihua Wu <wuzihua@pku.edu.cn>
Date: Mon, 25 Jul 2022 09:41:14 +0800
Subject: [PATCH] [misc] ti.Tape -> ti.ad.Tape (#5501)

---
 cpp_examples/autograd.cpp                     |  2 +-
 docs/lang/articles/advanced/odop.md           |  4 +--
 .../differentiable_programming.md             | 34 +++++++++----------
 misc/demo_record_kernel_group.py              |  2 +-
 python/taichi/ad/_ad.py                       |  6 ++--
 python/taichi/aot/record.py                   |  2 +-
 .../taichi/examples/autodiff/minimization.py  |  2 +-
 python/taichi/examples/autodiff/regression.py |  2 +-
 .../examples/ggui_examples/fem128_ggui.py     |  2 +-
 .../taichi/examples/simulation/ad_gravity.py  |  2 +-
 python/taichi/examples/simulation/fem128.py   |  2 +-
 python/taichi/examples/simulation/fem99.py    |  2 +-
 .../simulation/mpm_lagrangian_forces.py       |  2 +-
 .../examples/autodiff/test_minimization.py    |  2 +-
 tests/python/test_ad_basics.py                |  6 ++--
 tests/python/test_ad_for.py                   | 26 +++++++-------
 tests/python/test_ad_offload.py               |  2 +-
 tests/python/test_customized_grad.py          | 14 ++++----
 tests/python/test_kernel_templates.py         |  2 +-
 tests/python/test_no_grad.py                  |  2 +-
 tests/python/test_oop.py                      | 10 +++---
 tests/python/test_stop_grad.py                |  6 ++--
 22 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/cpp_examples/autograd.cpp b/cpp_examples/autograd.cpp
index 1f46a1e338ab3..dcf5324b14ecb 100644
--- a/cpp_examples/autograd.cpp
+++ b/cpp_examples/autograd.cpp
@@ -30,7 +30,7 @@ void autograd() {
           energy += c[i]
 
   init()
-  with ti.Tape(energy):
+  with ti.ad.Tape(energy):
       cal()
       support()
 
diff --git a/docs/lang/articles/advanced/odop.md b/docs/lang/articles/advanced/odop.md
index 679b3d07cb07b..96c9e537bcec7 100644
--- a/docs/lang/articles/advanced/odop.md
+++ b/docs/lang/articles/advanced/odop.md
@@ -225,7 +225,7 @@ print(arr.val[0, 0])  # 3
 arr.inc2(4)
 print(arr.val[0, 0])  # 7
 
-with ti.Tape(loss=arr.total):
+with ti.ad.Tape(loss=arr.total):
     arr.reduce()
 
 for i in range(arr.n):
@@ -236,7 +236,7 @@ for i in range(arr.n):
 def double():
     double_total[None] = 2 * arr.total[None]
 
-with ti.Tape(loss=double_total):
+with ti.ad.Tape(loss=double_total):
     arr.reduce()
     double()
 
diff --git a/docs/lang/articles/differentiable/differentiable_programming.md b/docs/lang/articles/differentiable/differentiable_programming.md
index 41657520256c2..b95fa0b0bc3b0 100644
--- a/docs/lang/articles/differentiable/differentiable_programming.md
+++ b/docs/lang/articles/differentiable/differentiable_programming.md
@@ -47,16 +47,16 @@ really error-prone and hard to maintain.
 
 If you run into this situation, Taichi's handy automatic differentiation (autodiff)
 system comes to your rescue! Taichi supports gradient evaluation through
-either `ti.Tape()` or the more flexible `kernel.grad()` syntax.
+either `ti.ad.Tape()` or the more flexible `kernel.grad()` syntax.
 
-## Using `ti.Tape()`
+## Using `ti.ad.Tape()`
 
 Let's still take the `compute_y` kernel above for an explanation.
-Using `ti.Tape()` is the easiest way to obtain a kernel that computes `dy/dx`:
+Using `ti.ad.Tape()` is the easiest way to obtain a kernel that computes `dy/dx`:
 
 1.  Enable `needs_grad=True` option when declaring fields involved in
     the derivative chain.
-2.  Use context manager `with ti.Tape(y):` to capture the kernel invocations which you want to automatically differentiate.
+2.  Use context manager `with ti.ad.Tape(y):` to capture the kernel invocations which you want to automatically differentiate.
 3.  Now `dy/dx` value at current `x` is available at `x.grad[None]`.
 
 The following code snippet explains the steps above:
@@ -69,7 +69,7 @@ y = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
 def compute_y():
     y[None] = ti.sin(x[None])
 
-with ti.Tape(y):
+with ti.ad.Tape(y):
     compute_y()
 
 print('dy/dx =', x.grad[None], ' at x =', x[None])
@@ -119,7 +119,7 @@ def advance():
 
 
 def substep():
-    with ti.Tape(loss=U):
+    with ti.ad.Tape(loss=U):
         # Kernel invocations in this scope will later contribute to partial derivatives of
         # U with respect to input variables such as x.
         compute_U(
@@ -144,7 +144,7 @@ while gui.running:
 
 :::note
 
-The argument `U` to `ti.Tape(U)` must be a 0D field.
+The argument `U` to `ti.ad.Tape(U)` must be a 0D field.
 
 To use autodiff with multiple output variables, see the
 `kernel.grad()` usage below.
@@ -152,7 +152,7 @@ To use autodiff with multiple output variables, see the
 
 :::note
 
-`ti.Tape(U)` automatically sets _`U[None]`_ to `0` on
+`ti.ad.Tape(U)` automatically sets _`U[None]`_ to `0` on
 start up.
 :::
 
@@ -166,13 +166,13 @@ for examples on using autodiff-based force evaluation MPM and FEM.
 
 ## Using `kernel.grad()`
 
-As mentioned above, `ti.Tape()` can only track a 0D field as the output variable.
+As mentioned above, `ti.ad.Tape()` can only track a 0D field as the output variable.
 If there are multiple output variables that you want to back-propagate
-gradients to inputs, call `kernel.grad()` instead of `ti.Tape()`.
-Different from using `ti.Tape()`, you need to set the `grad` of the output variables themselves to `1` manually
+gradients to inputs, call `kernel.grad()` instead of `ti.ad.Tape()`.
+Different from using `ti.ad.Tape()`, you need to set the `grad` of the output variables themselves to `1` manually
 before calling `kernel.grad()`. The reason is that the `grad` of the output variables themselves
 will always be multiplied to the `grad` with respect to the inputs at the end of the back-propagation.
-By calling `ti.Tape()`, you have the program do this under the hood.
+By calling `ti.ad.Tape()`, you have the program do this under the hood.
 
 ```python {13-14}
 import taichi as ti
@@ -257,10 +257,10 @@ for i in range(N):
 b[None] = 10
 loss.grad[None] = 1
 
-with ti.Tape(loss):
+with ti.ad.Tape(loss):
     func_broke_rule_1()
 # Call func_equivalent to see the correct result
-# with ti.Tape(loss):
+# with ti.ad.Tape(loss):
     # func_equivalent()
 
 assert x.grad[1] == 10.0
@@ -357,7 +357,7 @@ def manipulation_in_kernel():
 
 
 x[None] = 0.0
-with ti.Tape(loss=loss):
+with ti.ad.Tape(loss=loss):
     # The line below in python scope only contribute to the forward pass
     # but not the backward pass i.e., not auto-differentiated.
     loss[None] += ti.sin(x[None]) + 1.0
@@ -412,13 +412,13 @@ def forward(mul):
 def backward(mul):
     func.grad(mul)
 
-with ti.Tape(loss=total):
+with ti.ad.Tape(loss=total):
     forward(4)
 
 assert x.grad[0] == 4
 ```
 
-Customized gradient function works with both `ti.Tape()` and `kernel.grad()`. More examples can be found at `test_customized_grad.py`.
+Customized gradient function works with both `ti.ad.Tape()` and `kernel.grad()`. More examples can be found at `test_customized_grad.py`.
 
 ### Checkpointing
 
diff --git a/misc/demo_record_kernel_group.py b/misc/demo_record_kernel_group.py
index 3a65ea33a1fd0..f12aa544c9a06 100644
--- a/misc/demo_record_kernel_group.py
+++ b/misc/demo_record_kernel_group.py
@@ -21,6 +21,6 @@ def do_some_works():
 
 with ti.aot.RecordKernelGroup('my_substep'):
     x.fill(0)
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         compute_loss()
     do_some_works()
diff --git a/python/taichi/ad/_ad.py b/python/taichi/ad/_ad.py
index 31286dd6c4053..46649ec12036e 100644
--- a/python/taichi/ad/_ad.py
+++ b/python/taichi/ad/_ad.py
@@ -36,7 +36,7 @@ def __init__(self, loss=None, clear_gradients=True):
             >>>     for I in ti.grouped(x):
             >>>         y[None] += x[I] ** a
             >>>
-            >>> with ti.Tape(loss = y):
+            >>> with ti.ad.Tape(loss = y):
             >>>     sum(2)
         """
         self.calls = []
@@ -111,7 +111,7 @@ def visit(node):
 
 def grad_replaced(func):
     """A decorator for python function to customize gradient with Taichi's autodiff
-    system, e.g. `ti.Tape()` and `kernel.grad()`.
+    system, e.g. `ti.ad.Tape()` and `kernel.grad()`.
 
     This decorator forces Taichi's autodiff system to use a user-defined gradient
     function for the decorated function. Its customized gradient must be decorated
@@ -186,7 +186,7 @@ def decorated(*args, **kwargs):
 
 def no_grad(func):
     """A decorator for python function to skip gradient calculation within Taichi's
-    autodiff system, e.g. `ti.Tape()` and `kernel.grad()`.
+    autodiff system, e.g. `ti.ad.Tape()` and `kernel.grad()`.
     This decorator forces Taichi's autodiff system to use an empty gradient function
     for the decorated function.
 
diff --git a/python/taichi/aot/record.py b/python/taichi/aot/record.py
index 2f1e873f492c3..76cd88bd9b113 100644
--- a/python/taichi/aot/record.py
+++ b/python/taichi/aot/record.py
@@ -40,7 +40,7 @@ def start_recording(filename):
         >>>     for i in x:
         >>>         x[i] -= x.grad[i]
         >>>
-        >>> with ti.Tape(loss):
+        >>> with ti.ad.Tape(loss):
         >>>     compute_loss()
         >>> do_some_works()
     """
diff --git a/python/taichi/examples/autodiff/minimization.py b/python/taichi/examples/autodiff/minimization.py
index 1a5f2708e05e5..5d142fae21ea8 100644
--- a/python/taichi/examples/autodiff/minimization.py
+++ b/python/taichi/examples/autodiff/minimization.py
@@ -30,7 +30,7 @@ def main():
 
     # Optimize with 100 gradient descent iterations
     for k in range(100):
-        with ti.Tape(loss=L):
+        with ti.ad.Tape(loss=L):
             reduce()
         print('Loss =', L[None])
         gradient_descent()
diff --git a/python/taichi/examples/autodiff/regression.py b/python/taichi/examples/autodiff/regression.py
index 8bdafbbd88c51..7b505c2811b3e 100644
--- a/python/taichi/examples/autodiff/regression.py
+++ b/python/taichi/examples/autodiff/regression.py
@@ -59,7 +59,7 @@ def regress_raw():
 
     for i in range(1000):
         if use_tape:
-            with ti.Tape(loss=loss):
+            with ti.ad.Tape(loss=loss):
                 regress()
         else:
             ti.clear_all_gradients()
diff --git a/python/taichi/examples/ggui_examples/fem128_ggui.py b/python/taichi/examples/ggui_examples/fem128_ggui.py
index cb62da87d4a3c..14f35484e6209 100644
--- a/python/taichi/examples/ggui_examples/fem128_ggui.py
+++ b/python/taichi/examples/ggui_examples/fem128_ggui.py
@@ -172,7 +172,7 @@ def main():
         attractor_strength[None] = window.is_pressed(
             ti.ui.LMB) - window.is_pressed(ti.ui.RMB)
         for i in range(50):
-            with ti.Tape(loss=U):
+            with ti.ad.Tape(loss=U):
                 update_U()
             advance()
         render()
diff --git a/python/taichi/examples/simulation/ad_gravity.py b/python/taichi/examples/simulation/ad_gravity.py
index 811346dd36756..38af146f0aeb6 100644
--- a/python/taichi/examples/simulation/ad_gravity.py
+++ b/python/taichi/examples/simulation/ad_gravity.py
@@ -29,7 +29,7 @@ def advance():
 
 
 def substep():
-    with ti.Tape(loss=U):
+    with ti.ad.Tape(loss=U):
         # Kernel invocations in this scope will later contribute to partial derivatives of
         # U with respect to input variables such as x.
         compute_U(
diff --git a/python/taichi/examples/simulation/fem128.py b/python/taichi/examples/simulation/fem128.py
index 75f16405d21da..b11cf8f801b19 100644
--- a/python/taichi/examples/simulation/fem128.py
+++ b/python/taichi/examples/simulation/fem128.py
@@ -132,7 +132,7 @@ def main():
         attractor_strength[None] = gui.is_pressed(gui.LMB) - gui.is_pressed(
             gui.RMB)
         for i in range(50):
-            with ti.Tape(loss=U):
+            with ti.ad.Tape(loss=U):
                 update_U()
             advance()
         paint_phi(gui)
diff --git a/python/taichi/examples/simulation/fem99.py b/python/taichi/examples/simulation/fem99.py
index 64d25cf12c25d..264aaf10bcde1 100644
--- a/python/taichi/examples/simulation/fem99.py
+++ b/python/taichi/examples/simulation/fem99.py
@@ -100,7 +100,7 @@ def main():
             elif e.key == 'r':
                 init_pos()
         for i in range(30):
-            with ti.Tape(loss=U):
+            with ti.ad.Tape(loss=U):
                 update_U()
             advance()
         gui.circles(pos.to_numpy(), radius=2, color=0xffaa33)
diff --git a/python/taichi/examples/simulation/mpm_lagrangian_forces.py b/python/taichi/examples/simulation/mpm_lagrangian_forces.py
index 3da6708cafade..7c6510e0aedf4 100644
--- a/python/taichi/examples/simulation/mpm_lagrangian_forces.py
+++ b/python/taichi/examples/simulation/mpm_lagrangian_forces.py
@@ -166,7 +166,7 @@ def main():
             grid_v.fill(0)
             # Note that we are now differentiating the total energy w.r.t. the particle position.
             # Recall that F = - \partial (total_energy) / \partial x
-            with ti.Tape(total_energy):
+            with ti.ad.Tape(total_energy):
                 # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g
                 compute_total_energy()
                 # It's OK not to use the computed total_energy at all, since we only need x.grad
diff --git a/tests/python/examples/autodiff/test_minimization.py b/tests/python/examples/autodiff/test_minimization.py
index 272949eedaa79..51c0db5ab8c77 100644
--- a/tests/python/examples/autodiff/test_minimization.py
+++ b/tests/python/examples/autodiff/test_minimization.py
@@ -14,7 +14,7 @@ def test_minimization():
         y[i] = random.random()
 
     for k in range(100):
-        with ti.Tape(loss=L):
+        with ti.ad.Tape(loss=L):
             reduce()
         gradient_descent()
 
diff --git a/tests/python/test_ad_basics.py b/tests/python/test_ad_basics.py
index 5cb83ac56767e..b672480328dd5 100644
--- a/tests/python/test_ad_basics.py
+++ b/tests/python/test_ad_basics.py
@@ -426,7 +426,7 @@ def test_ad_precision_2():
     def func():
         loss[None] = x[None]
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         func()
 
     assert x.grad[None] == 1
@@ -443,7 +443,7 @@ def work():
 
     x[None] = 10
     with pytest.raises(RuntimeError) as e:
-        with ti.Tape(loss):
+        with ti.ad.Tape(loss):
             work()
     assert 'RandStmt not supported' in e.value.args[0]
 
@@ -472,7 +472,7 @@ def calc_loss(input_field: ti.template(), loss: ti.template()):
     field1 = ti.field(dtype=ti.f32, shape=(n, ), needs_grad=True)
     loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True)
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         ti_frac(field0, field1)
         calc_loss(field1, loss)
 
diff --git a/tests/python/test_ad_for.py b/tests/python/test_ad_for.py
index cceea48764849..6b2913743b69f 100644
--- a/tests/python/test_ad_for.py
+++ b/tests/python/test_ad_for.py
@@ -454,7 +454,7 @@ def mixed_inner_loops_tape():
                 loss[None] += ti.sin(x[None]) + 1.0
 
     x[None] = 0.0
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         mixed_inner_loops_tape()
 
     assert loss[None] == 10.0
@@ -479,7 +479,7 @@ def test_inner_loops_local_variable():
                 loss[None] += s + t
 
     x[None] = 0.0
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_inner_loops_local_variable()
 
     assert loss[None] == 18.0
@@ -530,7 +530,7 @@ def test_inner_loops_local_variable():
                 loss[None] += s + t
 
     x[None] = 0.0
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_inner_loops_local_variable()
 
     assert loss[None] == 18.0
@@ -585,7 +585,7 @@ def test_more_inner_loops_local_variable():
                 loss[None] += s
 
     x[None] = 0.0
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_more_inner_loops_local_variable()
 
     assert loss[None] == 12.0
@@ -614,7 +614,7 @@ def test_more_inner_loops_local_variable():
                 loss[None] += s
 
     x[None] = 0.0
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_more_inner_loops_local_variable()
 
     assert loss[None] == 12.0
@@ -768,7 +768,7 @@ def test_large_loop():
                 for k in range(1000):
                     loss[None] += ti.sin(x[None]) + 1.0
 
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_large_loop()
 
     assert loss[None] == 1e7
@@ -790,7 +790,7 @@ def test_large_loop():
                 for k in range(1000):
                     loss[None] += ti.sin(x[None]) + 1.0
 
-    with ti.Tape(loss=loss):
+    with ti.ad.Tape(loss=loss):
         test_large_loop()
 
     assert loss[None] == 1e7
@@ -811,7 +811,7 @@ def compute_y():
                 y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 12.0
@@ -837,7 +837,7 @@ def compute_y():
                 y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 24.0
@@ -865,7 +865,7 @@ def compute_y():
                     y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 42.0
@@ -890,7 +890,7 @@ def compute_y():
                 y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 30.0
@@ -916,7 +916,7 @@ def compute_y():
                         y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 42.0
@@ -975,7 +975,7 @@ def compute_y():
                         y[None] += x[None]
 
     x[None] = 1.0
-    with ti.Tape(y):
+    with ti.ad.Tape(y):
         compute_y()
 
     assert y[None] == 78.0
diff --git a/tests/python/test_ad_offload.py b/tests/python/test_ad_offload.py
index 945dba9c83d20..d562172e30e30 100644
--- a/tests/python/test_ad_offload.py
+++ b/tests/python/test_ad_offload.py
@@ -17,7 +17,7 @@ def forward():
         # for i in x:
         #     z[None] += y[i]
 
-    with ti.Tape(z):
+    with ti.ad.Tape(z):
         forward()
 
     # for i in range(n):
diff --git a/tests/python/test_customized_grad.py b/tests/python/test_customized_grad.py
index fbd7cc7eb1ac6..8a17fff2883d9 100644
--- a/tests/python/test_customized_grad.py
+++ b/tests/python/test_customized_grad.py
@@ -29,7 +29,7 @@ def forward(mul):
     def backward(mul):
         func.grad(mul)
 
-    with ti.Tape(loss=total):
+    with ti.ad.Tape(loss=total):
         forward(4)
     assert x.grad[0] == 4
 
@@ -93,7 +93,7 @@ def forward(mul):
     def backward(mul):
         func.grad(mul)
 
-    with ti.Tape(loss=total):
+    with ti.ad.Tape(loss=total):
         forward(4)
     assert x.grad[0] == 4
 
@@ -128,7 +128,7 @@ def backward(self, mul):
 
     ti.root.lazy_grad()
 
-    with ti.Tape(loss=a.total):
+    with ti.ad.Tape(loss=a.total):
         a.forward(4)
     assert a.x.grad[0] == 4
 
@@ -166,7 +166,7 @@ def backward(self, mul):
 
     ti.root.lazy_grad()
 
-    with ti.Tape(loss=a.total):
+    with ti.ad.Tape(loss=a.total):
         a.forward(4)
     assert a.x.grad[0] == 4
 
@@ -193,7 +193,7 @@ def func(mul: ti.f32):
         def backward(mul):
             func.grad(mul)
 
-    with ti.Tape(loss=total):
+    with ti.ad.Tape(loss=total):
         func(4)
 
 
@@ -223,7 +223,7 @@ def forward(mul):
         def backward(mul):
             func.grad(mul)
 
-    with ti.Tape(loss=total):
+    with ti.ad.Tape(loss=total):
         func(4)
 
 
@@ -248,7 +248,7 @@ def forward(mul):
         func(mul)
         func(mul)
 
-    with ti.Tape(loss=total):
+    with ti.ad.Tape(loss=total):
         forward(4)
         func(5)
     assert x.grad[0] == 5
diff --git a/tests/python/test_kernel_templates.py b/tests/python/test_kernel_templates.py
index 09637436c3ff1..27e87c0084ff2 100644
--- a/tests/python/test_kernel_templates.py
+++ b/tests/python/test_kernel_templates.py
@@ -57,7 +57,7 @@ def compute_loss():
     for i in range(16):
         x[i] = i
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         double(x, y)
         double(y, z)
         compute_loss()
diff --git a/tests/python/test_no_grad.py b/tests/python/test_no_grad.py
index e89275e0b176b..7c017ba6382ec 100644
--- a/tests/python/test_no_grad.py
+++ b/tests/python/test_no_grad.py
@@ -21,7 +21,7 @@ def func():
         for i in range(N):
             ti.atomic_add(loss[None], x[i]**2)
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         func()
 
 
diff --git a/tests/python/test_oop.py b/tests/python/test_oop.py
index 059d774f93578..ffb4c195e7c31 100644
--- a/tests/python/test_oop.py
+++ b/tests/python/test_oop.py
@@ -79,7 +79,7 @@ def reduce(self):
     arr.inc2(4)
     assert arr.val[3, 4] == 7
 
-    with ti.Tape(loss=arr.total):
+    with ti.ad.Tape(loss=arr.total):
         arr.reduce()
 
     for i in range(arr.n):
@@ -90,7 +90,7 @@ def reduce(self):
     def double():
         double_total[None] = 2 * arr.total[None]
 
-    with ti.Tape(loss=double_total):
+    with ti.ad.Tape(loss=double_total):
         arr.reduce()
         double()
 
@@ -137,9 +137,9 @@ def reduce(self):
     assert arr1.val[3, 4] == arr1_inc
     assert arr2.val[8, 6] == arr2_inc
 
-    with ti.Tape(loss=arr1.total):
+    with ti.ad.Tape(loss=arr1.total):
         arr1.reduce()
-    with ti.Tape(loss=arr2.total, clear_gradients=False):
+    with ti.ad.Tape(loss=arr2.total, clear_gradients=False):
         arr2.reduce()
     for i in range(arr1.n):
         for j in range(arr1.m):
@@ -172,7 +172,7 @@ def reduce(self):
 
     ti.root.lazy_grad()
 
-    with ti.Tape(loss=arr.total):
+    with ti.ad.Tape(loss=arr.total):
         arr.reduce()
     for i in range(arr.n):
         for j in range(arr.n):
diff --git a/tests/python/test_stop_grad.py b/tests/python/test_stop_grad.py
index 450782b3733a9..169c420845d3b 100644
--- a/tests/python/test_stop_grad.py
+++ b/tests/python/test_stop_grad.py
@@ -21,7 +21,7 @@ def func():
     for i in range(n):
         x[i] = i
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         func()
 
     for i in range(n):
@@ -48,7 +48,7 @@ def func():
     for i in range(n):
         x[i] = i
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         func()
 
     for i in range(n):
@@ -78,7 +78,7 @@ def func():
     for i in range(n):
         x[i] = i
 
-    with ti.Tape(loss):
+    with ti.ad.Tape(loss):
         func()
 
     # If without stop, grad x.grad[i] = i * 4