diff --git a/python/taichi/_kernels.py b/python/taichi/_kernels.py index 4b1d86302f84a..71c407bc61073 100644 --- a/python/taichi/_kernels.py +++ b/python/taichi/_kernels.py @@ -39,8 +39,13 @@ def fill_ndarray_matrix(ndarray: ndarray_type.ndarray(), val: template()): @kernel def tensor_to_ext_arr(tensor: template(), arr: ndarray_type.ndarray()): + offset = static(tensor.snode.ptr.offset) + shape = static(tensor.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) + for I in grouped(tensor): - arr[I] = tensor[I] + arr[I - offset_new] = tensor[I] @kernel @@ -73,10 +78,12 @@ def ndarray_matrix_to_ext_arr( @kernel def vector_to_fast_image(img: template(), out: ndarray_type.ndarray()): + i_offset = static(img.snode.ptr.offset[0] if len(img.snode.ptr.offset) != 0 else 0) + j_offset = static(img.snode.ptr.offset[1] if len(img.snode.ptr.offset) != 0 else 0) # FIXME: Why is ``for i, j in img:`` slower than: for i, j in ndrange(*img.shape): r, g, b = 0, 0, 0 - color = img[i, img.shape[1] - 1 - j] + color = img[i + i_offset, (img.shape[1] + j_offset) - 1 - j] if static(img.dtype in [f16, f32, f64]): r, g, b = ops.min(255, ops.max(0, int(color * 255)))[:3] else: @@ -100,32 +107,51 @@ def vector_to_fast_image(img: template(), out: ndarray_type.ndarray()): @kernel def tensor_to_image(tensor: template(), arr: ndarray_type.ndarray()): + offset = static(tensor.snode.ptr.offset) + shape = static(tensor.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) for I in grouped(tensor): t = ops.cast(tensor[I], f32) - arr[I, 0] = t - arr[I, 1] = t - arr[I, 2] = t + arr[I - offset_new, 0] = t + arr[I - offset_new, 1] = t + arr[I - offset_new, 2] = t @kernel def vector_to_image(mat: template(), arr: ndarray_type.ndarray()): + offset = static(mat.snode.ptr.offset) + shape = static(mat.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) for I in grouped(mat): for p in static(range(mat.n)): - arr[I, p] = ops.cast(mat[I][p], f32) + arr[I - offset_new, p] = ops.cast(mat[I][p], f32) if static(mat.n <= 2): - arr[I, 2] = 0 + arr[I - offset_new, 2] = 0 @kernel def tensor_to_tensor(tensor: template(), other: template()): - for I in grouped(tensor): - tensor[I] = other[I] + # assumes that tensor and other have the same shape + shape = static(tensor.shape) + tensor_offset = static(tensor.snode.ptr.offset) + tensor_offset_new = static([0] * len(shape) if len(tensor_offset) == 0 else tensor_offset) + other_offset = static(other.snode.ptr.offset) + other_offset_new = static([0] * len(shape) if len(other_offset) == 0 else other_offset) + + for I in grouped(ndrange(*shape)): + tensor[I + tensor_offset_new] = other[I + other_offset_new] @kernel def ext_arr_to_tensor(arr: ndarray_type.ndarray(), tensor: template()): + offset = static(tensor.snode.ptr.offset) + shape = static(tensor.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) for I in grouped(tensor): - tensor[I] = arr[I] + tensor[I] = arr[I - offset_new] @kernel @@ -164,36 +190,46 @@ def ext_arr_to_ndarray_matrix( @kernel def matrix_to_ext_arr(mat: template(), arr: ndarray_type.ndarray(), as_vector: template()): + offset = static(mat.snode.ptr.offset) + shape = static(mat.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) + for I in grouped(mat): for p in static(range(mat.n)): for q in static(range(mat.m)): if static(as_vector): if static(getattr(mat, "ndim", 2) == 1): - arr[I, p] = mat[I][p] + arr[I - offset_new, p] = mat[I][p] else: - arr[I, p] = mat[I][p, q] + arr[I - offset_new, p] = mat[I][p, q] else: if static(getattr(mat, "ndim", 2) == 1): - arr[I, p, q] = mat[I][p] + arr[I - offset_new, p, q] = mat[I][p] else: - arr[I, p, q] = mat[I][p, q] + arr[I - offset_new, p, q] = mat[I][p, q] @kernel def ext_arr_to_matrix(arr: ndarray_type.ndarray(), mat: template(), as_vector: template()): + offset = static(mat.snode.ptr.offset) + shape = static(mat.shape) + # default value of offset is [], replace it with [0] * len + offset_new = static([0] * len(shape) if len(offset) == 0 else offset) + for I in grouped(mat): for p in static(range(mat.n)): for q in static(range(mat.m)): if static(getattr(mat, "ndim", 2) == 1): if static(as_vector): - mat[I][p] = arr[I, p] + mat[I][p] = arr[I - offset_new, p] else: - mat[I][p] = arr[I, p, q] + mat[I][p] = arr[I - offset_new, p, q] else: if static(as_vector): - mat[I][p, q] = arr[I, p] + mat[I][p, q] = arr[I - offset_new, p] else: - mat[I][p, q] = arr[I, p, q] + mat[I][p, q] = arr[I - offset_new, p, q] # extract ndarray of raw vulkan memory layout to normal memory layout. @@ -215,10 +251,13 @@ def arr_vulkan_layout_to_arr_normal_layout(vk_arr: ndarray_type.ndarray(), norma @kernel def arr_vulkan_layout_to_field_normal_layout(vk_arr: ndarray_type.ndarray(), normal_field: template()): static_assert(len(normal_field.shape) == 2) - w = normal_field.shape[0] - h = normal_field.shape[1] + w = static(normal_field.shape[0]) + h = static(normal_field.shape[1]) + i_offset = static(normal_field.snode.ptr.offset[0] if len(normal_field.snode.ptr.offset) != 0 else 0) + j_offset = static(normal_field.snode.ptr.offset[1] if len(normal_field.snode.ptr.offset) != 0 else 0) + for i, j in ndrange(w, h): - normal_field[i, j] = vk_arr[(h - 1 - j) * w + i] + normal_field[i + i_offset, j + j_offset] = vk_arr[(h - 1 - j) * w + i] @kernel @@ -277,21 +316,23 @@ def sort_stage( k: int, invocations: int, ): + keys_offset = static(keys.snode.ptr.offset if len(keys.snode.ptr.offset) != 0 else 0) + values_offset = static(values.snode.ptr.offset if len(values.snode.ptr.offset) != 0 else 0) for inv in range(invocations): j = k % p + inv * 2 * k for i in range(0, ops.min(k, N - j - k)): a = i + j b = i + j + k if int(a / (p * 2)) == int(b / (p * 2)): - key_a = keys[a] - key_b = keys[b] + key_a = keys[a + keys_offset] + key_b = keys[b + keys_offset] if key_a > key_b: - keys[a] = key_b - keys[b] = key_a + keys[a + keys_offset] = key_b + keys[b + keys_offset] = key_a if use_values != 0: - temp = values[a] - values[a] = values[b] - values[b] = temp + temp = values[a + values_offset] + values[a + values_offset] = values[b + values_offset] + values[b + values_offset] = temp # Parallel Prefix Sum (Scan) @@ -383,5 +424,8 @@ def uniform_add(arr_in: template(), in_beg: i32, in_end: i32): @kernel def blit_from_field_to_field(dst: template(), src: template(), offset: i32, size: i32): + dst_offset = static(dst.snode.ptr.offset if len(dst.snode.ptr.offset) != 0 else 0) + src_offset = static(src.snode.ptr.offset if len(src.snode.ptr.offset) != 0 else 0) + print("[debug]", dst_offset, src_offset) for i in range(size): - dst[i + offset] = src[i] + dst[i + dst_offset + offset] = src[i + src_offset] diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index fe3e26dd213b4..79a3833736609 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -466,6 +466,7 @@ void export_lang(py::module &m) { .def_readwrite("parent", &SNode::parent) .def_readonly("type", &SNode::type) .def_readonly("id", &SNode::id) + .def_readonly("offset", &SNode::index_offsets) .def("dense", (SNode & (SNode::*)(const std::vector &, const std::vector &, diff --git a/tests/python/test_field.py b/tests/python/test_field.py index 33bf5000a8dc5..9077027f2aa29 100644 --- a/tests/python/test_field.py +++ b/tests/python/test_field.py @@ -84,6 +84,33 @@ def test_scalr_field_from_numpy(dtype, shape): x.from_numpy(arr) +@pytest.mark.parametrize("dtype", data_types) +@pytest.mark.parametrize( + "shape, offset", + [((), ()), (8, 0), (8, 8), (8, -4), ((6, 12), (-4, -4)), ((6, 12), (-4, 4)), ((6, 12), (4, -4)), ((6, 12), (8, 8))], +) +@test_utils.test(arch=get_host_arch_list()) +def test_scalr_field_from_numpy_with_offset(dtype, shape, offset): + import numpy as np + + x = ti.field(dtype=dtype, shape=shape, offset=offset) + # use the corresponding dtype for the numpy array. + numpy_dtypes = { + ti.i32: np.int32, + ti.f32: np.float32, + ti.f64: np.float64, + ti.i64: np.int64, + } + arr = np.ones(shape, dtype=numpy_dtypes[dtype]) + x.from_numpy(arr) + + def mat_equal(A, B, tol=1e-6): + return np.max(np.abs(A - B)) < tol + + tol = 1e-5 if dtype == ti.f32 else 1e-12 + assert mat_equal(x.to_numpy(), arr, tol=tol) + + @pytest.mark.parametrize("dtype", data_types) @pytest.mark.parametrize("shape", field_shapes) @test_utils.test(arch=get_host_arch_list()) @@ -248,6 +275,32 @@ def test_field_copy_from_with_mismatch_shape(): x.copy_from(other) +@test_utils.test() +@pytest.mark.parametrize( + "shape, x_offset, other_offset", + [ + ((), (), ()), + (8, 4, 0), + (8, 0, -4), + (8, -4, -4), + (8, 8, -4), + ((6, 12), (0, 0), (-6, -6)), + ((6, 12), (-6, -6), (0, 0)), + ((6, 12), (-6, -6), (-6, -6)), + ], +) +@pytest.mark.parametrize("dtype", [ti.i32, ti.f32]) +def test_field_copy_from_with_offset(shape, dtype, x_offset, other_offset): + x = ti.field(dtype=ti.f32, shape=shape, offset=x_offset) + other = ti.field(dtype=dtype, shape=shape, offset=other_offset) + other.fill(1) + x.copy_from(other) + convert = lambda arr: arr[0] if len(arr) == 1 else arr + assert convert(x.shape) == shape + assert x.dtype == ti.f32 + assert (x.to_numpy() == 1).all() + + @test_utils.test() def test_field_copy_from_with_non_filed_object(): import numpy as np diff --git a/tests/python/test_ggui.py b/tests/python/test_ggui.py index 6f3def7d51c3c..aa9ccb3efcec9 100644 --- a/tests/python/test_ggui.py +++ b/tests/python/test_ggui.py @@ -492,6 +492,39 @@ def render(): window.destroy() +@pytest.mark.parametrize("offset", [(0, 0), (-256, -256), (256, -256), (-256, 256), (256, 256), (23333, 233333)]) +@pytest.mark.skipif(not _ti_core.GGUI_AVAILABLE, reason="GGUI Not Available") +@test_utils.test(arch=supported_archs) +def test_get_depth_buffer_with_offset(offset): + window = ti.ui.Window("test", (512, 512), vsync=True, show_window=False) + canvas = window.get_canvas() + scene = ti.ui.Scene() + camera = ti.ui.Camera() + + ball_center = ti.Vector.field(3, dtype=float, shape=(1,)) + ball_center[0] = ti.math.vec3(0, 0, 0.5) + + def render(): + camera.position(0.0, 0.0, 1) + camera.lookat(0.0, 0.0, 0) + scene.set_camera(camera) + scene.point_light(pos=(0, 1, 2), color=(1, 1, 1)) + scene.ambient_light((0.5, 0.5, 0.5)) + scene.particles(ball_center, radius=0.05, color=(0.5, 0.42, 0.8)) + canvas.scene(scene) + + for _ in range(RENDER_REPEAT): + render() + window.get_image_buffer_as_numpy() + + render() + + depth_buffer_field = ti.field(dtype=ti.f32, shape=(512, 512), offset=offset) + window.get_depth_buffer(depth_buffer_field) + verify_image(depth_buffer_field, "test_depth") + window.destroy() + + @pytest.mark.skipif(not _ti_core.GGUI_AVAILABLE, reason="GGUI Not Available") @test_utils.test(arch=supported_archs) def test_draw_lines(): diff --git a/tests/python/test_gui.py b/tests/python/test_gui.py index 308c0209dc883..db05451322e49 100644 --- a/tests/python/test_gui.py +++ b/tests/python/test_gui.py @@ -29,3 +29,58 @@ def paint(c: dtype): image = ti.tools.imread(image_path) delta = (image - i).sum() assert delta == 0, "Expected image difference to be 0 but got {} instead.".format(delta) + + +@pytest.mark.parametrize("vector_field", [True, False]) +@pytest.mark.parametrize("dtype", [ti.u8, ti.f32, ti.f64]) +@pytest.mark.parametrize("color", [0, 32, 64, 128, 255]) +@pytest.mark.parametrize("offset", [(-150, -150), (0, 0), (150, 150)]) +@test_utils.test(arch=get_host_arch_list()) +def test_set_image_with_offset(vector_field, dtype, color, offset): + n = 300 + shape = (n, n) + + img = ( + ti.Vector.field(dtype=dtype, n=3, shape=shape, offset=offset) + if vector_field + else ti.field(dtype=dtype, shape=shape, offset=offset) + ) + img.fill(color if dtype is ti.u8 else color * 1.0 / 255) + + gui = ti.GUI(name="test", res=shape, show_gui=False, fast_gui=False) + gui.set_image(img) + + image_path = test_utils.make_temp_file(suffix=".png") + gui.show(image_path) + image = ti.tools.imread(image_path) + delta = (image - color).sum() + assert delta == 0, "Expected image difference to be 0 but got {} instead.".format(delta) + + +@pytest.mark.parametrize("channel", [3, 4]) +@pytest.mark.parametrize("dtype", [ti.u8, ti.f32, ti.f64]) +@pytest.mark.parametrize("color", [0, 32, 64, 128, 255]) +@pytest.mark.parametrize("offset", [(-150, -150), (0, 0), (150, 150)]) +@test_utils.test(arch=get_host_arch_list()) +def test_set_image_fast_gui_with_offset(channel, dtype, color, offset): + n = 300 + shape = (n, n) + + img = ti.Vector.field(dtype=dtype, n=channel, shape=shape, offset=offset) + img.fill(color if dtype is ti.u8 else color * 1.0 / 255) + + gui = ti.GUI(name="test", res=shape, show_gui=False, fast_gui=True) + gui.set_image(img) + fast_image = gui.img + + alpha = 0xFF << 24 + from taichi._lib.utils import get_os_name # pylint: disable=C0415 + + rgb_color = ( + (color << 16) + (color << 8) + color + if ti.static(get_os_name() != "osx") + else (color << 16) + (color << 8) + color + alpha + ) + ground_truth = np.full(n * n, rgb_color, dtype=np.uint32) + + assert np.allclose(fast_image, ground_truth) diff --git a/tests/python/test_matrix.py b/tests/python/test_matrix.py index 278859b4ed6d1..ea666a12a8f26 100644 --- a/tests/python/test_matrix.py +++ b/tests/python/test_matrix.py @@ -1305,6 +1305,57 @@ def access_mat(i: ti.i32, j: ti.i32): # access_mat(3, -1) +@pytest.mark.parametrize("dtype", [ti.i32, ti.f32, ti.i64, ti.f64]) +@pytest.mark.parametrize("shape", [(8,), (6, 12)]) +@pytest.mark.parametrize("offset", [0, -4, 4]) +@pytest.mark.parametrize("m, n", [(3, 4)]) +@test_utils.test(arch=get_host_arch_list()) +def test_matrix_from_numpy_with_offset(dtype, shape, offset, m, n): + import numpy as np + + x = ti.Matrix.field(dtype=dtype, m=m, n=n, shape=shape, offset=[offset] * len(shape)) + # use the corresponding dtype for the numpy array. + numpy_dtypes = { + ti.i32: np.int32, + ti.f32: np.float32, + ti.f64: np.float64, + ti.i64: np.int64, + } + numpy_shape = ((shape,) if isinstance(shape, int) else shape) + (n, m) + arr = np.ones(numpy_shape, dtype=numpy_dtypes[dtype]) + x.from_numpy(arr) + + @ti.kernel + def func(): + for I in ti.grouped(x): + assert all(abs(I - 1.0) < 1e-6) + + func() + + +@pytest.mark.parametrize("dtype", [ti.i32, ti.f32, ti.i64, ti.f64]) +@pytest.mark.parametrize("shape", [(8,), (6, 12)]) +@pytest.mark.parametrize("offset", [0, -4, 4]) +@pytest.mark.parametrize("m, n", [(3, 4)]) +@test_utils.test(arch=get_host_arch_list()) +def test_matrix_to_numpy_with_offset(dtype, shape, offset, m, n): + import numpy as np + + x = ti.Matrix.field(dtype=dtype, m=m, n=n, shape=shape, offset=[offset] * len(shape)) + x.fill(1.0) + # use the corresponding dtype for the numpy array. + numpy_dtypes = { + ti.i32: np.int32, + ti.f32: np.float32, + ti.f64: np.float64, + ti.i64: np.int64, + } + numpy_shape = ((shape,) if isinstance(shape, int) else shape) + (n, m) + arr = x.to_numpy() + + assert np.allclose(arr, np.ones(numpy_shape, dtype=numpy_dtypes[dtype])) + + @test_utils.test() def test_matrix_dtype(): a = ti.types.vector(3, dtype=ti.f32)([0, 1, 2]) diff --git a/tests/python/test_offset.py b/tests/python/test_offset.py index 58334b2fc85c7..a6e99a2e6848a 100644 --- a/tests/python/test_offset.py +++ b/tests/python/test_offset.py @@ -3,6 +3,7 @@ import taichi as ti from tests import test_utils +import numpy as np @test_utils.test() @@ -144,3 +145,20 @@ def test_offset_must_throw_matrix(): a = ti.Matrix.field(3, 3, dtype=ti.i32, shape=(32, 16, 8), offset=(32, 16)) with pytest.raises(ti.TaichiCompilationError, match="shape cannot be None when offset is set"): b = ti.Matrix.field(3, 3, dtype=ti.i32, shape=None, offset=(32, 16)) + + +@pytest.mark.parametrize("offset", [(0, 0), (-1, -1), (2, 2), (-23333, -23333), (23333, 23333)]) +@test_utils.test(arch=get_host_arch_list()) +def test_field_with_offset_print(offset): + val = ti.field(dtype=ti.f32, shape=(3, 3), offset=offset) + val.fill(1.0) + print(val) + + +@pytest.mark.parametrize("offset", [(0, 0), (-1, -1), (2, 2), (-23333, -23333), (23333, 23333)]) +@test_utils.test(arch=get_host_arch_list()) +def test_field_with_offset_to_numpy(offset): + shape = (3, 3) + val = ti.field(dtype=ti.f32, shape=shape, offset=offset) + val.fill(1.0) + assert np.allclose(val.to_numpy(), np.ones(shape, dtype=np.float32)) diff --git a/tests/python/test_scan.py b/tests/python/test_scan.py index 55ff669aee5ee..48b4281ee3017 100644 --- a/tests/python/test_scan.py +++ b/tests/python/test_scan.py @@ -1,3 +1,4 @@ +import pytest import taichi as ti from tests import test_utils @@ -30,3 +31,31 @@ def fill(): test_scan_for_dtype(ti.i32, 512) test_scan_for_dtype(ti.i32, 1024) test_scan_for_dtype(ti.i32, 4096) + + +@pytest.mark.parametrize("dtype", [ti.i32]) +@pytest.mark.parametrize("N", [512, 1024, 4096]) +@pytest.mark.parametrize("offset", [0, -1, 1, 256, -256, -23333, 23333]) +@test_utils.test(arch=[ti.cuda, ti.vulkan], exclude=[(ti.vulkan, "Darwin")]) +def test_scan_with_offset(dtype, N, offset): + arr = ti.field(dtype, N, offset=offset) + arr_aux = ti.field(dtype, N, offset=offset) + + @ti.kernel + def fill(): + for i in arr: + arr[i] = ti.random() * N + arr_aux[i] = arr[i] + + fill() + + # Performing an inclusive in-place's parallel prefix sum, + # only one exectutor is needed for a specified sorting length. + executor = ti.algorithms.PrefixSumExecutor(N) + + executor.run(arr) + + cur_sum = 0 + for i in range(N): + cur_sum += arr_aux[i + offset] + assert arr[i + offset] == cur_sum diff --git a/tests/python/test_sort.py b/tests/python/test_sort.py index ab8477a4c8018..cd93588cacd49 100644 --- a/tests/python/test_sort.py +++ b/tests/python/test_sort.py @@ -1,3 +1,4 @@ +import pytest import taichi as ti from tests import test_utils @@ -31,3 +32,29 @@ def fill(): test_sort_for_dtype(ti.f32, 1) test_sort_for_dtype(ti.f32, 256) test_sort_for_dtype(ti.f32, 100001) + + +@pytest.mark.parametrize("dtype", [ti.i32, ti.f32]) +@pytest.mark.parametrize("N", [1, 256, 100001]) +@pytest.mark.parametrize("offset", [0, -1, 1, 128, -128, -23333, 23333]) +@test_utils.test() +def test_sort_with_offset(dtype, N, offset): + keys = ti.field(dtype, N, offset=offset) + values = ti.field(dtype, N, offset=offset) + + @ti.kernel + def fill(): + for i in keys: + keys[i] = ti.random() * N + values[i] = keys[i] + + fill() + ti.algorithms.parallel_sort(keys, values) + + keys_host = keys.to_numpy() + values_host = values.to_numpy() + + for i in range(N): + if i < N - 1: + assert keys_host[i] <= keys_host[i + 1] + assert keys_host[i] == values_host[i]