From 7e679bfa174203fdfed4a10af578d61bf1b7df3c Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 13:22:53 +0800 Subject: [PATCH 01/12] Add prefix sum executor --- python/taichi/__init__.py | 2 +- python/taichi/_kernels.py | 74 +----------------------------- python/taichi/algorithms.py | 89 +++++++++++++++++++++++++++++++++++++ tests/python/test_scan.py | 6 ++- tests/python/test_sort.py | 2 +- 5 files changed, 97 insertions(+), 76 deletions(-) create mode 100644 python/taichi/algorithms.py diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py index 8870837b8d8c3..b55ca644183d5 100644 --- a/python/taichi/__init__.py +++ b/python/taichi/__init__.py @@ -9,7 +9,7 @@ # Provide a shortcut to types since they're commonly used. from taichi.types.primitive_types import * -from taichi import ad, experimental, graph, linalg, math, tools +from taichi import ad, algorithms, experimental, graph, linalg, math, tools from taichi.ui import GUI, hex_to_rgb, rgb_to_hex, ui # Issue#2223: Do not reorder, or we're busted with partially initialized module diff --git a/python/taichi/_kernels.py b/python/taichi/_kernels.py index f1d81f3856de9..201ac4078d929 100644 --- a/python/taichi/_kernels.py +++ b/python/taichi/_kernels.py @@ -292,9 +292,6 @@ def save_texture_to_numpy(tex: texture_type.rw_texture(num_dimensions=2, # Odd-even merge sort -# References: -# https://developer.nvidia.com/gpugems/gpugems2/part-vi-simulation-and-numerical-algorithms/chapter-46-improved-gpu-sorting -# https://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort @kernel def sort_stage(keys: template(), use_values: int, values: template(), N: int, p: int, k: int, invocations: int): @@ -315,26 +312,7 @@ def sort_stage(keys: template(), use_values: int, values: template(), N: int, values[b] = temp -def parallel_sort(keys, values=None): - N = keys.shape[0] - - num_stages = 0 - p = 1 - while p < N: - k = p - while k >= 1: - invocations = int((N - k - k % p) / (2 * k)) + 1 - if values is None: - sort_stage(keys, 0, keys, N, p, k, invocations) - else: - sort_stage(keys, 1, values, N, p, k, invocations) - num_stages += 1 - sync() - k = int(k / 2) - p = int(p * 2) - print(num_stages) - - +# Parallel Prefix Sum (Scan) @func def warp_shfl_up_i32(val: template()): global_tid = block.global_thread_idx() @@ -421,53 +399,3 @@ def blit_from_field_to_field( dst: template(), src: template(), offset: i32, size: i32): for i in range(size): dst[i + offset] = src[i] - - -# Parallel Prefix Sum (Scan) -# Ref[0]: https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf -# Ref[1]: https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu -def prefix_sum_inclusive_inplace(input_arr, length): - BLOCK_SZ = 64 - GRID_SZ = int((length + BLOCK_SZ - 1) / BLOCK_SZ) - - # Buffer position and length - # This is a single buffer implementation for ease of aot usage - ele_num = length - ele_nums = [ele_num] - start_pos = 0 - ele_nums_pos = [start_pos] - - while ele_num > 1: - ele_num = int((ele_num + BLOCK_SZ - 1) / BLOCK_SZ) - ele_nums.append(ele_num) - start_pos += BLOCK_SZ * ele_num - ele_nums_pos.append(start_pos) - - if input_arr.dtype != i32: - raise RuntimeError("Only ti.i32 type is supported for prefix sum.") - - large_arr = field(i32, shape=start_pos) - - if current_cfg().arch == cuda: - inclusive_add = warp_shfl_up_i32 - elif current_cfg().arch == vulkan: - inclusive_add = subgroup.inclusive_add - else: - raise RuntimeError( - f"{str(current_cfg().arch)} is not supported for prefix sum.") - - blit_from_field_to_field(large_arr, input_arr, 0, length) - - # Kogge-Stone construction - for i in range(len(ele_nums) - 1): - if i == len(ele_nums) - 2: - scan_add_inclusive(large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], - True, inclusive_add) - else: - scan_add_inclusive(large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], - False, inclusive_add) - - for i in range(len(ele_nums) - 3, -1, -1): - uniform_add(large_arr, ele_nums_pos[i], ele_nums_pos[i + 1]) - - blit_from_field_to_field(input_arr, large_arr, 0, length) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py new file mode 100644 index 0000000000000..2ba68a930c008 --- /dev/null +++ b/python/taichi/algorithms.py @@ -0,0 +1,89 @@ +from taichi.types.primitive_types import i32 +from taichi.lang.impl import current_cfg, field +from taichi.lang.kernel_impl import data_oriented +from taichi.lang.runtime_ops import sync +from taichi.lang.simt import subgroup +from taichi.lang.misc import cuda, vulkan +from taichi._kernels import sort_stage +from taichi._kernels import warp_shfl_up_i32, blit_from_field_to_field, scan_add_inclusive, uniform_add + +# Odd-even merge sort +# References: +# https://developer.nvidia.com/gpugems/gpugems2/part-vi-simulation-and-numerical-algorithms/chapter-46-improved-gpu-sorting +# https://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort +def parallel_sort(keys, values=None): + N = keys.shape[0] + + num_stages = 0 + p = 1 + while p < N: + k = p + while k >= 1: + invocations = int((N - k - k % p) / (2 * k)) + 1 + if values is None: + sort_stage(keys, 0, keys, N, p, k, invocations) + else: + sort_stage(keys, 1, values, N, p, k, invocations) + num_stages += 1 + sync() + k = int(k / 2) + p = int(p * 2) + print(num_stages) + + +# Parallel Prefix Sum (Scan) +# Ref[0]: https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf +# Ref[1]: https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu +@data_oriented +class PrefixSumExecutor: + def __init__(self): + self.large_arr = None + self.sorting_length = -1 + + def prefix_sum_inclusive_inplace(self, input_arr, length): + BLOCK_SZ = 64 + GRID_SZ = int((length + BLOCK_SZ - 1) / BLOCK_SZ) + + # Buffer position and length + # This is a single buffer implementation for ease of aot usage + ele_num = length + ele_nums = [ele_num] + start_pos = 0 + ele_nums_pos = [start_pos] + + while ele_num > 1: + ele_num = int((ele_num + BLOCK_SZ - 1) / BLOCK_SZ) + ele_nums.append(ele_num) + start_pos += BLOCK_SZ * ele_num + ele_nums_pos.append(start_pos) + + if input_arr.dtype != i32: + raise RuntimeError("Only ti.i32 type is supported for prefix sum.") + + if self.large_arr is None or self.sorting_length != length: + self.large_arr = field(i32, shape=start_pos) + self.sorting_length = length + + if current_cfg().arch == cuda: + inclusive_add = warp_shfl_up_i32 + elif current_cfg().arch == vulkan: + inclusive_add = subgroup.inclusive_add + else: + raise RuntimeError( + f"{str(current_cfg().arch)} is not supported for prefix sum.") + + blit_from_field_to_field(self.large_arr, input_arr, 0, length) + + # Kogge-Stone construction + for i in range(len(ele_nums) - 1): + if i == len(ele_nums) - 2: + scan_add_inclusive(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], + True, inclusive_add) + else: + scan_add_inclusive(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], + False, inclusive_add) + + for i in range(len(ele_nums) - 3, -1, -1): + uniform_add(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1]) + + blit_from_field_to_field(input_arr, self.large_arr, 0, length) diff --git a/tests/python/test_scan.py b/tests/python/test_scan.py index 72dd5f415cb30..8fe4037f6ff04 100644 --- a/tests/python/test_scan.py +++ b/tests/python/test_scan.py @@ -4,6 +4,10 @@ @test_utils.test(arch=[ti.cuda, ti.vulkan], exclude=[(ti.vulkan, "Darwin")]) def test_scan(): + + # A global prefix sum wrapper, only need to be initialized once. + executor = ti.algorithms.PrefixSumExecutor() + def test_scan_for_dtype(dtype, N): arr = ti.field(dtype, N) arr_aux = ti.field(dtype, N) @@ -15,7 +19,7 @@ def fill(): arr_aux[i] = arr[i] fill() - ti._kernels.prefix_sum_inclusive_inplace(arr, N) + executor.prefix_sum_inclusive_inplace(arr, N) cur_sum = 0 for i in range(N): diff --git a/tests/python/test_sort.py b/tests/python/test_sort.py index 1eb3647e038fc..104f81d636fca 100644 --- a/tests/python/test_sort.py +++ b/tests/python/test_sort.py @@ -15,7 +15,7 @@ def fill(): values[i] = keys[i] fill() - ti._kernels.parallel_sort(keys, values) + ti.algorithms.parallel_sort(keys, values) keys_host = keys.to_numpy() values_host = values.to_numpy() From 9400ddb8c705233d0a61f82900a341f0e36d3d1f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Sep 2022 05:29:32 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- python/taichi/algorithms.py | 17 +++++++++-------- tests/python/test_scan.py | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 2ba68a930c008..2bbe8caf216ef 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -1,11 +1,12 @@ -from taichi.types.primitive_types import i32 +from taichi._kernels import (blit_from_field_to_field, scan_add_inclusive, + sort_stage, uniform_add, warp_shfl_up_i32) from taichi.lang.impl import current_cfg, field from taichi.lang.kernel_impl import data_oriented +from taichi.lang.misc import cuda, vulkan from taichi.lang.runtime_ops import sync from taichi.lang.simt import subgroup -from taichi.lang.misc import cuda, vulkan -from taichi._kernels import sort_stage -from taichi._kernels import warp_shfl_up_i32, blit_from_field_to_field, scan_add_inclusive, uniform_add +from taichi.types.primitive_types import i32 + # Odd-even merge sort # References: @@ -77,11 +78,11 @@ def prefix_sum_inclusive_inplace(self, input_arr, length): # Kogge-Stone construction for i in range(len(ele_nums) - 1): if i == len(ele_nums) - 2: - scan_add_inclusive(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], - True, inclusive_add) + scan_add_inclusive(self.large_arr, ele_nums_pos[i], + ele_nums_pos[i + 1], True, inclusive_add) else: - scan_add_inclusive(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1], - False, inclusive_add) + scan_add_inclusive(self.large_arr, ele_nums_pos[i], + ele_nums_pos[i + 1], False, inclusive_add) for i in range(len(ele_nums) - 3, -1, -1): uniform_add(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1]) diff --git a/tests/python/test_scan.py b/tests/python/test_scan.py index 8fe4037f6ff04..fe73b6738c68c 100644 --- a/tests/python/test_scan.py +++ b/tests/python/test_scan.py @@ -4,10 +4,10 @@ @test_utils.test(arch=[ti.cuda, ti.vulkan], exclude=[(ti.vulkan, "Darwin")]) def test_scan(): - + # A global prefix sum wrapper, only need to be initialized once. executor = ti.algorithms.PrefixSumExecutor() - + def test_scan_for_dtype(dtype, N): arr = ti.field(dtype, N) arr_aux = ti.field(dtype, N) From a9e10c3cd38de2569bd2cc609e6a49ec937dd7d4 Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 15:07:56 +0800 Subject: [PATCH 03/12] Refactor PrefixSumExecutor --- python/taichi/algorithms.py | 27 +++++++++++++++------------ tests/python/test_scan.py | 11 ++++++----- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 2ba68a930c008..d6ec398b46db5 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -31,39 +31,42 @@ def parallel_sort(keys, values=None): print(num_stages) -# Parallel Prefix Sum (Scan) +# Inclusive In-Place's Parallel Prefix Sum (Scan) # Ref[0]: https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf # Ref[1]: https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu @data_oriented class PrefixSumExecutor: - def __init__(self): + def __init__(self, length): self.large_arr = None - self.sorting_length = -1 + self.sorting_length = length - def prefix_sum_inclusive_inplace(self, input_arr, length): BLOCK_SZ = 64 GRID_SZ = int((length + BLOCK_SZ - 1) / BLOCK_SZ) # Buffer position and length # This is a single buffer implementation for ease of aot usage ele_num = length - ele_nums = [ele_num] + self.ele_nums = [ele_num] start_pos = 0 - ele_nums_pos = [start_pos] + self.ele_nums_pos = [start_pos] while ele_num > 1: ele_num = int((ele_num + BLOCK_SZ - 1) / BLOCK_SZ) - ele_nums.append(ele_num) + self.ele_nums.append(ele_num) start_pos += BLOCK_SZ * ele_num - ele_nums_pos.append(start_pos) + self.ele_nums_pos.append(start_pos) + + self.large_arr = field(i32, shape=start_pos) + + def run(self, input_arr): + + length = self.sorting_length + ele_nums = self.ele_nums + ele_nums_pos = self.ele_nums_pos if input_arr.dtype != i32: raise RuntimeError("Only ti.i32 type is supported for prefix sum.") - if self.large_arr is None or self.sorting_length != length: - self.large_arr = field(i32, shape=start_pos) - self.sorting_length = length - if current_cfg().arch == cuda: inclusive_add = warp_shfl_up_i32 elif current_cfg().arch == vulkan: diff --git a/tests/python/test_scan.py b/tests/python/test_scan.py index 8fe4037f6ff04..9df9a99c90423 100644 --- a/tests/python/test_scan.py +++ b/tests/python/test_scan.py @@ -4,10 +4,6 @@ @test_utils.test(arch=[ti.cuda, ti.vulkan], exclude=[(ti.vulkan, "Darwin")]) def test_scan(): - - # A global prefix sum wrapper, only need to be initialized once. - executor = ti.algorithms.PrefixSumExecutor() - def test_scan_for_dtype(dtype, N): arr = ti.field(dtype, N) arr_aux = ti.field(dtype, N) @@ -19,7 +15,12 @@ def fill(): arr_aux[i] = arr[i] fill() - executor.prefix_sum_inclusive_inplace(arr, N) + + # Performing an inclusive in-place's parallel prefix sum, + # only one exectutor is needed for a specified sorting length. + executor = ti.algorithms.PrefixSumExecutor(N) + + executor.run(arr) cur_sum = 0 for i in range(N): From 3d62afe4be118948eaff0472430683e70134398a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Sep 2022 07:11:11 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/python/test_scan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_scan.py b/tests/python/test_scan.py index 9df9a99c90423..55ff669aee5ee 100644 --- a/tests/python/test_scan.py +++ b/tests/python/test_scan.py @@ -19,7 +19,7 @@ def fill(): # Performing an inclusive in-place's parallel prefix sum, # only one exectutor is needed for a specified sorting length. executor = ti.algorithms.PrefixSumExecutor(N) - + executor.run(arr) cur_sum = 0 From cfabe4f103f388ddb5a900994d55e146bd14664e Mon Sep 17 00:00:00 2001 From: YuZhang Date: Thu, 22 Sep 2022 15:32:45 +0800 Subject: [PATCH 05/12] Update python/taichi/algorithms.py Co-authored-by: Ailing --- python/taichi/algorithms.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 8ef91195c7197..2b6d3b14ba19d 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -38,7 +38,6 @@ def parallel_sort(keys, values=None): @data_oriented class PrefixSumExecutor: def __init__(self, length): - self.large_arr = None self.sorting_length = length BLOCK_SZ = 64 From b745de4f8fb4e67f014757675ef53725d4c885fd Mon Sep 17 00:00:00 2001 From: YuZhang Date: Thu, 22 Sep 2022 15:32:52 +0800 Subject: [PATCH 06/12] Update python/taichi/algorithms.py Co-authored-by: Ailing --- python/taichi/algorithms.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 2b6d3b14ba19d..629757b0e41c2 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -59,7 +59,6 @@ def __init__(self, length): self.large_arr = field(i32, shape=start_pos) def run(self, input_arr): - length = self.sorting_length ele_nums = self.ele_nums ele_nums_pos = self.ele_nums_pos From 61c1ef7ddb5d97306f95f037da24b950b6764472 Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 15:34:11 +0800 Subject: [PATCH 07/12] Add algorithms to test_api.py && Add docstrings --- python/taichi/algorithms.py | 21 ++++++++++++++------- tests/python/test_api.py | 6 +++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 8ef91195c7197..d2782ebbe0d7f 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -8,11 +8,13 @@ from taichi.types.primitive_types import i32 -# Odd-even merge sort -# References: -# https://developer.nvidia.com/gpugems/gpugems2/part-vi-simulation-and-numerical-algorithms/chapter-46-improved-gpu-sorting -# https://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort def parallel_sort(keys, values=None): + """Odd-even merge sort + + References: + https://developer.nvidia.com/gpugems/gpugems2/part-vi-simulation-and-numerical-algorithms/chapter-46-improved-gpu-sorting + https://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort + """ N = keys.shape[0] num_stages = 0 @@ -32,11 +34,16 @@ def parallel_sort(keys, values=None): print(num_stages) -# Inclusive In-Place's Parallel Prefix Sum (Scan) -# Ref[0]: https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf -# Ref[1]: https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu @data_oriented class PrefixSumExecutor: + """Parallel Prefix Sum (Scan) Helper + + Use this helper to perform an inclusive in-place's parallel prefix sum. + + References: + https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf + https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu + """ def __init__(self, length): self.large_arr = None self.sorting_length = length diff --git a/tests/python/test_api.py b/tests/python/test_api.py index 6db245771dd80..9ddc09404915c 100644 --- a/tests/python/test_api.py +++ b/tests/python/test_api.py @@ -66,7 +66,7 @@ def _get_expected_matrix_apis(): 'StructField', 'TRACE', 'TaichiAssertionError', 'TaichiCompilationError', 'TaichiNameError', 'TaichiRuntimeError', 'TaichiRuntimeTypeError', 'TaichiSyntaxError', 'TaichiTypeError', 'TetMesh', 'Texture', 'TriMesh', - 'Vector', 'VectorNdarray', 'WARN', 'abs', 'acos', 'activate', 'ad', 'aot', + 'Vector', 'VectorNdarray', 'WARN', 'abs', 'acos', 'activate', 'ad', 'algorithms', 'aot', 'append', 'arm64', 'asin', 'assume_in_range', 'atan2', 'atomic_add', 'atomic_and', 'atomic_max', 'atomic_min', 'atomic_or', 'atomic_sub', 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_local', @@ -93,6 +93,10 @@ def _get_expected_matrix_apis(): 'FwdMode', 'Tape', 'clear_all_gradients', 'grad_for', 'grad_replaced', 'no_grad' ] +user_api[ti.algorithms] = [ + 'PrefixSumExecutor', + 'parallel_sort' +] user_api[ti.Field] = [ 'copy_from', 'dtype', 'fill', 'from_numpy', 'from_paddle', 'from_torch', 'parent', 'shape', 'snode', 'to_numpy', 'to_paddle', 'to_torch' From 8247a9a0c42a2aa26841821475d6e11064a721e5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Sep 2022 07:36:34 +0000 Subject: [PATCH 08/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- python/taichi/algorithms.py | 2 +- tests/python/test_api.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 5d5bad75a2324..957696b670d35 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -39,7 +39,7 @@ class PrefixSumExecutor: """Parallel Prefix Sum (Scan) Helper Use this helper to perform an inclusive in-place's parallel prefix sum. - + References: https://developer.download.nvidia.com/compute/cuda/1.1-Beta/x86_website/projects/scan/doc/scan.pdf https://github.com/NVIDIA/cuda-samples/blob/master/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu diff --git a/tests/python/test_api.py b/tests/python/test_api.py index 9ddc09404915c..7f90cb5831770 100644 --- a/tests/python/test_api.py +++ b/tests/python/test_api.py @@ -66,10 +66,10 @@ def _get_expected_matrix_apis(): 'StructField', 'TRACE', 'TaichiAssertionError', 'TaichiCompilationError', 'TaichiNameError', 'TaichiRuntimeError', 'TaichiRuntimeTypeError', 'TaichiSyntaxError', 'TaichiTypeError', 'TetMesh', 'Texture', 'TriMesh', - 'Vector', 'VectorNdarray', 'WARN', 'abs', 'acos', 'activate', 'ad', 'algorithms', 'aot', - 'append', 'arm64', 'asin', 'assume_in_range', 'atan2', 'atomic_add', - 'atomic_and', 'atomic_max', 'atomic_min', 'atomic_or', 'atomic_sub', - 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_local', + 'Vector', 'VectorNdarray', 'WARN', 'abs', 'acos', 'activate', 'ad', + 'algorithms', 'aot', 'append', 'arm64', 'asin', 'assume_in_range', 'atan2', + 'atomic_add', 'atomic_and', 'atomic_max', 'atomic_min', 'atomic_or', + 'atomic_sub', 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_local', 'cache_read_only', 'cast', 'cc', 'ceil', 'cos', 'cpu', 'cuda', 'data_oriented', 'dataclass', 'deactivate', 'deactivate_all_snodes', 'dx11', 'eig', 'exp', 'experimental', 'extension', 'f16', 'f32', 'f64', @@ -93,10 +93,7 @@ def _get_expected_matrix_apis(): 'FwdMode', 'Tape', 'clear_all_gradients', 'grad_for', 'grad_replaced', 'no_grad' ] -user_api[ti.algorithms] = [ - 'PrefixSumExecutor', - 'parallel_sort' -] +user_api[ti.algorithms] = ['PrefixSumExecutor', 'parallel_sort'] user_api[ti.Field] = [ 'copy_from', 'dtype', 'fill', 'from_numpy', 'from_paddle', 'from_torch', 'parent', 'shape', 'snode', 'to_numpy', 'to_paddle', 'to_torch' From dc8df44a607a736239500ca5557aaccbd8693cfb Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 15:46:23 +0800 Subject: [PATCH 09/12] Remove unused imports --- python/taichi/_kernels.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/taichi/_kernels.py b/python/taichi/_kernels.py index 201ac4078d929..09abf7db9bf64 100644 --- a/python/taichi/_kernels.py +++ b/python/taichi/_kernels.py @@ -5,11 +5,10 @@ from taichi.lang._ndrange import ndrange from taichi.lang.expr import Expr from taichi.lang.field import ScalarField -from taichi.lang.impl import current_cfg, field, grouped, static, static_assert +from taichi.lang.impl import grouped, static, static_assert from taichi.lang.kernel_impl import func, kernel -from taichi.lang.misc import cuda, loop_config, vulkan -from taichi.lang.runtime_ops import sync -from taichi.lang.simt import block, subgroup, warp +from taichi.lang.misc import loop_config +from taichi.lang.simt import block, warp from taichi.lang.snode import deactivate from taichi.types import ndarray_type, texture_type, vector from taichi.types.annotations import template From d913fc7edf311ea803f0f1d3783b63582fe40d29 Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 16:46:55 +0800 Subject: [PATCH 10/12] add __all__ --- python/taichi/algorithms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms.py index 957696b670d35..8af2bfce5e084 100644 --- a/python/taichi/algorithms.py +++ b/python/taichi/algorithms.py @@ -96,3 +96,6 @@ def run(self, input_arr): uniform_add(self.large_arr, ele_nums_pos[i], ele_nums_pos[i + 1]) blit_from_field_to_field(input_arr, self.large_arr, 0, length) + + +__all__ = ['parallel_sort', 'PrefixSumExecutor'] From 88f40d33172cc3673af0e99d21c85e83e7ec603b Mon Sep 17 00:00:00 2001 From: yucrazing <723284893@qq.com> Date: Thu, 22 Sep 2022 18:13:53 +0800 Subject: [PATCH 11/12] Remove files into a folder to make ci happy --- python/taichi/algorithms/__init__.py | 1 + python/taichi/{algorithms.py => algorithms/_algorithms.py} | 0 2 files changed, 1 insertion(+) create mode 100644 python/taichi/algorithms/__init__.py rename python/taichi/{algorithms.py => algorithms/_algorithms.py} (100%) diff --git a/python/taichi/algorithms/__init__.py b/python/taichi/algorithms/__init__.py new file mode 100644 index 0000000000000..78108cf8c10c2 --- /dev/null +++ b/python/taichi/algorithms/__init__.py @@ -0,0 +1 @@ +from ._algorithms import * \ No newline at end of file diff --git a/python/taichi/algorithms.py b/python/taichi/algorithms/_algorithms.py similarity index 100% rename from python/taichi/algorithms.py rename to python/taichi/algorithms/_algorithms.py From 2c12fbb98f672d3e43d07850fa0f1d4debc269f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Sep 2022 10:16:06 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- python/taichi/algorithms/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/taichi/algorithms/__init__.py b/python/taichi/algorithms/__init__.py index 78108cf8c10c2..f13c84f0a7833 100644 --- a/python/taichi/algorithms/__init__.py +++ b/python/taichi/algorithms/__init__.py @@ -1 +1 @@ -from ._algorithms import * \ No newline at end of file +from ._algorithms import *