diff --git a/.github/workflows/scripts/ti_build/alter.py b/.github/workflows/scripts/ti_build/alter.py index cb599bf7ff39c..a1a165ff00d4e 100644 --- a/.github/workflows/scripts/ti_build/alter.py +++ b/.github/workflows/scripts/ti_build/alter.py @@ -33,11 +33,15 @@ def add_aot_env(): def _write_ti_bashrc(): path = get_cache_home() / "ti.bashrc" + envs = get_cache_home() / "ti-env.sh" + _write_env(envs) with open(path, "w") as f: f.write( "[ -f /etc/bashrc ] && source /etc/bashrc\n" "[ -f ~/.bashrc ] && source ~/.bashrc\n" r'export PS1="\[\e]0;[Taichi Build Environment]\a\]\[\033[01;31m\][Taichi Build] \[\033[00m\]$PS1"' + "\n" + f"source {envs}\n" ) return path @@ -47,11 +51,15 @@ def _write_ti_zshrc(): dotdir = get_cache_home() / "zdotdir" dotdir.mkdir(parents=True, exist_ok=True) path = dotdir / ".zshrc" + envs = get_cache_home() / "ti-env.sh" + _write_env(envs) with open(path, "w") as f: f.write( "[ -f /etc/zsh/zshrc ] && source /etc/zsh/zshrc\n" "[ -f $HOME/.zshrc ] && source $HOME/.zshrc\n" r"export PROMPT='%{$fg_bold[red]%}[Taichi Build] %{$reset_color%}'$PROMPT" + "\n" + f"source {envs}\n" ) return dotdir @@ -138,10 +146,13 @@ def enter_shell(): os.execl(shell.exe, shell.exe) -def write_env(path): - cmake_args.writeback() +def _write_env(path): envs = os.environ.get_changed_envs() envstr = "" + + if isinstance(path, Path): + path = str(path) + if path.endswith(".ps1"): envstr = "\n".join([f'$env:{k}="{v}"' for k, v in envs.items()]) elif path.endswith(".sh"): @@ -156,6 +167,10 @@ def write_env(path): with open(path, "w") as f: f.write(envstr) + +def write_env(path): + cmake_args.writeback() + _write_env(path) misc.info(f"Environment variables written to {path}") diff --git a/.github/workflows/scripts/ti_build/bootstrap.py b/.github/workflows/scripts/ti_build/bootstrap.py index 6423f2f086730..0018ac54ee5b8 100644 --- a/.github/workflows/scripts/ti_build/bootstrap.py +++ b/.github/workflows/scripts/ti_build/bootstrap.py @@ -2,6 +2,7 @@ # -- stdlib -- from pathlib import Path +from types import ModuleType from typing import Optional import importlib import os @@ -55,21 +56,32 @@ def restart(): os.execl(sys.executable, sys.executable, "-S", *sys.argv) +def _try_import(name: str) -> Optional[ModuleType]: + try: + return importlib.import_module(name) + except ModuleNotFoundError: + return None + + def ensure_dependencies(*deps: str): """ Automatically install dependencies if they are not installed. """ + pip = _try_import("pip") + ensurepip = _try_import("ensurepip") + if not sys.flags.no_site: - # First run, do pip checks - try: - import pip - except ModuleNotFoundError: - print("!! pip not found, build.py needs at least a functional pip to work.", flush=True) - exit(1) + # First run, restart with no_site + if not pip and not ensurepip: + print( + "!! pip or ensurepip not found, build.py needs at least a functional pip/ensurepip to work.", flush=True + ) + sys.exit(1) restart() + # Second run v = sys.version_info bootstrap_root = get_cache_home() / "bootstrap" / f"{v.major}.{v.minor}" bootstrap_root.mkdir(parents=True, exist_ok=True) @@ -85,12 +97,18 @@ def ensure_dependencies(*deps: str): print("Installing dependencies...", flush=True) py = sys.executable - - if run(py, "-m", "pip", "install", "pip", "--no-user", f"--target={bootstrap_root}"): - raise Exception("Unable to install pip!") - - pipcmd = [py, "-S", "-m", "pip", "install", "--no-user", f"--target={bootstrap_root}", "-U"] - if run(*pipcmd, *deps, env={"PYTHONPATH": str(bootstrap_root)}): + pip_install = ["-m", "pip", "install", "--no-user", f"--target={bootstrap_root}", "-U"] + + if ensurepip: + wheels = Path(ensurepip.__path__[0]).glob("**/*.whl") + wheels = os.pathsep.join(map(str, wheels)) + if run(py, "-S", *pip_install, "pip", env={"PYTHONPATH": wheels}): + raise Exception("Unable to install pip! (ensurepip method)") + else: # pip must exist + if run(py, *pip_install, "pip"): + raise Exception("Unable to install pip! (pip method)") + + if run(py, "-S", *pip_install, *deps, env={"PYTHONPATH": str(bootstrap_root)}): raise Exception("Unable to install dependencies!") restart() diff --git a/c_api/docs/taichi/taichi_core.h.md b/c_api/docs/taichi/taichi_core.h.md index 8c8263562db87..54890de2738a8 100644 --- a/c_api/docs/taichi/taichi_core.h.md +++ b/c_api/docs/taichi/taichi_core.h.md @@ -305,6 +305,7 @@ Types of kernel and compute graph argument. - `enumeration.argument_type.ndarray`: ND-array wrapped around a `handle.memory`. - `enumeration.argument_type.texture`: Texture wrapped around a `handle.image`. - `enumeration.argument_type.scalar`: Typed scalar. +- `enumeration.argument_type.tensor`: Typed tensor. `bit_field.memory_usage` @@ -450,6 +451,23 @@ Scalar value represented by a power-of-two number of bits. A typed scalar value. +`union.tensor_value` + +Tensor value represented by a power-of-two number of bits. + +- `union.tensor_value.x8`: Tensor value that fits into 8 bits. +- `union.tensor_value.x16`: Tensor value that fits into 16 bits. +- `union.tensor_value.x32`: Tensor value that fits into 32 bits. +- `union.tensor_value.x64`: Tensor value that fits into 64 bits. + +`structure.tensor_value_with_length` + +A tensor value with a length. + +`structure.tensor` + +A typed tensor value. + `union.argument_value` A scalar or structured argument value. @@ -459,6 +477,7 @@ A scalar or structured argument value. - `union.argument_value.ndarray`: An ND-array to be bound. - `union.argument_value.texture`: A texture to be bound. - `union.argument_value.scalar`: An scalar to be bound. +- `union.argument_value.tensor`: A tensor to be bound. `structure.argument` diff --git a/c_api/include/taichi/cpp/taichi.hpp b/c_api/include/taichi/cpp/taichi.hpp index 61f0b15e65290..8319f96493854 100644 --- a/c_api/include/taichi/cpp/taichi.hpp +++ b/c_api/include/taichi/cpp/taichi.hpp @@ -837,6 +837,25 @@ class ComputeGraph { return compute_graph_; } }; +template +struct DataTypeToEnum { + static constexpr TiDataType value = TI_DATA_TYPE_UNKNOWN; +}; +#define DEFINE_DATA_TYPE_ENUM(type, enumv) \ + template <> \ + struct DataTypeToEnum { \ + static constexpr TiDataType value = TI_DATA_TYPE_##enumv; \ + }; + +DEFINE_DATA_TYPE_ENUM(int32_t, I32); +DEFINE_DATA_TYPE_ENUM(float, F32); +DEFINE_DATA_TYPE_ENUM(uint16_t, U16); +DEFINE_DATA_TYPE_ENUM(int16_t, I16); +DEFINE_DATA_TYPE_ENUM(uint8_t, U8); +DEFINE_DATA_TYPE_ENUM(int8_t, I8); +DEFINE_DATA_TYPE_ENUM(uint64_t, U64); +DEFINE_DATA_TYPE_ENUM(int64_t, I64); +#undef DEFINE_DATA_TYPE_ENUM class Kernel { protected: @@ -884,11 +903,12 @@ class Kernel { template void push_arg(const std::vector &v) { int idx = args_.size(); - // Temporary workaround for setting vec/matrix arguments in a flattened way. - args_.resize(args_.size() + v.size()); - for (int j = 0; j < v.size(); ++j) { - at(idx + j) = v[j]; - } + args_.resize(idx + 1); + args_[idx].type = TI_ARGUMENT_TYPE_TENSOR; + std::memcpy(args_[idx].value.tensor.contents.data.x32, v.data(), + v.size() * sizeof(T)); + args_[idx].value.tensor.contents.length = v.size(); + args_[idx].value.tensor.type = DataTypeToEnum::value; } template diff --git a/c_api/include/taichi/taichi_core.h b/c_api/include/taichi/taichi_core.h index bae403a6ff64b..f3764822c35b0 100644 --- a/c_api/include/taichi/taichi_core.h +++ b/c_api/include/taichi/taichi_core.h @@ -227,7 +227,7 @@ #pragma once #ifndef TI_C_API_VERSION -#define TI_C_API_VERSION 1005000 +#define TI_C_API_VERSION 1007000 #endif // TI_C_API_VERSION #ifndef TAICHI_H @@ -463,6 +463,8 @@ typedef enum TiArgumentType { TI_ARGUMENT_TYPE_TEXTURE = 3, // Typed scalar. TI_ARGUMENT_TYPE_SCALAR = 4, + // Typed tensor. + TI_ARGUMENT_TYPE_TENSOR = 5, TI_ARGUMENT_TYPE_MAX_ENUM = 0xffffffff, } TiArgumentType; @@ -802,6 +804,36 @@ typedef struct TiScalar { TiScalarValue value; } TiScalar; +// Union `TiTensorValue` +// +// Tensor value represented by a power-of-two number of bits. +typedef union TiTensorValue { + // Tensor value that fits into 8 bits. + uint8_t x8[128]; + // Tensor value that fits into 16 bits. + uint16_t x16[64]; + // Tensor value that fits into 32 bits. + uint32_t x32[32]; + // Tensor value that fits into 64 bits. + uint64_t x64[16]; +} TiTensorValue; + +// Structure `TiTensorValueWithLength` +// +// A tensor value with a length. +typedef struct TiTensorValueWithLength { + uint32_t length; + TiTensorValue data; +} TiTensorValueWithLength; + +// Structure `TiTensor` +// +// A typed tensor value. +typedef struct TiTensor { + TiDataType type; + TiTensorValueWithLength contents; +} TiTensor; + // Union `TiArgumentValue` (1.4.0) // // A scalar or structured argument value. @@ -818,6 +850,8 @@ typedef union TiArgumentValue { TiTexture texture; // An scalar to be bound. TiScalar scalar; + // A tensor to be bound. + TiTensor tensor; } TiArgumentValue; // Structure `TiArgument` (1.4.0) diff --git a/c_api/src/taichi_core_impl.cpp b/c_api/src/taichi_core_impl.cpp index fa91a5da9ec71..d8366bf30b067 100644 --- a/c_api/src/taichi_core_impl.cpp +++ b/c_api/src/taichi_core_impl.cpp @@ -792,6 +792,28 @@ void ti_launch_kernel(TiRuntime runtime, devallocs.emplace_back(std::move(devalloc)); break; } + case TI_ARGUMENT_TYPE_TENSOR: { + auto &tensor = arg.value.tensor; + if (tensor.type == TI_DATA_TYPE_I16 || + tensor.type == TI_DATA_TYPE_U16 || + tensor.type == TI_DATA_TYPE_F16) { + for (int j = 0; j < tensor.contents.length; j++) { + builder.set_struct_arg_impl({(int)i, j}, + tensor.contents.data.x16[j]); + } + } else if (tensor.type == TI_DATA_TYPE_I32 || + tensor.type == TI_DATA_TYPE_U32 || + tensor.type == TI_DATA_TYPE_F32) { + for (int j = 0; j < tensor.contents.length; j++) { + builder.set_struct_arg_impl({(int)i, j}, + tensor.contents.data.x32[j]); + } + } else { + ti_set_last_error(TI_ERROR_NOT_SUPPORTED, + ("args[" + std::to_string(i) + "].type").c_str()); + } + break; + } default: { ti_set_last_error(TI_ERROR_ARGUMENT_OUT_OF_RANGE, ("args[" + std::to_string(i) + "].type").c_str()); diff --git a/c_api/src/taichi_llvm_impl.cpp b/c_api/src/taichi_llvm_impl.cpp index c39bf619a7ed6..a8b2fb8d8c6d2 100644 --- a/c_api/src/taichi_llvm_impl.cpp +++ b/c_api/src/taichi_llvm_impl.cpp @@ -52,14 +52,13 @@ taichi::lang::Device &LlvmRuntime::get() { TiMemory LlvmRuntime::allocate_memory( const taichi::lang::Device::AllocParams ¶ms) { - const taichi::lang::CompileConfig &config = executor_->get_config(); taichi::lang::LLVMRuntime *llvm_runtime = executor_->get_llvm_runtime(); taichi::lang::LlvmDevice *llvm_device = executor_->llvm_device(); taichi::lang::DeviceAllocation devalloc = - llvm_device->allocate_memory_runtime( - {params, config.ndarray_use_cached_allocator, - executor_->get_runtime_jit_module(), llvm_runtime, result_buffer}); + llvm_device->allocate_memory_runtime({params, + executor_->get_runtime_jit_module(), + llvm_runtime, result_buffer}); return devalloc2devmem(*this, devalloc); } diff --git a/c_api/taichi.json b/c_api/taichi.json index 7971c3327b84c..f726a5cea9b73 100644 --- a/c_api/taichi.json +++ b/c_api/taichi.json @@ -156,7 +156,8 @@ "f32": 1, "ndarray": 2, "texture": 3, - "scalar": 4 + "scalar": 4, + "tensor": 5 } }, { @@ -484,6 +485,60 @@ } ] }, + { + "name": "tensor_value", + "type": "union", + "variants": [ + { + "name": "x8", + "type": "uint8_t", + "count": 128 + }, + { + "name": "x16", + "type": "uint16_t", + "count": 64 + }, + { + "name": "x32", + "type": "uint32_t", + "count": 32 + }, + { + "name": "x64", + "type": "uint64_t", + "count": 16 + } + ] + }, + { + "name": "tensor_value_with_length", + "type": "structure", + "fields": [ + { + "name": "length", + "type": "uint32_t" + }, + { + "name": "data", + "type": "union.tensor_value" + } + ] + }, + { + "name": "tensor", + "type": "structure", + "fields": [ + { + "name": "type", + "type": "enumeration.data_type" + }, + { + "name": "contents", + "type": "structure.tensor_value_with_length" + } + ] + }, { "name": "argument_value", "type": "union", @@ -508,6 +563,10 @@ { "name": "scalar", "type": "structure.scalar" + }, + { + "name": "tensor", + "type": "structure.tensor" } ] }, diff --git a/docs/cover-in-ci.lst b/docs/cover-in-ci.lst index 420ae0ee214c6..723b95147504a 100644 --- a/docs/cover-in-ci.lst +++ b/docs/cover-in-ci.lst @@ -31,3 +31,5 @@ docs/lang/articles/visualization/ggui.md docs/lang/articles/visualization/gui_system.md docs/rfcs/20220410-rfc-process.md docs/rfcs/yyyymmdd-rfc-template.md +docs/lang/articles/math/linear_solver.md +docs/lang/articles/math/sparse_matrix.md diff --git a/docs/lang/articles/c-api/taichi_core.md b/docs/lang/articles/c-api/taichi_core.md index 4f3b4c1facc91..9892305358a6b 100644 --- a/docs/lang/articles/c-api/taichi_core.md +++ b/docs/lang/articles/c-api/taichi_core.md @@ -498,6 +498,7 @@ typedef enum TiArgumentType { TI_ARGUMENT_TYPE_NDARRAY = 2, TI_ARGUMENT_TYPE_TEXTURE = 3, TI_ARGUMENT_TYPE_SCALAR = 4, + TI_ARGUMENT_TYPE_TENSOR = 5, TI_ARGUMENT_TYPE_MAX_ENUM = 0xffffffff, } TiArgumentType; ``` @@ -509,6 +510,7 @@ Types of kernel and compute graph argument. - `TI_ARGUMENT_TYPE_NDARRAY`: ND-array wrapped around a [`TiMemory`](#handle-timemory). - `TI_ARGUMENT_TYPE_TEXTURE`: Texture wrapped around a [`TiImage`](#handle-tiimage). - `TI_ARGUMENT_TYPE_SCALAR`: Typed scalar. +- `TI_ARGUMENT_TYPE_TENSOR`: Typed tensor. --- @@ -927,6 +929,7 @@ typedef union TiArgumentValue { TiNdArray ndarray; TiTexture texture; TiScalar scalar; + TiTensor tensor; } TiArgumentValue; ``` @@ -937,6 +940,7 @@ A scalar or structured argument value. - `ndarray`: An ND-array to be bound. - `texture`: A texture to be bound. - `scalar`: An scalar to be bound. +- `tensor`: A tensor to be bound. --- ### Structure `TiArgument` diff --git a/docs/lang/articles/math/linear_solver.md b/docs/lang/articles/math/linear_solver.md new file mode 100644 index 0000000000000..cd852e4d48a93 --- /dev/null +++ b/docs/lang/articles/math/linear_solver.md @@ -0,0 +1,73 @@ +--- +sidebar_position: 3 +--- + +# Linear Solver + +Solving linear equations is a common task in scientific computing. Taichi provides basic direct and iterative linear solvers for +various simulation scenarios. Currently, there are two categories of linear solvers available: +1. Solvers built for `SparseMatrix` +2. Solvers built for `ti.field` + +## Sparse linear solver +You may want to solve some linear equations using sparse matrices. +Then, the following steps could help: +1. Create a `solver` using `ti.linalg.SparseSolver(solver_type, ordering)`. Currently, the factorization types supported on CPU backends are `LLT`, `LDLT`, and `LU`, and supported orderings include `AMD` and `COLAMD`. The sparse solver on CUDA supports the `LLT` factorization type only. +2. Analyze and factorize the sparse matrix you want to solve using `solver.analyze_pattern(sparse_matrix)` and `solver.factorize(sparse_matrix)` +3. Call `x = solver.solve(b)`, where `x` is the solution and `b` is the right-hand side of the linear system. On CPU backends, `x` and `b` can be NumPy arrays, Taichi Ndarrays, or Taichi fields. On the CUDA backend, `x` and `b` *must* be Taichi Ndarrays. +4. Call `solver.info()` to check if the solving process succeeds. + +Here's a full example. + +```python +import taichi as ti + +arch = ti.cpu # or ti.cuda +ti.init(arch=arch) + +n = 4 + +K = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) +b = ti.ndarray(ti.f32, shape=n) + +@ti.kernel +def fill(A: ti.types.sparse_matrix_builder(), b: ti.types.ndarray(), interval: ti.i32): + for i in range(n): + A[i, i] += 2.0 + + if i % interval == 0: + b[i] += 1.0 + +fill(K, b, 3) + +A = K.build() +print(">>>> Matrix A:") +print(A) +print(">>>> Vector b:") +print(b) +# outputs: +# >>>> Matrix A: +# [2, 0, 0, 0] +# [0, 2, 0, 0] +# [0, 0, 2, 0] +# [0, 0, 0, 2] +# >>>> Vector b: +# [1. 0. 0. 1.] +solver = ti.linalg.SparseSolver(solver_type="LLT") +solver.analyze_pattern(A) +solver.factorize(A) +x = solver.solve(b) +success = solver.info() +print(">>>> Solve sparse linear systems Ax = b with the solution x:") +print(x) +print(f">>>> Computation succeed: {success}") +# outputs: +# >>>> Solve sparse linear systems Ax = b with the solution x: +# [0.5 0. 0. 0.5] +# >>>> Computation was successful?: True +``` +## Examples + +Please have a look at our two demos for more information: ++ [Stable fluid](https://github.com/taichi-dev/taichi/blob/master/python/taichi/examples/simulation/stable_fluid.py): A 2D fluid simulation using a sparse Laplacian matrix to solve Poisson's pressure equation. ++ [Implicit mass spring](https://github.com/taichi-dev/taichi/blob/master/python/taichi/examples/simulation/implicit_mass_spring.py): A 2D cloth simulation demo using sparse matrices to solve the linear systems. diff --git a/docs/lang/articles/math/sparse_matrix.md b/docs/lang/articles/math/sparse_matrix.md index 2d4df0a61c73d..780ae4c08d390 100644 --- a/docs/lang/articles/math/sparse_matrix.md +++ b/docs/lang/articles/math/sparse_matrix.md @@ -56,7 +56,7 @@ print(A) The basic operations like `+`, `-`, `*`, `@` and transpose of sparse matrices are supported now. -```python +```python cont print(">>>> Summation: C = A + A") C = A + A print(C) @@ -131,66 +131,3 @@ print(f">>>> Element Access: A[0,0] = {A[0,0]}") # outputs: # >>>> Element Access: A[0,0] = 1.0 ``` - -## Sparse linear solver -You may want to solve some linear equations using sparse matrices. -Then, the following steps could help: -1. Create a `solver` using `ti.linalg.SparseSolver(solver_type, ordering)`. Currently, the factorization types supported on CPU backends are `LLT`, `LDLT`, and `LU`, and supported orderings include `AMD` and `COLAMD`. The sparse solver on CUDA supports the `LLT` factorization type only. -2. Analyze and factorize the sparse matrix you want to solve using `solver.analyze_pattern(sparse_matrix)` and `solver.factorize(sparse_matrix)` -3. Call `x = solver.solve(b)`, where `x` is the solution and `b` is the right-hand side of the linear system. On CPU backends, `x` and `b` can be NumPy arrays, Taichi Ndarrays, or Taichi fields. On the CUDA backend, `x` and `b` *must* be Taichi Ndarrays. -4. Call `solver.info()` to check if the solving process succeeds. - -Here's a full example. - -```python -import taichi as ti - -arch = ti.cpu # or ti.cuda -ti.init(arch=arch) - -n = 4 - -K = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) -b = ti.ndarray(ti.f32, shape=n) - -@ti.kernel -def fill(A: ti.types.sparse_matrix_builder(), b: ti.template(), interval: ti.i32): - for i in range(n): - A[i, i] += 2.0 - - if i % interval == 0: - b[i] += 1.0 - -fill(K, b, 3) - -A = K.build() -print(">>>> Matrix A:") -print(A) -print(">>>> Vector b:") -print(b) -# outputs: -# >>>> Matrix A: -# [2, 0, 0, 0] -# [0, 2, 0, 0] -# [0, 0, 2, 0] -# [0, 0, 0, 2] -# >>>> Vector b: -# [1. 0. 0. 1.] -solver = ti.linalg.SparseSolver(solver_type="LLT") -solver.analyze_pattern(A) -solver.factorize(A) -x = solver.solve(b) -isSuccess = solver.info() -print(">>>> Solve sparse linear systems Ax = b with the solution x:") -print(x) -print(f">>>> Computation was successful?: {isSuccess}") -# outputs: -# >>>> Solve sparse linear systems Ax = b with the solution x: -# [0.5 0. 0. 0.5] -# >>>> Computation was successful?: True -``` -## Examples - -Please have a look at our two demos for more information: -+ [Stable fluid](https://github.com/taichi-dev/taichi/blob/master/python/taichi/examples/simulation/stable_fluid.py): A 2D fluid simulation using a sparse Laplacian matrix to solve Poisson's pressure equation. -+ [Implicit mass spring](https://github.com/taichi-dev/taichi/blob/master/python/taichi/examples/simulation/implicit_mass_spring.py): A 2D cloth simulation demo using sparse matrices to solve the linear systems. diff --git a/misc/make_changelog.py b/misc/make_changelog.py index f41ff81269449..a7e23e593ece4 100644 --- a/misc/make_changelog.py +++ b/misc/make_changelog.py @@ -34,16 +34,20 @@ def main(ver=None, repo_dir="."): # We need to find out the latest common commit among base and ver, # everything after this commit should be listed in the changelog. - base_commit = find_latest_tag_commit(g.tags) - commits_in_base_tag = list(g.iter_commits(base_commit, max_count=500)) - commits = list(g.iter_commits(ver, max_count=500)) - begin, end = -1, 0 + latest_release = find_latest_tag_commit(g.tags) + head = g.head.commit + mb = g.merge_base(latest_release, head) + assert len(mb) == 1 + mb = mb[0] + commits_in_base_tag = list(g.iter_commits(latest_release, max_count=500)) + commits = list(g.iter_commits((mb, head))) def format(c): return f"{c.summary} (by **{c.author}**)" notable_changes = {} all_changes = [] + by_author = {} details = load_pr_tags() @@ -75,6 +79,7 @@ def format(c): f'** Warning: tag {tag.lower()} undefined in the "details" dict. Please include the tag into "details", unless the tag is a typo.' ) all_changes.append(format(c)) + by_author.setdefault(str(c.author), []).append(s) res = "Highlights:\n" for tag in sorted(notable_changes.keys()): @@ -86,6 +91,13 @@ def format(c): for c in all_changes: res += f" - {c}\n" + if args.show_per_author: + res += "\nContributors (in alphabetical order):\n" + for author in sorted(by_author.keys()): + res += f" - {author}\n" + for item in by_author[author]: + res += f" - {item}\n" + return res @@ -93,6 +105,7 @@ def format(c): parser = argparse.ArgumentParser() parser.add_argument("--ver") parser.add_argument("--repo_dir", type=str, default=".") + parser.add_argument("--show-per-author", action="store_true", default=False) parser.add_argument("--save", action="store_true", default=False) args = parser.parse_args() res = main(args.ver, args.repo_dir) diff --git a/python/taichi/aot/utils.py b/python/taichi/aot/utils.py index f85f132a8d12b..a29bda91fd43a 100644 --- a/python/taichi/aot/utils.py +++ b/python/taichi/aot/utils.py @@ -97,11 +97,8 @@ def produce_injected_args(kernel, symbolic_args=None): texture_shape = (2,) * anno.num_dimensions injected_args.append(Texture(Format.rgba8, texture_shape)) elif isinstance(anno, MatrixType): - if not isinstance(symbolic_args[i], list): - raise RuntimeError("Expected a symbolic arg with Matrix type.") - - symbolic_mat_n = len(symbolic_args[i]) - symbolic_mat_m = len(symbolic_args[i][0]) + symbolic_mat_n = symbolic_args[i].element_shape[0] + symbolic_mat_m = symbolic_args[i].element_shape[1] if symbolic_mat_m != anno.m or symbolic_mat_n != anno.n: raise RuntimeError( diff --git a/python/taichi/examples/machine_learning/differential_evolution.py b/python/taichi/examples/machine_learning/differential_evolution.py new file mode 100644 index 0000000000000..02f877cff076b --- /dev/null +++ b/python/taichi/examples/machine_learning/differential_evolution.py @@ -0,0 +1,282 @@ +# Authored by Erqi Chen. +# This script shows the optimization process of differential evolution. +# The black points are the search agents, and they finally find the minimum solution. + + +import numpy as np +import taichi as ti +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D + +ti.init(arch=ti.cpu) + + +@ti.func +def clip(_pop: ti.template(), _lb: ti.template(), _ub: ti.template()): + _search_num, _dim = _pop.shape + for ii, j in ti.ndrange(_search_num, _dim): + if _pop[ii, j] > _ub[j]: + _pop[ii, j] = _ub[j] + elif _pop[ii, j] < _lb[j]: + _pop[ii, j] = _lb[j] + + +@ti.func +def clip_only(_trial: ti.template(), _lb: ti.template(), _ub: ti.template()): + _dim = _trial.shape[0] + for j in range(_dim): + if _trial[j] > _ub[j]: + _trial[j] = _ub[j] + elif _trial[j] < _lb[j]: + _trial[j] = _lb[j] + + +@ti.func +def f1(_fit: ti.template(), _pop: ti.template()): + _search_num, _dim = _pop.shape + for ii in range(_search_num): + cur = 0.0 + for j in range(_dim): + cur += ti.pow(_pop[ii, j], 2) + + _fit[ii] = cur + + +@ti.func +def f1_only(_trial: ti.template()) -> ti.float32: + _dim = _trial.shape[0] + _res = 0.0 + for j in range(_dim): + _res += ti.pow(_trial[j], 2) + + return _res + + +@ti.func +def find_min(_fit: ti.template()) -> ti.i32: + _search_num = _fit.shape[0] + min_fit = _fit[0] + min_pos = 0 + for _ in ti.ndrange(1): + for ii in ti.ndrange(_search_num): + if min_fit < _fit[ii]: + min_fit = _fit[ii] + min_pos = ii + return min_pos + + +@ti.func +def rand_int(low: ti.i32, high: ti.i32) -> ti.i32: + r = ti.random(float) + _res = r * (high - low) + low + + return ti.round(_res, dtype=ti.i32) + + +@ti.func +def copy_pop_to_field(_pop: ti.template(), _trial: ti.template(), ind: ti.i32): + _, _dim = _pop.shape + for j in range(_dim): + _trial[j] = _pop[ind, j] + + +@ti.func +def copy_field_to_pop(_pop: ti.template(), _trial: ti.template(), ind: ti.i32): + _, _dim = _pop.shape + for j in range(dim): + _pop[ind, j] = _trial[j] + + +@ti.func +def copy_2d_to_3d(a: ti.template(), b: ti.template(), _iter: ti.i32): + r, c = b.shape + for ii, j in ti.ndrange(r, c): + a[_iter, ii, j] = b[ii, j] + + +@ti.func +def copy_field_a_to_b(a: ti.template(), b: ti.template()): + _dim = a.shape[0] + for j in range(_dim): + b[j] = a[j] + + +@ti.func +def de_crossover(_pop: ti.template(), _trial: ti.template(), a: ti.i32, b: ti.i32, c: ti.i32): + _, _dim = _pop.shape + CR = 0.5 + para_F = 0.7 + for k in range(_dim): + r = ti.random(float) + if r < CR or k == _dim - 1: + _trial[k] = _pop[c, k] + para_F * (_pop[a, k] - pop[b, k]) + + +@ti.func +def de_loop( + _pop: ti.template(), + all_best: ti.float32, + _fit: ti.template(), + _trial: ti.template(), + _lb: ti.template(), + _ub: ti.template(), +) -> ti.float32: + _search_num, _ = _pop.shape + for ii in range(_search_num): + copy_pop_to_field(_pop=_pop, _trial=_trial, ind=ii) + + a = rand_int(low=0, high=_search_num) + while a == ii: + a = rand_int(low=0, high=_search_num) + + b = rand_int(low=0, high=_search_num) + while b == ii or a == b: + b = rand_int(low=0, high=_search_num) + + c = rand_int(low=0, high=_search_num) + while c == ii or c == a or c == b: + c = rand_int(low=0, high=_search_num) + + de_crossover(_pop=_pop, _trial=_trial, a=a, b=b, c=c) + clip_only(_trial=_trial, _lb=_lb, _ub=_ub) + next_fit = f1_only(_trial=_trial) + if next_fit < _fit[ii]: + copy_field_to_pop(_pop=_pop, _trial=_trial, ind=ii) + _fit[ii] = next_fit + if next_fit < all_best: + all_best = next_fit + copy_field_a_to_b(a=_trial, b=best_pop) + + return all_best + + +@ti.kernel +def DE( + _pop: ti.template(), + _max_iter: ti.i32, + _lb: ti.template(), + _ub: ti.template(), + _fit: ti.template(), + _best_fit: ti.template(), + _trial: ti.template(), +): + f1(_fit=_fit, _pop=_pop) + min_pos = find_min(_fit=_fit) + all_best = _fit[min_pos] + _best_fit[0] = all_best + copy_2d_to_3d(a=all_pop, b=_pop, _iter=0) + + for _ in range(1): + for cur_iter in range(1, _max_iter + 1): + all_best = de_loop(_pop=_pop, _fit=_fit, all_best=all_best, _trial=_trial, _lb=_lb, _ub=_ub) + _best_fit[cur_iter] = all_best + copy_2d_to_3d(a=all_pop, b=_pop, _iter=cur_iter) + + +search_num = 20 +dim = 2 +max_iter = 50 + +_lb = np.ones(dim).astype(np.int32) * (-100) +lb = ti.field(ti.i32, shape=dim) +lb.from_numpy(_lb) + +_ub = np.ones(dim).astype(np.int32) * 100 +ub = ti.field(ti.i32, shape=dim) +ub.from_numpy(_ub) + +pop = ti.field(ti.float32, shape=(search_num, dim)) +pop.from_numpy((np.random.random((search_num, dim)) * (_ub - _lb) + _lb).astype(np.float32)) + +fit = ti.field(ti.float32, shape=(search_num,)) +best_fit = ti.field(ti.float32, shape=(max_iter,)) +best_pop = ti.field(ti.float32, shape=(search_num,)) +all_pop = ti.field(ti.float32, shape=(max_iter, search_num, dim)) + +trial = ti.field(ti.float32, shape=(search_num,)) + +DE(_pop=pop, _max_iter=max_iter, _lb=lb, _ub=ub, _fit=fit, _best_fit=best_fit, _trial=trial) + +res = best_fit.to_numpy() + + +@ti.kernel +def draw_contour(): + for ii, j in ti.ndrange(201, 201): + z[ii, j] = x[ii] ** 2 + y[j] ** 2 + + +_x = np.arange(-100, 101, 1) +x = ti.field(ti.float32, shape=201) +x.from_numpy(_x) +_y = np.arange(-100, 101, 1) +y = ti.field(ti.float32, shape=201) +y.from_numpy(_y) +z = ti.field(ti.float32, shape=(201, 201)) + +draw_contour() + +_z = z.to_numpy() +_pop = all_pop.to_numpy() + +plt.ion() + +"""2d visualization""" +plt.contourf(_x, _y, _z) +plt.colorbar() + +for i in range(max_iter): + plt.cla() + plt.contourf(_x, _y, _z) + plt.scatter(_pop[i, :, 0], _pop[i, :, 1], color="black") + plt.title(f"cur_iter: {i}, best_fit: {best_fit[i]:.2f}") + # plt.savefig(f"./2dimg/iter-{i}.png") + plt.pause(0.5) + + +# import imageio.v2 as imageio +# import os +# +# png_ls = os.listdir("./img") +# f = [] +# for i in png_ls: +# f.append(imageio.imread("./img/" + i)) +# +# imageio.mimsave("res.gif", f, "GIF", duration=0.5) + + +"""3d visualization""" +mesh_x, mesh_y = np.meshgrid(_x, _y) + +fig = plt.figure() +ax = Axes3D(fig, auto_add_to_figure=False) +ax.view_init(elev=51, azim=-70) +fig.add_axes(ax) +ax.plot_surface(mesh_x, mesh_y, _z, cmap="viridis", alpha=0.7) + +for i in range(max_iter): + ax.cla() + ax.plot_surface(mesh_x, mesh_y, _z, cmap="viridis", alpha=0.7) + + row = [] + col = [] + val = [] + nr, _ = _pop[i, :, :].shape + for _i in range(nr - 1): + row.append(np.round(_pop[i, _i, 0]).astype(int)) + col.append(np.round(_pop[i, _i, 1]).astype(int)) + val.append(_z[np.round(_pop[i, _i, 0]).astype(int) + 100, np.round(_pop[i, _i, 1]).astype(int) + 100]) + + ax.scatter3D(row, col, val, color="black") + # plt.savefig(f"./3dimg/iter-{i}.png") + plt.pause(0.5) +# +# +# import imageio.v2 as imageio +# import os +# +# png_ls = os.listdir("./3dimg") +# f = [] +# for i in png_ls: +# f.append(imageio.imread("./3dimg/" + i)) +# imageio.mimsave("3dres.gif", f, "GIF", duration=0.5) diff --git a/python/taichi/graph/_graph.py b/python/taichi/graph/_graph.py index bcdef533e392a..f33ceb5e526a2 100644 --- a/python/taichi/graph/_graph.py +++ b/python/taichi/graph/_graph.py @@ -235,15 +235,7 @@ def _make_arg_matrix(kwargs: Dict[str, Any]): dtype = kwargs["dtype"] if not isinstance(dtype, MatrixType): raise TaichiRuntimeError(f"Tag ArgKind.MATRIX must specify matrix type, but got {dtype}.") - arg_list = [] - i = 0 - for _ in range(dtype.n): - arg_sublist = [] - for _ in range(dtype.m): - arg_sublist.append(_ti_core.Arg(ArgKind.MATRIX, f"{name}_mat_arg_{i}", dtype.dtype, 0, [])) - i += 1 - arg_list.append(arg_sublist) - return arg_list + return _ti_core.Arg(ArgKind.MATRIX, f"{name}_mat_arg", dtype.dtype, 0, [dtype.n, dtype.m]) def _make_arg_texture(kwargs: Dict[str, Any]): diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py index d211f9e410ecf..8dc92ad81614e 100644 --- a/python/taichi/lang/ast/ast_transformer.py +++ b/python/taichi/lang/ast/ast_transformer.py @@ -434,17 +434,6 @@ def build_call_if_is_builtin(ctx, node, args, keywords): if id(func) in replace_func: node.ptr = replace_func[id(func)](*args, **keywords) - if func is min or func is max: - name = "min" if func is min else "max" - warnings.warn_explicit( - f'Calling builtin function "{name}" in Taichi scope is deprecated, ' - f"and it will be removed in Taichi v1.6.0." - f'Please use "ti.{name}" instead.', - DeprecationWarning, - ctx.file, - node.lineno + ctx.lineno_offset, - module="taichi", - ) return True return False @@ -1023,28 +1012,19 @@ def build_Compare(ctx, node): ops_static = { ast.In: lambda l, r: l in r, ast.NotIn: lambda l, r: l not in r, - ast.Is: lambda l, r: l is r, - ast.IsNot: lambda l, r: l is not r, } if ctx.is_in_static_scope(): ops = {**ops, **ops_static} operands = [node.left.ptr] + [comparator.ptr for comparator in node.comparators] val = True for i, node_op in enumerate(node.ops): + if isinstance(node_op, (ast.Is, ast.IsNot)): + name = "is" if isinstance(node_op, ast.Is) else "is not" + raise TaichiSyntaxError(f'Operator "{name}" in Taichi scope is not supported.') l = operands[i] r = operands[i + 1] op = ops.get(type(node_op)) - if isinstance(node_op, (ast.Is, ast.IsNot)): - name = "is" if isinstance(node_op, ast.Is) else "is not" - warnings.warn_explicit( - f'Operator "{name}" in Taichi scope is deprecated, ' - f"and it will be removed in Taichi v1.6.0. " - f"Please avoid using it.", - DeprecationWarning, - ctx.file, - node.lineno + ctx.lineno_offset, - module="taichi", - ) + if op is None: if type(node_op) in ops_static: raise TaichiSyntaxError(f'"{type(node_op).__name__}" is only supported inside `ti.static`.') @@ -1165,16 +1145,11 @@ def build_ndrange_for(ctx, node): I = impl.expr_init(ndrange_loop_var) targets = ASTTransformer.get_for_loop_targets(node) if len(targets) != len(ndrange_var.dimensions): - warnings.warn_explicit( + raise TaichiSyntaxError( "Ndrange for loop with number of the loop variables not equal to " - "the dimension of the ndrange is deprecated, " - "and it will be removed in Taichi 1.6.0. " + "the dimension of the ndrange is not supported. " "Please check if the number of arguments of ti.ndrange() is equal to " - "the number of the loop variables.", - DeprecationWarning, - ctx.file, - node.lineno + ctx.lineno_offset, - module="taichi", + "the number of the loop variables." ) for i, target in enumerate(targets): if i + 1 < len(targets): diff --git a/python/taichi/lang/common_ops.py b/python/taichi/lang/common_ops.py index 630d853d858f4..e5ef364cb8795 100644 --- a/python/taichi/lang/common_ops.py +++ b/python/taichi/lang/common_ops.py @@ -1,30 +1,16 @@ -import warnings - from taichi.lang import ops from taichi.lang.util import in_python_scope from taichi.types import primitive_types +from typing import TYPE_CHECKING class TaichiOperations: """The base class of taichi operations of expressions. Subclasses: :class:`~taichi.lang.expr.Expr`, :class:`~taichi.lang.matrix.Matrix`""" - __deprecated_atomic_ops__ = { - "atomic_add": "_atomic_add", - "atomic_mul": "_atomic_mul", - "atomic_and": "_atomic_and", - "atomic_or": "_atomic_or", - "atomic_sub": "_atomic_sub", - "atomic_xor": "_atomic_xor", - } - - def __getattr__(self, item): - if item in TaichiOperations.__deprecated_atomic_ops__: - warnings.warn( - f"a.{item}(b) is deprecated, and it will be removed in Taichi v1.6.0. Please use ti.{item}(a, b) instead.", - DeprecationWarning, - ) - return getattr(self, TaichiOperations.__deprecated_atomic_ops__[item]) - raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'") + if TYPE_CHECKING: + # Make pylint happy + def __getattr__(self, item): + pass def __neg__(self): return ops.neg(self) diff --git a/python/taichi/lang/kernel_arguments.py b/python/taichi/lang/kernel_arguments.py index 1f1089090308a..c15b8555a8f34 100644 --- a/python/taichi/lang/kernel_arguments.py +++ b/python/taichi/lang/kernel_arguments.py @@ -7,7 +7,7 @@ from taichi.lang.any_array import AnyArray from taichi.lang.enums import Layout from taichi.lang.expr import Expr -from taichi.lang.matrix import MatrixType, VectorType, make_matrix +from taichi.lang.matrix import MatrixType from taichi.lang.struct import StructType from taichi.lang.util import cook_dtype from taichi.types.primitive_types import RefType, u64 @@ -82,14 +82,10 @@ def get_type_for_kernel_args(dtype, name): def decl_matrix_arg(matrixtype, name): - if isinstance(matrixtype, VectorType): - return make_matrix([decl_scalar_arg(matrixtype.dtype, f"{name}_{i}") for i in range(matrixtype.n)]) - return make_matrix( - [ - [decl_scalar_arg(matrixtype.dtype, f"{name}_{i}_{j}") for i in range(matrixtype.m)] - for j in range(matrixtype.n) - ] - ) + arg_type = get_type_for_kernel_args(matrixtype, name) + arg_id = impl.get_runtime().compiling_callable.insert_scalar_param(arg_type, name) + arg_load = Expr(_ti_core.make_arg_load_expr(arg_id, arg_type, create_load=False)) + return matrixtype.from_taichi_object(arg_load) def decl_struct_arg(structtype, name): diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py index 226f8ef9ae5c8..6467174deb64f 100644 --- a/python/taichi/lang/kernel_impl.py +++ b/python/taichi/lang/kernel_impl.py @@ -757,39 +757,33 @@ def call_back(): ) else: raise TaichiRuntimeTypeError.get(i, needed.to_string(), v) - else: raise TaichiRuntimeTypeError.get(i, needed.to_string(), v) elif isinstance(needed, MatrixType): if needed.dtype in primitive_types.real_types: - for a in range(needed.n): - for b in range(needed.m): - if actual_argument_slot >= max_arg_num: - exceed_max_arg_num = True - break - val = v[a, b] if needed.ndim == 2 else v[a] - if not isinstance(val, (int, float, np.integer, np.floating)): - raise TaichiRuntimeTypeError.get(i, needed.dtype.to_string(), type(val)) - launch_ctx.set_arg_float(actual_argument_slot, float(val)) - actual_argument_slot += 1 + + def cast_func(x): + if not isinstance(x, (int, float, np.integer, np.floating)): + raise TaichiRuntimeTypeError.get(i, needed.dtype.to_string(), type(x)) + return float(x) + elif needed.dtype in primitive_types.integer_types: - for a in range(needed.n): - for b in range(needed.m): - if actual_argument_slot >= max_arg_num: - exceed_max_arg_num = True - break - val = v[a, b] if needed.ndim == 2 else v[a] - if not isinstance(val, (int, np.integer)): - raise TaichiRuntimeTypeError.get(i, needed.dtype.to_string(), type(val)) - if is_signed(needed.dtype): - launch_ctx.set_arg_int(actual_argument_slot, int(val)) - else: - launch_ctx.set_arg_uint(actual_argument_slot, int(val)) - actual_argument_slot += 1 + + def cast_func(x): + if not isinstance(x, (int, np.integer)): + raise TaichiRuntimeTypeError.get(i, needed.dtype.to_string(), type(x)) + return int(x) + else: raise ValueError(f"Matrix dtype {needed.dtype} is not integer type or real type.") - continue + + if needed.ndim == 2: + v = [cast_func(v[i, j]) for i in range(needed.n) for j in range(needed.m)] + else: + v = [cast_func(v[i]) for i in range(needed.n)] + v = needed(*v) + needed.set_kernel_struct_args(v, launch_ctx, (actual_argument_slot,)) elif isinstance(needed, StructType): needed.set_kernel_struct_args(v, launch_ctx, (actual_argument_slot,)) else: diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py index 7d7fb13e531d9..0aa5a9954169c 100644 --- a/python/taichi/lang/matrix.py +++ b/python/taichi/lang/matrix.py @@ -1482,7 +1482,8 @@ def _instantiate_in_python_scope(self, entries): for j in range(self.m) ] for i in range(self.n) - ] + ], + dt=self.dtype, ) def _instantiate(self, entries): @@ -1569,7 +1570,8 @@ def _instantiate_in_python_scope(self, entries): [ int(entries[i]) if self.dtype in primitive_types.integer_types else float(entries[i]) for i in range(self.n) - ] + ], + dt=self.dtype, ) def _instantiate(self, entries): diff --git a/python/taichi/linalg/__init__.py b/python/taichi/linalg/__init__.py index 2bb594ffe273f..c4e28878b1b59 100644 --- a/python/taichi/linalg/__init__.py +++ b/python/taichi/linalg/__init__.py @@ -1,6 +1,6 @@ """Taichi support module for sparse matrix operations. """ -from taichi.linalg.cg import CG +from taichi.linalg.sparse_cg import SparseCG from taichi.linalg.sparse_matrix import * from taichi.linalg.sparse_solver import SparseSolver -from taichi.linalg.taichi_cg import * +from taichi.linalg.matrixfree_cg import * diff --git a/python/taichi/linalg/taichi_cg.py b/python/taichi/linalg/matrixfree_cg.py similarity index 82% rename from python/taichi/linalg/taichi_cg.py rename to python/taichi/linalg/matrixfree_cg.py index bb39c0af734f0..0d333f4bbcea0 100644 --- a/python/taichi/linalg/taichi_cg.py +++ b/python/taichi/linalg/matrixfree_cg.py @@ -16,7 +16,21 @@ def matvec(self, x, Ax): self._matvec(x, Ax) -def taichi_cg_solver(A, b, x, tol=1e-6, maxiter=5000, quiet=True): +def MatrixFreeCG(A, b, x, tol=1e-6, maxiter=5000, quiet=True): + """Matrix-free conjugate-gradient solver. + + Use conjugate-gradient method to solve the linear system Ax = b, where A is implicitly + represented as a LinearOperator. + + Args: + A (LinearOperator): The coefficient matrix A of the linear system. + b (Field): The right-hand side of the linear system. + x (Field): The initial guess for the solution. + maxiter (int): Maximum number of iterations. + atol: Tolerance(absolute) for convergence. + quiet (bool): Switch to turn on/off iteration log. + """ + if b.dtype != x.dtype: raise TaichiTypeError(f"Dtype mismatch b.dtype({b.dtype}) != x.dtype({x.dtype}).") if str(b.dtype) == "f32": diff --git a/python/taichi/linalg/cg.py b/python/taichi/linalg/sparse_cg.py similarity index 78% rename from python/taichi/linalg/cg.py rename to python/taichi/linalg/sparse_cg.py index 21973641e4db3..e0aba8112c690 100644 --- a/python/taichi/linalg/cg.py +++ b/python/taichi/linalg/sparse_cg.py @@ -6,7 +6,19 @@ from taichi.types import f32, f64 -class CG: +class SparseCG: + """Conjugate-gradient solver built for SparseMatrix. + + Use conjugate-gradient method to solve the linear system Ax = b, where A is SparseMatrix. + + Args: + A (SparseMatrix): The coefficient matrix A of the linear system. + b (numpy ndarray, taichi Ndarray): The right-hand side of the linear system. + x0 (numpy ndarray, taichi Ndarray): The initial guess for the solution. + max_iter (int): Maximum number of iterations. + atol: Tolerance(absolute) for convergence. + """ + def __init__(self, A, b, x0=None, max_iter=50, atol=1e-6): self.dtype = A.dtype self.ti_arch = get_runtime().prog.config().arch diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 9cd064d62aac9..443afd933aae3 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -1,4 +1,3 @@ -import warnings from functools import reduce import numpy as np @@ -7,7 +6,7 @@ from taichi.lang.exception import TaichiRuntimeError from taichi.lang.field import Field from taichi.lang.impl import get_runtime -from taichi.types import annotations, f32 +from taichi.types import f32 class SparseMatrix: @@ -294,14 +293,4 @@ def build(self, dtype=f32, _format="CSR"): raise TaichiRuntimeError("Sparse matrix only supports CPU and CUDA backends.") -# TODO: remove this in 1.0 release -class sparse_matrix_builder(annotations.sparse_matrix_builder): - def __init__(self): - warnings.warn( - "ti.linalg.sparse_matrix_builder is deprecated, and it will be removed in Taichi v1.6.0. " - "Please use ti.types.sparse_matrix_builder instead.", - DeprecationWarning, - ) - - -__all__ = ["SparseMatrix", "SparseMatrixBuilder", "sparse_matrix_builder"] +__all__ = ["SparseMatrix", "SparseMatrixBuilder"] diff --git a/python/taichi/math/mathimpl.py b/python/taichi/math/mathimpl.py index f445e4fd556a0..947cea1fb71e9 100644 --- a/python/taichi/math/mathimpl.py +++ b/python/taichi/math/mathimpl.py @@ -2,7 +2,7 @@ """ Math functions for glsl-like functions and other stuff. """ -from math import e, inf, nan, pi +import math from taichi.lang import impl, ops from taichi.lang.impl import static, zero @@ -31,6 +31,26 @@ cfg = impl.default_cfg +e = math.e +"""The mathematical constant e = 2.718281…. +Directly imported from the Python standard library `math`. +""" + +pi = math.pi +"""The mathematical constant π = 3.141592…. +Directly imported from the Python standard library `math`. +""" + +inf = math.inf +"""A floating-point positive infinity. (For negative infinity, use `-inf`). +Directly imported from the Python standard library `math`. +""" + +nan = math.nan +"""A floating-point "not a number" (NaN) value. +Directly imported from the Python standard library `math` +""" + vec2 = vector(2, cfg().default_fp) """2D floating vector type. """ @@ -84,7 +104,7 @@ def mix(x, y, a): """Performs a linear interpolation between `x` and `y` using `a` to weight between them. The return value is computed as - :math:`x\times a + (1-a)\times y`. + `x * (1 - a) + a * y`. The arguments can be scalars or :class:`~taichi.Matrix`, as long as the operation can be performed. diff --git a/python/taichi/ui/ui.py b/python/taichi/ui/ui.py index 8ed96100e9b4b..2252057ec9bfc 100644 --- a/python/taichi/ui/ui.py +++ b/python/taichi/ui/ui.py @@ -1,8 +1,6 @@ -import warnings - from taichi._lib import core as _ti_core -from .camera import Camera +from .camera import Camera # pylint: disable=unused-import from .canvas import Canvas # pylint: disable=unused-import from .constants import * # pylint: disable=unused-import,wildcard-import from .imgui import Gui # pylint: disable=unused-import @@ -11,22 +9,6 @@ from .window import Window # pylint: disable=unused-import -def make_camera(): - """Return an instance of :class:`~taichi.ui.Camera`. This is an deprecated - interface, please construct `~taichi.ui.Camera` directly. - - Example:: - - >>> camera = ti.ui.make_camera() - """ - warnings.warn( - "`ti.ui.make_camera()` is deprecated, and will be removed in Taichi v1.6.0. " - "Please use `ti.ui.Camera()` instead", - DeprecationWarning, - ) - return Camera() - - # ---------------------- ProjectionMode = _ti_core.ProjectionMode if _ti_core.GGUI_AVAILABLE else None """Camera projection mode, 0 for perspective and 1 for orthogonal. diff --git a/python/taichi/ui/window.py b/python/taichi/ui/window.py index 18670d4dd8506..397695ddd5b36 100644 --- a/python/taichi/ui/window.py +++ b/python/taichi/ui/window.py @@ -1,5 +1,4 @@ import pathlib -import warnings import numpy from taichi._kernels import ( @@ -145,20 +144,6 @@ def get_window_shape(self): """ return self.window.get_window_shape() - def write_image(self, filename): - """Save the window content to an image file. This is an deprecated - interface; please use `save_image` instead. - - Args: - filename (str): output filename. - """ - warnings.warn( - "`Window.write_image()` is deprecated, and it will be removed in Taichi v1.6.0. " - "Please use `Window.save_image()` instead.", - DeprecationWarning, - ) - return self.save_image(filename) - def save_image(self, filename): """Save the window content to an image file. diff --git a/taichi/aot/graph_data.cpp b/taichi/aot/graph_data.cpp index 2423f864e3cc9..caa03f5e2fb97 100644 --- a/taichi/aot/graph_data.cpp +++ b/taichi/aot/graph_data.cpp @@ -44,6 +44,41 @@ void CompiledGraph::init_runtime_context( LaunchContextBuilder &ctx) { for (int i = 0; i < paramter_list.size(); ++i) { auto &symbolic_arg = paramter_list[i]; + if (symbolic_arg.tag == aot::ArgKind::kMatrix) { + int size = symbolic_arg.element_shape[0] * symbolic_arg.element_shape[1]; + for (int j = 0; j < size; j++) { + auto found = args.find(symbolic_arg.name + "_" + std::to_string(j)); + TI_ERROR_IF(found == args.end(), "Missing runtime value for {}", + symbolic_arg.name); + const aot::IValue &ival = found->second; + TI_ASSERT(ival.tag == aot::ArgKind::kScalar); + int type_size = data_type_size(symbolic_arg.dtype()); + switch (type_size) { + case 1: + ctx.set_struct_arg_impl( + {i, j}, taichi_union_cast_with_different_sizes(ival.val)); + break; + case 2: + ctx.set_struct_arg_impl( + {i, j}, + taichi_union_cast_with_different_sizes(ival.val)); + break; + case 4: + ctx.set_struct_arg_impl( + {i, j}, + taichi_union_cast_with_different_sizes(ival.val)); + break; + case 8: + ctx.set_struct_arg_impl( + {i, j}, + taichi_union_cast_with_different_sizes(ival.val)); + break; + default: + TI_ERROR("Unsupported type size {}", type_size); + } + } + continue; + } auto found = args.find(symbolic_arg.name); TI_ERROR_IF(found == args.end(), "Missing runtime value for {}", symbolic_arg.name); @@ -89,8 +124,7 @@ void CompiledGraph::init_runtime_context( symbolic_arg.name, symbolic_arg_primitive_dtype.to_string(), arr_primitive_dtype.to_string()); ctx.set_arg_ndarray(i, *arr); - } else if (symbolic_arg.tag == aot::ArgKind::kScalar || - symbolic_arg.tag == aot::ArgKind::kMatrix) { + } else if (symbolic_arg.tag == aot::ArgKind::kScalar) { TI_ASSERT(ival.tag == aot::ArgKind::kScalar); // Matrix args are flattened so they're same as scalars. int type_size = data_type_size(symbolic_arg.dtype()); diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index 28e95d8f729e5..2c2bea8285827 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -1898,10 +1898,11 @@ void TaskCodeGenLLVM::visit(ExternalPtrStmt *stmt) { (layout == ExternalArrayLayout::kAOS) ? num_array_args : 0; for (int i = 0; i < num_array_args; i++) { - auto raw_arg = - builder->CreateGEP(struct_type, llvm_val[stmt->base_ptr], - {tlctx->get_constant(0), tlctx->get_constant(0), - tlctx->get_constant(i)}); + auto raw_arg = builder->CreateGEP( + struct_type, llvm_val[stmt->base_ptr], + {tlctx->get_constant(0), + tlctx->get_constant(TypeFactory::SHAPE_POS_IN_NDARRAY), + tlctx->get_constant(i)}); raw_arg = builder->CreateLoad(tlctx->get_data_type(PrimitiveType::i32), raw_arg); sizes[i] = raw_arg; @@ -1971,16 +1972,8 @@ void TaskCodeGenLLVM::visit(ExternalPtrStmt *stmt) { void TaskCodeGenLLVM::visit(ExternalTensorShapeAlongAxisStmt *stmt) { const auto arg_id = stmt->arg_id; const auto axis = stmt->axis; - if (auto struct_type = current_callable->args_type->get_element_type({arg_id}) - ->cast()) { - // Is ndarray - llvm_val[stmt] = get_struct_arg({arg_id, 0, axis}, /*create_load=*/true); - } else { - // Is texture - llvm_val[stmt] = - call("RuntimeContext_get_extra_args", get_context(), - tlctx->get_constant(arg_id), tlctx->get_constant(axis)); - } + llvm_val[stmt] = get_struct_arg( + {arg_id, TypeFactory::SHAPE_POS_IN_NDARRAY, axis}, /*create_load=*/true); } std::string TaskCodeGenLLVM::init_offloaded_task_function(OffloadedStmt *stmt, diff --git a/taichi/codegen/spirv/kernel_utils.cpp b/taichi/codegen/spirv/kernel_utils.cpp index e7cd29c3ab95e..d485e3269c2cb 100644 --- a/taichi/codegen/spirv/kernel_utils.cpp +++ b/taichi/codegen/spirv/kernel_utils.cpp @@ -50,9 +50,7 @@ std::string TaskAttributes::BufferBind::debug_string() const { KernelContextAttributes::KernelContextAttributes( const Kernel &kernel, const DeviceCapabilityConfig *caps) - : args_bytes_(0), - rets_bytes_(0), - extra_args_bytes_(RuntimeContext::extra_args_size) { + : args_bytes_(0), rets_bytes_(0) { arr_access.resize(kernel.parameter_list.size(), irpass::ExternalPtrAccess(0)); arg_attribs_vec_.reserve(kernel.parameter_list.size()); // TODO: We should be able to limit Kernel args and rets to be primitive types diff --git a/taichi/codegen/spirv/kernel_utils.h b/taichi/codegen/spirv/kernel_utils.h index 53990125b8900..16fce6aa35fb0 100644 --- a/taichi/codegen/spirv/kernel_utils.h +++ b/taichi/codegen/spirv/kernel_utils.h @@ -229,23 +229,6 @@ class KernelContextAttributes { return rets_bytes_; } - /** - * Number of bytes needed by the extra arguments. - * - * Extra argument region is used to store some metadata, like the shape of the - * external array. - */ - inline size_t extra_args_bytes() const { - return extra_args_bytes_; - } - - /** - * Offset (in bytes) of the extra arguments in the memory. - */ - inline size_t extra_args_mem_offset() const { - return args_bytes(); - } - /** * The type of the struct that contains all the arguments. */ @@ -266,7 +249,6 @@ class KernelContextAttributes { ret_attribs_vec_, args_bytes_, rets_bytes_, - extra_args_bytes_, arr_access, args_type_, rets_type_); @@ -277,7 +259,6 @@ class KernelContextAttributes { size_t args_bytes_{0}; size_t rets_bytes_{0}; - size_t extra_args_bytes_{0}; const lang::StructType *args_type_{nullptr}; const lang::StructType *rets_type_{nullptr}; diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index 8d266326ddeef..26c3cffd41447 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -633,30 +633,18 @@ class TaskCodegen : public IRVisitor { const auto arg_id = stmt->arg_id; const auto axis = stmt->axis; - const auto extra_args_member_index = ctx_attribs_->args().size(); - - const auto extra_arg_index = (arg_id * taichi_max_num_indices) + axis; spirv::Value var_ptr; - if (ctx_attribs_->args_type() - ->get_element_type({arg_id}) - ->is()) { - // Is ndarray - var_ptr = ir_->make_value( - spv::OpAccessChain, - ir_->get_pointer_type(ir_->i32_type(), spv::StorageClassUniform), - get_buffer_value(BufferType::Args, PrimitiveType::i32), - ir_->int_immediate_number(ir_->i32_type(), arg_id), - ir_->int_immediate_number(ir_->i32_type(), 0), - ir_->int_immediate_number(ir_->i32_type(), axis)); - } else { - // Is texture - var_ptr = ir_->make_value( - spv::OpAccessChain, - ir_->get_pointer_type(ir_->i32_type(), spv::StorageClassUniform), - get_buffer_value(BufferType::Args, PrimitiveType::i32), - ir_->int_immediate_number(ir_->i32_type(), - extra_args_member_index + extra_arg_index)); - } + TI_ASSERT(ctx_attribs_->args_type() + ->get_element_type({arg_id}) + ->is()); + var_ptr = ir_->make_value( + spv::OpAccessChain, + ir_->get_pointer_type(ir_->i32_type(), spv::StorageClassUniform), + get_buffer_value(BufferType::Args, PrimitiveType::i32), + ir_->int_immediate_number(ir_->i32_type(), arg_id), + ir_->int_immediate_number(ir_->i32_type(), + TypeFactory::SHAPE_POS_IN_NDARRAY), + ir_->int_immediate_number(ir_->i32_type(), axis)); spirv::Value var = ir_->load_variable(var_ptr, ir_->i32_type()); ir_->register_value(name, var); @@ -685,7 +673,8 @@ class TaskCodegen : public IRVisitor { ir_->get_pointer_type(ir_->i32_type(), spv::StorageClassUniform), get_buffer_value(BufferType::Args, PrimitiveType::i32), ir_->int_immediate_number(ir_->i32_type(), arg_id), - ir_->int_immediate_number(ir_->i32_type(), 0), + ir_->int_immediate_number(ir_->i32_type(), + TypeFactory::SHAPE_POS_IN_NDARRAY), ir_->int_immediate_number(ir_->i32_type(), i)); spirv::Value var = ir_->load_variable(var_ptr, ir_->i32_type()); ir_->register_value(var_name, var); @@ -2236,11 +2225,6 @@ class TaskCodegen : public IRVisitor { element_types.push_back( translate_ti_type(blk, element.type, has_buffer_ptr)); } - const tinyir::Type *i32_type = - blk.emplace_back(/*num_bits=*/32, /*is_signed=*/true); - for (int i = 0; i < ctx_attribs_->extra_args_bytes() / 4; i++) { - element_types.push_back(i32_type); - } const tinyir::Type *struct_type = blk.emplace_back(element_types); diff --git a/taichi/ir/type_factory.cpp b/taichi/ir/type_factory.cpp index 3e3e51e877c78..b63aee0662767 100644 --- a/taichi/ir/type_factory.cpp +++ b/taichi/ir/type_factory.cpp @@ -188,6 +188,10 @@ const Type *TypeFactory::get_ndarray_struct_type(DataType dt, return get_struct_type(members); } +const Type *TypeFactory::get_rwtexture_struct_type() { + return get_ndarray_struct_type(PrimitiveType::f32, 3); +} + namespace { static bool compare_types(DataType x, DataType y) { // Is the first type "bigger" than the second type? diff --git a/taichi/ir/type_factory.h b/taichi/ir/type_factory.h index 897644fa3fded..f73343c78519f 100644 --- a/taichi/ir/type_factory.h +++ b/taichi/ir/type_factory.h @@ -29,6 +29,8 @@ class TypeFactory { int total_dim, bool needs_grad = false); + const Type *get_rwtexture_struct_type(); + Type *get_pointer_type(Type *element, bool is_bit_pointer = false); Type *get_quant_int_type(int num_bits, bool is_signed, Type *compute_type); @@ -54,6 +56,7 @@ class TypeFactory { static DataType create_tensor_type(std::vector shape, DataType element); + constexpr static int SHAPE_POS_IN_NDARRAY = 0; constexpr static int DATA_PTR_POS_IN_NDARRAY = 1; constexpr static int GRAD_PTR_POS_IN_NDARRAY = 2; diff --git a/taichi/program/callable.cpp b/taichi/program/callable.cpp index b3af705acad64..384a3c31059a6 100644 --- a/taichi/program/callable.cpp +++ b/taichi/program/callable.cpp @@ -46,8 +46,11 @@ int Callable::insert_ndarray_param(const DataType &dt, int Callable::insert_texture_param(int total_dim, const std::string &name) { // FIXME: we shouldn't abuse is_array for texture parameters - parameter_list.emplace_back(PrimitiveType::f32, /*is_array=*/true, 0, - total_dim, std::vector{}); + // FIXME: using rwtexture struct type for texture parameters because C-API + // does not distinguish between texture and rwtexture. + auto *type = TypeFactory::get_instance().get_rwtexture_struct_type(); + parameter_list.emplace_back(type, /*is_array=*/true, 0, total_dim, + std::vector{}); parameter_list.back().name = name; return (int)parameter_list.size() - 1; } @@ -63,8 +66,9 @@ int Callable::insert_rw_texture_param(int total_dim, BufferFormat format, const std::string &name) { // FIXME: we shouldn't abuse is_array for texture parameters - parameter_list.emplace_back(PrimitiveType::f32, /*is_array=*/true, 0, - total_dim, std::vector{}, format); + auto *type = TypeFactory::get_instance().get_rwtexture_struct_type(); + parameter_list.emplace_back(type, /*is_array=*/true, 0, total_dim, + std::vector{}, format); parameter_list.back().name = name; return (int)parameter_list.size() - 1; } diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp index 64c04371a44a2..bb6c102041d7d 100644 --- a/taichi/program/compile_config.cpp +++ b/taichi/program/compile_config.cpp @@ -42,7 +42,6 @@ CompileConfig::CompileConfig() { make_thread_local = true; make_block_local = true; detect_read_only = true; - ndarray_use_cached_allocator = true; real_matrix_scalarize = true; half2_vectorization = false; make_cpu_multithreading_loop = true; diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h index df1650ec163a3..619109f4e86d8 100644 --- a/taichi/program/compile_config.h +++ b/taichi/program/compile_config.h @@ -37,7 +37,6 @@ struct CompileConfig { bool make_thread_local; bool make_block_local; bool detect_read_only; - bool ndarray_use_cached_allocator; bool real_matrix_scalarize; bool half2_vectorization; bool make_cpu_multithreading_loop; diff --git a/taichi/program/context.h b/taichi/program/context.h index 3b5a8412292f1..71ab23bc9644f 100644 --- a/taichi/program/context.h +++ b/taichi/program/context.h @@ -17,7 +17,6 @@ struct RuntimeContext { LLVMRuntime *runtime{nullptr}; - int32_t extra_args[taichi_max_num_args_extra][taichi_max_num_indices]; int32_t cpu_thread_id; // We move the pointer of result buffer from LLVMRuntime to RuntimeContext @@ -25,8 +24,6 @@ struct RuntimeContext { // LLVMRuntime is shared among functions. So we moved the pointer to // RuntimeContext which each function have one. uint64_t *result_buffer; - - static constexpr size_t extra_args_size = sizeof(extra_args); }; #if defined(TI_RUNTIME_HOST) diff --git a/taichi/program/launch_context_builder.cpp b/taichi/program/launch_context_builder.cpp index e57c8f21b4be0..1458ae81d6203 100644 --- a/taichi/program/launch_context_builder.cpp +++ b/taichi/program/launch_context_builder.cpp @@ -135,10 +135,6 @@ void LaunchContextBuilder::set_arg(int i, TypedConstant d) { } } -void LaunchContextBuilder::set_extra_arg_int(int i, int j, int32 d) { - ctx_->extra_args[i][j] = d; -} - template void LaunchContextBuilder::set_struct_arg_impl(std::vector arg_indices, T v) { @@ -257,8 +253,8 @@ void LaunchContextBuilder::set_arg_rw_texture_impl( array_ptrs[{arg_id}] = (void *)alloc_ptr; set_array_device_allocation_type(arg_id, DevAllocType::kRWTexture); TI_ASSERT(shape.size() <= taichi_max_num_indices); - for (int i = 0; i < shape.size(); i++) { - ctx_->extra_args[arg_id][i] = shape[i]; + for (int i = 0; i < shape.size(); ++i) { + set_struct_arg({arg_id, 0, i}, shape[i]); } } diff --git a/taichi/program/launch_context_builder.h b/taichi/program/launch_context_builder.h index 55d9f212fe746..ee0687d490b2c 100644 --- a/taichi/program/launch_context_builder.h +++ b/taichi/program/launch_context_builder.h @@ -57,7 +57,6 @@ class LaunchContextBuilder { template T get_ret(int i); - void set_extra_arg_int(int i, int j, int32 d); void set_arg_external_array_with_shape(int arg_id, uintptr_t ptr, diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 4f34d0e3976a0..79a3833736609 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -193,8 +193,6 @@ void export_lang(py::module &m) { .def_readwrite("make_thread_local", &CompileConfig::make_thread_local) .def_readwrite("make_block_local", &CompileConfig::make_block_local) .def_readwrite("detect_read_only", &CompileConfig::detect_read_only) - .def_readwrite("ndarray_use_cached_allocator", - &CompileConfig::ndarray_use_cached_allocator) .def_readwrite("real_matrix_scalarize", &CompileConfig::real_matrix_scalarize) .def_readwrite("half2_vectorization", &CompileConfig::half2_vectorization) @@ -618,66 +616,55 @@ void export_lang(py::module &m) { .def("seq", &GraphBuilder::seq, py::return_value_policy::reference); py::class_(m, "CompiledGraph") - .def("jit_run", [](aot::CompiledGraph *self, - const CompileConfig &compile_config, - const py::dict &pyargs) { - std::unordered_map args; - for (auto it : pyargs) { - std::string arg_name = py::cast(it.first); - auto tag = self->args[arg_name].tag; - if (tag == aot::ArgKind::kNdarray) { - auto &val = it.second.cast(); - args.insert( - {py::cast(it.first), aot::IValue::create(val)}); - } else if (tag == aot::ArgKind::kTexture || - tag == aot::ArgKind::kRWTexture) { - auto &val = it.second.cast(); - args.insert( - {py::cast(it.first), aot::IValue::create(val)}); - - } else if (tag == aot::ArgKind::kScalar || - tag == aot::ArgKind::kMatrix) { - std::string arg_name = py::cast(it.first); - auto expected_dtype = self->args[arg_name].dtype(); - if (expected_dtype == PrimitiveType::i32) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::i64) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::f32) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::f64) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::i16) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::u32) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::u64) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::u16) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::u8) { - args.insert({arg_name, - aot::IValue::create(py::cast(it.second))}); - } else if (expected_dtype == PrimitiveType::i8) { - args.insert( - {arg_name, aot::IValue::create(py::cast(it.second))}); - } else { - TI_NOT_IMPLEMENTED; - } - } else { - TI_NOT_IMPLEMENTED; - } - } - self->jit_run(compile_config, args); - }); + .def("jit_run", + [](aot::CompiledGraph *self, const CompileConfig &compile_config, + const py::dict &pyargs) { + std::unordered_map args; + auto insert_scalar_arg = [&args](std::string arg_name, + DataType expected_dtype, + py::object pyarg) { + auto type_id = expected_dtype->as()->type; + switch (type_id) { +#define PER_C_TYPE(type, ctype) \ + case PrimitiveTypeID::type: \ + args.insert({arg_name, aot::IValue::create(py::cast(pyarg))}); \ + break; +#include "taichi/inc/data_type_with_c_type.inc.h" +#undef PER_C_TYPE + default: + TI_ERROR("Unsupported scalar type {}", type_id); + } + }; + for (const auto &[arg_name, arg] : self->args) { + auto tag = arg.tag; + if (tag == aot::ArgKind::kMatrix) { + int size = arg.element_shape[0] * arg.element_shape[1]; + for (int i = 0; i < size; i++) { + auto name = fmt::format("{}_{}", arg_name, i); + TI_ASSERT(pyargs.contains(name.c_str())); + auto pyarg = pyargs[name.c_str()]; + insert_scalar_arg(name, arg.dtype(), pyarg); + } + continue; + } + TI_ASSERT(pyargs.contains(arg_name.c_str())); + auto pyarg = pyargs[arg_name.c_str()]; + if (tag == aot::ArgKind::kNdarray) { + auto &val = pyarg.cast(); + args.insert({arg_name, aot::IValue::create(val)}); + } else if (tag == aot::ArgKind::kTexture || + tag == aot::ArgKind::kRWTexture) { + auto &val = pyarg.cast(); + args.insert({arg_name, aot::IValue::create(val)}); + } else if (tag == aot::ArgKind::kScalar) { + auto expected_dtype = arg.dtype(); + insert_scalar_arg(arg_name, expected_dtype, pyarg); + } else { + TI_NOT_IMPLEMENTED; + } + } + self->jit_run(compile_config, args); + }); py::class_(m, "Kernel") .def("no_activate", @@ -723,7 +710,6 @@ void export_lang(py::module &m) { &LaunchContextBuilder::set_arg_ndarray_with_grad) .def("set_arg_texture", &LaunchContextBuilder::set_arg_texture) .def("set_arg_rw_texture", &LaunchContextBuilder::set_arg_rw_texture) - .def("set_extra_arg_int", &LaunchContextBuilder::set_extra_arg_int) .def("get_struct_ret_int", &LaunchContextBuilder::get_struct_ret_int) .def("get_struct_ret_uint", &LaunchContextBuilder::get_struct_ret_uint) .def("get_struct_ret_float", &LaunchContextBuilder::get_struct_ret_float); diff --git a/taichi/rhi/amdgpu/amdgpu_device.cpp b/taichi/rhi/amdgpu/amdgpu_device.cpp index f2d94a9c06883..6107904c9f644 100644 --- a/taichi/rhi/amdgpu/amdgpu_device.cpp +++ b/taichi/rhi/amdgpu/amdgpu_device.cpp @@ -56,17 +56,15 @@ DeviceAllocation AmdgpuDevice::allocate_memory_runtime( info.size = taichi::iroundup(params.size, taichi_page_size); if (params.host_read || params.host_write) { TI_NOT_IMPLEMENTED - } else if (params.use_cached) { + } else { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); TI_ASSERT(info.ptr != nullptr); AMDGPUDriver::get_instance().memset((void *)info.ptr, 0, info.size); - } else { - info.ptr = allocate_llvm_runtime_memory_jit(params); } info.is_imported = false; - info.use_cached = params.use_cached; + info.use_cached = true; info.use_preallocated = true; DeviceAllocation alloc; diff --git a/taichi/rhi/cuda/cuda_device.cpp b/taichi/rhi/cuda/cuda_device.cpp index bb4bded239eb2..f75f657972cbf 100644 --- a/taichi/rhi/cuda/cuda_device.cpp +++ b/taichi/rhi/cuda/cuda_device.cpp @@ -51,18 +51,16 @@ DeviceAllocation CudaDevice::allocate_memory_runtime( info.size = taichi::iroundup(params.size, taichi_page_size); if (info.size == 0) { info.ptr = nullptr; - } else if (params.use_cached) { + } else { info.ptr = DeviceMemoryPool::get_instance().allocate_with_cache(this, params); TI_ASSERT(info.ptr != nullptr); CUDADriver::get_instance().memset((void *)info.ptr, 0, info.size); - } else { - info.ptr = allocate_llvm_runtime_memory_jit(params); } info.is_imported = false; - info.use_cached = params.use_cached; + info.use_cached = true; info.use_preallocated = true; DeviceAllocation alloc; diff --git a/taichi/rhi/llvm/llvm_device.h b/taichi/rhi/llvm/llvm_device.h index e7104f94a20fb..7997a5c3fe964 100644 --- a/taichi/rhi/llvm/llvm_device.h +++ b/taichi/rhi/llvm/llvm_device.h @@ -10,7 +10,6 @@ struct LLVMRuntime; class LlvmDevice : public Device { public: struct LlvmRuntimeAllocParams : AllocParams { - bool use_cached{true}; JITModule *runtime_jit{nullptr}; LLVMRuntime *runtime{nullptr}; uint64 *result_buffer{nullptr}; diff --git a/taichi/runtime/gfx/runtime.cpp b/taichi/runtime/gfx/runtime.cpp index 1c028a1924979..c49a912226a36 100644 --- a/taichi/runtime/gfx/runtime.cpp +++ b/taichi/runtime/gfx/runtime.cpp @@ -93,11 +93,6 @@ class HostDeviceContextBlitter { std::memcpy(device_base, host_ctx_.get_context().arg_buffer, ctx_attribs_->args_bytes()); - void *device_ptr = - (uint8_t *)device_base + ctx_attribs_->extra_args_mem_offset(); - std::memcpy(device_ptr, host_ctx_.get_context().extra_args, - ctx_attribs_->extra_args_bytes()); - device_->unmap(*device_args_buffer_); } @@ -258,10 +253,6 @@ CompiledTaichiKernel::CompiledTaichiKernel(const Params &ti_params) args_buffer_size_ = arg_sz; ret_buffer_size_ = ret_sz; - if (arg_sz) { - args_buffer_size_ += ti_kernel_attribs_.ctx_attribs.extra_args_bytes(); - } - const auto &task_attribs = ti_kernel_attribs_.tasks_attribs; const auto &spirv_bins = ti_params.spirv_bins; TI_ASSERT(task_attribs.size() == spirv_bins.size()); @@ -802,15 +793,14 @@ std::pair GfxRuntime::get_struct_type_with_data_layout(const lang::StructType *old_ty, const std::string &layout) { auto [new_ty, size, align] = - get_struct_type_with_data_layout_impl(old_ty, layout, true); + get_struct_type_with_data_layout_impl(old_ty, layout); return {new_ty, size}; } std::tuple GfxRuntime::get_struct_type_with_data_layout_impl( const lang::StructType *old_ty, - const std::string &layout, - bool is_outmost) { + const std::string &layout) { TI_TRACE("get_struct_type_with_data_layout: {}", layout); TI_ASSERT(layout.size() == 2); auto is_430 = layout[0] == '4'; @@ -824,7 +814,7 @@ GfxRuntime::get_struct_type_with_data_layout_impl( size_t member_size; if (auto struct_type = member.type->cast()) { auto [new_ty, size, member_align_] = - get_struct_type_with_data_layout_impl(struct_type, layout, false); + get_struct_type_with_data_layout_impl(struct_type, layout); members[i].type = new_ty; member_align = member_align_; member_size = size; @@ -863,7 +853,7 @@ GfxRuntime::get_struct_type_with_data_layout_impl( if (!is_430) { align = align_up(align, sizeof(float) * 4); - bytes = align_up(bytes, is_outmost ? 4 : 4 * sizeof(float)); + bytes = align_up(bytes, 4 * sizeof(float)); } TI_TRACE(" total_bytes={}", bytes); return {TypeFactory::get_instance() diff --git a/taichi/runtime/gfx/runtime.h b/taichi/runtime/gfx/runtime.h index d52de3c2ae320..725ef0aa81394 100644 --- a/taichi/runtime/gfx/runtime.h +++ b/taichi/runtime/gfx/runtime.h @@ -135,8 +135,7 @@ class TI_DLL_EXPORT GfxRuntime { static std::tuple get_struct_type_with_data_layout_impl(const lang::StructType *old_ty, - const std::string &layout, - bool is_outmost); + const std::string &layout); private: friend class taichi::lang::gfx::SNodeTreeManager; diff --git a/taichi/runtime/llvm/llvm_runtime_executor.cpp b/taichi/runtime/llvm/llvm_runtime_executor.cpp index c7ad98b5201e6..ffb35055618a0 100644 --- a/taichi/runtime/llvm/llvm_runtime_executor.cpp +++ b/taichi/runtime/llvm/llvm_runtime_executor.cpp @@ -475,7 +475,6 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray( return llvm_device()->allocate_memory_runtime( {{alloc_size, /*host_write=*/false, /*host_read=*/false, /*export_sharing=*/false, AllocUsage::Storage}, - config_.ndarray_use_cached_allocator, get_runtime_jit_module(), get_llvm_runtime(), result_buffer}); diff --git a/taichi/runtime/llvm/runtime_module/runtime.cpp b/taichi/runtime/llvm/runtime_module/runtime.cpp index f7cb28443d824..8be6e4f69e8c4 100644 --- a/taichi/runtime/llvm/runtime_module/runtime.cpp +++ b/taichi/runtime/llvm/runtime_module/runtime.cpp @@ -288,10 +288,6 @@ STRUCT_FIELD_ARRAY(PhysicalCoordinates, val); STRUCT_FIELD(RuntimeContext, runtime); STRUCT_FIELD(RuntimeContext, result_buffer) -int32 RuntimeContext_get_extra_args(RuntimeContext *ctx, int32 i, int32 j) { - return ctx->extra_args[i][j]; -} - #include "taichi/runtime/llvm/runtime_module/atomic.h" // These structures are accessible by both the LLVM backend and this C++ runtime diff --git a/taichi/transforms/alg_simp.cpp b/taichi/transforms/alg_simp.cpp index 792f865410a0d..44ff6e9408185 100644 --- a/taichi/transforms/alg_simp.cpp +++ b/taichi/transforms/alg_simp.cpp @@ -60,8 +60,10 @@ class AlgSimp : public BasicStmtVisitor { data_type_bits(second_cast) <= data_type_bits(first_cast); } if (is_integral(first_cast)) { - // int(int(a)) - return data_type_bits(second_cast) <= data_type_bits(first_cast); + // int(int(a)), note it's not always equivalent when signedness differ, + // see #7915 + return data_type_bits(second_cast) <= data_type_bits(first_cast) && + is_signed(second_cast) == is_signed(first_cast); } // int(float(a)) if (data_type_bits(second_cast) <= data_type_bits(first_cast) * 2) { diff --git a/taichi/transforms/compile_to_offloads.cpp b/taichi/transforms/compile_to_offloads.cpp index c93fd9298b1d8..92700327cfb0f 100644 --- a/taichi/transforms/compile_to_offloads.cpp +++ b/taichi/transforms/compile_to_offloads.cpp @@ -127,14 +127,6 @@ void compile_to_offloads(IRNode *ir, print("Access flagged I"); irpass::analysis::verify(ir); - if (config.real_matrix_scalarize) { - irpass::scalarize(ir); - - // Remove redundant MatrixInitStmt inserted during scalarization - irpass::die(ir); - print("Scalarized"); - } - irpass::full_simplify(ir, config, {false, /*autodiff_enabled*/ false}); print("Simplified II"); irpass::analysis::verify(ir); @@ -187,6 +179,14 @@ void offload_to_executable(IRNode *ir, print("Detect read-only accesses"); } + if (config.real_matrix_scalarize) { + irpass::scalarize(ir); + + // Remove redundant MatrixInitStmt inserted during scalarization + irpass::full_simplify(ir, config, {false, /*autodiff_enabled*/ false}); + print("Scalarized"); + } + irpass::demote_atomics(ir, config); print("Atomics demoted I"); irpass::analysis::verify(ir); diff --git a/taichi/transforms/offload.cpp b/taichi/transforms/offload.cpp index fe675b0c362bf..d9f904b443caf 100644 --- a/taichi/transforms/offload.cpp +++ b/taichi/transforms/offload.cpp @@ -17,6 +17,8 @@ bool demotable_axis_load(Stmt *stmt) { // Stmt involving simple arithmetic of ExternalTensorShapeAlongAxisStmt // shouldn't be saved in global tmp, just clone them to each shader // separately. + if (stmt->is()) + return false; int n_op = stmt->num_operands(); if (n_op == 0) { return stmt->is() || @@ -442,8 +444,8 @@ class PromoteIntermediateToGlobalTmp : public BasicStmtVisitor { private: explicit PromoteIntermediateToGlobalTmp( - const StmtToOffsetMap &local_to_global_offset) - : local_to_global_offset_(local_to_global_offset) { + const StmtToOffsetMap *local_to_global_offset) + : local_to_global_offset_(*local_to_global_offset) { allow_undefined_visitor = true; invoke_default_visitor = true; } @@ -454,20 +456,20 @@ class PromoteIntermediateToGlobalTmp : public BasicStmtVisitor { local_to_global_offset_.find(stmt) != local_to_global_offset_.end() && stored_to_global_.find(stmt) == stored_to_global_.end()) { stored_to_global_.insert(stmt); - auto offset = local_to_global_offset_[stmt]; + auto offset = local_to_global_offset_.at(stmt); auto ptr = stmt->insert_after_me( Stmt::make(offset, stmt->ret_type)); ptr->insert_after_me(Stmt::make(ptr, stmt)); } } - static void run(IRNode *root, const StmtToOffsetMap &local_to_global_offset) { + static void run(IRNode *root, const StmtToOffsetMap *local_to_global_offset) { PromoteIntermediateToGlobalTmp pass(local_to_global_offset); root->accept(&pass); } private: - StmtToOffsetMap local_to_global_offset_; + const StmtToOffsetMap &local_to_global_offset_; std::set stored_to_global_; }; @@ -477,11 +479,11 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { private: FixCrossOffloadReferences( const CompileConfig &config, - const StmtToOffsetMap &local_to_global_offset, - const std::unordered_map &stmt_to_offloaded, + const StmtToOffsetMap *local_to_global_offset, + std::unordered_map &stmt_to_offloaded, OffloadedRanges *offloaded_ranges) : config_(config), - local_to_global_offset_(local_to_global_offset), + local_to_global_offset_(*local_to_global_offset), stmt_to_offloaded_(stmt_to_offloaded), offloaded_ranges_(offloaded_ranges) { allow_undefined_visitor = true; @@ -499,9 +501,8 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { offloaded_ranges_->begin_stmts.find(stmt)->second) != local_to_global_offset_.end(), "Begin fails.") - stmt->begin_offset = - local_to_global_offset_[offloaded_ranges_->begin_stmts.find(stmt) - ->second]; + stmt->begin_offset = local_to_global_offset_.at( + offloaded_ranges_->begin_stmts.find(stmt)->second); } if (!stmt->const_end) { if (stmt->end_stmt) { @@ -514,9 +515,8 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { offloaded_ranges_->end_stmts.find(stmt)->second) != local_to_global_offset_.end(), "End fails.") - stmt->end_offset = - local_to_global_offset_[offloaded_ranges_->end_stmts.find(stmt) - ->second]; + stmt->end_offset = local_to_global_offset_.at( + offloaded_ranges_->end_stmts.find(stmt)->second); } } } @@ -530,27 +530,22 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { auto ret_type = stmt->ret_type; local_to_global_vector_type_[stmt] = ret_type; auto ptr = replacement.push_back( - local_to_global_offset_[stmt], ret_type); + local_to_global_offset_.at(stmt), ret_type); auto offloaded = stmt_to_offloaded_[stmt]; stmt_to_offloaded_[ptr] = offloaded; + + TypedConstant zero(stmt->ret_type.get_element_type()); + auto const_zero_stmt = replacement.push_back(zero); if (auto tensor_type = stmt->ret_type->cast()) { - TypedConstant zero(tensor_type->get_element_type()); - auto const_zero_stmt = replacement.push_back(zero); - stmt_to_offloaded_[const_zero_stmt] = offloaded; - for (int i = 0; i < tensor_type->get_num_elements(); ++i) { - auto const_offset_stmt = - replacement.push_back(TypedConstant(i)); - auto ptr_offset_stmt = - replacement.push_back(ptr, const_offset_stmt); - auto global_store_stmt = replacement.push_back( - ptr_offset_stmt, const_zero_stmt); - stmt_to_offloaded_[const_offset_stmt] = offloaded; - stmt_to_offloaded_[ptr_offset_stmt] = offloaded; - stmt_to_offloaded_[global_store_stmt] = offloaded; - } + std::vector zero_values(tensor_type->get_num_elements(), + const_zero_stmt); + auto zero_matrix_init_stmt = + replacement.push_back(zero_values); + zero_matrix_init_stmt->ret_type = stmt->ret_type.ptr_removed(); + auto global_store_stmt = + replacement.push_back(ptr, zero_matrix_init_stmt); + stmt_to_offloaded_[global_store_stmt] = offloaded; } else { - TypedConstant zero(stmt->ret_type); - auto const_zero_stmt = replacement.push_back(zero); auto global_store_stmt = replacement.push_back(ptr, const_zero_stmt); stmt_to_offloaded_[global_store_stmt] = offloaded; @@ -623,7 +618,7 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { generic_visit(pcopy); } else { auto global_temporary = Stmt::make( - local_to_global_offset_[op], op->ret_type); + local_to_global_offset_.at(op), op->ret_type); stmt_to_offloaded_[global_temporary.get()] = offloaded; stmt->set_operand(index, global_temporary.get()); if (op->is() || op->ret_type.is_pointer()) { @@ -660,8 +655,8 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { public: static void run(IRNode *root, const CompileConfig &config, - const StmtToOffsetMap &local_to_global_offset, - const std::unordered_map &stmt_to_offloaded, + const StmtToOffsetMap *local_to_global_offset, + std::unordered_map &stmt_to_offloaded, OffloadedRanges *offloaded_ranges) { FixCrossOffloadReferences pass(config, local_to_global_offset, stmt_to_offloaded, offloaded_ranges); @@ -670,8 +665,8 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { private: [[maybe_unused]] const CompileConfig &config_; - StmtToOffsetMap local_to_global_offset_; - std::unordered_map stmt_to_offloaded_; + const StmtToOffsetMap &local_to_global_offset_; + std::unordered_map &stmt_to_offloaded_; OffloadedRanges *const offloaded_ranges_; std::unordered_map local_to_global_vector_type_; }; @@ -783,9 +778,9 @@ void offload(IRNode *root, const CompileConfig &config) { auto stmt_to_offloaded = StmtToOffloaded::run(root); const auto local_to_global_offset = IdentifyValuesUsedInOtherOffloads::run( root, config, stmt_to_offloaded, &offloaded_ranges); - PromoteIntermediateToGlobalTmp::run(root, local_to_global_offset); + PromoteIntermediateToGlobalTmp::run(root, &local_to_global_offset); stmt_to_offloaded = StmtToOffloaded::run(root); - FixCrossOffloadReferences::run(root, config, local_to_global_offset, + FixCrossOffloadReferences::run(root, config, &local_to_global_offset, stmt_to_offloaded, &offloaded_ranges); } insert_gc(root, config); diff --git a/taichi/transforms/scalarize.cpp b/taichi/transforms/scalarize.cpp index 40fa40f8ba9f6..69073000b5329 100644 --- a/taichi/transforms/scalarize.cpp +++ b/taichi/transforms/scalarize.cpp @@ -881,7 +881,7 @@ class GatherScalarizableLocalPointers : public BasicStmtVisitor { } }; -class ScalarizeLocalPointers : public BasicStmtVisitor { +class ScalarizePointers : public BasicStmtVisitor { public: ImmediateIRModifier immediate_modifier_; DelayedIRModifier delayed_modifier_; @@ -890,7 +890,7 @@ class ScalarizeLocalPointers : public BasicStmtVisitor { // { original_alloca_stmt : [scalarized_alloca_stmt0, ...] } std::unordered_map> scalarized_local_tensor_map_; - explicit ScalarizeLocalPointers( + explicit ScalarizePointers( IRNode *node, const std::unordered_set &scalarizable_allocas) : immediate_modifier_(node), scalarizable_allocas_(scalarizable_allocas) { @@ -948,16 +948,16 @@ class ScalarizeLocalPointers : public BasicStmtVisitor { } } - /* - Before: - MatrixPtrStmt(TensorType<4 x i32>* alloca_stmt, int offset) - - After: - scalarized_alloca_stmt = - scalarized_local_tensor_map_[alloca_stmt][offset] - stmt->replace_all_usages_with(scalarized_alloca_stmt) - */ void visit(MatrixPtrStmt *stmt) override { + /* + Before: + MatrixPtrStmt(TensorType<4 x i32>* alloca_stmt, int offset) + + After: + scalarized_alloca_stmt = + scalarized_local_tensor_map_[alloca_stmt][offset] + stmt->replace_all_usages_with(scalarized_alloca_stmt) + */ if (stmt->origin->is() && scalarizable_allocas_.count(stmt->origin) == 1) { auto alloca_stmt = stmt->origin->cast(); @@ -979,6 +979,34 @@ class ScalarizeLocalPointers : public BasicStmtVisitor { immediate_modifier_.replace_usages_with(stmt, new_stmt); delayed_modifier_.erase(stmt); + return; + } + + /* + Before: + TensorType<4 x i32>* ptr = GlobalTempStmt(offset_0) + i32* ptr_1 = MatrixPtrStmt(ptr, offset_1) + + After: + i32* $1 = GlobalTempStmt(offset_0 + offset_1 * sizeof(i32)) + replace_all_usages_with(ptr_1, $1) + */ + if (stmt->origin->is() && + stmt->offset->is()) { + auto global_temp_stmt = stmt->origin->as(); + auto offset_0 = global_temp_stmt->offset; + auto offset_1 = stmt->offset->as()->val.val_int32(); + auto new_offset = + offset_0 + offset_1 * data_type_size(stmt->ret_type.ptr_removed()); + + auto new_global_temp_stmt = std::make_unique( + new_offset, stmt->ret_type.ptr_removed().get_element_type()); + new_global_temp_stmt->ret_type.set_is_pointer(true); + + stmt->replace_usages_with(new_global_temp_stmt.get()); + delayed_modifier_.insert_before(stmt, std::move(new_global_temp_stmt)); + delayed_modifier_.erase(stmt); + return; } } @@ -1021,12 +1049,24 @@ class ExtractLocalPointers : public BasicStmtVisitor { Block *top_level_; explicit ExtractLocalPointers(IRNode *root) : immediate_modifier_(root) { - TI_ASSERT(root->is()); - top_level_ = root->as(); + if (root->is()) { + top_level_ = root->as()->body.get(); + } else { + TI_ASSERT(root->is()); + top_level_ = root->as(); + } root->accept(this); delayed_modifier_.modify_ir(); } + void visit(OffloadedStmt *stmt) override { + // Extract to OffloadStmt + Block *orig_top_level = top_level_; + top_level_ = stmt->body.get(); + stmt->all_blocks_accept(this); + top_level_ = orig_top_level; + } + void visit(MatrixPtrStmt *stmt) override { if (stmt->origin->is()) { auto alloca_stmt = stmt->origin->cast(); @@ -1118,7 +1158,7 @@ void scalarize(IRNode *root) { TI_AUTO_PROF; Scalarize scalarize_pass(root); auto scalarizable_allocas = GatherScalarizableLocalPointers::run(root); - ScalarizeLocalPointers scalarize_pointers_pass(root, scalarizable_allocas); + ScalarizePointers scalarize_pointers_pass(root, scalarizable_allocas); ExtractLocalPointers extract_pointers_pass(root); MergeExternalAndMatrixPtr::run(root); } diff --git a/tests/cpp/aot/gfx_utils.cpp b/tests/cpp/aot/gfx_utils.cpp index e7e15b5fde434..9a0d965321316 100644 --- a/tests/cpp/aot/gfx_utils.cpp +++ b/tests/cpp/aot/gfx_utils.cpp @@ -172,7 +172,7 @@ void run_kernel_test1(Arch arch, taichi::lang::Device *device) { // Hack to set vector/matrix args std::vector vec = {1, 2, 3}; for (int i = 0; i < vec.size(); ++i) { - builder.set_arg(/*arg_id=*/i + 2, vec[i]); + builder.set_struct_arg(/*arg_indices=*/{2, i}, vec[i]); } k_run->launch(builder); gfx_runtime->synchronize(); diff --git a/tests/cpp/aot/llvm/kernel_aot_test.cpp b/tests/cpp/aot/llvm/kernel_aot_test.cpp index 4dcdce34fc23d..5446cb8219cff 100644 --- a/tests/cpp/aot/llvm/kernel_aot_test.cpp +++ b/tests/cpp/aot/llvm/kernel_aot_test.cpp @@ -54,7 +54,7 @@ TEST(LlvmAotTest, CpuKernel) { builder.set_arg_ndarray(/*arg_id=*/1, arr); std::vector vec = {1, 2, 3}; for (int i = 0; i < vec.size(); ++i) { - builder.set_arg(/*arg_id=*/i + 2, vec[i]); + builder.set_struct_arg(/*arg_indices=*/{2, i}, vec[i]); } k_run->launch(builder); @@ -100,7 +100,7 @@ TEST(LlvmAotTest, CudaKernel) { builder.set_arg_ndarray(/*arg_id=*/1, arr); std::vector vec = {1, 2, 3}; for (int i = 0; i < vec.size(); ++i) { - builder.set_arg(/*arg_id=*/i + 2, vec[i]); + builder.set_struct_arg(/*arg_indices=*/{2, i}, vec[i]); } k_run->launch(builder); diff --git a/tests/python/test_argument.py b/tests/python/test_argument.py index 4a7139b1f328a..9d1c0d078e9ef 100644 --- a/tests/python/test_argument.py +++ b/tests/python/test_argument.py @@ -4,32 +4,6 @@ from tests import test_utils -@test_utils.test(exclude=[ti.opengl, ti.gles]) -def test_exceed_max_64(): - N = 64 - - @ti.kernel - def foo1(a: ti.types.vector(N, ti.i32)) -> ti.i32: - return a.sum() - - A = ti.Vector([1] * N) - assert foo1(A) == 64 - - N = 65 - - @ti.kernel - def foo2(a: ti.types.vector(N, ti.i32)) -> ti.i32: - return a.sum() - - A = ti.Vector([1] * N) - - with pytest.raises( - ti.TaichiRuntimeError, - match=f"The number of elements in kernel arguments is too big! Do not exceed 64 on {ti._lib.core.arch_name(ti.lang.impl.current_cfg().arch)} backend.", - ): - foo2(A) - - @test_utils.test(debug=True) def test_kernel_keyword_args(): @ti.kernel diff --git a/tests/python/test_ast_refactor.py b/tests/python/test_ast_refactor.py index 669c419dd724a..deab2b0310c44 100644 --- a/tests/python/test_ast_refactor.py +++ b/tests/python/test_ast_refactor.py @@ -164,17 +164,6 @@ def foo(a: ti.template()): assert a[i] == b[i] -@test_utils.test() -def test_compare_fail(): - with pytest.raises(ti.TaichiCompilationError, match='"Is" is only supported inside `ti.static`.'): - - @ti.kernel - def foo(): - None is None - - foo() - - @test_utils.test() def test_single_compare(): @ti.kernel diff --git a/tests/python/test_compare.py b/tests/python/test_compare.py index 487e88c70f7ee..84a335bc4c054 100644 --- a/tests/python/test_compare.py +++ b/tests/python/test_compare.py @@ -171,33 +171,6 @@ def foo(a: ti.template()) -> ti.i32: foo(ti.i32) -@test_utils.test() -def test_static_is(): - @ti.kernel - def is_f32(tp: ti.template()) -> ti.i32: - return ti.static(tp is ti.f32) - - @ti.kernel - def is_not_f32(tp: ti.template()) -> ti.i32: - return ti.static(tp is not ti.f32) - - assert is_f32(ti.f32) == 1 - assert is_f32(ti.i32) == 0 - assert is_not_f32(ti.f32) == 0 - assert is_not_f32(ti.i32) == 1 - - -@test_utils.test() -def test_non_static_is(): - with pytest.raises(ti.TaichiCompilationError, match='"Is" is only supported inside `ti.static`.'): - - @ti.kernel - def is_f32(tp: ti.template()) -> ti.i32: - return tp is ti.f32 - - is_f32(ti.f32) - - @test_utils.test(default_ip=ti.i64, require=ti.extension.data64) def test_compare_ret_type(): # The purpose of this test is to make sure a comparison returns i32 diff --git a/tests/python/test_deprecation.py b/tests/python/test_deprecation.py index 83b8d8b9a507f..358b3c9d3b55d 100644 --- a/tests/python/test_deprecation.py +++ b/tests/python/test_deprecation.py @@ -8,21 +8,6 @@ from tests import test_utils -@test_utils.test() -def test_deprecate_a_atomic_b(): - with pytest.warns( - DeprecationWarning, - match=r"a\.atomic_add\(b\) is deprecated, and it will be removed in Taichi v1.6.0.", - ): - - @ti.kernel - def func(): - a = 1 - a.atomic_add(2) - - func() - - @test_utils.test() def test_deprecate_element_shape_scalar(): with pytest.warns( @@ -84,26 +69,8 @@ def test_deprecate_rwtexture_ndim(): @test_utils.test() -def test_deprecate_builtin_min_max(): - with pytest.warns( - DeprecationWarning, - match='Calling builtin function "max" in Taichi scope is deprecated, ' - "and it will be removed in Taichi v1.6.0.", - ): - - @ti.kernel - def func(): - max(1, 2) - - func() - - -@test_utils.test() -def test_deprecate_is_is_not(): - with pytest.warns( - DeprecationWarning, - match='Operator "is" in Taichi scope is deprecated, ' "and it will be removed in Taichi v1.6.0.", - ): +def test_remove_is_is_not(): + with pytest.raises(ti.TaichiSyntaxError, match='Operator "is" in Taichi scope is not supported'): @ti.kernel def func(): @@ -112,44 +79,6 @@ def func(): func() -@test_utils.test() -def test_deprecate_ndrange(): - with pytest.warns( - DeprecationWarning, - match="Ndrange for loop with number of the loop variables not equal to " - "the dimension of the ndrange is deprecated, " - "and it will be removed in Taichi 1.6.0. ", - ): - - @ti.kernel - def func(): - for i in ti.ndrange(4, 4): - pass - - func() - - -@pytest.mark.skipif(not _ti_core.GGUI_AVAILABLE, reason="GGUI Not Available") -@test_utils.test(arch=ti.cpu) -def test_deprecate_ti_ui_window(): - window = ti.ui.Window("Diff SPH", (256, 256), show_window=False) - with pytest.warns( - DeprecationWarning, - match=r"`Window\.write_image\(\)` is deprecated, and it will be removed in Taichi v1\.6\.0\. ", - ): - window.write_image("deprecate.png") - - -@pytest.mark.skipif(not _ti_core.GGUI_AVAILABLE, reason="GGUI Not Available") -@test_utils.test(arch=ti.cpu) -def test_deprecate_ti_ui_make_camera(): - with pytest.warns( - DeprecationWarning, - match=r"`ti\.ui\.make_camera\(\)` is deprecated, and will be removed in Taichi v1\.6\.0\. ", - ): - ti.ui.make_camera() - - @test_utils.test() def test_deprecation_in_taichi_init_py(): with pytest.warns( @@ -157,12 +86,3 @@ def test_deprecation_in_taichi_init_py(): match="ti.SOA is deprecated, and it will be removed in Taichi v1.6.0.", ): ti.SOA - - -@test_utils.test() -def test_deprecate_sparse_matrix_builder(): - with pytest.warns( - DeprecationWarning, - match=r"ti\.linalg\.sparse_matrix_builder is deprecated, and it will be removed in Taichi v1\.6\.0\.", - ): - ti.linalg.sparse_matrix_builder() diff --git a/tests/python/test_matrix.py b/tests/python/test_matrix.py index 18931a1146daa..0da6ed756c9ae 100644 --- a/tests/python/test_matrix.py +++ b/tests/python/test_matrix.py @@ -1333,3 +1333,12 @@ def mat_equal(A, B, tol=1e-6): tol = 1e-5 if dtype == ti.f32 else 1e-12 assert mat_equal(x.to_numpy(), arr, tol=tol) + + +@test_utils.test() +def test_matrix_dtype(): + a = ti.types.vector(3, dtype=ti.f32)([0, 1, 2]) + assert a.entries.dtype == np.float32 + + b = ti.types.matrix(2, 2, dtype=ti.i32)([[0, 1], [2, 3]]) + assert b.entries.dtype == np.int32 diff --git a/tests/python/test_taichi_cg.py b/tests/python/test_matrixfree_cg.py similarity index 91% rename from tests/python/test_taichi_cg.py rename to tests/python/test_matrixfree_cg.py index 122669931aa62..733a0ea24c0d2 100644 --- a/tests/python/test_taichi_cg.py +++ b/tests/python/test_matrixfree_cg.py @@ -1,7 +1,7 @@ import math import pytest -from taichi.linalg import LinearOperator, taichi_cg_solver +from taichi.linalg import LinearOperator, MatrixFreeCG import taichi as ti from tests import test_utils @@ -11,7 +11,7 @@ @pytest.mark.parametrize("ti_dtype", [ti.f32, ti.f64]) @test_utils.test(arch=[ti.cpu, ti.cuda, ti.vulkan], exclude=[vk_on_mac]) -def test_taichi_cg(ti_dtype): +def test_matrixfree_cg(ti_dtype): GRID = 32 Ax = ti.field(dtype=ti_dtype, shape=(GRID, GRID)) x = ti.field(dtype=ti_dtype, shape=(GRID, GRID)) @@ -47,7 +47,7 @@ def check_solution(sol: ti.template(), ans: ti.template(), tol: ti_dtype) -> boo A = LinearOperator(compute_Ax) init() - taichi_cg_solver(A, b, x, maxiter=10 * GRID * GRID, tol=1e-18, quiet=True) + MatrixFreeCG(A, b, x, maxiter=10 * GRID * GRID, tol=1e-18, quiet=True) compute_Ax(x, Ax) # `tol` can't be < 1e-6 for ti.f32 because of accumulating round-off error; # see https://en.wikipedia.org/wiki/Conjugate_gradient_method#cite_note-6 diff --git a/tests/python/test_native_functions.py b/tests/python/test_native_functions.py index 2176f19aecb9b..fa60291bfdb09 100644 --- a/tests/python/test_native_functions.py +++ b/tests/python/test_native_functions.py @@ -72,9 +72,7 @@ def func(): y[i] = N - i z[i] = i - 2 if i % 2 else i + 2 - with pytest.warns(DeprecationWarning, match="Calling builtin function") as records: - func() - assert len(records) > 0 + func() assert np.allclose( minimum.to_numpy(), diff --git a/tests/python/test_ndarray.py b/tests/python/test_ndarray.py index e3c89fa3e0f00..f139d44917c75 100644 --- a/tests/python/test_ndarray.py +++ b/tests/python/test_ndarray.py @@ -278,7 +278,7 @@ def test_ndarray_deepcopy(): assert y[4][1, 0] == 9 -@test_utils.test(arch=[ti.cuda], ndarray_use_cached_allocator=True) +@test_utils.test(arch=[ti.cuda]) def test_ndarray_caching_allocator(): n = 8 a = ti.ndarray(ti.i32, shape=(n)) diff --git a/tests/python/test_ndrange.py b/tests/python/test_ndrange.py index 311f9cc770e80..925473566b9c5 100644 --- a/tests/python/test_ndrange.py +++ b/tests/python/test_ndrange.py @@ -312,20 +312,6 @@ def example(): example() -@test_utils.test(exclude=[ti.amdgpu]) -def test_n_loop_var_neq_dimension(): - @ti.kernel - def iter(): - for i in ti.ndrange(1, 4): - print(i) - - with pytest.warns( - DeprecationWarning, - match="Ndrange for loop with number of the loop variables not equal to", - ): - iter() - - @test_utils.test() def test_2d_loop_over_ndarray(): @ti.kernel @@ -336,3 +322,19 @@ def foo(arr: ti.types.ndarray(dtype=ti.i32, ndim=1)): array = ti.ndarray(ti.i32, shape=(16,)) foo(array) + + +@test_utils.test() +def test_dimension_error(): + with pytest.raises( + ti.TaichiSyntaxError, + match="Ndrange for loop with number of the loop variables not equal to " + "the dimension of the ndrange is not supported", + ): + + @ti.kernel + def func(): + for i in ti.ndrange(4, 4): + pass + + func() diff --git a/tests/python/test_offload_cross.py b/tests/python/test_offload_cross.py index 916fcf01acf16..a28c2e96d3d19 100644 --- a/tests/python/test_offload_cross.py +++ b/tests/python/test_offload_cross.py @@ -133,3 +133,24 @@ def run(a: ti.i32): print("OK") run(2) + + +@test_utils.test(exclude=ti.amdgpu) +def test_offload_with_save(): + a = ti.Vector.field(2, dtype=ti.f32, shape=1) + b = ti.Vector.field(2, dtype=ti.f32, shape=1) + c = ti.Vector.field(2, dtype=ti.f32, shape=1) + + @ti.kernel + def test(): + a[0] = ti.Vector([1, 1]) + b[0] = ti.Vector([0, 0]) + c[0] = ti.Vector([0, 0]) + b[0] += a[0] # b[0] = [1, 1] + b[0] /= 2 # b[0] = [0.5, 0.5] + for i in c: + c[i] += b[0] # c[0] = [0.5, 0.5] + + test() + assert c[0][0] == 0.5 + assert c[0][1] == 0.5 diff --git a/tests/python/test_optimization.py b/tests/python/test_optimization.py index fa7147d7282c1..c966464647da4 100644 --- a/tests/python/test_optimization.py +++ b/tests/python/test_optimization.py @@ -143,3 +143,13 @@ def func(): for i in range(3): for j in range(4): assert mat[i, j] == i + 1 + + +@test_utils.test() +def test_casts_int_uint(): + @ti.kernel + def my_cast(x: ti.f32) -> ti.u32: + y = ti.floor(x, ti.i32) + return ti.cast(y, ti.u32) + + assert my_cast(-1) == 4294967295 diff --git a/tests/python/test_cg.py b/tests/python/test_sparse_cg.py similarity index 93% rename from tests/python/test_cg.py rename to tests/python/test_sparse_cg.py index bd0eb38445f20..ad6afa4c071f4 100644 --- a/tests/python/test_cg.py +++ b/tests/python/test_sparse_cg.py @@ -28,7 +28,7 @@ def fill( fill(Abuilder, A_psd, b) A = Abuilder.build(dtype=ti_dtype) - cg = ti.linalg.CG(A, b, x0, max_iter=50, atol=1e-6) + cg = ti.linalg.SparseCG(A, b, x0, max_iter=50, atol=1e-6) x, exit_code = cg.solve() res = np.linalg.solve(A_psd, b.to_numpy()) assert exit_code == True @@ -59,7 +59,7 @@ def fill( fill(Abuilder, A_psd, b) A = Abuilder.build(dtype=ti_dtype) - cg = ti.linalg.CG(A, b, x0, max_iter=50, atol=1e-6) + cg = ti.linalg.SparseCG(A, b, x0, max_iter=50, atol=1e-6) x, exit_code = cg.solve() res = np.linalg.solve(A_psd, b.to_numpy()) assert exit_code == True