diff --git a/taichi/analysis/offline_cache_util.cpp b/taichi/analysis/offline_cache_util.cpp index 8331c6d1cc4c7..19f8a32c01dbf 100644 --- a/taichi/analysis/offline_cache_util.cpp +++ b/taichi/analysis/offline_cache_util.cpp @@ -161,14 +161,16 @@ std::string get_hashed_offline_cache_key(CompileConfig *config, compile_config_key = get_offline_cache_key_of_compile_config(config); } + std::string autodiff_mode = + std::to_string(static_cast(kernel->autodiff_mode)); picosha2::hash256_one_by_one hasher; hasher.process(compile_config_key.begin(), compile_config_key.end()); hasher.process(kernel_ast_string.begin(), kernel_ast_string.end()); + hasher.process(autodiff_mode.begin(), autodiff_mode.end()); hasher.finish(); auto res = picosha2::get_hash_hex_string(hasher); - res.insert(res.begin(), - kernel->autodiff_mode != AutodiffMode::kNone ? 'g' : 'n'); + res.insert(res.begin(), 'T'); // The key must start with a letter return res; } diff --git a/taichi/codegen/llvm/codegen_llvm_quant.cpp b/taichi/codegen/llvm/codegen_llvm_quant.cpp index 95e2a423011a1..15ae796137ef9 100644 --- a/taichi/codegen/llvm/codegen_llvm_quant.cpp +++ b/taichi/codegen/llvm/codegen_llvm_quant.cpp @@ -221,7 +221,7 @@ void TaskCodeGenLLVM::visit(BitStructStoreStmt *stmt) { create_call("max_i32", {exponent_bits, tlctx->get_constant(0)}); // Compute the bit pointer of the exponent bits. - val = builder->CreateBitCast(exponent_bits, physical_type); + val = builder->CreateIntCast(exponent_bits, physical_type, false); val = builder->CreateShl(val, bit_struct->get_member_bit_offset(exp)); if (bit_struct_val == nullptr) { @@ -238,7 +238,7 @@ void TaskCodeGenLLVM::visit(BitStructStoreStmt *stmt) { tlctx->get_constant(0)); val = builder->CreateSelect(exp_non_zero, digit_bits, tlctx->get_constant(0)); - val = builder->CreateBitCast(val, physical_type); + val = builder->CreateIntCast(val, physical_type, false); val = builder->CreateShl(val, bit_struct->get_member_bit_offset(ch_id)); } else { val = quant_int_or_quant_fixed_to_bits(val, dtype, physical_type); diff --git a/taichi/runtime/llvm/llvm_offline_cache.cpp b/taichi/runtime/llvm/llvm_offline_cache.cpp index c0fa66ca07f89..523d38382afa5 100644 --- a/taichi/runtime/llvm/llvm_offline_cache.cpp +++ b/taichi/runtime/llvm/llvm_offline_cache.cpp @@ -376,6 +376,9 @@ void LlvmOfflineCacheFileWriter::clean_cache(const std::string &path, { std::string lock_path = taichi::join_path(path, kMetadataFileLockName); if (!lock_with_file(lock_path)) { + if (!taichi::path_exists(path)) { + return; + } TI_WARN("Lock {} failed", lock_path); return; } diff --git a/taichi/transforms/auto_diff.cpp b/taichi/transforms/auto_diff.cpp index f853bf2a3a7e9..35246679073fe 100644 --- a/taichi/transforms/auto_diff.cpp +++ b/taichi/transforms/auto_diff.cpp @@ -14,12 +14,14 @@ class IndependentBlocksJudger : public BasicStmtVisitor { void visit(LocalLoadStmt *stmt) override { for (auto &lane : stmt->src.data) { - touched_allocas_.insert(lane.var->as()); + TI_ASSERT(lane.var->is() || lane.var->is()); + touched_allocas_.insert(lane.var); } } void visit(LocalStoreStmt *stmt) override { - touched_allocas_.insert(stmt->dest->as()); + TI_ASSERT(stmt->dest->is() || stmt->dest->is()); + touched_allocas_.insert(stmt->dest); } void visit(AtomicOpStmt *stmt) override { @@ -75,7 +77,7 @@ class IndependentBlocksJudger : public BasicStmtVisitor { } private: - std::set touched_allocas_; + std::set touched_allocas_; bool qualified_atomics_ = true; bool inner_most_loop_ = true; bool is_inside_loop_ = false; @@ -578,6 +580,10 @@ class ADTransform : public IRVisitor { // do nothing. } + void visit(PtrOffsetStmt *stmt) override { + // do nothing. + } + void visit(PrintStmt *print_stmt) override { // do nothing } @@ -989,7 +995,21 @@ class MakeAdjoint : public ADTransform { void visit(GlobalLoadStmt *stmt) override { // issue global store to adjoint - GlobalPtrStmt *src = stmt->src->as(); + if (stmt->src->is()) { + TI_ERROR( + "Importing data from external array (such as numpy array) not " + "supported in AutoDiff for now") + } + + GlobalPtrStmt *src = nullptr; + bool is_ptr_offset = false; + if (stmt->src->is()) { + is_ptr_offset = true; + src = stmt->src->as()->origin->as(); + } else { + src = stmt->src->as(); + } + TI_ASSERT(src->width() == 1); auto snodes = src->snodes; if (!snodes[0]->has_adjoint()) { @@ -1003,12 +1023,30 @@ class MakeAdjoint : public ADTransform { TI_ASSERT(snodes[0]->get_adjoint() != nullptr); snodes[0] = snodes[0]->get_adjoint(); auto adj_ptr = insert(snodes, src->indices); + if (is_ptr_offset) { + adj_ptr = insert(adj_ptr, + stmt->src->as()->offset); + } insert(AtomicOpType::add, adj_ptr, load(adjoint(stmt))); } void visit(GlobalStoreStmt *stmt) override { // erase and replace with global load adjoint - GlobalPtrStmt *dest = stmt->dest->as(); + if (stmt->dest->is()) { + TI_ERROR( + "Exporting data to external array (such as numpy array) not " + "supported in AutoDiff for now") + } + + GlobalPtrStmt *dest = nullptr; + bool is_ptr_offset = false; + if (stmt->dest->is()) { + is_ptr_offset = true; + dest = stmt->dest->as()->origin->as(); + } else { + dest = stmt->dest->as(); + } + TI_ASSERT(dest->width() == 1); auto snodes = dest->snodes; if (!snodes[0]->has_adjoint()) { @@ -1018,24 +1056,40 @@ class MakeAdjoint : public ADTransform { TI_ASSERT(snodes[0]->get_adjoint() != nullptr); snodes[0] = snodes[0]->get_adjoint(); auto adjoint_ptr = insert(snodes, dest->indices); - auto load = insert(adjoint_ptr); - accumulate(stmt->val, load); + if (is_ptr_offset) { + adjoint_ptr = insert( + adjoint_ptr, stmt->dest->as()->offset); + } + accumulate(stmt->val, insert(adjoint_ptr)); stmt->parent->erase(stmt); } void visit(AtomicOpStmt *stmt) override { // erase and replace with global load adjoint - GlobalPtrStmt *dest = stmt->dest->as(); + GlobalPtrStmt *dest = nullptr; + bool is_ptr_offset = false; + if (stmt->dest->is()) { + is_ptr_offset = true; + dest = stmt->dest->as()->origin->as(); + } else { + dest = stmt->dest->as(); + } + TI_ASSERT(dest->width() == 1); auto snodes = dest->snodes; - if (snodes[0]->has_adjoint()) { - TI_ASSERT(snodes[0]->get_adjoint() != nullptr); - snodes[0] = snodes[0]->get_adjoint(); - auto adjoint_ptr = insert(snodes, dest->indices); - accumulate(stmt->val, insert(adjoint_ptr)); - } else { + if (!snodes[0]->has_adjoint()) { // no gradient (likely integer types) + return; } + + TI_ASSERT(snodes[0]->get_adjoint() != nullptr); + snodes[0] = snodes[0]->get_adjoint(); + auto adjoint_ptr = insert(snodes, dest->indices); + if (is_ptr_offset) { + adjoint_ptr = insert( + adjoint_ptr, stmt->dest->as()->offset); + } + accumulate(stmt->val, insert(adjoint_ptr)); stmt->parent->erase(stmt); } }; @@ -1278,7 +1332,14 @@ class MakeDual : public ADTransform { void visit(GlobalLoadStmt *stmt) override { // issue global store to dual - GlobalPtrStmt *src = stmt->src->as(); + GlobalPtrStmt *src = nullptr; + bool is_ptr_offset = false; + if (stmt->src->is()) { + is_ptr_offset = true; + src = stmt->src->as()->origin->as(); + } else { + src = stmt->src->as(); + } TI_ASSERT(src->width() == 1); auto snodes = src->snodes; if (!snodes[0]->has_dual()) { @@ -1292,11 +1353,22 @@ class MakeDual : public ADTransform { TI_ASSERT(snodes[0]->get_dual() != nullptr); snodes[0] = snodes[0]->get_dual(); auto dual_ptr = insert(snodes, src->indices); + if (is_ptr_offset) { + dual_ptr = insert(dual_ptr, + stmt->src->as()->offset); + } accumulate(stmt, insert(dual_ptr)); } void visit(GlobalStoreStmt *stmt) override { - GlobalPtrStmt *dest = stmt->dest->as(); + GlobalPtrStmt *dest = nullptr; + bool is_ptr_offset = false; + if (stmt->dest->is()) { + is_ptr_offset = true; + dest = stmt->dest->as()->origin->as(); + } else { + dest = stmt->dest->as(); + } TI_ASSERT(dest->width() == 1); auto snodes = dest->snodes; if (!snodes[0]->has_dual()) { @@ -1306,11 +1378,22 @@ class MakeDual : public ADTransform { TI_ASSERT(snodes[0]->get_dual() != nullptr); snodes[0] = snodes[0]->get_dual(); auto dual_ptr = insert(snodes, dest->indices); + if (is_ptr_offset) { + dual_ptr = insert(dual_ptr, + stmt->dest->as()->offset); + } insert(AtomicOpType::add, dual_ptr, load(dual(stmt->val))); } void visit(AtomicOpStmt *stmt) override { - GlobalPtrStmt *dest = stmt->dest->as(); + GlobalPtrStmt *dest = nullptr; + bool is_ptr_offset = false; + if (stmt->dest->is()) { + is_ptr_offset = true; + dest = stmt->dest->as()->origin->as(); + } else { + dest = stmt->dest->as(); + } TI_ASSERT(dest->width() == 1); auto snodes = dest->snodes; if (!snodes[0]->has_dual()) { @@ -1320,6 +1403,10 @@ class MakeDual : public ADTransform { TI_ASSERT(snodes[0]->get_dual() != nullptr); snodes[0] = snodes[0]->get_dual(); auto dual_ptr = insert(snodes, dest->indices); + if (is_ptr_offset) { + dual_ptr = insert(dual_ptr, + stmt->dest->as()->offset); + } insert(AtomicOpType::add, dual_ptr, load(dual(stmt->val))); } }; diff --git a/taichi/util/io.h b/taichi/util/io.h index 4a47f7310012e..30c41f9151692 100644 --- a/taichi/util/io.h +++ b/taichi/util/io.h @@ -10,6 +10,7 @@ #include #include #include +#include #if defined(TI_PLATFORM_WINDOWS) #include @@ -17,6 +18,11 @@ TI_NAMESPACE_BEGIN +inline bool path_exists(const std::string &dir) { + struct stat buffer; + return stat(dir.c_str(), &buffer) == 0; +} + // TODO: move to std::filesystem after it's nonexperimental on all platforms inline void create_directories(const std::string &dir) { #if defined(TI_PLATFORM_WINDOWS) diff --git a/tests/python/test_ad_dynamic_index.py b/tests/python/test_ad_dynamic_index.py new file mode 100644 index 0000000000000..fc3e504a9978f --- /dev/null +++ b/tests/python/test_ad_dynamic_index.py @@ -0,0 +1,28 @@ +import taichi as ti +from tests import test_utils + + +@test_utils.test(require=ti.extension.dynamic_index, + dynamic_index=True, + debug=True) +def test_matrix_non_constant_index(): + m = ti.Matrix.field(2, 2, ti.f32, 5, needs_grad=True) + n = ti.Matrix.field(2, 2, ti.f32, 5, needs_grad=True) + loss = ti.field(ti.f32, (), needs_grad=True) + + n.fill(0) + + @ti.kernel + def func1(): + for i in range(5): + for j, k in ti.ndrange(2, 2): + m[i][j, k] = (j + 1) * (k + 1) * n[i][j, k] + loss[None] += m[i][j, k] + + loss.grad[None] = 1.0 + func1.grad() + + for i in range(5): + for j in range(2): + for k in range(2): + assert n.grad[i][j, k] == (j + 1) * (k + 1) diff --git a/tests/python/test_ad_math_func.py b/tests/python/test_ad_math_func.py index 34192d5d9beb9..63be38b560f19 100644 --- a/tests/python/test_ad_math_func.py +++ b/tests/python/test_ad_math_func.py @@ -2,7 +2,7 @@ from tests import test_utils -@test_utils.test(require=ti.extension.adstack, dynamic_index=False) +@test_utils.test(require=ti.extension.adstack, dynamic_index=True) def test_polar_decompose_2D(): # `polar_decompose3d` in current Taichi version (v1.1) does not support autodiff, # becasue it mixed usage of for-loops and statements without looping. diff --git a/tests/python/test_quant_float.py b/tests/python/test_quant_float.py index 286e9e92c8be3..492ef9be26183 100644 --- a/tests/python/test_quant_float.py +++ b/tests/python/test_quant_float.py @@ -6,12 +6,13 @@ from tests import test_utils +@pytest.mark.parametrize('max_num_bits', [32, 64]) @test_utils.test(require=ti.extension.quant) -def test_quant_float_unsigned(): +def test_quant_float_unsigned(max_num_bits): qflt = ti.types.quant.float(exp=6, frac=13, signed=False) x = ti.field(dtype=qflt) - bitpack = ti.BitpackedFields(max_num_bits=32) + bitpack = ti.BitpackedFields(max_num_bits=max_num_bits) bitpack.place(x) ti.root.place(bitpack)