From 39e4581515022c4fbfc858b7e686ab6ce84251ba Mon Sep 17 00:00:00 2001 From: Haidong Lan Date: Sat, 31 Dec 2022 06:18:20 +0800 Subject: [PATCH] [cuda] Fix LLVM15 rsqrt perf regression (#7012) Fixes #6957 --- taichi/runtime/cuda/jit_cuda.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/taichi/runtime/cuda/jit_cuda.cpp b/taichi/runtime/cuda/jit_cuda.cpp index 98c5538875828..898df3d182d7c 100644 --- a/taichi/runtime/cuda/jit_cuda.cpp +++ b/taichi/runtime/cuda/jit_cuda.cpp @@ -165,7 +165,14 @@ std::string JITSessionCUDA::compile_module_to_ptx( if (kFTZDenorms) { for (llvm::Function &fn : *module) { - fn.addFnAttr("nvptx-f32ftz", "true"); + /* nvptx-f32ftz was deprecated. + * + * https://github.com/llvm/llvm-project/commit/a4451d88ee456304c26d552749aea6a7f5154bde#diff-6fda74ef428299644e9f49a2b0994c0d850a760b89828f655030a114060d075a + */ + fn.addFnAttr("denormal-fp-math-f32", "preserve-sign"); + + // Use unsafe fp math for sqrt.approx instead of sqrt.rn + fn.addFnAttr("unsafe-fp-math", "true"); } }