From babb1285ba0794e8923de581f5dac488692805b8 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 26 Aug 2024 08:38:05 -0700 Subject: [PATCH] PR#4179 (#2435) Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/124 Pull Request resolved: https://github.com/pytorch/benchmark/pull/2435 X-link: https://github.com/pytorch/FBGEMM/pull/3027 This PR is a dependency of the grid_constant PR. The API for TMA descriptor fill methods was changed, so I fixed up all usages in fbcode. https://github.com/triton-lang/triton/pull/4179 Reviewed By: minjang Differential Revision: D61729239 fbshipit-source-id: 8ce25b7c230c3f4ad960f76aa0dd29626c8ee4d2 --- torchbenchmark/util/kernels/triton_fused_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchbenchmark/util/kernels/triton_fused_attention.py b/torchbenchmark/util/kernels/triton_fused_attention.py index 872ba8fac..0c84fa25d 100644 --- a/torchbenchmark/util/kernels/triton_fused_attention.py +++ b/torchbenchmark/util/kernels/triton_fused_attention.py @@ -62,7 +62,7 @@ def fill_1d_tma_descriptor(self, name, ptr, dim, block_dim, element_size): else: desc_x = self.cuda_descriptors[name] buf_x = torch.empty_like(desc_x, device="cpu", pin_memory=True) - self.fill_1d_tma_descriptor_inner(ptr, dim, block_dim, element_size, buf_x.numpy()) + self.fill_1d_tma_descriptor_inner(ptr, dim, block_dim, element_size, buf_x.data_ptr()) desc_x.copy_(buf_x, non_blocking=True) @@ -75,7 +75,7 @@ def fill_2d_tma_descriptor(self, name, ptr, dim1, dim0, block_dim1, block_dim0, else: desc_x = self.cuda_descriptors[name] buf_x = torch.empty_like(desc_x, device="cpu", pin_memory=True) - self.fill_2d_tma_descriptor_inner(ptr, dim1, dim0, block_dim1, block_dim0, element_size, buf_x.numpy()) + self.fill_2d_tma_descriptor_inner(ptr, dim1, dim0, block_dim1, block_dim0, element_size, buf_x.data_ptr()) desc_x.copy_(buf_x, non_blocking=True)