Adding e2e tests for i1 mask attentions (#19312)

* New tests are aimed at testing with option `--iree-experimental-packed-i1-storage` turned on, which allows real packed i1 datatype in memory. * Only certain shapes are correct at this moment as upstream patches for emulating unaligned vector stores are not yet merged. Signed-off-by: Alan Li <[email protected]>
iree-org · Dec 5, 2024 · 5dee2c8 · 5dee2c8
1 parent df34911
commit 5dee2c8
Show file tree

Hide file tree

Showing 5 changed files with 277 additions and 0 deletions.
diff --git a/tests/e2e/linalg_ext_ops/BUILD.bazel b/tests/e2e/linalg_ext_ops/BUILD.bazel
@@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob(
         "winograd_output.mlir",
     ],
     include = ["*.mlir"],
+    exclude = [
+        "attention_i1_mask.mlir",
+    ],
 )
 
 iree_check_single_backend_test_suite(
@@ -39,6 +42,24 @@ iree_check_single_backend_test_suite(
     target_backend = "llvm-cpu",
 )
 
+iree_check_single_backend_test_suite(
+    name = "check_llvm-cpu_local-task_i1",
+    srcs = [
+        "attention_i1_mask.mlir",
+    ],
+    compiler_flags = [
+        "--iree-llvmcpu-target-cpu=generic",
+        "--iree-experimental-packed-i1-storage",
+    ],
+    driver = "local-task",
+    tags = [
+        # attention fails with a wasm target, just disable the tests there for now
+        #   error: Yield operand #2 is not equivalent to the corresponding iter bbArg
+        "nowasm",
+    ],
+    target_backend = "llvm-cpu",
+)
+
 VMVX_SRCS = enforce_glob(
     # keep sorted
     [
@@ -52,6 +73,7 @@ VMVX_SRCS = enforce_glob(
     include = ["*.mlir"],
     exclude = [
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -75,6 +97,7 @@ LLVM_GPU_SRCS = enforce_glob(
     include = ["*.mlir"],
     exclude = [
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -107,6 +130,7 @@ ROCM_HIP_SRCS = enforce_glob(
     exclude = [
         "top-k.mlir",
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -131,6 +155,7 @@ iree_check_single_backend_test_suite(
         include = ["*.mlir"],
         exclude = [
             "attention.mlir",
+            "attention_i1_mask.mlir",
             "top-k.mlir",
         ],
     ),
@@ -152,6 +177,7 @@ iree_check_single_backend_test_suite(
         include = ["*.mlir"],
         exclude = [
             "attention.mlir",
+            "attention_i1_mask.mlir",
             "top-k.mlir",
         ],
     ),

diff --git a/tests/e2e/linalg_ext_ops/CMakeLists.txt b/tests/e2e/linalg_ext_ops/CMakeLists.txt
@@ -31,6 +31,22 @@ iree_check_single_backend_test_suite(
     "nowasm"
 )
 
+iree_check_single_backend_test_suite(
+  NAME
+    check_llvm-cpu_local-task_i1
+  SRCS
+    "attention_i1_mask.mlir"
+  TARGET_BACKEND
+    "llvm-cpu"
+  DRIVER
+    "local-task"
+  COMPILER_FLAGS
+    "--iree-llvmcpu-target-cpu=generic"
+    "--iree-experimental-packed-i1-storage"
+  LABELS
+    "nowasm"
+)
+
 iree_check_single_backend_test_suite(
   NAME
     check_vmvx_local-task

diff --git a/tests/e2e/linalg_ext_ops/attention.mlir b/tests/e2e/linalg_ext_ops/attention.mlir
@@ -66,6 +66,48 @@ func.func @causal_attention1x3x4() {
   return
 }
 
+func.func @attention1x4x4_i1_mask_all_ones() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %mask = util.unfoldable_constant dense<[[[true, true, true, true],
+                                           [true, true, true, true],
+                                           [true, true, true, true],
+                                           [true, true, true, true]]]> : tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
+              [0.941939, 1.04194, 1.14194, 1.24194],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
 
 func.func @softcap_attention1x3x4() {
   %init = tensor.empty() : tensor<1x3x4xf32>

diff --git a/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir b/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir
@@ -0,0 +1,122 @@
+func.func @attention1x4x4_i1_mask() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %i8mask = util.unfoldable_constant dense<[165, 165]> : tensor<2xi8>
+  %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[0.57895, 0.67895, 0.77895, 0.87895],
+              [1.09108, 1.19108, 1.29108, 1.39108],
+              [0.774324, 0.874324, 0.974324, 1.07432],
+              [1.22842, 1.32842, 1.42842, 1.52842]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
+
+func.func @attention1x4x4_i1_mask_all_ones() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8>
+  %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
+              [0.941939, 1.04194, 1.14194, 1.24194],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
+
+func.func @attention1x4x4_i1_mask_tril() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8>
+  %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[1.11993, 1.21993, 1.31993, 1.41993],
+              [1.3, 1.4, 1.5, 1.6],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
diff --git a/tests/e2e/subbyte_types/subbyte_types.mlir b/tests/e2e/subbyte_types/subbyte_types.mlir
@@ -26,3 +26,74 @@ func.func @i1_type_slice() {
   check.expect_eq_const(%tensor_res, dense<[255]> : tensor<1xi8>) : tensor<1xi8>
   return
 }
+
+func.func @i1_representation() {
+  %mask = util.unfoldable_constant dense<[140]> : tensor<1xi8>
+  %casted = flow.tensor.bitcast %mask : tensor<1xi8> -> tensor<2x4xi1>
+  %bar = util.optimization_barrier %casted : tensor<2x4xi1>
+  %tensor_res = flow.tensor.bitcast %bar : tensor<2x4xi1> -> tensor<1xi8>
+  check.expect_eq_const(%tensor_res, dense<[140]> : tensor<1xi8>) : tensor<1xi8>
+  return
+}
+
+func.func @i1_representation_2() {
+  %mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
+  %casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<2x8xi1>
+  %bar = util.optimization_barrier %casted : tensor<2x8xi1>
+  %tensor_res = flow.tensor.bitcast %bar : tensor<2x8xi1> -> tensor<2xi8>
+  check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
+  return
+}
+
+func.func @i1_representation_3() {
+  %mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
+  %casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<4x4xi1>
+  %bar = util.optimization_barrier %casted : tensor<4x4xi1>
+  %tensor_res = flow.tensor.bitcast %bar : tensor<4x4xi1> -> tensor<2xi8>
+  check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
+  return
+}
+
+func.func @truncate_i1() {
+  %mask = util.unfoldable_constant dense<[1, 1, 0, 0,
+                                          0, 0, 1, 1]> : tensor<8xi8>
+  %nm = tensor.empty() : tensor<8xi1>
+  %truncm = linalg.generic
+  {indexing_maps = [
+    affine_map<(d0) -> (d0)>,
+    affine_map<(d0) -> (d0)>],
+  iterator_types = ["parallel"]}
+  ins(%mask: tensor<8xi8>)
+  outs(%nm: tensor<8xi1>) {
+    ^bb0(%in: i8, %out: i1):
+      %zero = arith.constant 0 : i8
+      %truncated = arith.cmpi "sgt", %in, %zero : i8
+      linalg.yield %truncated : i1
+  } -> tensor<8xi1>
+  %tensor_res = flow.tensor.bitcast %truncm : tensor<8xi1> -> tensor<1xi8>
+  check.expect_eq_const(%tensor_res, dense<[195]> : tensor<1xi8>) : tensor<1xi8>
+  return
+}
+
+func.func @truncate_i1_2() {
+  %mask = util.unfoldable_constant dense<[[0, 0, 1, 1],
+                                          [1, 1, 0, 0],
+                                          [1, 1, 0, 0],
+                                          [0, 0, 1, 1]]> : tensor<4x4xi8>
+  %nm = tensor.empty() : tensor<4x4xi1>
+  %truncm = linalg.generic
+  {indexing_maps = [
+    affine_map<(d0, d1) -> (d0, d1)>,
+    affine_map<(d0, d1) -> (d0, d1)>],
+  iterator_types = ["parallel", "parallel"]}
+  ins(%mask: tensor<4x4xi8>)
+  outs(%nm: tensor<4x4xi1>) {
+    ^bb0(%in: i8, %out: i1):
+      %zero = arith.constant 0 : i8
+      %truncated = arith.cmpi "sgt", %in, %zero : i8
+      linalg.yield %truncated : i1
+  } -> tensor<4x4xi1>
+  %tensor_res = flow.tensor.bitcast %truncm : tensor<4x4xi1> -> tensor<2xi8>
+  check.expect_eq_const(%tensor_res, dense<[60, 195]> : tensor<2xi8>) : tensor<2xi8>
+  return
+}