From 8411214c56eb3f9fb77fe3a9f156d4e6ef6cec06 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 17 Sep 2024 11:27:05 +0100 Subject: [PATCH] [AArch64] Tests for vecreduce.or(sext(x)), with or/and/xor and sext/zext. NFC --- llvm/test/CodeGen/AArch64/vecreduce-bitext.ll | 1437 +++++++++++++++++ 1 file changed, 1437 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-bitext.ll diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll b/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll new file mode 100644 index 00000000000000..1b1b7e676bb3cc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll @@ -0,0 +1,1437 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s + +define zeroext i16 @and_sext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: and_sext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and x8, x8, x8, lsr #32 +; CHECK-NEXT: lsr x9, x8, #16 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @and_zext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: and_zext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and x8, x8, x8, lsr #32 +; CHECK-NEXT: lsr x9, x8, #16 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @and_sext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: and_sext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and x8, x8, x8, lsr #32 +; CHECK-NEXT: lsr x9, x8, #16 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %y) + ret i16 %z +} + +define zeroext i16 @and_zext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: and_zext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and x8, x8, x8, lsr #32 +; CHECK-NEXT: lsr x9, x8, #16 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %y) + ret i16 %z +} + +define i32 @and_sext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: and_sext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @and_zext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: and_zext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @and_sext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: and_sext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i32 @and_zext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: and_zext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i64 @and_sext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: and_sext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: and_zext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @and_sext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: and_sext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: and v3.8b, v5.8b, v4.8b +; CHECK-NEXT: and v0.8b, v1.8b, v0.8b +; CHECK-NEXT: and v2.8b, v7.8b, v6.8b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: and_zext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: and v5.8b, v3.8b, v2.8b +; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v3.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: and v4.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: and v4.8b, v5.8b, v4.8b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i32 @and_sext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: and_sext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @and_zext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: and_zext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @and_sext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: and_sext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @and_zext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: and_zext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i64 @and_sext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: and_sext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: and_zext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @and_sext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: and_sext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: and_zext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @and_sext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: and_sext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: and_zext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @and_sext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: and_sext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @and_zext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: and_zext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %y) + ret i64 %z +} + +define zeroext i16 @or_sext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: or_sext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @or_zext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: or_zext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @or_sext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: or_sext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %y) + ret i16 %z +} + +define zeroext i16 @or_zext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: or_zext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %y) + ret i16 %z +} + +define i32 @or_sext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: or_sext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @or_zext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: or_zext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @or_sext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: or_sext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i32 @or_zext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: or_zext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i64 @or_sext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: or_sext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: or_zext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @or_sext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: or_sext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v3.8b, v5.8b, v4.8b +; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-NEXT: orr v2.8b, v7.8b, v6.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: or_zext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v3.8b, v5.8b, v4.8b +; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-NEXT: orr v2.8b, v7.8b, v6.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i32 @or_sext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: or_sext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @or_zext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: or_zext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @or_sext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: or_sext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @or_zext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: or_zext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i64 @or_sext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: or_sext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: or_zext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @or_sext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: or_sext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: or_zext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v3.8b, v2.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @or_sext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: or_sext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: or_zext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @or_sext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: or_sext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @or_zext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: or_zext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %y) + ret i64 %z +} + +define zeroext i16 @xor_sext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: xor_sext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @xor_zext_v8i8_i16(<8 x i8> %x) { +; CHECK-LABEL: xor_zext_v8i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i16> + %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %y) + ret i16 %z +} + +define zeroext i16 @xor_sext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: xor_sext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %y) + ret i16 %z +} + +define zeroext i16 @xor_zext_v16i8_i16(<16 x i8> %x) { +; CHECK-LABEL: xor_zext_v16i8_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w8, w8, w8, lsr #16 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i16> + %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %y) + ret i16 %z +} + +define i32 @xor_sext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: xor_sext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @xor_zext_v8i8_i32(<8 x i8> %x) { +; CHECK-LABEL: xor_zext_v8i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @xor_sext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: xor_sext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i32 @xor_zext_v16i8_i32(<16 x i8> %x) { +; CHECK-LABEL: xor_zext_v16i8_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i32> + %z = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %y) + ret i32 %z +} + +define i64 @xor_sext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: xor_sext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v8i8_i64(<8 x i8> %x) { +; CHECK-LABEL: xor_zext_v8i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i8> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @xor_sext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: xor_sext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v5.8b, v4.8b +; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b +; CHECK-NEXT: eor v2.8b, v7.8b, v6.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v16i8_i64(<16 x i8> %x) { +; CHECK-LABEL: xor_zext_v16i8_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v5.8b, v4.8b +; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b +; CHECK-NEXT: eor v2.8b, v7.8b, v6.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <16 x i8> %x to <16 x i64> + %z = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %y) + ret i64 %z +} + +define i32 @xor_sext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: xor_sext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @xor_zext_v4i16_i32(<4 x i16> %x) { +; CHECK-LABEL: xor_zext_v4i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i32> + %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %y) + ret i32 %z +} + +define i32 @xor_sext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: xor_sext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i32 @xor_zext_v8i16_i32(<8 x i16> %x) { +; CHECK-LABEL: xor_zext_v8i16_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: lsr x9, x8, #32 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i32> + %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %y) + ret i32 %z +} + +define i64 @xor_sext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: xor_sext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v4i16_i64(<4 x i16> %x) { +; CHECK-LABEL: xor_zext_v4i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i16> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @xor_sext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: xor_sext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v8i16_i64(<8 x i16> %x) { +; CHECK-LABEL: xor_zext_v8i16_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: eor v1.8b, v3.8b, v2.8b +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <8 x i16> %x to <8 x i64> + %z = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %y) + ret i64 %z +} + +define i64 @xor_sext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: xor_sext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v2i32_i64(<2 x i32> %x) { +; CHECK-LABEL: xor_zext_v2i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <2 x i32> %x to <2 x i64> + %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %y) + ret i64 %z +} + +define i64 @xor_sext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: xor_sext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = sext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %y) + ret i64 %z +} + +define i64 @xor_zext_v4i32_i64(<4 x i32> %x) { +; CHECK-LABEL: xor_zext_v4i32_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %y = zext <4 x i32> %x to <4 x i64> + %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %y) + ret i64 %z +} + +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) +declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>)