From 7f17639803fdc73f9ab9bd60a315596ea8881af9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 30 Jul 2024 14:02:51 -0700 Subject: [PATCH] [SLP]Fix PR101213: Reuse extractelement, only if its vector operand comes before new vector value. When trying to reuse extractelement instruction, need to check that it is inserted into proper position. Its original vector operand should come before new vector value, otherwise new extractelement instruction must be generated. Fixes https://github.com/llvm/llvm-project/issues/101213 (cherry picked from commit f70f1228035c9610de38e0e376afdacb647c4ad9) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 9 +++- .../X86/extract-vectorized-operand.ll | 49 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab2b96cdc42db..007ffedab7df4 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13889,11 +13889,16 @@ Value *BoUpSLP::vectorizeTree( } if (!Ex) { // "Reuse" the existing extract to improve final codegen. - if (auto *ES = dyn_cast(Scalar)) { + if (auto *ES = dyn_cast(Scalar); + ES && isa(Vec)) { Value *V = ES->getVectorOperand(); if (const TreeEntry *ETE = getTreeEntry(V)) V = ETE->VectorizedValue; - Ex = Builder.CreateExtractElement(V, ES->getIndexOperand()); + if (auto *IV = dyn_cast(V); + !IV || IV == Vec || IV->comesBefore(cast(Vec))) + Ex = Builder.CreateExtractElement(V, ES->getIndexOperand()); + else + Ex = Builder.CreateExtractElement(Vec, Lane); } else if (ReplaceGEP) { // Leave the GEPs as is, they are free in most cases and better to // keep them as GEPs. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll new file mode 100644 index 0000000000000..f1a5709d07f02 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br label %[[BB43:.*]] +; CHECK: [[BB20:.*]]: +; CHECK-NEXT: br label %[[BB105:.*]] +; CHECK: [[BB43]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x ptr addrspace(1)> [ [[TMP1:%.*]], %[[BB51:.*]] ], [ zeroinitializer, %[[BB]] ] +; CHECK-NEXT: br i1 false, label %[[BB105]], label %[[BB51]] +; CHECK: [[BB51]]: +; CHECK-NEXT: [[TMP1]] = phi <2 x ptr addrspace(1)> [ poison, %[[BB54:.*]] ], [ zeroinitializer, %[[BB43]] ] +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB54]]: +; CHECK-NEXT: br label %[[BB51]] +; CHECK: [[BB105]]: +; CHECK-NEXT: [[PHI106:%.*]] = phi ptr addrspace(1) [ null, %[[BB20]] ], [ null, %[[BB43]] ] +; CHECK-NEXT: ret void +; +bb: + %0 = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <2 x i32> + %1 = extractelement <2 x ptr addrspace(1)> %0, i32 0 + %2 = extractelement <2 x ptr addrspace(1)> %0, i32 1 + br label %bb43 + +bb20: + br label %bb105 + +bb43: + %phi441 = phi ptr addrspace(1) [ %4, %bb51 ], [ %2, %bb ] + %phi452 = phi ptr addrspace(1) [ %5, %bb51 ], [ %1, %bb ] + br i1 false, label %bb105, label %bb51 + +bb51: + %3 = phi <2 x ptr addrspace(1)> [ poison, %bb54 ], [ zeroinitializer, %bb43 ] + %4 = extractelement <2 x ptr addrspace(1)> %3, i32 0 + %5 = extractelement <2 x ptr addrspace(1)> %3, i32 1 + br label %bb43 + +bb54: + br label %bb51 + +bb105: + %phi106 = phi ptr addrspace(1) [ %1, %bb20 ], [ null, %bb43 ] + ret void +} +