-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VectorCombine] Combine scalar fneg with insert/extract to vector fneg when length is different #115209
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: hanbeom (ParkHanbum) Changesinsertelt DestVec, (fneg (extractelt SrcVec, Index)), Index Original combining left the combine between vectors of different Full diff: https://github.com/llvm/llvm-project/pull/115209.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 58145c7e3c5913..3850484ae40384 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -649,9 +649,9 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
return false;
- // TODO: We could handle this with a length-changing shuffle.
auto *VecTy = cast<FixedVectorType>(I.getType());
- if (SrcVec->getType() != VecTy)
+ auto *SrcVecTy = cast<FixedVectorType>(SrcVec->getType());
+ if (SrcVecTy->getScalarType() != VecTy->getScalarType())
return false;
// Ignore bogus insert/extract index.
@@ -666,7 +666,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
std::iota(Mask.begin(), Mask.end(), 0);
Mask[Index] = Index + NumElts;
- Type *ScalarTy = VecTy->getScalarType();
+ Type *ScalarTy = SrcVecTy->getScalarType();
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
@@ -682,14 +682,31 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
+ bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
+ // If the lengths of the two vectors are not equal,
+ // we need to add a length-change vector. Add this cost.
+ if (NeedLenChg)
+ NewCost +=
+ TTI.getShuffleCost(TargetTransformInfo::SK_Select, SrcVecTy, Mask);
+
if (NewCost > OldCost)
return false;
- // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
- // shuffle DestVec, (fneg SrcVec), Mask
+ Value *NewShuf;
+ // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
- Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
- replaceValue(I, *Shuf);
+ if (NeedLenChg) {
+ // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
+ SmallVector<int> SrcMask(NumElts, PoisonMaskElem);
+ SrcMask[Index] = Index;
+ Value *LenChgShuf = Builder.CreateShuffleVector(
+ SrcVec, PoisonValue::get(SrcVecTy), SrcMask);
+ NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask);
+ } else
+ // shuffle DestVec, (fneg SrcVec), Mask
+ NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
+
+ replaceValue(I, *NewShuf);
return true;
}
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index df5fcdb7beb656..05aad1b4ba79d1 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -18,6 +18,19 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
+define <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @ext0_v2f32v4f32(
+; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <2 x float> %x, i32 0
+ %n = fneg float %e
+ %r = insertelement <4 x float> %y, float %n, i32 0
+ ret <4 x float> %r
+}
+
; Eliminating extract/insert is profitable.
define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -32,6 +45,25 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
+define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
+; SSE-LABEL: @ext2_v2f32v4f32(
+; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 2
+; SSE-NEXT: [[N:%.*]] = fneg float [[E]]
+; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 2
+; SSE-NEXT: ret <4 x float> [[R]]
+;
+; AVX-LABEL: @ext2_v2f32v4f32(
+; AVX-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
+; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; AVX-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <2 x float> %x, i32 2
+ %n = fneg float %e
+ %r = insertelement <4 x float> %y, float %n, i32 2
+ ret <4 x float> %r
+}
+
; Eliminating extract/insert is still profitable. Flags propagate.
define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
@@ -46,6 +78,25 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
+define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) {
+; SSE-LABEL: @ext1_v2f64v4f64(
+; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
+; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
+; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
+; SSE-NEXT: ret <4 x double> [[R]]
+;
+; AVX-LABEL: @ext1_v2f64v4f64(
+; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; AVX-NEXT: ret <4 x double> [[R]]
+;
+ %e = extractelement <2 x double> %x, i32 1
+ %n = fneg nsz double %e
+ %r = insertelement <4 x double> %y, double %n, i32 1
+ ret <4 x double> %r
+}
+
; The vector fneg would cost twice as much as the scalar op with SSE,
; so we don't transform there (the shuffle would also be more expensive).
@@ -67,6 +118,19 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @ext7_v4f32v8f32(
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
+; CHECK-NEXT: ret <8 x float> [[R]]
+;
+ %e = extractelement <4 x float> %x, i32 3
+ %n = fneg float %e
+ %r = insertelement <8 x float> %y, float %n, i32 7
+ ret <8 x float> %r
+}
+
; Same as above with an extra use of the extracted element.
define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
@@ -91,6 +155,21 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @ext7_v4f32v8f32_use1(
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; CHECK-NEXT: call void @use(float [[E]])
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
+; CHECK-NEXT: ret <8 x float> [[R]]
+;
+ %e = extractelement <4 x float> %x, i32 3
+ call void @use(float %e)
+ %n = fneg float %e
+ %r = insertelement <8 x float> %y, float %n, i32 3
+ ret <8 x float> %r
+}
+
; Negative test - the transform is likely not profitable if the fneg has another use.
define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
@@ -108,6 +187,21 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
ret <8 x float> %r
}
+define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @ext7_v4f32v8f32_use2(
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: call void @use(float [[N]])
+; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
+; CHECK-NEXT: ret <8 x float> [[R]]
+;
+ %e = extractelement <4 x float> %x, i32 3
+ %n = fneg float %e
+ call void @use(float %n)
+ %r = insertelement <8 x float> %y, float %n, i32 3
+ ret <8 x float> %r
+}
+
; Negative test - can't convert variable index to a shuffle.
define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) {
@@ -123,6 +217,19 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
ret <2 x double> %r
}
+define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) {
+; CHECK-LABEL: @ext_index_var_v2f64v4f64(
+; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
+; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
+; CHECK-NEXT: ret <4 x double> [[R]]
+;
+ %e = extractelement <2 x double> %x, i32 %index
+ %n = fneg nsz double %e
+ %r = insertelement <4 x double> %y, double %n, i32 %index
+ ret <4 x double> %r
+}
+
; Negative test - require same extract/insert index for simple shuffle.
; TODO: We could handle this by adjusting the cost calculation.
@@ -139,6 +246,19 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
+define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) {
+; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
+; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
+; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
+; CHECK-NEXT: ret <4 x double> [[R]]
+;
+ %e = extractelement <2 x double> %x, i32 1
+ %n = fneg nsz double %e
+ %r = insertelement <4 x double> %y, double %n, i32 0
+ ret <4 x double> %r
+}
+
; Negative test - avoid changing poison ops
define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
@@ -154,6 +274,19 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %r
}
+define <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @ext12_v2f32v4f32(
+; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
+; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
+; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
+; CHECK-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <2 x float> %x, i32 6
+ %n = fneg float %e
+ %r = insertelement <4 x float> %y, float %n, i32 12
+ ret <4 x float> %r
+}
+
; This used to crash because we assumed matching a true, unary fneg instruction.
define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) {
@@ -181,3 +314,22 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
%r = insertelement <2 x float> %y, float %s, i32 1
ret <2 x float> %r
}
+
+define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
+; SSE-LABEL: @ext1_v2f32v4f32_fsub_fmf(
+; SSE-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; SSE-NEXT: [[S:%.*]] = fsub nnan nsz float 0.000000e+00, [[E]]
+; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[S]], i32 1
+; SSE-NEXT: ret <4 x float> [[R]]
+;
+; AVX-LABEL: @ext1_v2f32v4f32_fsub_fmf(
+; AVX-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
+; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; AVX-NEXT: ret <4 x float> [[R]]
+;
+ %e = extractelement <2 x float> %x, i32 1
+ %s = fsub nsz nnan float 0.0, %e
+ %r = insertelement <4 x float> %y, float %s, i32 1
+ ret <4 x float> %r
+}
|
20286a3
to
1c5baed
Compare
can I request review? |
Value *LenChgShuf = Builder.CreateShuffleVector( | ||
SrcVec, PoisonValue::get(SrcVecTy), SrcMask); | ||
NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask); | ||
} else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please match the curly braces. https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements
…g when length is different insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -> shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask Original combining left the combine between vectors of different lengths as a TODO. this commit do that. (see #[baab4aa])
b0d0530
to
3327572
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I missed this.
@@ -682,7 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { | |||
std::iota(Mask.begin(), Mask.end(), 0); | |||
Mask[Index] = Index + NumElts; | |||
|
|||
Type *ScalarTy = VecTy->getScalarType(); | |||
Type *ScalarTy = SrcVecTy->getScalarType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move this up and avoid the repeated SrcVecTy->getScalarType()
@@ -682,7 +682,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { | |||
std::iota(Mask.begin(), Mask.end(), 0); | |||
Mask[Index] = Index + NumElts; | |||
|
|||
Type *ScalarTy = VecTy->getScalarType(); | |||
Type *ScalarTy = SrcVecTy->getScalarType(); | |||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this (CostKind is now a VectorCombine class member)
// we need to add a length-change vector. Add this cost. | ||
if (NeedLenChg) | ||
NewCost += | ||
TTI.getShuffleCost(TargetTransformInfo::SK_Select, SrcVecTy, Mask); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't be a Select shuffle kind? Create the SrcMask below earlier and use SingleSrc SK.
if (SrcVec->getType() != VecTy) | ||
auto *SrcVecTy = cast<FixedVectorType>(SrcVec->getType()); | ||
auto *ScalarTy = SrcVecTy->getScalarType(); | ||
if (ScalarTy != VecTy->getScalarType()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't guarantee that SrcVec is a FixedVectorType, you're probably safer doing this:
auto *ScalarTy = VecTy->getScalarType();
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
%n = fneg nsz double %e | ||
%r = insertelement <4 x double> %y, double %n, i32 1 | ||
ret <4 x double> %r | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you add coverage from when you extract from an index greater than the vector width of the destination:
%e = extractelement <4 x double> %x, i32 3
%n = fneg nsz double %e
%r = insertelement <2 x double> %y, double %n, i32 1
ret <4 x double> %r
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done. but this test case didn't work as properly because we've restrict of extract/insert index to be same.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's fine - I just want to make sure we have coverage for cases where the extraction index is out of bounds for the destination vector
auto *VecTy = cast<FixedVectorType>(I.getType()); | ||
if (SrcVec->getType() != VecTy) | ||
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType()); | ||
auto *ScalarTy = SrcVecTy->getScalarType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *ScalarTy = VecTy->getScalarType();
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it was terrible mistake. so sorry.
if (NeedLenChg) { | ||
// shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask | ||
Value *LenChgShuf = Builder.CreateShuffleVector( | ||
SrcVec, PoisonValue::get(SrcVecTy), SrcMask); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use the shorter 1 op version of CreateShuffleVector here:
Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask)
%n = fneg nsz double %e | ||
%r = insertelement <4 x double> %y, double %n, i32 1 | ||
ret <4 x double> %r | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's fine - I just want to make sure we have coverage for cases where the extraction index is out of bounds for the destination vector
If we call CreateShuffleVector with only one value as an argument, it will create a poison vector internally and it shuffled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
@ParkHanbum Do you have commit access? |
@RKSimon no I haven't |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/7882 Here is the relevant piece of the build log for the reference
|
Alive2 flags an issue with this commit. define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
%e = extractelement <2 x float> %x, i32 1
%s = fsub nnan nsz float 0.000000, %e
%r = insertelement <4 x float> %y, float %s, i32 1
ret <4 x float> %r
}
=>
define <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) {
%#2 = shufflevector <2 x float> %x, <2 x float> poison, 4294967295, 1, 4294967295, 4294967295
%r = shufflevector <4 x float> %y, <4 x float> %#2, 0, 5, 2, 3
ret <4 x float> %r
}
Transformation doesn't verify! (unsound)
ERROR: Value mismatch
Example:
<2 x float> %x = < poison, #x00000002 (0.000000000000?) >
<4 x float> %y = < #x00000000 (+0.0), poison, #x00000000 (+0.0), #x00000000 (+0.0) >
Source:
float %e = #x00000002 (0.000000000000?)
float %s = #x80000002 (-0.000000000000?)
<4 x float> %r = < #x00000000 (+0.0), #x80000002 (-0.000000000000?), #x00000000 (+0.0), #x00000000 (+0.0) >
Target:
<4 x float> %#2 = < poison, #x00000002 (0.000000000000?), poison, poison >
<4 x float> %r = < #x00000000 (+0.0), #x00000002 (0.000000000000?), #x00000000 (+0.0), #x00000000 (+0.0) >
Source value: < #x00000000 (+0.0), #x80000002 (-0.000000000000?), #x00000000 (+0.0), #x00000000 (+0.0) >
Target value: < #x00000000 (+0.0), #x00000002 (0.000000000000?), #x00000000 (+0.0), #x00000000 (+0.0) > |
replaceValue(I, *Shuf); | ||
if (NeedLenChg) { | ||
// shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask | ||
Value *LenChgShuf = Builder.CreateShuffleVector(SrcVec, SrcMask); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ParkHanbum - this is regression typo - sorry I missed this :(
Value *LenChgShuf = Builder.CreateShuffleVector(VecFNeg, SrcMask);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm sorry for my mistake.. Can I send a PR again?
…ctor fneg when length is different" (llvm#120422) Reverts llvm#115209 - investigating a reported regression
…tract to vector fneg when length is different" (#120422) Reverts llvm/llvm-project#115209 - investigating a reported regression
insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index
-> shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask
Original combining left the combine between vectors of different
lengths as a TODO. this commit do that. (see #[baab4aa])