diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a22ee1de0ac21d1..673596573582c1c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -977,14 +977,7 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, Value *Cmp1 = Cmp->getOperand(1); ICmpInst::Predicate Pred = Cmp->getPredicate(); Value *X; - const APInt *C, *CmpC; - if (Pred == ICmpInst::ICMP_ULT && - match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 && - match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) { - // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C) - return Builder.CreateBinaryIntrinsic( - Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C)); - } + const APInt *C; // Match unsigned saturated add of 2 variables with an unnecessary 'not'. // There are 8 commuted variants. @@ -996,6 +989,46 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, if (!match(TVal, m_AllOnes())) return nullptr; + // uge -1 is canonicalized to eq -1 and requires special handling + // (a == -1) ? -1 : a + 1 -> uadd.sat(a, 1) + if (Pred == ICmpInst::ICMP_EQ) { + if (match(FVal, m_Add(m_Specific(Cmp0), m_One())) && + match(Cmp1, m_AllOnes())) { + return Builder.CreateBinaryIntrinsic( + Intrinsic::uadd_sat, Cmp0, ConstantInt::get(Cmp0->getType(), 1)); + } + return nullptr; + } + + if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) && + match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) && + match(Cmp1, m_SpecificIntAllowPoison(~*C))) { + // (X u> ~C) ? -1 : (X + C) --> uadd.sat(X, C) + // (X u>= ~C)? -1 : (X + C) --> uadd.sat(X, C) + return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0, + ConstantInt::get(Cmp0->getType(), *C)); + } + + // Negative one does not work here because X u> -1 ? -1, X + -1 is not a + // saturated add. + if (Pred == ICmpInst::ICMP_UGT && + match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) && + match(Cmp1, m_SpecificIntAllowPoison(~*C - 1)) && !C->isAllOnes()) { + // (X u> ~C - 1) ? -1 : (X + C) --> uadd.sat(X, C) + return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0, + ConstantInt::get(Cmp0->getType(), *C)); + } + + // Zero does not work here because X u>= 0 ? -1 : X -> is always -1, which is + // not a saturated add. + if (Pred == ICmpInst::ICMP_UGE && + match(FVal, m_Add(m_Specific(Cmp0), m_APIntAllowPoison(C))) && + match(Cmp1, m_SpecificIntAllowPoison(-*C)) && !C->isZero()) { + // (X u >= -C) ? -1 : (X + C) --> uadd.sat(X, C) + return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp0, + ConstantInt::get(Cmp0->getType(), *C)); + } + // Canonicalize predicate to less-than or less-or-equal-than. if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { std::swap(Cmp0, Cmp1); diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index 53366e2cc6a4e52..15776e19775720d 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1398,9 +1398,7 @@ define i32 @uadd_sat(i32 %x, i32 %y) { define i32 @uadd_sat_flipped(i32 %x) { ; CHECK-LABEL: @uadd_sat_flipped( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -11 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9) ; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp ugt i32 %x, -11 @@ -1411,9 +1409,7 @@ define i32 @uadd_sat_flipped(i32 %x) { define i32 @uadd_sat_flipped2(i32 %x) { ; CHECK-LABEL: @uadd_sat_flipped2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -10 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 9) ; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp ugt i32 %x, -10 @@ -1452,9 +1448,7 @@ define i32 @uadd_sat_flipped3_neg_no_nuw(i32 %x) { define i32 @uadd_sat_negative_one(i32 %x) { ; CHECK-LABEL: @uadd_sat_negative_one( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], -1 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 1 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]] +; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 1) ; CHECK-NEXT: ret i32 [[COND]] ; %cmp = icmp eq i32 %x, -1 @@ -1483,21 +1477,6 @@ define i32 @uadd_sat_negative_one_poison_all(i32 %x) { ret i32 %cond } -; Negative test - -define i32 @uadd_sat_flipped_neg_no_nuw(i32 %x) { -; CHECK-LABEL: @uadd_sat_flipped_neg_no_nuw( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], -9 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], 9 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[ADD]] -; CHECK-NEXT: ret i32 [[COND]] -; - %cmp = icmp ugt i32 %x, -9 - %add = add i32 %x, 9 - %cond = select i1 %cmp, i32 -1, i32 %add - ret i32 %cond -} - define i32 @uadd_sat_poison(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_poison( ; CHECK-NEXT: ret i32 poison @@ -1582,9 +1561,7 @@ define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -1595,9 +1572,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) { define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) { ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> +; CHECK-NEXT: [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) ; CHECK-NEXT: ret <2 x i8> [[COND]] ; %cmp = icmp ult <2 x i8> %x, @@ -2093,9 +2068,7 @@ define i32 @uadd_sat_not_commute_select_uge_commute_add(i32 %x, i32 %y) { define i32 @uadd_sat_constant(i32 %x) { ; CHECK-LABEL: @uadd_sat_constant( -; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 42 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[X]], -43 -; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 -1, i32 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42) ; CHECK-NEXT: ret i32 [[R]] ; %a = add i32 %x, 42 @@ -2161,9 +2134,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) { define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec( -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ugt <4 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> , <4 x i32> [[A]] +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x, @@ -2185,9 +2156,7 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs( -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i32> [[X]], -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> [[A]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) ; CHECK-NEXT: ret <4 x i32> [[R]] ; %a = add <4 x i32> %x,