return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
+ // fold (X - (-Y * Z)) -> (X + (Y * Z))
+ if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
+ if (N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ if (N1.getOperand(1).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ }
+
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
return N0;
define i32 @combine_srem_by_minsigned(i32 %x) {
; CHECK-LABEL: combine_srem_by_minsigned:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: sarl $31, %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000
-; CHECK-NEXT: subl %eax, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: leal (%rax,%rdi), %eax
; CHECK-NEXT: retq
%1 = srem i32 %x, -2147483648
ret i32 %1
; SSE-NEXT: psrad $1, %xmm1
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm1, %xmm2
-; SSE-NEXT: pmulld {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubd %xmm2, %xmm0
+; SSE-NEXT: pmulld {{.*}}(%rip), %xmm1
+; SSE-NEXT: paddd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_vec_srem_by_pow2b_neg:
; AVX1-NEXT: vpsrad $1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_vec_srem_by_pow2b_neg:
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -2, i32 -4, i32 -8, i32 -16>
ret <4 x i32> %1