if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
- SDValue ORNode = DAG.getNode(LogicOpcode, SDLoc(N0),
- N0.getOperand(0).getValueType(),
- N0.getOperand(0), N1.getOperand(0));
- AddToWorklist(ORNode.getNode());
- return DAG.getNode(HandOpcode, SDLoc(N), VT, ORNode, N0.getOperand(1));
+ // If either operand has other uses, this transform is not an improvement.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+ SDValue Logic = DAG.getNode(LogicOpcode, SDLoc(N0), Op0VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorklist(Logic.getNode());
+ return DAG.getNode(HandOpcode, SDLoc(N), VT, Logic, N0.getOperand(1));
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
}
; This is questionable - hoisting doesn't eliminate anything.
+; It might result in an extra register move.
define i32 @lshr_or_multiuse1(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) {
; CHECK-LABEL: lshr_or_multiuse1:
; CHECK: # %bb.0:
-; CHECK-NEXT: or 4, 3, 4
-; CHECK-NEXT: srw 4, 4, 5
-; CHECK-NEXT: srw 5, 3, 5
-; CHECK-NEXT: mr 3, 4
-; CHECK-NEXT: stw 5, 0(6)
+; CHECK-NEXT: srw 7, 3, 5
+; CHECK-NEXT: srw 3, 4, 5
+; CHECK-NEXT: or 3, 7, 3
+; CHECK-NEXT: stw 7, 0(6)
; CHECK-NEXT: blr
%xt = lshr i32 %x, %z
%yt = lshr i32 %y, %z
define i32 @lshr_multiuse2(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) {
; CHECK-LABEL: lshr_multiuse2:
; CHECK: # %bb.0:
-; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: srw 3, 3, 5
; CHECK-NEXT: srw 4, 4, 5
+; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: stw 4, 0(7)
; CHECK-NEXT: blr
%xt = lshr i32 %x, %z
ret i32 %r
}
-; FIXME: This is not profitable to hoist. We need an extra shift instruction.
+; This is not profitable to hoist. We need an extra shift instruction.
define i32 @lshr_multiuse3(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) {
; CHECK-LABEL: lshr_multiuse3:
; CHECK: # %bb.0:
-; CHECK-NEXT: or 8, 3, 4
; CHECK-NEXT: srw 3, 3, 5
-; CHECK-NEXT: stw 3, 0(6)
-; CHECK-NEXT: srw 3, 8, 5
; CHECK-NEXT: srw 4, 4, 5
+; CHECK-NEXT: stw 3, 0(6)
+; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: stw 4, 0(7)
; CHECK-NEXT: blr
%xt = lshr i32 %x, %z
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0]
; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; X86-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-LABEL: var_shift_v2i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; XOP-LABEL: var_shift_v2i64:
; AVX2-LABEL: var_shift_v4i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; XOPAVX1-LABEL: var_shift_v4i64:
; XOPAVX2-LABEL: var_shift_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
-; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
;
; AVX512-LABEL: var_shift_v4i64:
; X32-AVX2-LABEL: var_shift_v4i64:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
%shift = ashr <4 x i64> %a, %b
ret <4 x i64> %shift
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,7,0,31,0,62,0]
; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2
; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; X32-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; X32-AVX2-NEXT: retl
%shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift
; AVX2-NEXT: vpsllq $32, %xmm0, %xmm2
; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; XOP-LABEL: var_shift_v2i8:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; XOP-LABEL: splatvar_shift_v2i8: