N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1;
- // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ auto IsUndefOrZero = [](SDValue V) {
+ if (V.isUndef())
+ return true;
+ if (ConstantSDNode *Cst = isConstOrConstSplat(V, /*AllowUndefs*/true))
+ return Cst->getAPIntValue() == 0;
+ return false;
+ };
+
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
- DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
}
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
; X32-SSE2-LABEL: fshl_i32_undef0_cst:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: shldl $9, %eax, %eax
+; X32-SSE2-NEXT: shrl $23, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i32_undef0_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: shldl $9, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shrl $23, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
ret i32 %res
; X32-SSE2-LABEL: fshl_i32_undef1_msk:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: andl $7, %ecx
-; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT: shldl %cl, %eax, %eax
+; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT: andb $7, %cl
+; X32-SSE2-NEXT: shll %cl, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i32_undef1_msk:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %esi, %ecx
; X64-AVX2-NEXT: movl %edi, %eax
-; X64-AVX2-NEXT: andl $7, %ecx
+; X64-AVX2-NEXT: andb $7, %cl
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT: shldl %cl, %eax, %eax
+; X64-AVX2-NEXT: shll %cl, %eax
; X64-AVX2-NEXT: retq
%m = and i32 %a1, 7
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
; X32-SSE2-LABEL: fshl_i32_undef1_cst:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: shldl $9, %eax, %eax
+; X32-SSE2-NEXT: shll $9, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i32_undef1_cst:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %edi, %eax
-; X64-AVX2-NEXT: shldl $9, %eax, %eax
+; X64-AVX2-NEXT: shll $9, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
ret i32 %res
; X32-SSE2-LABEL: fshr_i32_undef0_msk:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: andl $7, %ecx
-; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT: shrdl %cl, %eax, %eax
+; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT: andb $7, %cl
+; X32-SSE2-NEXT: shrl %cl, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i32_undef0_msk:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %esi, %ecx
; X64-AVX2-NEXT: movl %edi, %eax
-; X64-AVX2-NEXT: andl $7, %ecx
+; X64-AVX2-NEXT: andb $7, %cl
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT: shrdl %cl, %eax, %eax
+; X64-AVX2-NEXT: shrl %cl, %eax
; X64-AVX2-NEXT: retq
%m = and i32 %a1, 7
%res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
; X32-SSE2-LABEL: fshr_i32_undef0_cst:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: shrdl $9, %eax, %eax
+; X32-SSE2-NEXT: shrl $9, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i32_undef0_cst:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %edi, %eax
-; X64-AVX2-NEXT: shrdl $9, %eax, %eax
+; X64-AVX2-NEXT: shrl $9, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
ret i32 %res
; X32-SSE2-LABEL: fshr_i32_undef1_cst:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: shrdl $9, %eax, %eax
+; X32-SSE2-NEXT: shll $23, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i32_undef1_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: shrdl $9, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shll $23, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
ret i32 %res
define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
; X32-SSE2-LABEL: fshl_i32_zero0_cst:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: shldl $9, %ecx, %eax
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: shrl $23, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i32_zero0_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: shldl $9, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shrl $23, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
ret i32 %res
define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
; X32-SSE2-LABEL: fshl_i32_zero1_cst:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: shrdl $23, %ecx, %eax
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: shll $9, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshl_i32_zero1_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: shrdl $23, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shll $9, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
ret i32 %res
define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
; X32-SSE2-LABEL: fshr_i32_zero0_cst:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: shldl $23, %ecx, %eax
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: shrl $9, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i32_zero0_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: shldl $23, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shrl $9, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
ret i32 %res
define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
; X32-SSE2-LABEL: fshr_i32_zero1_cst:
; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT: xorl %eax, %eax
-; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: shll $23, %eax
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: fshr_i32_zero1_cst:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: xorl %eax, %eax
-; X64-AVX2-NEXT: shrdl $9, %edi, %eax
+; X64-AVX2-NEXT: movl %edi, %eax
+; X64-AVX2-NEXT: shll $23, %eax
; X64-AVX2-NEXT: retq
%res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
ret i32 %res