From: Simon Pilgrim Date: Fri, 17 May 2019 17:25:55 +0000 (+0000) Subject: [X86][SSE] Match all-of bool scalar reductions into a bitcast/movmsk + cmp. X-Git-Tag: android-x86-9.0-r1~3264 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=1460d5989d13fc170ae28077e34f18186149b05d;p=android-x86%2Fexternal-llvm.git [X86][SSE] Match all-of bool scalar reductions into a bitcast/movmsk + cmp. Same as what we do for vector reductions in combineHorizontalPredicateResult, use movmsk+cmp for scalar (and(extract(x,0),extract(x,1)) reduction patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361052 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a52d0faa35b..743e23977e5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -37841,6 +37841,24 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineParity(N, DAG, Subtarget)) return V; + // Match all-of bool scalar reductions into a bitcast/movmsk + cmp. + // TODO: Support multiple SrcOps. + if (VT == MVT::i1) { + SmallVector SrcOps; + if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) && + SrcOps.size() == 1) { + SDLoc dl(N); + unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); + EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); + SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); + if (Mask) { + APInt AllBits = APInt::getAllOnesValue(NumElts); + return DAG.getSetCC(dl, MVT::i1, Mask, + DAG.getConstant(AllBits, dl, MaskVT), ISD::SETEQ); + } + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); diff --git a/test/CodeGen/X86/movmsk-cmp.ll b/test/CodeGen/X86/movmsk-cmp.ll index c8b3488af16..eeb523c3800 100644 --- a/test/CodeGen/X86/movmsk-cmp.ll +++ b/test/CodeGen/X86/movmsk-cmp.ll @@ -4487,10 +4487,9 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: shrb %al -; SSE2-NEXT: andb %cl, %al +; SSE2-NEXT: movmskpd %xmm0, %eax +; SSE2-NEXT: cmpb $3, %al +; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v2i64: @@ -4498,10 +4497,9 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: shrb %al -; AVX-NEXT: andb %cl, %al +; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: cmpb $3, %al +; AVX-NEXT: sete %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v2i64: @@ -4596,19 +4594,17 @@ define i1 @movmsk_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE2-LABEL: movmsk_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cmplepd %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: shrb %al -; SSE2-NEXT: andb %cl, %al +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: cmpb $3, %al +; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: shrb %al -; AVX-NEXT: andb %cl, %al +; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: cmpb $3, %al +; AVX-NEXT: sete %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_v2f64: