From 156f3b71997e37d45e22b5aca537ea90295faabd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 4 Feb 2017 23:26:39 +0000 Subject: [PATCH] [DAGCombiner] Canonicalize the order of a chain of INSERT_SUBVECTORs. Based on similar code for INSERT_VECTOR_ELT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294110 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 ++++++++++++++++++++++---- test/CodeGen/X86/avx512-mask-op.ll | 34 +++++++++++++++----------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d38a9fca019..aad07d8ae3c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14547,15 +14547,35 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), N1, N2); + if (!isa(N2)) + return SDValue(); + + unsigned InsIdx = cast(N2)->getZExtValue(); + + // Canonicalize insert_subvector dag nodes. + // Example: + // (insert_subvector (insert_subvector A, Idx0), Idx1) + // -> (insert_subvector (insert_subvector A, Idx1), Idx0) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && + N1.getValueType() == N0.getOperand(1).getValueType() && + isa(N0.getOperand(2))) { + unsigned OtherIdx = cast(N0.getOperand(2))->getZExtValue(); + if (InsIdx < OtherIdx) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, + N0.getOperand(0), N1, N2); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), + VT, NewOp, N0.getOperand(1), N0.getOperand(2)); + } + } + if (N0.getValueType() != N1.getValueType()) return SDValue(); // If the input vector is a concatenation, and the insert replaces // one of the halves, we can optimize into a single concat_vectors. - if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && - isa(N2)) { - unsigned InsIdx = cast(N2)->getZExtValue(); - + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2) { // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) if (InsIdx == 0) diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 41fb19f38e0..6bd28b18b23 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -509,18 +509,13 @@ define <64 x i8> @test16(i64 %x) { ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; KNL-NEXT: kmovw (%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; KNL-NEXT: movl $1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} @@ -528,6 +523,10 @@ define <64 x i8> @test16(i64 %x) { ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq @@ -573,20 +572,15 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; KNL-NEXT: kmovw (%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} @@ -594,6 +588,10 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -- 2.11.0