Remove MVT:i1 xor instruction before SELECT. (Performance improvement).

author Ayman Musa <ayman.musa@intel.com>

Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)

committer Ayman Musa <ayman.musa@intel.com>

Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)
author Ayman Musa <ayman.musa@intel.com>
Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)
committer Ayman Musa <ayman.musa@intel.com>
Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 73faff2..d1727a1 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5258,6 +5258,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
      }
    }
  
+  // select (xor Cond, 1), X, Y -> select Cond, Y, X
+  // select (xor Cond, 0), X, Y -> selext Cond, X, Y
+  if (VT0 == MVT::i1) {
+    if (N0->getOpcode() == ISD::XOR) {
+      if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
+        SDValue Cond0 = N0->getOperand(0);
+        if (C->isOne()) 
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), 
+                             Cond0, N2, N1);
+        else
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), 
+                             Cond0, N1, N2);
+      }
+    }
+  }
+
    // fold selects based on a setcc into other things, such as min/max/abs
    if (N0.getOpcode() == ISD::SETCC) {
      // select x, y (fcmp lt x, y) -> fminnum x, y
diff --git a/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/test/CodeGen/PowerPC/select-i1-vs-i1.ll

index ef5b7dc..8bceb9f 100644 (file)
--- a/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -800,10 +800,10 @@ entry:
  ; CHECK-LABEL: @testv4floateq
  ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
  ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK-DAG: xxlor [[REG2:[0-9]+]], 34, 34
-; CHECK-DAG: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-DAG: xxlor [[REG2:[0-9]+]], 35, 35
+; CHECK-DAG: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
  ; CHECK: bc 12, [[REG1]], .LBB[[BB:[0-9_]+]]
-; CHECK: xxlor [[REG2]], 35, 35
+; CHECK: xxlor [[REG2]], 34, 34
  ; CHECK: .LBB[[BB]]:
  ; CHECK: xxlor 34, [[REG2]], [[REG2]]
  ; CHECK: blr
@@ -928,15 +928,15 @@ entry:
  ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 3
  ; CHECK: crand [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
  ; CHECK: crand [[REG2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: creqv [[REG3:[0-9]+]], [[REG2]], [[REG1]]
+; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]]
  ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]]
-; CHECK: fmr 9, 11
+; CHECK: fmr 11, 9
  ; CHECK: .LBB[[BB1]]:
  ; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]]
-; CHECK: fmr 10, 12
+; CHECK: fmr 12, 10
  ; CHECK: .LBB[[BB2]]:
-; CHECK-DAG: fmr 1, 9
-; CHECK-DAG: fmr 2, 10
+; CHECK-DAG: fmr 1, 11
+; CHECK-DAG: fmr 2, 12
  ; CHECK: blr
  }
  
@@ -1019,9 +1019,11 @@ entry:
  ; CHECK-LABEL: @testv2doubleeq
  ; CHECK-DAG: fcmpu {{[0-9]+}}, 3, 4
  ; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
-; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK: bclr 12, [[REG1]], 0
-; CHECK: vor 2, 3, 3
+; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK: bc 12, [[REG1]], .LBB[[BB55:[0-9_]+]]
+; CHECK: vor 3, 2, 2
+; CHECK: .LBB[[BB55]]
+; CHECK: xxlor 34, 35, 35
  ; CHECK: blr
  }
  
diff --git a/test/CodeGen/X86/xor-select-i1-combine.ll b/test/CodeGen/X86/xor-select-i1-combine.ll

new file mode 100644 (file)

index 0000000..d270afc
--- /dev/null
+++ b/test/CodeGen/X86/xor-select-i1-combine.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;RUN: llc < %s -O2 -mattr=+avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK
+
+@n = common global i32 0, align 4
+@m = common global i32 0, align 4
+
+define i32 @main(i8 %small) {
+; CHECK-LABEL: main:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $n, %eax
+; CHECK-NEXT:    movl $m, %ecx
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    cmovneq %rax, %rcx
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    retq
+entry:
+  %0 = and i8 %small, 1
+  %cmp = icmp eq i8 %0, 0
+  %m.n = select i1 %cmp, i32* @m, i32* @n
+  %retval = load volatile i32, i32* %m.n, align 4
+  ret i32 %retval
+}
+
+
+define i32 @main2(i8 %small) {
+; CHECK-LABEL: main2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl $m, %eax
+; CHECK-NEXT:    movl $n, %ecx
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    cmovneq %rax, %rcx
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    retq
+entry:
+  %0 = and i8 %small, 1
+  %cmp = icmp eq i8 %0, 1
+  %m.n = select i1 %cmp, i32* @m, i32* @n
+  %retval = load volatile i32, i32* %m.n, align 4
+  ret i32 %retval
+}
author	Ayman Musa <ayman.musa@intel.com>
	Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)
committer	Ayman Musa <ayman.musa@intel.com>
	Tue, 13 Sep 2016 09:12:45 +0000 (09:12 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/PowerPC/select-i1-vs-i1.ll		patch \| blob \| history
test/CodeGen/X86/xor-select-i1-combine.ll	[new file with mode: 0644]	patch \| blob