[X86] Widen the 'AND' mask if doing so shrinks the encoding size

author David Majnemer <david.majnemer@gmail.com>

Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)

committer David Majnemer <david.majnemer@gmail.com>

Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)
author David Majnemer <david.majnemer@gmail.com>
Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)
committer David Majnemer <david.majnemer@gmail.com>
Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index d37db7f..3453bf6 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -198,6 +198,7 @@ namespace {
      SDNode *Select(SDNode *N) override;
      SDNode *SelectGather(SDNode *N, unsigned Opc);
      SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
+    SDNode *SelectAndWithSExtImmediate(SDNode *Node, MVT NVT);
  
      bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
      bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -2208,6 +2209,57 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
    return ResNode;
  }
  
+// Try to shrink the encoding of an AND by setting additional bits in the mask.
+// It is only correct to do so if we know a priori that the other operand of the
+// AND already has those bits set to zero.
+SDNode *X86DAGToDAGISel::SelectAndWithSExtImmediate(SDNode *Node, MVT NVT) {
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+
+  if (NVT != MVT::i32 && NVT != MVT::i64)
+    return nullptr;
+
+  auto *Cst = dyn_cast<ConstantSDNode>(N1);
+  if (!Cst)
+    return nullptr;
+
+  // As a heuristic, skip over negative constants.  It turns out not to be
+  // productive to widen the mask.
+  int64_t Val = Cst->getSExtValue();
+  if (Val <= 0)
+    return nullptr;
+
+  // Limit ourselves to constants which already have sign bits to save on
+  // compile time.
+  if ((int8_t)Val >= 0)
+    return nullptr;
+
+  unsigned Opc;
+  switch (NVT.SimpleTy) {
+  default:
+    llvm_unreachable("Unsupported VT!");
+  case MVT::i32:
+    Opc = X86::AND32ri8;
+    break;
+  case MVT::i64:
+    Opc = X86::AND64ri8;
+    break;
+  }
+
+  APInt Op0Zero, Op0One;
+  CurDAG->computeKnownBits(N0, Op0Zero, Op0One);
+  // Grow the mask using the known zero bits.
+  Op0Zero |= Val;
+  // See if the mask can be efficiently encoded using at most NumBits.
+  if (!Op0Zero.isSignedIntN(8))
+    return nullptr;
+
+  SDLoc DL(Node);
+  SDValue NewCst =
+      CurDAG->getTargetConstant(Op0Zero.getSExtValue(), DL, MVT::i8);
+  return CurDAG->getMachineNode(Opc, DL, NVT, N0, NewCst);
+}
+
  SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
    MVT NVT = Node->getSimpleValueType(0);
    unsigned Opc, MOpc;
@@ -2223,7 +2275,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
    }
  
    switch (Opcode) {
-  default: break;
+  default:
+    break;
    case ISD::INTRINSIC_W_CHAIN: {
      unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
      switch (IntNo) {
@@ -2298,7 +2351,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
        return RetVal;
      break;
    }
-  case ISD::AND:
+  case ISD::AND: {
+    if (SDNode *NewNode = SelectAndWithSExtImmediate(Node, NVT)) {
+      ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+      return nullptr;
+    }
+    // FALLTHROUGH
+  }
    case ISD::OR:
    case ISD::XOR: {
      // For operations of the form (x << C1) op C2, check if we can use a smaller
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll

index 62e51f0..a0cdb27 100644 (file)
--- a/test/CodeGen/X86/shift-pair.ll
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -3,7 +3,7 @@
  define i64 @test(i64 %A) {
  ; CHECK: @test
  ; CHECK: shrq $54
-; CHECK: andl $1020
+; CHECK: andq $-4
  ; CHECK: ret
      %B = lshr i64 %A, 56
      %C = shl i64 %B, 2
diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll

index 477b314..2c62f49 100644 (file)
--- a/test/CodeGen/X86/win64_frame.ll
+++ b/test/CodeGen/X86/win64_frame.ll
@@ -100,9 +100,8 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="
  
    alloca i32, i32 %a
    ; CHECK:        movl    %ecx, %eax
-  ; CHECK:        leaq    15(,%rax,4), %rcx
-  ; CHECK:        movabsq $34359738352, %rax
-  ; CHECK:        andq    %rcx, %rax
+  ; CHECK:        leaq    15(,%rax,4), %rax
+  ; CHECK:        andq    $-16, %rax
    ; CHECK:        callq   __chkstk
    ; CHECK:        subq    %rax, %rsp
  
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll

index a10923f..9757c7a 100644 (file)
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -8,7 +8,7 @@ define i32 @test1(i8 %x) nounwind readnone {
  }
  ; CHECK: test1
  ; CHECK: movzbl
-; CHECK-NEXT: andl {{.*}}224
+; CHECK-NEXT: andl {{.*}}-32
  
  ;; Multiple uses of %x but easily extensible.
  define i32 @test2(i8 %x) nounwind readnone {
@@ -21,7 +21,7 @@ define i32 @test2(i8 %x) nounwind readnone {
  }
  ; CHECK: test2
  ; CHECK: movzbl
-; CHECK: andl $224
+; CHECK: andl $-32
  ; CHECK: orl $63
  
  declare void @use(i32, i8)
@@ -36,6 +36,6 @@ define void @test3(i8 %x) nounwind readnone {
  ; CHECK: test3
  ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
  ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
-; CHECK-NEXT: andl $224, [[REGISTER]]
+; CHECK-NEXT: andl $-32, [[REGISTER]]
  ; CHECK-NEXT: movl [[REGISTER]], (%esp)
  ; CHECK-NEXT: call{{.*}}use
author	David Majnemer <david.majnemer@gmail.com>
	Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)
committer	David Majnemer <david.majnemer@gmail.com>
	Sun, 16 Aug 2015 04:52:11 +0000 (04:52 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
test/CodeGen/X86/shift-pair.ll		patch \| blob \| history
test/CodeGen/X86/win64_frame.ll		patch \| blob \| history
test/CodeGen/X86/zext-fold.ll		patch \| blob \| history