[X86] Avoid using high register trick for test instruction

author Amaury Sechet <deadalnix@gmail.com>

Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)

committer Amaury Sechet <deadalnix@gmail.com>

Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)
author Amaury Sechet <deadalnix@gmail.com>
Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)
committer Amaury Sechet <deadalnix@gmail.com>
Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index e325d97..cbc8eee 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3073,67 +3073,33 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
          return;
        }
  
-      // For example, "testl %eax, $2048" to "testb %ah, $8".
-      if (isShiftedUInt<8, 8>(Mask) &&
-          (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
-        // Shift the immediate right by 8 bits.
-        SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
-        SDValue Reg = N0.getOperand(0);
-
-        // Extract the h-register.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
-                                                        MVT::i8, Reg);
-
-        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
-        // target GR8_NOREX registers, so make sure the register class is
-        // forced.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
-                                                 MVT::i32, Subreg, ShiftedImm);
-        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
-        // one, do not call ReplaceAllUsesWith.
-        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
-                    SDValue(NewNode, 0));
-        CurDAG->RemoveDeadNode(Node);
-        return;
-      }
-
-      // For example, "testl %eax, $32776" to "testw %ax, $32776".
-      // NOTE: We only want to form TESTW instructions if optimizing for
-      // min size. Otherwise we only save one byte and possibly get a length
-      // changing prefix penalty in the decoders.
-      if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
-          (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
-        SDValue Reg = N0.getOperand(0);
-
-        // Extract the 16-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
-                                                        MVT::i16, Reg);
-
-        // Emit a testw.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
-                                                 Subreg, Imm);
-        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
-        // one, do not call ReplaceAllUsesWith.
-        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
-                    SDValue(NewNode, 0));
-        CurDAG->RemoveDeadNode(Node);
-        return;
-      }
-
        // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
-      if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
+      if (isUInt<32>(Mask) &&
            (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
+        MVT VT = MVT::i32;
+        int SubRegOp = X86::sub_32bit;
+        unsigned Op = X86::TEST32ri;
+
+        // For example, "testl %eax, $32776" to "testw %ax, $32776".
+        // NOTE: We only want to form TESTW instructions if optimizing for
+        // min size. Otherwise we only save one byte and possibly get a length
+        // changing prefix penalty in the decoders.
+        if (OptForMinSize && isUInt<16>(Mask) &&
+            (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+          VT = MVT::i16;
+          SubRegOp = X86::sub_16bit;
+          Op = X86::TEST16ri;
+        }
+
+        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
          SDValue Reg = N0.getOperand(0);
  
-        // Extract the 32-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
-                                                        MVT::i32, Reg);
+        // Extract the subregister if necessary.
+        if (N0.getValueType() != VT)
+          Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
  
-        // Emit a testl.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
-                                                 Subreg, Imm);
+        // Emit a testl or testw.
+        SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
          // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
          // one, do not call ReplaceAllUsesWith.
          ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td

index d09deb5..d35b433 100644 (file)
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1257,14 +1257,6 @@ let isCompare = 1 in {
      def TEST32mi   : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
      let Predicates = [In64BitMode] in
      def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;
-
-    // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
-    // register class is constrained to GR8_NOREX. This pseudo is explicitly
-    // marked side-effect free, since it doesn't have an isel pattern like
-    // other test instructions.
-    let isPseudo = 1, hasSideEffects = 0 in
-    def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
-                          "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
    } // Defs = [EFLAGS]
  
    def TEST8i8    : BinOpAI_F<0xA8, "test", Xi8 , AL,
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index ba5f0f2..9c6c860 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -8018,9 +8018,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
    case X86::VMOVUPSZ256mr_NOVLX:
      return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
                              get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
-  case X86::TEST8ri_NOREX:
-    MI.setDesc(get(X86::TEST8ri));
-    return true;
    case X86::MOV32ri64:
      MI.setDesc(get(X86::MOV32ri));
      return true;
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp

index 67d95c2..4e11397 100644 (file)
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -86,7 +86,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
    case X86::TEST16mr:
    case X86::TEST32mr:
    case X86::TEST64mr:
-  case X86::TEST8ri_NOREX:
    case X86::AND16i16:
    case X86::AND16ri:
    case X86::AND16ri8:
diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll

index a054c1f..0cc7849 100644 (file)
--- a/test/CodeGen/X86/test-shrink.ll
+++ b/test/CodeGen/X86/test-shrink.ll
@@ -484,8 +484,7 @@ no:
  define void @truncand32(i16 inreg %x) nounwind {
  ; CHECK-LINUX64-LABEL: truncand32:
  ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $2049, %edi # imm = 0x801
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testl $2049, %edi # imm = 0x801
  ; CHECK-LINUX64-NEXT:    je .LBB11_1
  ; CHECK-LINUX64-NEXT:  # %bb.2: # %no
  ; CHECK-LINUX64-NEXT:    retq
@@ -498,8 +497,7 @@ define void @truncand32(i16 inreg %x) nounwind {
  ; CHECK-WIN32-64-LABEL: truncand32:
  ; CHECK-WIN32-64:       # %bb.0:
  ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $2049, %ecx # imm = 0x801
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testl $2049, %ecx # imm = 0x801
  ; CHECK-WIN32-64-NEXT:    je .LBB11_1
  ; CHECK-WIN32-64-NEXT:  # %bb.2: # %no
  ; CHECK-WIN32-64-NEXT:    addq $40, %rsp
@@ -511,8 +509,7 @@ define void @truncand32(i16 inreg %x) nounwind {
  ;
  ; CHECK-X86-LABEL: truncand32:
  ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    andl $2049, %eax # imm = 0x801
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    testl $2049, %eax # imm = 0x801
  ; CHECK-X86-NEXT:    je .LBB11_1
  ; CHECK-X86-NEXT:  # %bb.2: # %no
  ; CHECK-X86-NEXT:    retl
diff --git a/test/CodeGen/X86/testb-je-fusion.ll b/test/CodeGen/X86/testb-je-fusion.ll

index 9822ad3..47453ca 100644 (file)
--- a/test/CodeGen/X86/testb-je-fusion.ll
+++ b/test/CodeGen/X86/testb-je-fusion.ll
@@ -6,9 +6,8 @@
  define i32 @check_flag(i32 %flags, ...) nounwind {
  ; CHECK-LABEL: check_flag:
  ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %ecx
  ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $2, %ch
+; CHECK-NEXT:    testl $512, %edi # imm = 0x200
  ; CHECK-NEXT:    je .LBB0_2
  ; CHECK-NEXT:  # %bb.1: # %if.then
  ; CHECK-NEXT:    movl $1, %eax
diff --git a/test/CodeGen/X86/vastart-defs-eflags.ll b/test/CodeGen/X86/vastart-defs-eflags.ll

index 4c527a7..6ef6915 100644 (file)
--- a/test/CodeGen/X86/vastart-defs-eflags.ll
+++ b/test/CodeGen/X86/vastart-defs-eflags.ll
@@ -8,9 +8,7 @@ target triple = "x86_64-apple-macosx10.10.0"
  define i32 @check_flag(i32 %flags, ...) nounwind {
  ; CHECK-LABEL: check_flag:
  ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    subq $48, %rsp
-; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    subq $56, %rsp
  ; CHECK-NEXT:    testb %al, %al
  ; CHECK-NEXT:    je LBB0_2
  ; CHECK-NEXT:  ## %bb.1: ## %entry
@@ -29,7 +27,7 @@ define i32 @check_flag(i32 %flags, ...) nounwind {
  ; CHECK-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $2, %bh
+; CHECK-NEXT:    testl $512, %edi ## imm = 0x200
  ; CHECK-NEXT:    je LBB0_4
  ; CHECK-NEXT:  ## %bb.3: ## %if.then
  ; CHECK-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax
@@ -40,8 +38,7 @@ define i32 @check_flag(i32 %flags, ...) nounwind {
  ; CHECK-NEXT:    movl $8, 0
  ; CHECK-NEXT:    movl $1, %eax
  ; CHECK-NEXT:  LBB0_4: ## %if.end
-; CHECK-NEXT:    addq $48, %rsp
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    addq $56, %rsp
  ; CHECK-NEXT:    retq
  entry:
    %and = and i32 %flags, 512
author	Amaury Sechet <deadalnix@gmail.com>
	Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)
committer	Amaury Sechet <deadalnix@gmail.com>
	Mon, 29 Jan 2018 20:54:33 +0000 (20:54 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86InstrArithmetic.td		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86MacroFusion.cpp		patch \| blob \| history
test/CodeGen/X86/test-shrink.ll		patch \| blob \| history
test/CodeGen/X86/testb-je-fusion.ll		patch \| blob \| history
test/CodeGen/X86/vastart-defs-eflags.ll		patch \| blob \| history