From bbfe0d287878a3792f2780431604590304555d8b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 15 Jul 2019 23:07:56 +0000
Subject: [PATCH] [X86] Teach convertToThreeAddress to handle SUB with
 immediate

We mostly avoid sub with immediate but there are a couple cases that can create them. One is the add 128, %rax -> sub -128, %rax trick in isel. The other is when a SUB immediate gets created for a compare where both the flags and the subtract value is used. If we are unable to linearize the SelectionDAG to satisfy the flag user and the sub result user from the same instruction, we will clone the sub immediate for the two uses. The one that produces flags will eventually become a compare. The other will have its flag output dead, and could then be considered for LEA creation.

I added additional test cases to add.ll to show the the sub -128 trick gets converted to LEA and a case where we don't need to convert it.

This showed up in the current codegen for PR42571.

Differential Revision: https://reviews.llvm.org/D64574

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366151 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrArithmetic.td               | 15 ++++----
 lib/Target/X86/X86InstrInfo.cpp                    | 45 ++++++++++++++++++++++
 test/CodeGen/X86/add.ll                            | 14 +++----
 .../CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll | 12 ++----
 test/CodeGen/X86/bmi-intrinsics-fast-isel.ll       | 28 ++++++--------
 test/CodeGen/X86/cgp-usubo.ll                      |  4 +-
 .../CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll |  3 +-
 test/CodeGen/X86/tbm-intrinsics-fast-isel.ll       |  7 ++--
 8 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index fbdc55cb025..e52635f8d48 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -924,11 +924,12 @@ class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
 multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
                          string mnemonic, Format RegMRM, Format MemMRM,
                          SDNode opnodeflag, SDNode opnode,
-                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+                         bit CommutableRR, bit ConvertibleToThreeAddress,
+                         bit ConvertibleToThreeAddressRR> {
   let Defs = [EFLAGS] in {
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR in {
-        let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
           def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
           def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
           def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
@@ -1169,16 +1170,16 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
 
 
 defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
-                         X86and_flag, and, 1, 0>;
+                         X86and_flag, and, 1, 0, 0>;
 defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
-                         X86or_flag, or, 1, 0>;
+                         X86or_flag, or, 1, 0, 0>;
 defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
-                         X86xor_flag, xor, 1, 0>;
+                         X86xor_flag, xor, 1, 0, 0>;
 defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
-                         X86add_flag, add, 1, 1>;
+                         X86add_flag, add, 1, 1, 1>;
 let isCompare = 1 in {
 defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
-                         X86sub_flag, sub, 0, 0>;
+                         X86sub_flag, sub, 0, 1, 0>;
 }
 
 // Arithmetic.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e5d3a09c291..dbe45356c42 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1078,6 +1078,51 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   case X86::ADD16ri_DB:
   case X86::ADD16ri8_DB:
     return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+  case X86::SUB8ri:
+  case X86::SUB16ri8:
+  case X86::SUB16ri:
+    /// FIXME: Support these similar to ADD8ri/ADD16ri*.
+    return nullptr;
+  case X86::SUB32ri8:
+  case X86::SUB32ri: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
+    unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+
+    bool isKill;
+    unsigned SrcReg;
+    MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+                        SrcReg, isKill, ImplicitOp, LV))
+      return nullptr;
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+                                  .add(Dest)
+                                  .addReg(SrcReg, getKillRegState(isKill));
+    if (ImplicitOp.getReg() != 0)
+      MIB.add(ImplicitOp);
+
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
+  case X86::SUB64ri8:
+  case X86::SUB64ri32: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
+                                      get(X86::LEA64r)).add(Dest).add(Src);
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
   case X86::VMOVDQU8Z128rmk:
   case X86::VMOVDQU8Z256rmk:
   case X86::VMOVDQU8Zrmk:
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index e9516b87104..1662562bd01 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -16,14 +16,14 @@ define i32 @test1(i32 inreg %a) nounwind {
 ;
 ; X64-LINUX-LABEL: test1:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    movl %edi, %eax
-; X64-LINUX-NEXT:    subl $-128, %eax
+; X64-LINUX-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-LINUX-NEXT:    leal 128(%rdi), %eax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test1:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    movl %ecx, %eax
-; X64-WIN32-NEXT:    subl $-128, %eax
+; X64-WIN32-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; X64-WIN32-NEXT:    leal 128(%rcx), %eax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i32 %a, 128
@@ -86,14 +86,12 @@ define i64 @test3(i64 inreg %a) nounwind {
 ;
 ; X64-LINUX-LABEL: test3:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    movq %rdi, %rax
-; X64-LINUX-NEXT:    subq $-128, %rax
+; X64-LINUX-NEXT:    leaq 128(%rdi), %rax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test3:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    movq %rcx, %rax
-; X64-WIN32-NEXT:    subq $-128, %rax
+; X64-WIN32-NEXT:    leaq 128(%rcx), %rax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i64 %a, 128
diff --git a/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index 872233f51ad..d704f38307f 100644
--- a/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -43,8 +43,7 @@ define i64 @test__blsi_u64(i64 %a0) {
 define i64 @test__blsmsk_u64(i64 %a0) {
 ; X64-LABEL: test__blsmsk_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -55,8 +54,7 @@ define i64 @test__blsmsk_u64(i64 %a0) {
 define i64 @test__blsr_u64(i64 %a0) {
 ; X64-LABEL: test__blsr_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -124,8 +122,7 @@ define i64 @test_blsi_u64(i64 %a0) {
 define i64 @test_blsmsk_u64(i64 %a0) {
 ; X64-LABEL: test_blsmsk_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -136,8 +133,7 @@ define i64 @test_blsmsk_u64(i64 %a0) {
 define i64 @test_blsr_u64(i64 %a0) {
 ; X64-LABEL: test_blsr_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
diff --git a/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index ced1585bd71..c6950da4064 100644
--- a/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -82,15 +82,14 @@ define i32 @test__blsmsk_u32(i32 %a0) {
 ; X32-LABEL: test__blsmsk_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    xorl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsmsk_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -102,15 +101,14 @@ define i32 @test__blsr_u32(i32 %a0) {
 ; X32-LABEL: test__blsr_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    andl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsr_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -224,15 +222,14 @@ define i32 @test_blsmsk_u32(i32 %a0) {
 ; X32-LABEL: test_blsmsk_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    xorl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_blsmsk_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -244,15 +241,14 @@ define i32 @test_blsr_u32(i32 %a0) {
 ; X32-LABEL: test_blsr_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    andl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_blsr_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
diff --git a/test/CodeGen/X86/cgp-usubo.ll b/test/CodeGen/X86/cgp-usubo.ll
index 6733a8258f8..ab82d980972 100644
--- a/test/CodeGen/X86/cgp-usubo.ll
+++ b/test/CodeGen/X86/cgp-usubo.ll
@@ -246,8 +246,8 @@ exit:
 define i32 @PR42571(i32 %x, i32 %y) {
 ; CHECK-LABEL: PR42571:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    subl $1, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %eax
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    cmpl $1, %edi
 ; CHECK-NEXT:    cmovbl %esi, %eax
diff --git a/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
index 82b3b4c3624..35c14697cf9 100644
--- a/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
+++ b/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
@@ -76,8 +76,7 @@ define i64 @test__blcs_u64(i64 %a0) {
 define i64 @test__blsfill_u64(i64 %a0) {
 ; X64-LABEL: test__blsfill_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    orq %rdi, %rax
 ; X64-NEXT:    retq
   %1 = sub i64 %a0, 1
diff --git a/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll b/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
index 0664d043e11..55fe9b8b3c0 100644
--- a/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
@@ -125,15 +125,14 @@ define i32 @test__blsfill_u32(i32 %a0) {
 ; X32-LABEL: test__blsfill_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    orl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsfill_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %1 = sub i32 %a0, 1
-- 
2.11.0