From 1b8408cc62f2cb2f6d35cefe77bb5037cc10328f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 11 Jul 2019 01:01:39 +0000
Subject: [PATCH] [X86] Don't convert 8 or 16 bit ADDs to LEAs on Atom in
 FixupLEAPass.

We use the functions that convert to three address to do the
conversion, but changing an 8 or 16 bit will cause it to create
a virtual register. This can't be done after register allocation
where this pass runs.

I've switched the pass completely to a white list of instructions
that can be converted to LEA instead of a blacklist that was
incorrect. This will avoid surprises if we enhance the three
address conversion function to include additional instructions
in the future.

Fixes PR42565.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365720 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FixupLEAs.cpp | 27 +++++++++++++++------------
 test/CodeGen/X86/pr42565.ll     | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 12 deletions(-)
 create mode 100644 test/CodeGen/X86/pr42565.ll

diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 60f70ad1059..041529a0be6 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -149,6 +149,9 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
     return nullptr;
 
   switch (MI.getOpcode()) {
+  default:
+    // Only convert instructions that we've verified are safe.
+    return nullptr;
   case X86::ADD64ri32:
   case X86::ADD64ri8:
   case X86::ADD64ri32_DB:
@@ -157,24 +160,24 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
   case X86::ADD32ri8:
   case X86::ADD32ri_DB:
   case X86::ADD32ri8_DB:
-  case X86::ADD16ri:
-  case X86::ADD16ri8:
-  case X86::ADD16ri_DB:
-  case X86::ADD16ri8_DB:
     if (!MI.getOperand(2).isImm()) {
       // convertToThreeAddress will call getImm()
       // which requires isImm() to be true
       return nullptr;
     }
     break;
-  case X86::ADD16rr:
-  case X86::ADD16rr_DB:
-    if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) {
-      // if src1 != src2, then convertToThreeAddress will
-      // need to create a Virtual register, which we cannot do
-      // after register allocation.
-      return nullptr;
-    }
+  case X86::SHL64ri:
+  case X86::SHL32ri:
+  case X86::INC64r:
+  case X86::INC32r:
+  case X86::DEC64r:
+  case X86::DEC32r:
+  case X86::ADD64rr:
+  case X86::ADD64rr_DB:
+  case X86::ADD32rr:
+  case X86::ADD32rr_DB:
+    // These instructions are all fine to convert.
+    break;
   }
   MachineFunction::iterator MFI = MBB.getIterator();
   return TII->convertToThreeAddress(MFI, MI, nullptr);
diff --git a/test/CodeGen/X86/pr42565.ll b/test/CodeGen/X86/pr42565.ll
new file mode 100644
index 00000000000..2c80c2121a0
--- /dev/null
+++ b/test/CodeGen/X86/pr42565.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=atom | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=atom -filetype=obj -o /dev/null
+
+define void @HUF_writeCTable_wksp()  {
+; CHECK-LABEL: HUF_writeCTable_wksp:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $2, %eax
+; CHECK-NEXT:    movb $-2, %cl
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    leal 1(%rcx), %edx
+; CHECK-NEXT:    movb %dl, (%rax)
+; CHECK-NEXT:    movb %cl, (%rax)
+; CHECK-NEXT:    leaq 2(%rax), %rax
+; CHECK-NEXT:    addb $-2, %cl
+; CHECK-NEXT:    jmp .LBB0_1
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv8 = phi i64 [ 1, %entry ], [ %indvars.iv.next9.1, %for.body ]
+  %0 = trunc i64 %indvars.iv8 to i8
+  %conv = sub i8 0, %0
+  store i8 %conv, i8* undef, align 1
+  %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
+  %1 = trunc i64 %indvars.iv.next9 to i8
+  %conv.1 = sub i8 0, %1
+  %arrayidx.1 = getelementptr inbounds i8, i8* null, i64 %indvars.iv.next9
+  store i8 %conv.1, i8* %arrayidx.1, align 1
+  %indvars.iv.next9.1 = add nuw nsw i64 %indvars.iv8, 2
+  br i1 false, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
-- 
2.11.0