From df332423cfc003a2feb21872dafba2894ca18cb0 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Sat, 15 Apr 2017 22:15:01 +0000
Subject: [PATCH] Use correct registers for "A" inline asm constraint

Summary:
In PR32594, inline assembly using the 'A' constraint on x86_64 causes
llvm to crash with a "Cannot select" stack trace.  This is because
`X86TargetLowering::getRegForInlineAsmConstraint` hardcodes that 'A'
means the EAX and EDX registers.

However, on x86_64 it means the RAX and RDX registers, and on 16-bit x86
(ia16?) it means the old AX and DX registers.

Add new register classes in `X86RegisterInfo.td` to support these cases,
and amend the logic in `getRegForInlineAsmConstraint` to cope with
different subtargets.  Also add a test case, derived from PR32594.

Reviewers: craig.topper, qcolombet, RKSimon, ab

Reviewed By: ab

Subscribers: ab, emaste, royger, llvm-commits

Differential Revision: https://reviews.llvm.org/D31902

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300404 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp          | 16 ++++++++++---
 lib/Target/X86/X86RegisterInfo.td           |  4 +++-
 test/CodeGen/X86/inline-asm-A-constraint.ll | 35 +++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 test/CodeGen/X86/inline-asm-A-constraint.ll

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5c8a95963c3..7ff483063ec 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -35917,10 +35917,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       return Res;
     }
 
-    // 'A' means EAX + EDX.
+    // 'A' means [ER]AX + [ER]DX.
     if (Constraint == "A") {
-      Res.first = X86::EAX;
-      Res.second = &X86::GR32_ADRegClass;
+      if (Subtarget.is64Bit()) {
+        Res.first = X86::RAX;
+        Res.second = &X86::GR64_ADRegClass;
+      } else if (Subtarget.is32Bit()) {
+        Res.first = X86::EAX;
+        Res.second = &X86::GR32_ADRegClass;
+      } else if (Subtarget.is16Bit()) {
+        Res.first = X86::AX;
+        Res.second = &X86::GR16_ADRegClass;
+      } else {
+        llvm_unreachable("Expecting 64, 32 or 16 bit subtarget");
+      }
       return Res;
     }
     return Res;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index b8cae2f0bd2..c177ba1d52f 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -437,8 +437,10 @@ def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
 def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
                                           (add LOW32_ADDR_ACCESS, RBP)>;
 
-// A class to support the 'A' assembler constraint: EAX then EDX.
+// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
+def GR16_AD : RegisterClass<"X86", [i16], 16, (add AX, DX)>;
 def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
 
 // Scalar SSE2 floating point registers.
 def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
diff --git a/test/CodeGen/X86/inline-asm-A-constraint.ll b/test/CodeGen/X86/inline-asm-A-constraint.ll
new file mode 100644
index 00000000000..2ad011e88e0
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-A-constraint.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--"
+
+; Function Attrs: nounwind uwtable
+define { i64, i64 } @foo(i8* %ptr, i128* nocapture readonly %src, i128* nocapture readonly %dst) local_unnamed_addr #0 {
+entry:
+  %0 = load i128, i128* %dst, align 16, !tbaa !1
+  %shr = lshr i128 %0, 64
+  %conv = trunc i128 %shr to i64
+  %conv1 = trunc i128 %0 to i64
+  %1 = load i128, i128* %src, align 16, !tbaa !1
+  %2 = tail call i128 asm sideeffect "lock; cmpxchg16b $1", "=A,=*m,{cx},{bx},0,*m,~{dirflag},~{fpsr},~{flags}"(i8* %ptr, i64 %conv, i64 %conv1, i128 %1, i8* %ptr) #1, !srcloc !5
+  %retval.sroa.0.0.extract.trunc = trunc i128 %2 to i64
+  %retval.sroa.2.0.extract.shift = lshr i128 %2, 64
+  %retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
+  %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
+  ret { i64, i64 } %.fca.1.insert
+}
+; CHECK: lock
+; CHECK-NEXT: cmpxchg16b
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 300088)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"__int128", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{i32 269}
-- 
2.11.0