From 8677133ebc57b0aa622da0ba82fae0ec8182dcbd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 18 Jan 2018 23:52:31 +0000
Subject: [PATCH] [X86] Add intrinsic support for the RDPID instruction

This adds a new instrinsic to support the rdpid instruction. The implementation is a bit weird because the intrinsic is defined as always returning 32-bits, but the assembler support thinks the instruction produces a 64-bit register in 64-bit mode. But really it zeros the upper 32 bits. So I had to add separate patterns where 64-bit mode uses an extract_subreg.

Differential Revision: https://reviews.llvm.org/D42205

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322910 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/IR/IntrinsicsX86.td |  6 ++++++
 lib/Support/Host.cpp             |  4 +++-
 lib/Target/X86/X86.td            |  5 ++++-
 lib/Target/X86/X86InstrInfo.td   |  1 +
 lib/Target/X86/X86InstrSystem.td | 20 ++++++++++++++------
 lib/Target/X86/X86Subtarget.cpp  |  1 +
 lib/Target/X86/X86Subtarget.h    |  4 ++++
 test/CodeGen/X86/rdpid.ll        | 21 +++++++++++++++++++++
 8 files changed, 54 insertions(+), 8 deletions(-)
 create mode 100644 test/CodeGen/X86/rdpid.ll
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 35eb8310d96..c1879f65e43 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -63,6 +63,12 @@ let TargetPrefix = "x86" in {
               Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
 }
 
+// Read processor ID.
+let TargetPrefix = "x86" in {
+  def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">,
+              Intrinsic<[llvm_i32_ty], [], []>;
+}
+
 //===----------------------------------------------------------------------===//
 // CET SS
 let TargetPrefix = "x86" in {
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 3dc67ad782a..695683efa62 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1255,7 +1255,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
   Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
-  Features["ibt"]             = HasLeaf7 && ((EDX >> 20) & 1);
+  Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
+
+  Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index f4a47dcc403..6141df7a0f0 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -249,6 +249,8 @@ def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
                                       "Flush A Cache Line Optimized">;
 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
                                       "Cache Line Write Back">;
+def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
+                                    "Support RDPID instructions">;
 // On some processors, instructions that implicitly take two memory operands are
 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
@@ -752,7 +754,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
   FeatureVPCLMULQDQ,
   FeatureVPOPCNTDQ,
   FeatureGFNI,
-  FeatureCLWB
+  FeatureCLWB,
+  FeatureRDPID
 ]>;
 
 class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 14c1372cfef..52351f786ec 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -878,6 +878,7 @@ def HasSHSTK     : Predicate<"Subtarget->hasSHSTK()">;
 def HasIBT       : Predicate<"Subtarget->hasIBT()">;
 def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
 def HasCLWB      : Predicate<"Subtarget->hasCLWB()">;
+def HasRDPID     : Predicate<"Subtarget->hasRDPID()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
 def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index c1837d55e6e..1d1b9698dae 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -700,14 +700,22 @@ let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
 //===----------------------------------------------------------------------===//
 // RDPID Instruction
 let SchedRW = [WriteSystem] in {
-def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins),
-              "rdpid\t$src", [], IIC_RDPID>, XS,
-              Requires<[Not64BitMode]>;
-def RDPID64 : I<0xC7, MRM7r, (outs GR64:$src), (ins),
-              "rdpid\t$src", [], IIC_RDPID>, XS,
-              Requires<[In64BitMode]>;
+def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+              "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))], IIC_RDPID>, XS,
+              Requires<[Not64BitMode, HasRDPID]>;
+def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins),
+              "rdpid\t$dst", [], IIC_RDPID>, XS,
+              Requires<[In64BitMode, HasRDPID]>;
 } // SchedRW
 
+let Predicates = [In64BitMode, HasRDPID] in {
+  // Due to silly instruction definition, we have to compensate for the
+  // instruction outputing a 64-bit register.
+  def : Pat<(int_x86_rdpid),
+            (EXTRACT_SUBREG (RDPID64), sub_32bit)>;
+}
+
+
 //===----------------------------------------------------------------------===//
 // PTWRITE Instruction
 let SchedRW = [WriteSystem] in {
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d08b44611dc..f4478d182a9 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -315,6 +315,7 @@ void X86Subtarget::initializeEnvironment() {
   HasSGX = false;
   HasCLFLUSHOPT = false;
   HasCLWB = false;
+  HasRDPID = false;
   IsPMULLDSlow = false;
   IsSHLDSlow = false;
   IsUAMem16Slow = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index c13247da24b..77f4a16d1e4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -345,6 +345,9 @@ protected:
   /// Processor supports Cache Line Write Back instruction
   bool HasCLWB;
 
+  /// Processor support RDPID instruction
+  bool HasRDPID;
+
   /// Use software floating point for code generation.
   bool UseSoftFloat;
 
@@ -579,6 +582,7 @@ public:
   bool hasIBT() const { return HasIBT; }
   bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
   bool hasCLWB() const { return HasCLWB; }
+  bool hasRDPID() const { return HasRDPID; }
 
   bool isXRaySupported() const override { return is64Bit(); }
 
diff --git a/test/CodeGen/X86/rdpid.ll b/test/CodeGen/X86/rdpid.ll
new file mode 100644
index 00000000000..7eafb6cb1ce
--- /dev/null
+++ b/test/CodeGen/X86/rdpid.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=rdpid | FileCheck %s --check-prefix=CHECK --check-prefix=X86-64
+; RUN: llc < %s -mtriple=i686-- -mattr=rdpid | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+define i32 @test_builtin_rdpid() {
+; X86-64-LABEL: test_builtin_rdpid:
+; X86-64:       # %bb.0:
+; X86-64-NEXT:    rdpid %rax
+; X86-64-NEXT:    # kill: def %eax killed %eax killed %rax
+; X86-64-NEXT:    retq
+;
+; X86-LABEL: test_builtin_rdpid:
+; X86:       # %bb.0:
+; X86-NEXT:    rdpid %eax
+; X86-NEXT:    retl
+  %1 = tail call i32 @llvm.x86.rdpid()
+  ret i32 %1
+}
+
+declare i32 @llvm.x86.rdpid()
+
-- 
2.11.0