[NVPTX] Use ldg for explicitly invariant loads.

author Justin Lebar <jlebar@google.com>

Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)

committer Justin Lebar <jlebar@google.com>

Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)
author Justin Lebar <jlebar@google.com>
Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)
committer Justin Lebar <jlebar@google.com>
Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

index 14aa3f1..7ab15ee 100644 (file)
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -558,21 +558,30 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
  
  static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
                            unsigned CodeAddrSpace, MachineFunction *F) {
-  // To use non-coherent caching, the load has to be from global
-  // memory and we have to prove that the memory area is not written
-  // to anywhere for the duration of the kernel call, not even after
-  // the load.
+  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
+  // space.
    //
-  // To ensure that there are no writes to the memory, we require the
-  // underlying pointer to be a noalias (__restrict) kernel parameter
-  // that is never used for a write. We can only do this for kernel
-  // functions since from within a device function, we cannot know if
-  // there were or will be writes to the memory from the caller - or we
-  // could, but then we would have to do inter-procedural analysis.
-  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
-      !isKernelFunction(*F->getFunction())) {
+  // We have two ways of identifying invariant loads: Loads may be explicitly
+  // marked as invariant, or we may infer them to be invariant.
+  //
+  // We currently infer invariance only for kernel function pointer params that
+  // are noalias (i.e. __restrict) and never written to.
+  //
+  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
+  // not during the SelectionDAG phase).
+  //
+  // TODO: Infer invariance only at -O2.  We still want to use ldg at -O0 for
+  // explicitly invariant loads because these are how clang tells us to use ldg
+  // when the user uses a builtin.
+  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
+    return false;
+
+  if (N->isInvariant())
+    return true;
+
+  // Load wasn't explicitly invariant.  Attempt to infer invariance.
+  if (!isKernelFunction(*F->getFunction()))
      return false;
-  }
  
    // We use GetUnderlyingObjects() here instead of
    // GetUnderlyingObject() mainly because the former looks through phi
diff --git a/test/CodeGen/NVPTX/ldg-invariant.ll b/test/CodeGen/NVPTX/ldg-invariant.ll

new file mode 100644 (file)

index 0000000..40dad1f
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+; Check that invariant loads from the global addrspace are lowered to
+; ld.global.nc.
+
+; CHECK-LABEL: @ld_global
+define i32 @ld_global(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.nc.{{[a-z]}}32
+  %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
+  ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_invariant
+define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.{{[a-z]}}32
+  %a = load i32, i32 addrspace(1)* %ptr
+  ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_global_addrspace
+define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
+; CHECK: ld.{{[a-z]}}32
+  %a = load i32, i32 addrspace(0)* %ptr
+  ret i32 %a
+}
+
+!0 = !{}
author	Justin Lebar <jlebar@google.com>
	Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)
committer	Justin Lebar <jlebar@google.com>
	Sun, 11 Sep 2016 01:39:04 +0000 (01:39 +0000)
lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp		patch \| blob \| history
test/CodeGen/NVPTX/ldg-invariant.ll	[new file with mode: 0644]	patch \| blob