Don't leave unused divs/rems sitting around in BypassSlowDivision.

author Justin Lebar <jlebar@google.com>

Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)

committer Justin Lebar <jlebar@google.com>

Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)
author Justin Lebar <jlebar@google.com>
Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)
committer Justin Lebar <jlebar@google.com>
Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp

index 41a8543..0e2a465 100644 (file)
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -20,6 +20,7 @@
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/Local.h"
  
  using namespace llvm;
  
@@ -246,5 +247,12 @@ bool llvm::bypassSlowDivision(
      MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
    }
  
+  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
+  // create divrem machine instructions.  Now erase any unused divs / rems so we
+  // don't leave extra instructions sitting around.
+  for (auto &KV : DivCache)
+    for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
+      RecursivelyDeleteTriviallyDeadInstructions(Phi);
+
    return MadeChange;
  }
diff --git a/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll

new file mode 100644 (file)

index 0000000..4846d52
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; We only use the div instruction -- the rem should be DCE'ed.
+; CHECK-LABEL: @div_only
+define void @div_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK: udiv i32
+  ; CHECK-NOT: urem
+  ; CHECK: sdiv i64
+  ; CHECK-NOT: rem
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+; We only use the rem instruction -- the div should be DCE'ed.
+; CHECK-LABEL: @rem_only
+define void @rem_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK-NOT: div
+  ; CHECK: urem i32
+  ; CHECK-NOT: div
+  ; CHECK: rem i64
+  ; CHECK-NOT: div
+  %d = srem i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
author	Justin Lebar <jlebar@google.com>
	Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)
committer	Justin Lebar <jlebar@google.com>
	Fri, 28 Oct 2016 21:43:54 +0000 (21:43 +0000)
lib/Transforms/Utils/BypassSlowDivision.cpp		patch \| blob \| history
test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll	[new file with mode: 0644]	patch \| blob