[X86] Limit store merge size when implicitfloat is enabled (PR34421)

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 023e539..81eeead 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4611,6 +4611,20 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
    return Subtarget.hasLZCNT();
  }
  
+bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+                                         const SelectionDAG &DAG) const {
+  // Do not merge to float value size (128 bytes) if no implicit
+  // float attribute is set.
+  bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
+
+  if (NoFloat) {
+    unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
+    return (MemVT.getSizeInBits() <= MaxIntSize);
+  }
+  return true;
+}
+
  bool X86TargetLowering::isCtlzFast() const {
    return Subtarget.hasFastLZCNT();
  }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 53cd8ca..663b953 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -814,6 +814,9 @@ namespace llvm {
  
      bool mergeStoresAfterLegalization() const override { return true; }
  
+    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+                          const SelectionDAG &DAG) const override;
+
      bool isCheapToSpeculateCttz() const override;
  
      bool isCheapToSpeculateCtlz() const override;
diff --git a/test/CodeGen/X86/pr34421.ll b/test/CodeGen/X86/pr34421.ll

new file mode 100644 (file)

index 0000000..5db8b4c
--- /dev/null
+++ b/test/CodeGen/X86/pr34421.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-apple-macosx10.13.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s --check-prefix=X64
+
+define void @thread_selfcounts() noimplicitfloat noredzone nounwind {
+; X86-LABEL: thread_selfcounts:
+; X86:       ## BB#0: ## %entry
+; X86-NEXT:    subl $44, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    ## -- End function
+;
+; X64-LABEL: thread_selfcounts:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    subq $40, %rsp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq %rax, (%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    ## -- End function
+entry:
+  %counts = alloca [2 x i64], align 16
+  %thread_counts = alloca [3 x i64], align 16
+  %arraydecay = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 0
+  %0 = load i64, i64* %arraydecay, align 16
+  %arrayidx3 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 0
+  store i64 %0, i64* %arrayidx3, align 16
+  %arrayidx6 = getelementptr inbounds [3 x i64], [3 x i64]* %thread_counts, i64 0, i64 1
+  %1 = load i64, i64* %arrayidx6, align 8
+  %arrayidx10 = getelementptr inbounds [2 x i64], [2 x i64]* %counts, i64 0, i64 1
+  store i64 %1, i64* %arrayidx10, align 8
+  unreachable
+}
+
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 5 Sep 2017 13:40:29 +0000 (13:40 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/pr34421.ll	[new file with mode: 0644]	patch \| blob