[X86] Change the tuning settings for pentium4 to be more modern since its the default...

author Craig Topper <craig.topper@intel.com>

Thu, 16 Jul 2020 18:20:57 +0000 (11:20 -0700)

committer Craig Topper <craig.topper@intel.com>

Thu, 16 Jul 2020 19:51:25 +0000 (12:51 -0700)
author Craig Topper <craig.topper@intel.com>
Thu, 16 Jul 2020 18:20:57 +0000 (11:20 -0700)
committer Craig Topper <craig.topper@intel.com>
Thu, 16 Jul 2020 19:51:25 +0000 (12:51 -0700)
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index dc1ff72..10d3007 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel,
                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
  
  foreach P = ["pentium4", "pentium4m"] in {
+//  def : ProcessorModel<P, GenericPostRAModel,
+//                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+//                        FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+//                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
+
+  // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
+  // give it more modern tunings.
+  // FIXME: This wouldn't be needed if we supported mtune.
    def : ProcessorModel<P, GenericPostRAModel,
-                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                       [FeatureX87, FeatureCMPXCHG8B,
                          FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
-                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
+                        FeatureCMOV, FeatureInsertVZEROUPPER,
+                        FeatureSlow3OpsLEA, FeatureSlowDivide64,
+                        FeatureSlowIncDec, FeatureMacroFusion]>;
  }
  
  // Intel Quark.
diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll

index 25e3691..380c18f 100644 (file)
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -16,19 +16,19 @@ entry:
    ; X32-LABEL: func_cf_vector_x86
    ; X32:            movl 12(%ebp), %eax
    ; X32:            movl 8(%ebp), %ecx
-  ; X32:            movsd 24(%eax), %xmm4         # xmm4 = mem[0],zero
-  ; X32:            movsd %xmm4, 24(%esp)
-  ; X32:            movsd 16(%eax), %xmm5         # xmm5 = mem[0],zero
-  ; X32:            movsd %xmm5, 16(%esp)
-  ; X32:            movsd (%eax), %xmm6           # xmm6 = mem[0],zero
-  ; X32:            movsd 8(%eax), %xmm7          # xmm7 = mem[0],zero
-  ; X32:            movsd %xmm7, 8(%esp)
-  ; X32:            movsd %xmm6, (%esp)
+  ; X32:            movups     (%eax), %xmm0
+  ; X32:            movups     16(%eax), %xmm1
+  ; X32:            movaps     %xmm0, (%esp)
+  ; X32:            movaps     %xmm1, 16(%esp)
+  ; X32:            movsd      (%esp), %xmm4
+  ; X32:            movsd      8(%esp), %xmm5
+  ; X32:            movsd      16(%esp), %xmm6
+  ; X32:            movsd      24(%esp), %xmm7
    ; X32:            calll *___guard_check_icall_fptr
-  ; X32:            movaps %xmm6, %xmm0
-  ; X32:            movaps %xmm7, %xmm1
-  ; X32:            movaps %xmm5, %xmm2
-  ; X32:            movaps %xmm4, %xmm3
+  ; X32:            movaps %xmm4, %xmm0
+  ; X32:            movaps %xmm5, %xmm1
+  ; X32:            movaps %xmm6, %xmm2
+  ; X32:            movaps %xmm7, %xmm3
    ; X32:            calll  *%ecx
  }
  attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll

index f2c7c2f..295fdfb 100644 (file)
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -3,8 +3,6 @@
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3      2>&1 | FileCheck %s --check-prefix=SLOW
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m     2>&1 | FileCheck %s --check-prefix=SLOW
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m     2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=SLOW
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah         2>&1 | FileCheck %s --check-prefix=SLOW
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott      2>&1 | FileCheck %s --check-prefix=SLOW
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona        2>&1 | FileCheck %s --check-prefix=SLOW
@@ -14,6 +12,10 @@
  
  ; Intel chips with fast unaligned memory accesses
  
+; Marked fast because this is the default 32-bit mode CPU in clang.
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=FAST
+
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont     2>&1 | FileCheck %s --check-prefix=FAST
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem        2>&1 | FileCheck %s --check-prefix=FAST
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere       2>&1 | FileCheck %s --check-prefix=FAST
diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll

index d42dcf0..23a1f56 100644 (file)
--- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
@@ -40,7 +40,7 @@
  ; OBJ: SubSectionType: FrameData (0xF5)
  ; OBJ:    FrameData {
  ; OBJ:      RvaStart: 0x0
-; OBJ:      CodeSize: 0x34
+; OBJ:      CodeSize: 0x36
  ; OBJ:      PrologSize: 0x9
  ; OBJ:      FrameFunc [
  ; OBJ-NEXT:   $T0 .raSearch =
@@ -50,7 +50,7 @@
  ; OBJ:    }
  ; OBJ:    FrameData {
  ; OBJ:      RvaStart: 0x7
-; OBJ:      CodeSize: 0x2D
+; OBJ:      CodeSize: 0x2F
  ; OBJ:      PrologSize: 0x2
  ; OBJ:      FrameFunc [
  ; OBJ-NEXT:   $T0 .raSearch =
@@ -61,7 +61,7 @@
  ; OBJ:    }
  ; OBJ:    FrameData {
  ; OBJ:      RvaStart: 0x8
-; OBJ:      CodeSize: 0x2C
+; OBJ:      CodeSize: 0x2E
  ; OBJ:      PrologSize: 0x1
  ; OBJ:      FrameFunc [
  ; OBJ-NEXT:   $T0 .raSearch =
@@ -73,7 +73,7 @@
  ; OBJ:    }
  ; OBJ:    FrameData {
  ; OBJ:      RvaStart: 0x9
-; OBJ:      CodeSize: 0x2B
+; OBJ:      CodeSize: 0x2D
  ; OBJ:      PrologSize: 0x0
  ; OBJ:      FrameFunc [
  ; OBJ-NEXT:   $T0 .raSearch =
diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll

index 2962f97..19ddcf9 100644 (file)
--- a/llvm/test/DebugInfo/COFF/types-array.ll
+++ b/llvm/test/DebugInfo/COFF/types-array.ll
@@ -51,7 +51,7 @@
  ; CHECK:       PtrParent: 0x0
  ; CHECK:       PtrEnd: 0x0
  ; CHECK:       PtrNext: 0x0
-; CHECK:       CodeSize: 0x39
+; CHECK:       CodeSize: 0x2A
  ; CHECK:       DbgStart: 0x0
  ; CHECK:       DbgEnd: 0x0
  ; CHECK:       FunctionType: f (0x1002)
@@ -73,7 +73,7 @@
  ; CHECK:       LocalVariableAddrRange {
  ; CHECK:         OffsetStart: .text+0x6
  ; CHECK:         ISectStart: 0x0
-; CHECK:         Range: 0x33
+; CHECK:         Range: 0x24
  ; CHECK:       }
  ; CHECK:     }
  ; CHECK:     ProcEnd {
author	Craig Topper <craig.topper@intel.com>
	Thu, 16 Jul 2020 18:20:57 +0000 (11:20 -0700)
committer	Craig Topper <craig.topper@intel.com>
	Thu, 16 Jul 2020 19:51:25 +0000 (12:51 -0700)
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll		patch \| blob \| history
llvm/test/CodeGen/X86/slow-unaligned-mem.ll		patch \| blob \| history
llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll		patch \| blob \| history
llvm/test/DebugInfo/COFF/types-array.ll		patch \| blob \| history