From: Simon Pilgrim Date: Fri, 4 Jan 2019 16:55:57 +0000 (+0000) Subject: [CostModel][X86] Fix SSE1 FADD/FSUB costs X-Git-Tag: android-x86-9.0-r1~8864 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=4f03641b8a1eb7e08d87efb5888314c922664842;p=android-x86%2Fexternal-llvm.git [CostModel][X86] Fix SSE1 FADD/FSUB costs Noticed in D56011 - handle the case that scalar fp ops are quicker on P3 than P4 Add the other costs so that we're not relying on the default "is legal/custom" cost logic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350403 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 89f45dd5461..bce3a061b8e 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -832,6 +832,12 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ + + { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/ + { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/ + + { ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/ + { ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/ }; if (ST->hasSSE2()) @@ -841,6 +847,12 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE1CostTable[] = { { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ + + { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/ + { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ + + { ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/ + { ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/ }; if (ST->hasSSE1()) diff --git a/test/Analysis/CostModel/X86/arith-fp.ll b/test/Analysis/CostModel/X86/arith-fp.ll index bd62bd9dc18..bce7ca8d59f 100644 --- a/test/Analysis/CostModel/X86/arith-fp.ll +++ b/test/Analysis/CostModel/X86/arith-fp.ll @@ -16,7 +16,7 @@ target triple = "x86_64-apple-macosx10.8.0" define i32 @fadd(i32 %arg) { ; SSE1-LABEL: 'fadd' -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fadd float undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef @@ -129,7 +129,7 @@ define i32 @fadd(i32 %arg) { define i32 @fsub(i32 %arg) { ; SSE1-LABEL: 'fsub' -; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef