From 67f453aae7ac55f30af9e961a21029beabaaf47b Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 9 Mar 2008 05:42:06 +0000 Subject: [PATCH] Finish implementing a readme entry: when inserting an i64 variable into a vector of zeros or undef, and when the top part is obviously zero, we can just use movd + shuffle. This allows us to compile vec_set-B.ll into: _test3: movl $1234567, %eax andl 4(%esp), %eax movd %eax, %xmm0 ret instead of: _test3: subl $28, %esp movl $1234567, %eax andl 32(%esp), %eax movl %eax, (%esp) movl $0, 4(%esp) movq (%esp), %xmm0 addl $28, %esp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48090 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README-SSE.txt | 38 -------------------------------------- lib/Target/X86/X86ISelLowering.cpp | 10 +++++----- test/CodeGen/X86/vec_set-B.ll | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 43 deletions(-) create mode 100644 test/CodeGen/X86/vec_set-B.ll diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 7087c681737..4d7224514cb 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is just a matter of matching (scalar_to_vector (load x)) to movd. //===---------------------------------------------------------------------===// - -These two functions should compile to identical code on x86-32: - -define <2 x i64> @test2(i64 %arg) { -entry: - %A = and i64 %arg, 1234567 - %B = insertelement <2 x i64> undef, i64 %A, i32 0 - ret <2 x i64> %B -} - -define <2 x i64> @test2(i64 %arg) { -entry: - %A = and i64 %arg, 1234567 - %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0 - ret <2 x i64> %B -} - -The later compiles to: - -_test2: - movl $1234567, %eax - andl 4(%esp), %eax - movd %eax, %xmm0 - ret - -the former compiles to: - -_test2: - subl $28, %esp - movl $1234567, %eax - andl 32(%esp), %eax - movl %eax, (%esp) - movl $0, 4(%esp) - movaps (%esp), %xmm0 - addl $28, %esp - ret - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 133a2da1f6d..be7f91c6ae6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::UNDEF, VT); } - // Splat is obviously ok. Let legalizer expand it to a shuffle. - if (Values.size() == 1) - return SDOperand(); - - // Special case for single non-zero element. + // Special case for single non-zero, non-undef, element. if (NumNonZero == 1 && NumElems <= 4) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDOperand Item = Op.getOperand(Idx); @@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } } + // Splat is obviously ok. Let legalizer expand it to a shuffle. + if (Values.size() == 1) + return SDOperand(); + // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. if (IsAllConstants) diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll new file mode 100644 index 00000000000..e4e5667d2f6 --- /dev/null +++ b/test/CodeGen/X86/vec_set-B.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep movaps +; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2 + +; These should both generate something like this: +;_test3: +; movl $1234567, %eax +; andl 4(%esp), %eax +; movd %eax, %xmm0 +; ret + +define <2 x i64> @test3(i64 %arg) { +entry: + %A = and i64 %arg, 1234567 + %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0 + ret <2 x i64> %B +} + +define <2 x i64> @test2(i64 %arg) { +entry: + %A = and i64 %arg, 1234567 + %B = insertelement <2 x i64> undef, i64 %A, i32 0 + ret <2 x i64> %B +} + -- 2.11.0