From c3ddf2dea3b054664e59d3c763e5f9fc4ddd1ae7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 2 Apr 2016 14:09:17 +0000 Subject: [PATCH] [X86][AVX] Added vector float truncation (double2float) tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265222 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vec_fptrunc.ll | 168 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 test/CodeGen/X86/vec_fptrunc.ll diff --git a/test/CodeGen/X86/vec_fptrunc.ll b/test/CodeGen/X86/vec_fptrunc.ll new file mode 100644 index 00000000000..fa22a4af175 --- /dev/null +++ b/test/CodeGen/X86/vec_fptrunc.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX + +define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) { +; X32-SSE-LABEL: fptrunc_frommem2: +; X32-SSE: # BB#0: # %entry +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0 +; X32-SSE-NEXT: extractps $1, %xmm0, 4(%eax) +; X32-SSE-NEXT: movss %xmm0, (%eax) +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_frommem2: +; X32-AVX: # BB#0: # %entry +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0 +; X32-AVX-NEXT: vextractps $1, %xmm0, 4(%eax) +; X32-AVX-NEXT: vmovss %xmm0, (%eax) +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_frommem2: +; X64-SSE: # BB#0: # %entry +; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 +; X64-SSE-NEXT: movlpd %xmm0, (%rsi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_frommem2: +; X64-AVX: # BB#0: # %entry +; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 +; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi) +; X64-AVX-NEXT: retq +entry: + %0 = load <2 x double>, <2 x double>* %in + %1 = fptrunc <2 x double> %0 to <2 x float> + store <2 x float> %1, <2 x float>* %out, align 1 + ret void +} + +define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) { +; X32-SSE-LABEL: fptrunc_frommem4: +; X32-SSE: # BB#0: # %entry +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 +; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 +; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X32-SSE-NEXT: movupd %xmm1, (%eax) +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_frommem4: +; X32-AVX: # BB#0: # %entry +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 +; X32-AVX-NEXT: vmovupd %xmm0, (%eax) +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_frommem4: +; X64-SSE: # BB#0: # %entry +; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 +; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 +; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movupd %xmm1, (%rsi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_frommem4: +; X64-AVX: # BB#0: # %entry +; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 +; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) +; X64-AVX-NEXT: retq +entry: + %0 = load <4 x double>, <4 x double>* %in + %1 = fptrunc <4 x double> %0 to <4 x float> + store <4 x float> %1, <4 x float>* %out, align 1 + ret void +} + +define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { +; X32-SSE-LABEL: fptrunc_frommem8: +; X32-SSE: # BB#0: # %entry +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 +; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 +; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X32-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0 +; X32-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2 +; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X32-SSE-NEXT: movupd %xmm2, 16(%eax) +; X32-SSE-NEXT: movupd %xmm1, (%eax) +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_frommem8: +; X32-AVX: # BB#0: # %entry +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 +; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 +; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-AVX-NEXT: vmovupd %ymm0, (%eax) +; X32-AVX-NEXT: vzeroupper +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_frommem8: +; X64-SSE: # BB#0: # %entry +; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 +; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 +; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0 +; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2 +; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; X64-SSE-NEXT: movupd %xmm2, 16(%rsi) +; X64-SSE-NEXT: movupd %xmm1, (%rsi) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_frommem8: +; X64-AVX: # BB#0: # %entry +; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 +; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 +; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX-NEXT: vmovupd %ymm0, (%rsi) +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq +entry: + %0 = load <8 x double>, <8 x double>* %in + %1 = fptrunc <8 x double> %0 to <8 x float> + store <8 x float> %1, <8 x float>* %out, align 1 + ret void +} + +; FIXME: For exact truncations we should be able to fold this. +define <4 x float> @fptrunc_fromconst() { +; X32-SSE-LABEL: fptrunc_fromconst: +; X32-SSE: # BB#0: # %entry +; X32-SSE-NEXT: cvtpd2ps .LCPI3_0, %xmm1 +; X32-SSE-NEXT: cvtpd2ps .LCPI3_1, %xmm0 +; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retl +; +; X32-AVX-LABEL: fptrunc_fromconst: +; X32-AVX: # BB#0: # %entry +; X32-AVX-NEXT: vcvtpd2psy .LCPI3_0, %xmm0 +; X32-AVX-NEXT: retl +; +; X64-SSE-LABEL: fptrunc_fromconst: +; X64-SSE: # BB#0: # %entry +; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 +; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: fptrunc_fromconst: +; X64-AVX: # BB#0: # %entry +; X64-AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: retq +entry: + %0 = insertelement <4 x double> undef, double 1.0, i32 0 + %1 = insertelement <4 x double> %0, double -2.0, i32 1 + %2 = insertelement <4 x double> %1, double +4.0, i32 2 + %3 = insertelement <4 x double> %2, double -0.0, i32 3 + %4 = fptrunc <4 x double> %3 to <4 x float> + ret <4 x float> %4 +} -- 2.11.0