From fb00e2715b109e06de04b276e69015a69d1dfe16 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 9 Jan 2011 22:36:18 +0000 Subject: [PATCH] Add a note about the inability to model FP -> int conversions which perform rounding other than truncation in the IR. Common C code for this turns into really an LLVM intrinsic call that blocks a lot of further optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123135 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/README.txt | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a9afffd95ae..bea240c0921 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2273,3 +2273,58 @@ this whenever the floating point type has enough exponent bits to represent the largest integer value as < inf. //===---------------------------------------------------------------------===// + +clang -O3 currently compiles this code: + +#include +int f(double x) { return _mm_cvtsd_si32(_mm_set_sd(x)); } +int g(double x) { return _mm_cvttsd_si32(_mm_set_sd(x)); } + +into + +define i32 @_Z1fd(double %x) nounwind readnone { +entry: + %vecinit.i = insertelement <2 x double> undef, double %x, i32 0 + %vecinit1.i = insertelement <2 x double> %vecinit.i, double 0.000000e+00, i32 1 + %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %vecinit1.i) nounwind + ret i32 %0 +} + +define i32 @_Z1gd(double %x) nounwind readnone { +entry: + %conv.i = fptosi double %x to i32 + ret i32 %conv.i +} + +This difference carries over to the assmebly produced, resulting in: + +_Z1fd: # @_Z1fd +# BB#0: # %entry + pushq %rbp + movq %rsp, %rbp + xorps %xmm1, %xmm1 + movsd %xmm0, %xmm1 + cvtsd2sil %xmm1, %eax + popq %rbp + ret + +_Z1gd: # @_Z1gd +# BB#0: # %entry + pushq %rbp + movq %rsp, %rbp + cvttsd2si %xmm0, %eax + popq %rbp + ret + +The problem is that we can't see through the intrinsic call used for cvtsd2si, +and fold away the unnecessary manipulation of the function parameter. When +these functions are inlined, it forms a barrier preventing many further +optimizations. LLVM IR doesn't have a good way to model the logic of +'cvtsd2si', its only FP -> int conversion path forces truncation. We should add +a rounding flag onto fptosi so that it can represent this type of rounding +naturally in the IR rather than using intrinsics. We might need to use a +'system_rounding_mode' flag to encode that the semantics of the rounding mode +can be changed by the program, but ideally we could just say that isn't +supported, and hard code the rounding. + +//===---------------------------------------------------------------------===// -- 2.11.0