utiltools/X86MAC64/cuda/samples/6_Advanced/interval/cuda_interval_rounded_arith.h

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 // Type-specific implementation of rounded arithmetic operators.
  13 // Thin layer over the CUDA intrinsics.
  14
  15 #ifndef CUDA_INTERVAL_ROUNDED_ARITH_H
  16 #define CUDA_INTERVAL_ROUNDED_ARITH_H
  17
  18 // Temporary workaround for CUDA 3.0/3.1 on SM 1.3 devices:
  19 // missing double-precision div/sqrt with directed rounding
  20 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ <= 130
  21 #define NO_DOUBLE_DIV
  22 #endif
  23
  24 // Generic class, no actual implementation yet
  25 template<class T>
  26 struct rounded_arith
  27 {
  28     __device__ T add_down(const T &x, const T &y);
  29     __device__ T add_up(const T &x, const T &y);
  30     __device__ T sub_down(const T &x, const T &y);
  31     __device__ T sub_up(const T &x, const T &y);
  32     __device__ T mul_down(const T &x, const T &y);
  33     __device__ T mul_up(const T &x, const T &y);
  34     __device__ T div_down(const T &x, const T &y);
  35     __device__ T div_up(const T &x, const T &y);
  36     __device__ T median(const T &x, const T &y);
  37     __device__ T sqrt_down(const T &x);
  38     __device__ T sqrt_up(const T &x);
  39     __device__ T int_down(const T &x);
  40     __device__ T int_up(const T &x);
  41
  42     __device__ T pos_inf();
  43     __device__ T neg_inf();
  44     __device__ __host__ T nan();
  45     __device__ T min(T const &x, T const &y);
  46     __device__ T max(T const &x, T const &y);
  47 };
  48
  49
  50 // Specialization for float
  51 template<>
  52 struct rounded_arith<float>
  53 {
  54     __device__ float add_down(const float &x, const float &y)
  55     {
  56         return __fadd_rd(x, y);
  57     }
  58
  59     __device__ float add_up(const float &x, const float &y)
  60     {
  61         return __fadd_ru(x, y);
  62     }
  63
  64     __device__ float sub_down(const float &x, const float &y)
  65     {
  66         return __fadd_rd(x, -y);
  67     }
  68
  69     __device__ float sub_up(const float &x, const float &y)
  70     {
  71         return __fadd_ru(x, -y);
  72     }
  73
  74     __device__ float mul_down(const float &x, const float &y)
  75     {
  76         return __fmul_rd(x, y);
  77     }
  78
  79     __device__ float mul_up(const float &x, const float &y)
  80     {
  81         return __fmul_ru(x, y);
  82     }
  83
  84     __device__ float div_down(const float &x, const float &y)
  85     {
  86         return __fdiv_rd(x, y);
  87     }
  88
  89     __device__ float div_up(const float &x, const float &y)
  90     {
  91         return __fdiv_ru(x, y);
  92     }
  93
  94     __device__ float median(const float &x, const float &y)
  95     {
  96         return (x + y) * .5f;
  97     }
  98
  99     __device__ float sqrt_down(const float &x)
 100     {
 101         return __fsqrt_rd(x);
 102     }
 103
 104     __device__ float sqrt_up(const float &x)
 105     {
 106         return __fsqrt_ru(x);
 107     }
 108
 109     __device__ float int_down(const float &x)
 110     {
 111         return floorf(x);
 112     }
 113
 114     __device__ float int_up(const float &x)
 115     {
 116         return ceilf(x);
 117     }
 118
 119     __device__ float neg_inf()
 120     {
 121         return __int_as_float(0xff800000);
 122     }
 123
 124     __device__ float pos_inf()
 125     {
 126         return __int_as_float(0x7f800000);
 127     }
 128
 129     __device__ __host__ float nan()
 130     {
 131         return nanf("");
 132     }
 133
 134     __device__ float min(float const &x, float const &y)
 135     {
 136         return fminf(x, y);
 137     }
 138
 139     __device__ float max(float const &x, float const &y)
 140     {
 141         return fmaxf(x, y);
 142     }
 143 };
 144
 145 __device__ double my_div_ru(double a, double b)
 146 {
 147     int expo;
 148     b = frexp(b, &expo);
 149     double y0 = (double)(1.0f / __double2float_rn(b));    // y0 22 bits
 150
 151     // Cubic iteration from Alex
 152     double e1 = __fma_rn(-b, y0, 1.0);
 153     double e2 = __fma_rn(e1, e1, e1);
 154     double y2 = __fma_rn(y0, e2, y0);        // y2 ~ 1/b 66 bits + rounding RN => faithful
 155
 156     double e3 = __fma_rn(-b, y2, 1.0);       // M
 157     double y3 = __fma_rn(y2, e3, y2);        // y3 ~ 1/b correct except for 1/(2-eps)
 158
 159     double q0 = __dmul_rn(a, y0);
 160     double r0 = __fma_rn(-q0, b, a);         // M
 161     double q1 = __fma_rn(r0, y2, q0);        // q1 ~ a/b faithful
 162
 163     double r1 = __fma_ru(-q1, b, a);         // M
 164     double q2 = __fma_ru(r1, y3, q1);        // q2 = a/b correct
 165     return ldexp(q2, -expo);
 166 }
 167
 168 __device__ double my_div_rd(double a, double b)
 169 {
 170     int expo;
 171     b = frexp(b, &expo);
 172     double y0 = (double)(1.0f / __double2float_rn(b));    // y0 22 bits
 173
 174     double e1 = __fma_rn(-b, y0, 1.0);
 175     double e2 = __fma_rn(e1, e1, e1);
 176     double y2 = __fma_rn(y0, e2, y0);        // y2 ~ 1/b 66 bits + rounding RN => faithful
 177
 178     double e3 = __fma_rn(-b, y2, 1.0);       // M
 179     double y3 = __fma_rn(y2, e3, y2);        // y3 ~ 1/b correct except for 1/(2-eps)
 180
 181     double q0 = __dmul_rn(a, y0);
 182     double r0 = __fma_rn(-q0, b, a);         // M
 183     double q1 = __fma_rn(r0, y2, q0);        // q1 ~ a/b faithful
 184
 185     double r1 = __fma_rd(-q1, b, a);         // M
 186     double q2 = __fma_rd(r1, y3, q1);
 187     return ldexp(q2, -expo);
 188 }
 189
 190 // Specialization for double
 191 template<>
 192 struct rounded_arith<double>
 193 {
 194     __device__ double add_down(const double &x, const double &y)
 195     {
 196         return __dadd_rd(x, y);
 197     }
 198
 199     __device__ double add_up(const double &x, const double &y)
 200     {
 201         return __dadd_ru(x, y);
 202     }
 203
 204     __device__ double sub_down(const double &x, const double &y)
 205     {
 206         return __dadd_rd(x, -y);
 207     }
 208
 209     __device__ double sub_up(const double &x, const double &y)
 210     {
 211         return __dadd_ru(x, -y);
 212     }
 213
 214     __device__ double mul_down(const double &x, const double &y)
 215     {
 216         return __dmul_rd(x, y);
 217     }
 218
 219     __device__ double mul_up(const double &x, const double &y)
 220     {
 221         return __dmul_ru(x, y);
 222     }
 223
 224     __device__ double div_down(const double &x, const double &y)
 225     {
 226 #ifndef NO_DOUBLE_DIV
 227         return __ddiv_rd(x, y);
 228 #else
 229         return my_div_rd(x, y);
 230 #endif
 231     }
 232
 233     __device__ double div_up(const double &x, const double &y)
 234     {
 235 #ifndef NO_DOUBLE_DIV
 236         return __ddiv_ru(x, y);
 237 #else
 238         return my_div_ru(x, y);
 239 #endif
 240     }
 241     __device__ double median(const double &x, const double &y)
 242     {
 243         return (x + y) * .5;
 244     }
 245
 246 #ifndef NO_DOUBLE_DIV
 247     __device__ double sqrt_down(const double &x)
 248     {
 249         return __dsqrt_rd(x);
 250     }
 251
 252     __device__ double sqrt_up(const double &x)
 253     {
 254         return __dsqrt_ru(x);
 255     }
 256 #endif
 257     __device__ double int_down(const double &x)
 258     {
 259         return floor(x);
 260     }
 261
 262     __device__ double int_up(const double &x)
 263     {
 264         return ceil(x);
 265     }
 266
 267     __device__ double neg_inf()
 268     {
 269         return __longlong_as_double(0xfff0000000000000ull);
 270     }
 271
 272     __device__ double pos_inf()
 273     {
 274         return __longlong_as_double(0x7ff0000000000000ull);
 275     }
 276     __device__ __host__ double nan()
 277     {
 278         return ::nan("");
 279     }
 280
 281     __device__ double min(double const &x, double const &y)
 282     {
 283         return fmin(x, y);
 284     }
 285
 286     __device__ double max(double const &x, double const &y)
 287     {
 288         return fmax(x, y);
 289     }
 290 };
 291
 292 #endif