utiltools/X86MAC64/cuda/samples/6_Advanced/interval/cuda_interval_lib.h

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 #ifndef CUDA_INTERVAL_LIB_H
  13 #define CUDA_INTERVAL_LIB_H
  14
  15 #include "cuda_interval_rounded_arith.h"
  16
  17 // Interval template class and basic operations
  18 // Interface inspired from the Boost Interval library (www.boost.org)
  19
  20 template<class T>
  21 class interval_gpu
  22 {
  23     public:
  24         __device__ __host__ interval_gpu();
  25         __device__ __host__ interval_gpu(T const &v);
  26         __device__ __host__ interval_gpu(T const &l, T const &u);
  27
  28         __device__ __host__ T const &lower() const;
  29         __device__ __host__ T const &upper() const;
  30
  31
  32         static __device__ __host__ interval_gpu empty();
  33
  34     private:
  35         T low;
  36         T up;
  37 };
  38
  39 // Constructors
  40 template<class T> inline __device__ __host__
  41 interval_gpu<T>::interval_gpu()
  42 {
  43 }
  44
  45 template<class T> inline __device__ __host__
  46 interval_gpu<T>::interval_gpu(T const &l, T const &u) :
  47     low(l), up(u)
  48 {
  49 }
  50
  51 template<class T> inline __device__ __host__
  52 interval_gpu<T>::interval_gpu(T const &v) :
  53     low(v), up(v)
  54 {
  55 }
  56
  57
  58 template<class T> inline __device__ __host__
  59 T const &interval_gpu<T>::lower() const
  60 {
  61     return low;
  62 }
  63
  64 template<class T> inline __device__ __host__
  65 T const &interval_gpu<T>::upper() const
  66 {
  67     return up;
  68 }
  69
  70 template<class T> inline __device__ __host__
  71 interval_gpu<T> interval_gpu<T>::empty()
  72 {
  73     rounded_arith<T> rnd;
  74     return interval_gpu<T>(rnd.nan(), rnd.nan());
  75 }
  76
  77 template<class T> inline __device__ __host__
  78 bool empty(interval_gpu<T> x)
  79 {
  80     T hash = x.lower() + x.upper();
  81     return(hash != hash);
  82 }
  83
  84 template<class T> inline __device__
  85 T width(interval_gpu<T> x)
  86 {
  87     if (empty(x))
  88         return 0;
  89
  90     rounded_arith<T> rnd;
  91     return rnd.sub_up(x.upper(), x.lower());
  92 }
  93
  94 // Arithmetic operations
  95
  96 // Unary operators
  97 template<class T> inline __device__
  98 interval_gpu<T> const &operator+(interval_gpu<T> const &x)
  99 {
 100     return x;
 101 }
 102
 103 template<class T> inline __device__
 104 interval_gpu<T> operator-(interval_gpu<T> const &x)
 105 {
 106     return interval_gpu<T>(-x.upper(), -x.lower());
 107 }
 108
 109 // Binary operators
 110 template<class T> inline __device__
 111 interval_gpu<T> operator+(interval_gpu<T> const &x, interval_gpu<T> const &y)
 112 {
 113     rounded_arith<T> rnd;
 114     return interval_gpu<T>(rnd.add_down(x.lower(), y.lower()),
 115                            rnd.add_up(x.upper(), y.upper()));
 116 }
 117
 118 template<class T> inline __device__
 119 interval_gpu<T> operator-(interval_gpu<T> const &x, interval_gpu<T> const &y)
 120 {
 121     rounded_arith<T> rnd;
 122     return interval_gpu<T>(rnd.sub_down(x.lower(), y.upper()),
 123                            rnd.sub_up(x.upper(), y.lower()));
 124 }
 125
 126 inline __device__ float min4(float a, float b, float c, float d)
 127 {
 128     return fminf(fminf(a, b), fminf(c, d));
 129 }
 130
 131 inline __device__ float max4(float a, float b, float c, float d)
 132 {
 133     return fmaxf(fmaxf(a, b), fmaxf(c, d));
 134 }
 135
 136 inline __device__ double min4(double a, double b, double c, double d)
 137 {
 138     return fmin(fmin(a, b), fmin(c, d));
 139 }
 140
 141 inline __device__ double max4(double a, double b, double c, double d)
 142 {
 143     return fmax(fmax(a, b), fmax(c, d));
 144 }
 145
 146 template<class T> inline __device__
 147 interval_gpu<T> operator*(interval_gpu<T> const &x, interval_gpu<T> const &y)
 148 {
 149     // Textbook implementation: 14 flops, but no branch.
 150     rounded_arith<T> rnd;
 151     return interval_gpu<T>(min4(rnd.mul_down(x.lower(), y.lower()),
 152                                 rnd.mul_down(x.lower(), y.upper()),
 153                                 rnd.mul_down(x.upper(), y.lower()),
 154                                 rnd.mul_down(x.upper(), y.upper())),
 155                            max4(rnd.mul_up(x.lower(), y.lower()),
 156                                 rnd.mul_up(x.lower(), y.upper()),
 157                                 rnd.mul_up(x.upper(), y.lower()),
 158                                 rnd.mul_up(x.upper(), y.upper())));
 159 }
 160
 161 // Center of an interval
 162 // Typically used for bisection
 163 template<class T> inline __device__
 164 T median(interval_gpu<T> const &x)
 165 {
 166     rounded_arith<T> rnd;
 167     return rnd.median(x.lower(), x.upper());
 168 }
 169
 170 // Intersection between two intervals (can be empty)
 171 template<class T> inline __device__
 172 interval_gpu<T> intersect(interval_gpu<T> const &x, interval_gpu<T> const &y)
 173 {
 174     rounded_arith<T> rnd;
 175     T const &l = rnd.max(x.lower(), y.lower());
 176     T const &u = rnd.min(x.upper(), y.upper());
 177
 178     if (l <= u)
 179         return interval_gpu<T>(l, u);
 180     else
 181         return interval_gpu<T>::empty();
 182 }
 183
 184 // Division by an interval which does not contain 0.
 185 // GPU-optimized implementation assuming division is expensive
 186 template<class T> inline __device__
 187 interval_gpu<T> div_non_zero(interval_gpu<T> const &x, interval_gpu<T> const &y)
 188 {
 189     rounded_arith<T> rnd;
 190     typedef interval_gpu<T> I;
 191     T xl, yl, xu, yu;
 192
 193     if (y.upper() < 0)
 194     {
 195         xl = x.upper();
 196         xu = x.lower();
 197     }
 198     else
 199     {
 200         xl = x.lower();
 201         xu = x.upper();
 202     }
 203
 204     if (x.upper() < 0)
 205     {
 206         yl = y.lower();
 207         yu = y.upper();
 208     }
 209     else if (x.lower() < 0)
 210     {
 211         if (y.upper() < 0)
 212         {
 213             yl = y.upper();
 214             yu = y.upper();
 215         }
 216         else
 217         {
 218             yl = y.lower();
 219             yu = y.lower();
 220         }
 221     }
 222     else
 223     {
 224         yl = y.upper();
 225         yu = y.lower();
 226     }
 227
 228     return I(rnd.div_down(xl, yl), rnd.div_up(xu, yu));
 229 }
 230
 231 template<class T> inline __device__
 232 interval_gpu<T> div_positive(interval_gpu<T> const &x, T const &yu)
 233 {
 234     // assert(yu > 0);
 235     if (x.lower() == 0 && x.upper() == 0)
 236         return x;
 237
 238     rounded_arith<T> rnd;
 239     typedef interval_gpu<T> I;
 240     const T &xl = x.lower();
 241     const T &xu = x.upper();
 242
 243     if (xu < 0)
 244         return I(rnd.neg_inf(), rnd.div_up(xu, yu));
 245     else if (xl < 0)
 246         return I(rnd.neg_inf(), rnd.pos_inf());
 247     else
 248         return I(rnd.div_down(xl, yu), rnd.pos_inf());
 249 }
 250
 251 template<class T> inline __device__
 252 interval_gpu<T> div_negative(interval_gpu<T> const &x, T const &yl)
 253 {
 254     // assert(yu > 0);
 255     if (x.lower() == 0 && x.upper() == 0)
 256         return x;
 257
 258     rounded_arith<T> rnd;
 259     typedef interval_gpu<T> I;
 260     const T &xl = x.lower();
 261     const T &xu = x.upper();
 262
 263     if (xu < 0)
 264         return I(rnd.div_down(xu, yl), rnd.pos_inf());
 265     else if (xl < 0)
 266         return I(rnd.neg_inf(), rnd.pos_inf());
 267     else
 268         return I(rnd.neg_inf(), rnd.div_up(xl, yl));
 269 }
 270
 271 template<class T> inline __device__
 272 interval_gpu<T> div_zero_part1(interval_gpu<T> const &x, interval_gpu<T> const &y, bool &b)
 273 {
 274     if (x.lower() == 0 && x.upper() == 0)
 275     {
 276         b = false;
 277         return x;
 278     }
 279
 280     rounded_arith<T> rnd;
 281     typedef interval_gpu<T> I;
 282     const T &xl = x.lower();
 283     const T &xu = x.upper();
 284     const T &yl = y.lower();
 285     const T &yu = y.upper();
 286
 287     if (xu < 0)
 288     {
 289         b = true;
 290         return I(rnd.neg_inf(), rnd.div_up(xu, yu));
 291     }
 292     else if (xl < 0)
 293     {
 294         b = false;
 295         return I(rnd.neg_inf(), rnd.pos_inf());
 296     }
 297     else
 298     {
 299         b = true;
 300         return I(rnd.neg_inf(), rnd.div_up(xl, yl));
 301     }
 302 }
 303
 304 template<class T> inline __device__
 305 interval_gpu<T> div_zero_part2(interval_gpu<T> const &x, interval_gpu<T> const &y)
 306 {
 307     rounded_arith<T> rnd;
 308     typedef interval_gpu<T> I;
 309     const T &xl = x.lower();
 310     const T &xu = x.upper();
 311     const T &yl = y.lower();
 312     const T &yu = y.upper();
 313
 314     if (xu < 0)
 315         return I(rnd.div_down(xu, yl), rnd.pos_inf());
 316     else
 317         return I(rnd.div_down(xl, yu), rnd.pos_inf());
 318 }
 319
 320 template<class T> inline __device__
 321 interval_gpu<T> division_part1(interval_gpu<T> const &x, interval_gpu<T> const &y, bool &b)
 322 {
 323     b = false;
 324
 325     if (y.lower() <= 0 && y.upper() >= 0)
 326         if (y.lower() != 0)
 327             if (y.upper() != 0)
 328                 return div_zero_part1(x, y, b);
 329             else
 330                 return div_negative(x, y.lower());
 331         else if (y.upper() != 0)
 332             return div_positive(x, y.upper());
 333         else
 334             return interval_gpu<T>::empty();
 335     else
 336         return div_non_zero(x, y);
 337 }
 338
 339 template<class T> inline __device__
 340 interval_gpu<T> division_part2(interval_gpu<T> const &x, interval_gpu<T> const &y, bool b = true)
 341 {
 342     if (!b)
 343         return interval_gpu<T>::empty();
 344
 345     return div_zero_part2(x, y);
 346 }
 347
 348 template<class T> inline __device__
 349 interval_gpu<T> square(interval_gpu<T> const &x)
 350 {
 351     typedef interval_gpu<T> I;
 352     rounded_arith<T> rnd;
 353     const T &xl = x.lower();
 354     const T &xu = x.upper();
 355
 356     if (xl >= 0)
 357         return I(rnd.mul_down(xl, xl), rnd.mul_up(xu, xu));
 358     else if (xu <= 0)
 359         return I(rnd.mul_down(xu, xu), rnd.mul_up(xl, xl));
 360     else
 361         return I(static_cast<T>(0),
 362                  rnd.max(rnd.mul_up(xl, xl), rnd.mul_up(xu, xu)));
 363 }
 364
 365 #endif