2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
12 #ifndef CUDA_INTERVAL_LIB_H
13 #define CUDA_INTERVAL_LIB_H
15 #include "cuda_interval_rounded_arith.h"
17 // Interval template class and basic operations
18 // Interface inspired from the Boost Interval library (www.boost.org)
24 __device__ __host__ interval_gpu();
25 __device__ __host__ interval_gpu(T const &v);
26 __device__ __host__ interval_gpu(T const &l, T const &u);
28 __device__ __host__ T const &lower() const;
29 __device__ __host__ T const &upper() const;
32 static __device__ __host__ interval_gpu empty();
40 template<class T> inline __device__ __host__
41 interval_gpu<T>::interval_gpu()
45 template<class T> inline __device__ __host__
46 interval_gpu<T>::interval_gpu(T const &l, T const &u) :
51 template<class T> inline __device__ __host__
52 interval_gpu<T>::interval_gpu(T const &v) :
58 template<class T> inline __device__ __host__
59 T const &interval_gpu<T>::lower() const
64 template<class T> inline __device__ __host__
65 T const &interval_gpu<T>::upper() const
70 template<class T> inline __device__ __host__
71 interval_gpu<T> interval_gpu<T>::empty()
74 return interval_gpu<T>(rnd.nan(), rnd.nan());
77 template<class T> inline __device__ __host__
78 bool empty(interval_gpu<T> x)
80 T hash = x.lower() + x.upper();
84 template<class T> inline __device__
85 T width(interval_gpu<T> x)
91 return rnd.sub_up(x.upper(), x.lower());
94 // Arithmetic operations
97 template<class T> inline __device__
98 interval_gpu<T> const &operator+(interval_gpu<T> const &x)
103 template<class T> inline __device__
104 interval_gpu<T> operator-(interval_gpu<T> const &x)
106 return interval_gpu<T>(-x.upper(), -x.lower());
110 template<class T> inline __device__
111 interval_gpu<T> operator+(interval_gpu<T> const &x, interval_gpu<T> const &y)
113 rounded_arith<T> rnd;
114 return interval_gpu<T>(rnd.add_down(x.lower(), y.lower()),
115 rnd.add_up(x.upper(), y.upper()));
118 template<class T> inline __device__
119 interval_gpu<T> operator-(interval_gpu<T> const &x, interval_gpu<T> const &y)
121 rounded_arith<T> rnd;
122 return interval_gpu<T>(rnd.sub_down(x.lower(), y.upper()),
123 rnd.sub_up(x.upper(), y.lower()));
126 inline __device__ float min4(float a, float b, float c, float d)
128 return fminf(fminf(a, b), fminf(c, d));
131 inline __device__ float max4(float a, float b, float c, float d)
133 return fmaxf(fmaxf(a, b), fmaxf(c, d));
136 inline __device__ double min4(double a, double b, double c, double d)
138 return fmin(fmin(a, b), fmin(c, d));
141 inline __device__ double max4(double a, double b, double c, double d)
143 return fmax(fmax(a, b), fmax(c, d));
146 template<class T> inline __device__
147 interval_gpu<T> operator*(interval_gpu<T> const &x, interval_gpu<T> const &y)
149 // Textbook implementation: 14 flops, but no branch.
150 rounded_arith<T> rnd;
151 return interval_gpu<T>(min4(rnd.mul_down(x.lower(), y.lower()),
152 rnd.mul_down(x.lower(), y.upper()),
153 rnd.mul_down(x.upper(), y.lower()),
154 rnd.mul_down(x.upper(), y.upper())),
155 max4(rnd.mul_up(x.lower(), y.lower()),
156 rnd.mul_up(x.lower(), y.upper()),
157 rnd.mul_up(x.upper(), y.lower()),
158 rnd.mul_up(x.upper(), y.upper())));
161 // Center of an interval
162 // Typically used for bisection
163 template<class T> inline __device__
164 T median(interval_gpu<T> const &x)
166 rounded_arith<T> rnd;
167 return rnd.median(x.lower(), x.upper());
170 // Intersection between two intervals (can be empty)
171 template<class T> inline __device__
172 interval_gpu<T> intersect(interval_gpu<T> const &x, interval_gpu<T> const &y)
174 rounded_arith<T> rnd;
175 T const &l = rnd.max(x.lower(), y.lower());
176 T const &u = rnd.min(x.upper(), y.upper());
179 return interval_gpu<T>(l, u);
181 return interval_gpu<T>::empty();
184 // Division by an interval which does not contain 0.
185 // GPU-optimized implementation assuming division is expensive
186 template<class T> inline __device__
187 interval_gpu<T> div_non_zero(interval_gpu<T> const &x, interval_gpu<T> const &y)
189 rounded_arith<T> rnd;
190 typedef interval_gpu<T> I;
209 else if (x.lower() < 0)
228 return I(rnd.div_down(xl, yl), rnd.div_up(xu, yu));
231 template<class T> inline __device__
232 interval_gpu<T> div_positive(interval_gpu<T> const &x, T const &yu)
235 if (x.lower() == 0 && x.upper() == 0)
238 rounded_arith<T> rnd;
239 typedef interval_gpu<T> I;
240 const T &xl = x.lower();
241 const T &xu = x.upper();
244 return I(rnd.neg_inf(), rnd.div_up(xu, yu));
246 return I(rnd.neg_inf(), rnd.pos_inf());
248 return I(rnd.div_down(xl, yu), rnd.pos_inf());
251 template<class T> inline __device__
252 interval_gpu<T> div_negative(interval_gpu<T> const &x, T const &yl)
255 if (x.lower() == 0 && x.upper() == 0)
258 rounded_arith<T> rnd;
259 typedef interval_gpu<T> I;
260 const T &xl = x.lower();
261 const T &xu = x.upper();
264 return I(rnd.div_down(xu, yl), rnd.pos_inf());
266 return I(rnd.neg_inf(), rnd.pos_inf());
268 return I(rnd.neg_inf(), rnd.div_up(xl, yl));
271 template<class T> inline __device__
272 interval_gpu<T> div_zero_part1(interval_gpu<T> const &x, interval_gpu<T> const &y, bool &b)
274 if (x.lower() == 0 && x.upper() == 0)
280 rounded_arith<T> rnd;
281 typedef interval_gpu<T> I;
282 const T &xl = x.lower();
283 const T &xu = x.upper();
284 const T &yl = y.lower();
285 const T &yu = y.upper();
290 return I(rnd.neg_inf(), rnd.div_up(xu, yu));
295 return I(rnd.neg_inf(), rnd.pos_inf());
300 return I(rnd.neg_inf(), rnd.div_up(xl, yl));
304 template<class T> inline __device__
305 interval_gpu<T> div_zero_part2(interval_gpu<T> const &x, interval_gpu<T> const &y)
307 rounded_arith<T> rnd;
308 typedef interval_gpu<T> I;
309 const T &xl = x.lower();
310 const T &xu = x.upper();
311 const T &yl = y.lower();
312 const T &yu = y.upper();
315 return I(rnd.div_down(xu, yl), rnd.pos_inf());
317 return I(rnd.div_down(xl, yu), rnd.pos_inf());
320 template<class T> inline __device__
321 interval_gpu<T> division_part1(interval_gpu<T> const &x, interval_gpu<T> const &y, bool &b)
325 if (y.lower() <= 0 && y.upper() >= 0)
328 return div_zero_part1(x, y, b);
330 return div_negative(x, y.lower());
331 else if (y.upper() != 0)
332 return div_positive(x, y.upper());
334 return interval_gpu<T>::empty();
336 return div_non_zero(x, y);
339 template<class T> inline __device__
340 interval_gpu<T> division_part2(interval_gpu<T> const &x, interval_gpu<T> const &y, bool b = true)
343 return interval_gpu<T>::empty();
345 return div_zero_part2(x, y);
348 template<class T> inline __device__
349 interval_gpu<T> square(interval_gpu<T> const &x)
351 typedef interval_gpu<T> I;
352 rounded_arith<T> rnd;
353 const T &xl = x.lower();
354 const T &xu = x.upper();
357 return I(rnd.mul_down(xl, xl), rnd.mul_up(xu, xu));
359 return I(rnd.mul_down(xu, xu), rnd.mul_up(xl, xl));
361 return I(static_cast<T>(0),
362 rnd.max(rnd.mul_up(xl, xl), rnd.mul_up(xu, xu)));