4 * Generic implementation for the pow(), powl(), and powf() functions.
8 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
9 * Copyright (C) 2016, MinGW.org Project
12 * Permission is hereby granted, free of charge, to any person obtaining a
13 * copy of this software and associated documentation files (the "Software"),
14 * to deal in the Software without restriction, including without limitation
15 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 * and/or sell copies of the Software, and to permit persons to whom the
17 * Software is furnished to do so, subject to the following conditions:
19 * The above copyright notice and this permission notice (including the next
20 * paragraph) shall be included in all copies or substantial portions of the
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 * DEALINGS IN THE SOFTWARE.
34 #if defined _powl_source
35 # define __function _powl
36 # define __yoffset __xoffset+12
37 # define __fldx fldt __xoffset(%esp)
38 # define __fldy fldt __yoffset(%esp)
40 #elif defined _powf_source
41 # define __function _powf
42 # define __yoffset __xoffset+4
43 # define __fldx flds __xoffset(%esp)
44 # define __fldy flds __yoffset(%esp)
45 # define ___x87cvt ___x87cvtf
47 #elif defined _pow_source
48 # define __function _pow
49 # define __yoffset __xoffset+8
50 # define __fldx fldl __xoffset(%esp)
51 # define __fldy fldl __yoffset(%esp)
66 /* A specific front-end entry point name has been identified; thus,
67 * we are assembling the front-end stub implementation for just one
68 * of the three supported functions, with C language prototypes:
70 * double pow (double x, double y);
71 * long double powl (long double x, long double y);
72 * float powf (float x, float y);
75 .def __function; .scl 2; .type 32; .endef
78 /* First, load x and y into the FPU, using the appropriate operand
79 * size specification for the specified front-end entry point, then
80 * hand off control to the generic back-end function.
86 /* For the long double powl (long double, long double) form of the
87 * primary function, we can simply delegate computation of the REAL10
88 * result to the ___x87pow() handler, with that returning directly
89 * to powl()'s own caller...
94 /* ...whereas for each of double pow (double, double) form, and its
95 * float powf (float, float) sibling, we must call the backend handler
96 * to compute an intermediate REAL10 result...
100 /* ...then return that via the appropriate type conversion/validation
101 * handler, to obtain the ultimately required REAL8 or REAL4 result.
107 /* No specific function entry point identified; implement the generic
108 * back-end, which is common to all supported front-end entry points;
109 * it also provides the error reporting API.
115 .def ___x87pow; .scl 2; .type 32; .endef
118 fxam /* classify y input value */
119 fnstsw %ax /* copy FPU flags to CPU flags */
120 fld1 /* +1.0 ; y ; x */
121 sahf /* examine ZF = C3 and PF = C2 */
122 jnz 30f /* y is non-zero */
123 jp 30f /* y is non-zero, denormalized */
125 /* In the case where y is zero, then POSIX says that the value of x is
126 * irrelevant, (even if it is indefinite); the return value is +1.0
128 fstp %st(2) /* y ; x^y = 1.0 */
129 fstp %st(0) /* x^y */
132 /* When y is non-zero, proceed to consideration of the x argument value;
133 * (this is necessary, even if the value of y is indeterminate).
135 30: movb %ah, %dl /* save y classification flags */
136 fucomp %st(2) /* y ; x */
137 fnstsw %ax /* copy FPU flags to CPU flags */
139 sahf /* examine ZF = C3 and PF = C2 */
140 jp 32f /* return x^y = indeterminate x */
141 jnz 40f /* x != +1.0 */
143 /* For this specific case, where x == +1.0, POSIX says that the return
144 * value shall be +1.0, (even if the value of y is indeterminate).
146 fld1 /* x^y = 1.0 ; y ; x */
147 31: fstp %st(1) /* x^y ; x */
148 32: fstp %st(1) /* x^y */
151 /* In any other case, if either x or y is NaN, then POSIX requires that
152 * NaN shall be returned; first check for x being NaN, or infinite.
154 40: fxam /* classify x input value */
155 fnstsw %ax /* copy FPU flags to CPU flags */
156 sahf /* test for infinity or NaN */
157 jnc 50f /* x is finite, so pass it on */
158 jnp 32b /* return x^y = x as NaN */
160 /* We've identified that x is infinite; how we handle this boundary
161 * condition depends on whether it's a +ve infinity, or a -ve.
163 testb $0x02, %ah /* x ; y */
164 jnz 42f /* x is -ve infinity */
166 /* In the case where x is +ve infinity, POSIX stipulates that the return
167 * value should be +ve infinity when y > 0.0, or +0.0 when y < 0.0; first
168 * deal with the y > 0.0 case.
170 testb $0x02, %dl /* check if y is +ve, or -ve? */
171 jz 32b /* +ve: return x^y = x = +ve infinity */
173 /* Alternatively, in the case when x is +ve infinity, and y < 0.0, we
174 * substitute 0.0 for x, then return it as the value for x^y.
176 41: fldz /* 0.0 ; x ; y */
177 jmp 31b /* return x^y = 0.0 */
179 /* Similarly, in the case where x is -ve infinity, we must again return
180 * infinity for y > 0.0, or 0.0 for y < 0.0; however, in this case, the
181 * sign of the returned value must be -ve if y is an odd valued integer,
182 * or +ve for any other value of y.
184 42: testb $0x02, %dl /* check if y is +ve, or -ve? */
185 jz 43f /* when +ve, return signed infinity */
187 /* Fall through when x is -ve infinity and y < 0.0; substitute -0.0 for
188 * the infinite value of x, then adjust the sign depending on whether y
189 * is an odd valued integer, or any other value.
191 fldz /* 0.0 ; x ; y */
192 fstp %st(1) /* 0.0 ; y */
195 /* Determine if y is non-integral, or an even valued integer, in either
196 * of which cases we force a +ve return value, or an odd valued integer,
197 * in whiich case we leave the sign of the return value as it is; begin
198 * by checking if y/2 is an integer, which asserts that y itself is an
199 * even valued integer.
201 43: fld1 /* 1.0 ; x ; y */
202 fchs /* -1.0 ; x ; y */
203 fld %st(2) /* y ; -1.0 ; x ; y */
204 fscale /* y/2 ; -1.0 ; x ; y */
205 fst %st(1) /* y/2 ; y/2 ; x ; y */
206 frndint /* int(y/2) ; y/2 ; x ; y */
208 fnstsw %ax /* check if int(y/2) == y/2 ? */
209 sahf /* hence y is an even valued integer */
210 je 44f /* so go force +ve x^y return value */
212 /* When we've established that y is not an even valued integer, we must
213 * still confirm the possibility that it is an odd valued integer; i.e.
214 * if it is an integer, it must be odd valued.
216 fld %st(1) /* y ; x ; y */
217 fld %st(0) /* y ; y ; x ; y */
218 frndint /* int(y) ; y ; x ; y */
220 fnstsw %ax /* check if int(y) == y ? */
221 sahf /* hence y is an odd valued integer */
222 je 32b /* so return x^y as is */
224 /* When y is either an even valued integer, or not an integer at all:
226 44: fabs /* make x^y value +ve */
227 jmp 32b /* then return it */
229 /* When x is finite, we still need to check the possibility that y may
230 * be NaN, or may be infinite.
232 50: xchgb %dl, %ah /* reload y classification flags */
233 movb %ah, %dh /* save a copy */
234 sahf /* check for y finite, infinite, or NaN */
235 jnc 60f /* y is also finite */
236 jp 52f /* y is infinite, but not NaN */
238 /* y is NaN; pop x off FPU stack, and return x^y as NaN value of y.
240 51: fstp %st(0) /* y */
241 ret /* return x^y = y */
243 /* We've identified x as finite, but y as infinite; POSIX defines
244 * boundary conditions about the range -1.0 < x < +1.0, which may be
245 * differentiated by comparison between +1.0 and |x|. We've already
246 * that x != +1.0, so if we now identify that |x| == +1.0, then this
247 * must represent x == -1.0, a boundary condition for which POSIX
248 * prescribes a return value of +1.0
250 52: fabs /* |x| ; y */
251 fld1 /* 1.0 ; |x| ; y */
252 fucom %st(1) /* check for |x| == 1.0 */
253 fnstsw %ax /* copy FPU flags to CPU flags */
254 sahf /* if ZF == 1 -> |x| == 1.0 */
255 je 31b /* return x^y = |x| = +1.0 */
257 /* When |x| != 1.0, we have no further use for the comparative values
258 * of 1.0 and |x|, on the FPU stack; discard them, then check the flag
259 * state to establish whether x lies within the boundary range.
261 fstp %st(0) /* |x| ; y */
263 jnb 53f /* -1.0 < x < +1.0 */
265 /* This represents the POSIX boundary condition where y is infinite,
266 * and |x| > 1.0; for this condition, POSIX specifies a return value
267 * of x^y = 0.0 if y is -ve infinity, otherwise x^y = y.
269 test $0x02, %dh /* if y is -ve infinity */
270 jnz 41b /* then go return +0.0 */
271 ret /* else return -ve infinity */
273 /* Here, we have -1.0 < x < +1.0, and y is infinite; for this case,
274 * POSIX prescribes a return value of +0.0 when y is +ve infinity, or
275 * +ve infinity when y itself -ve infinity.
277 53: test $0x02, %dh /* if y is +ve infinity */
278 jz 41b /* then go return +0.0 */
279 fabs /* else force to +ve infinity */
280 ret /* and return it */
282 /* We've now established that both x and y are finite, but we must
283 * still consider the special restrictions which apply when x == 0.0
286 60: movb %dl, %ah /* review x value classification */
287 sahf /* examining ZF = C3 and PF = C2 */
288 jnz 70f /* x is non-zero */
289 jp 70f /* x is non-zero, denormalized */
291 /* When x is zero, the return value is (possibly signed) zero for
292 * all y > 0.0, but infinite, and reported as a pole error, for any
295 testb $0x02, %dh /* if y is -ve? */
296 jnz 61f /* then go process the pole error */
298 /* For the case where y > 0.0, the sign of the original x value is
299 * preserved when y is an odd valued integer, or forced to +0.0 for
300 * any other +ve value of y; (obviously, if x is already +0.0, the
301 * sign preservation condition becomes irrelevant).
303 testb $0x02, %ah /* if x == +0.0 */
304 jz 32b /* then just go return it */
305 jmp 43b /* else go ajust sign */
307 /* For the pole error case, we must substitute an infinity for the
308 * original value of x; a convenient way to achieve this is to take
309 * the logarithm of x, (which is -ve infinity by definition).
311 61: fld1 /* 1.0 ; 0.0 ; y */
312 fxch %st(1) /* 0.0 ; 1.0 ; y */
313 fyl2x /* 1.0 * log2(0.0) = -inf ; y */
315 /* To diagnose the pole error, we will set errno = ERANGE, (which
316 * is compliant with POSIX); on Win32, we call __errno() to get a
317 * pointer to errno itself, but note that we haven't done with EDX
318 * yet, so we must guard against possible modification during the
319 * execution of __errno().
321 pushl %edx /* we must save this */
322 errno ERANGE /* because this may change it */
323 popl %edx /* restore saved value */
325 /* The returned infinity must preserve the sign of the original x,
326 * when y is an odd valued integer, otherwise it is forced to +inf;
327 * (obviously, if x is +0.0, we may just force +inf anyway).
329 testb $0x02, %dl /* if x is -0.0 */
330 jnz 43b /* then go do signed return */
331 fabs /* else force to +inf */
332 jmp 32b /* and return it */
334 /* When both x and y are finite and non-zero, then we must check
335 * for a possible domain error condition, which occurs when x < 0
336 * and y has any value which is not an integer.
338 70: testb $0x02, %ah /* if x > 0.0 */
339 jz 80f /* then result is computable */
341 /* Here, x < 0.0; the result may still be computable, if (and only
342 * if) the value of y is an integer.
344 fld %st(1) /* y ; x ; y */
345 fld %st(0) /* y ; y ; x ; y */
346 frndint /* int(y) ; y ; x ; y */
348 fnstsw %ax /* copy FPU flags to CPU flags */
349 sahf /* to test if y == int(y) ? */
350 je 71f /* then result is computable */
352 /* Fall through when x < 0.0 and y is not an integer; in this case
353 * we must set errno to report a domain error, and return NaN.
356 errno EDOM /* set errno = EDOM */
357 jmp 32b /* return NaN */
359 /* When x < 0.0 and y is an integer, we may still compute x^y
360 * according to the relationship x^y = -1^y * 2^(y * log2(|x|))
362 71: fabs /* |x| ; y */
363 fld %st(1) /* y ; |x| ; y */
364 fxch %st(1) /* |x| ; y ; y */
365 call 80f /* |x|^y ; y */
366 fchs /* assume y is odd valued */
367 jmp 43b /* adjust if even valued */
369 /* When x > 0.0, and y is finite, we may proceed to compute x^y,
370 * according to the relationship: x^y = 2^(y * log2(x)); first we
371 * compute log2(x), preferring the FYL2XP1 method for values of x
372 * close to zero, but falling back on FYL2X for x > 1.29
374 80: call ___x87log /* y*log2(x) */
376 /* Having computed the value of y * log2(x), we may now compute
377 * the final result as 2^(y * log2(x)). We must compute this in
378 * stages, combining 2^frac(y * log2(x)) * 2^int(y * log2(x)) to
379 * yielding the final result for x^y; first separate y * log2(x)
380 * into fractional and integer parts:
382 fld %st /* y*log2(x) ; y*log2(x) */
383 frndint /* int(y*log2(x)) ; y*log2(x) */
384 fxch %st(1) /* y*log2(x) ; int(y*log2(x)) */
385 fsub %st(1), %st /* frac(y*log2(x)) ; int(y*log2(x)) */
387 /* Now compute the intermediate 2^frac(y * log(x)) - 1.0 result:
389 f2xm1 /* 2^frac(y*log2(x))-1 ; int(y*log2(x)) */
391 /* Add the 1.0 deficit, to yield the 2^frac(y * log2(x)) result:
393 fld1 /* 1 ; 2^frac(y*log2(x))-1 ; int(y*log2(x)) */
394 faddp /* 2^frac(y*log2(x)) ; int(y*log2(x)) */
396 /* Finally, multiply by 2^int(y * log2(x)), to yield the x^y result:
398 fscale /* x^y ; int(y*log2(x)) */
399 fstp %st(1) /* x^y */
401 /* At this point, the value of x^y should not be zero; if it is, then
402 * the computation has underflowed, in which case POSIX recommends that
403 * errno should be set to ERANGE. Alternatively, if the result becomes
404 * infinite then the computation has overflowed, in which case POSIX
405 * requires that errno be so set. Check if either is appropriate.
407 fxam /* classify x^y result */
408 fnstsw %ax /* copy FPU status flags */
409 sahf /* to test via CPU flags, hence report */
410 jbe 81f /* ZF -> underflow; CF -> overflow */
411 ret /* else return x^y, errno unchanged */
413 /* Here, we provide an alternative function return, for use when either
414 * overflow or underflow is detected during the computation of x^y; it
415 * returns whatever x^y value has been computed, after having set errno
416 * to indicate the ERANGE condition.
418 81: errno ERANGE /* set errno = ERANGE */
419 ret /* return x^y, errno = ERANGE */
422 /* vim: set autoindent filetype=asm formatoptions=croql: */
423 /* $RCSfile$: end of file */