mingwrt/mingwex/math/pow_generic.sx

   1 /*
   2  * pow_generic.sx
   3  *
   4  * Generic implementation for the pow(), powl(), and powf() functions.
   5  *
   6  * $Id$
   7  *
   8  * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
   9  * Copyright (C) 2016, MinGW.org Project
  10  *
  11  *
  12  * Permission is hereby granted, free of charge, to any person obtaining a
  13  * copy of this software and associated documentation files (the "Software"),
  14  * to deal in the Software without restriction, including without limitation
  15  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  16  * and/or sell copies of the Software, and to permit persons to whom the
  17  * Software is furnished to do so, subject to the following conditions:
  18  *
  19  * The above copyright notice and this permission notice (including the next
  20  * paragraph) shall be included in all copies or substantial portions of the
  21  * Software.
  22  *
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  29  * DEALINGS IN THE SOFTWARE.
  30  *
  31  */
  32 #undef __function
  33
  34 #if defined _powl_source
  35 # define __function     _powl
  36 # define __yoffset      __xoffset+12
  37 # define __fldx  fldt   __xoffset(%esp)
  38 # define __fldy  fldt   __yoffset(%esp)
  39
  40 #elif defined _powf_source
  41 # define __function     _powf
  42 # define __yoffset      __xoffset+4
  43 # define __fldx  flds   __xoffset(%esp)
  44 # define __fldy  flds   __yoffset(%esp)
  45 # define ___x87cvt   ___x87cvtf
  46
  47 #elif defined _pow_source
  48 # define __function     _pow
  49 # define __yoffset      __xoffset+8
  50 # define __fldx  fldl   __xoffset(%esp)
  51 # define __fldy  fldl   __yoffset(%esp)
  52 #endif
  53
  54 #ifdef __MINGW64__
  55 # define __xoffset      8
  56 # define esp            rsp
  57 # define eax            rax
  58 # define edx            rdx
  59 #else
  60 # define __xoffset      4
  61 #endif
  62
  63 .text
  64 .align  4
  65 #ifdef  __function
  66 /* A specific front-end entry point name has been identified; thus,
  67  * we are assembling the front-end stub implementation for just one
  68  * of the three supported functions, with C language prototypes:
  69  *
  70  *  double pow (double x, double y);
  71  *  long double powl (long double x, long double y);
  72  *  float powf (float x, float y);
  73  */
  74 .globl  __function
  75 .def    __function; .scl 2; .type 32; .endef
  76
  77 __function:
  78 /* First, load x and y into the FPU, using the appropriate operand
  79  * size specification for the specified front-end entry point, then
  80  * hand off control to the generic back-end function.
  81  */
  82         __fldx                  /* x */
  83         __fldy                  /* y ; x */
  84
  85 #ifdef _powl_source
  86 /* For the long double powl (long double, long double) form of the
  87  * primary function, we can simply delegate computation of the REAL10
  88  * result to the ___x87pow() handler, with that returning directly
  89  * to powl()'s own caller...
  90  */
  91         jmp     ___x87pow
  92
  93 #else
  94 /* ...whereas for each of double pow (double, double) form, and its
  95  * float powf (float, float) sibling, we must call the backend handler
  96  * to compute an intermediate REAL10 result...
  97  */
  98         call    ___x87pow
  99
 100 /* ...then return that via the appropriate type conversion/validation
 101  * handler, to obtain the ultimately required REAL8 or REAL4 result.
 102  */
 103         jmp     ___x87cvt
 104
 105 #endif
 106 #else
 107 /* No specific function entry point identified; implement the generic
 108  * back-end, which is common to all supported front-end entry points;
 109  * it also provides the error reporting API.
 110  */
 111 #include "errno.sx"
 112
 113 .align  4
 114 .globl  ___x87pow
 115 .def    ___x87pow; .scl 2; .type 32; .endef
 116
 117 ___x87pow:
 118         fxam                    /* classify y input value */
 119         fnstsw  %ax             /* copy FPU flags to CPU flags */
 120         fld1                    /* +1.0 ; y ; x */
 121         sahf                    /* examine ZF = C3 and PF = C2 */
 122         jnz     30f             /* y is non-zero */
 123         jp      30f             /* y is non-zero, denormalized */
 124
 125 /* In the case where y is zero, then POSIX says that the value of x is
 126  * irrelevant, (even if it is indefinite); the return value is +1.0
 127  */
 128         fstp    %st(2)          /* y ; x^y = 1.0 */
 129         fstp    %st(0)          /* x^y */
 130         ret
 131
 132 /* When y is non-zero, proceed to consideration of the x argument value;
 133  * (this is necessary, even if the value of y is indeterminate).
 134  */
 135 30:     movb    %ah, %dl        /* save y classification flags */
 136         fucomp  %st(2)          /* y ; x */
 137         fnstsw  %ax             /* copy FPU flags to CPU flags */
 138         fxch                    /* x ; y */
 139         sahf                    /* examine ZF = C3 and PF = C2 */
 140         jp      32f             /* return x^y = indeterminate x */
 141         jnz     40f             /* x != +1.0 */
 142
 143 /* For this specific case, where x == +1.0, POSIX says that the return
 144  * value shall be +1.0, (even if the value of y is indeterminate).
 145  */
 146         fld1                    /* x^y = 1.0 ; y ; x */
 147 31:     fstp    %st(1)          /* x^y ; x */
 148 32:     fstp    %st(1)          /* x^y */
 149         ret
 150
 151 /* In any other case, if either x or y is NaN, then POSIX requires that
 152  * NaN shall be returned; first check for x being NaN, or infinite.
 153  */
 154 40:     fxam                    /* classify x input value */
 155         fnstsw  %ax             /* copy FPU flags to CPU flags */
 156         sahf                    /* test for infinity or NaN */
 157         jnc     50f             /* x is finite, so pass it on */
 158         jnp     32b             /* return x^y = x as NaN */
 159
 160 /* We've identified that x is infinite; how we handle this boundary
 161  * condition depends on whether it's a +ve infinity, or a -ve.
 162  */
 163         testb   $0x02, %ah      /* x ; y */
 164         jnz     42f             /* x is -ve infinity */
 165
 166 /* In the case where x is +ve infinity, POSIX stipulates that the return
 167  * value should be +ve infinity when y > 0.0, or +0.0 when y < 0.0; first
 168  * deal with the y > 0.0 case.
 169  */
 170         testb   $0x02, %dl      /* check if y is +ve, or -ve? */
 171         jz      32b             /* +ve: return x^y = x = +ve infinity */
 172
 173 /* Alternatively, in the case when x is +ve infinity, and y < 0.0, we
 174  * substitute 0.0 for x, then return it as the value for x^y.
 175  */
 176 41:     fldz                    /* 0.0 ; x ; y */
 177         jmp     31b             /* return x^y = 0.0 */
 178
 179 /* Similarly, in the case where x is -ve infinity, we must again return
 180  * infinity for y > 0.0, or 0.0 for y < 0.0; however, in this case, the
 181  * sign of the returned value must be -ve if y is an odd valued integer,
 182  * or +ve for any other value of y.
 183  */
 184 42:     testb   $0x02, %dl      /* check if y is +ve, or -ve? */
 185         jz      43f             /* when +ve, return signed infinity */
 186
 187 /* Fall through when x is -ve infinity and y < 0.0; substitute -0.0 for
 188  * the infinite value of x, then adjust the sign depending on whether y
 189  * is an odd valued integer, or any other value.
 190  */
 191         fldz                    /* 0.0 ; x ; y */
 192         fstp    %st(1)          /* 0.0 ; y */
 193         fchs                    /* -0.0 ; y */
 194
 195 /* Determine if y is non-integral, or an even valued integer, in either
 196  * of which cases we force a +ve return value, or an odd valued integer,
 197  * in whiich case we leave the sign of the return value as it is; begin
 198  * by checking if y/2 is an integer, which asserts that y itself is an
 199  * even valued integer.
 200  */
 201 43:     fld1                    /* 1.0 ; x ; y */
 202         fchs                    /* -1.0 ; x ; y */
 203         fld     %st(2)          /* y ; -1.0 ; x ; y */
 204         fscale                  /* y/2 ; -1.0 ; x ; y */
 205         fst     %st(1)          /* y/2 ; y/2 ; x ; y */
 206         frndint                 /* int(y/2) ; y/2 ; x ; y */
 207         fucompp                 /* x ; y */
 208         fnstsw  %ax             /* check if int(y/2) == y/2 ? */
 209         sahf                    /* hence y is an even valued integer */
 210         je      44f             /* so go force +ve x^y return value */
 211
 212 /* When we've established that y is not an even valued integer, we must
 213  * still confirm the possibility that it is an odd valued integer; i.e.
 214  * if it is an integer, it must be odd valued.
 215  */
 216         fld     %st(1)          /* y ; x ; y */
 217         fld     %st(0)          /* y ; y ; x ; y */
 218         frndint                 /* int(y) ; y ; x ; y */
 219         fucompp                 /* x ; y */
 220         fnstsw  %ax             /* check if int(y) == y ? */
 221         sahf                    /* hence y is an odd valued integer */
 222         je      32b             /* so return x^y as is */
 223
 224 /* When y is either an even valued integer, or not an integer at all:
 225  */
 226 44:     fabs                    /* make x^y value +ve */
 227         jmp     32b             /* then return it */
 228
 229 /* When x is finite, we still need to check the possibility that y may
 230  * be NaN, or may be infinite.
 231  */
 232 50:     xchgb   %dl, %ah        /* reload y classification flags */
 233         movb    %ah, %dh        /* save a copy */
 234         sahf                    /* check for y finite, infinite, or NaN */
 235         jnc     60f             /* y is also finite */
 236         jp      52f             /* y is infinite, but not NaN */
 237
 238 /* y is NaN; pop x off FPU stack, and return x^y as NaN value of y.
 239  */
 240 51:     fstp    %st(0)          /* y */
 241         ret                     /* return x^y = y */
 242
 243 /* We've identified x as finite, but y as infinite; POSIX defines
 244  * boundary conditions about the range -1.0 < x < +1.0, which may be
 245  * differentiated by comparison between +1.0 and |x|.  We've already
 246  * that x != +1.0, so if we now identify that |x| == +1.0, then this
 247  * must represent x == -1.0, a boundary condition for which POSIX
 248  * prescribes a return value of +1.0
 249  */
 250 52:     fabs                    /* |x| ; y */
 251         fld1                    /* 1.0 ; |x| ; y */
 252         fucom   %st(1)          /* check for |x| == 1.0 */
 253         fnstsw  %ax             /* copy FPU flags to CPU flags */
 254         sahf                    /* if ZF == 1 -> |x| == 1.0 */
 255         je      31b             /* return x^y = |x| = +1.0 */
 256
 257 /* When |x| != 1.0, we have no further use for the comparative values
 258  * of 1.0 and |x|, on the FPU stack; discard them, then check the flag
 259  * state to establish whether x lies within the boundary range.
 260  */
 261         fstp    %st(0)          /* |x| ; y */
 262         fstp    %st(0)          /* y */
 263         jnb     53f             /* -1.0 < x < +1.0 */
 264
 265 /* This represents the POSIX boundary condition where y is infinite,
 266  * and |x| > 1.0; for this condition, POSIX specifies a return value
 267  * of x^y = 0.0 if y is -ve infinity, otherwise x^y = y.
 268  */
 269         test    $0x02, %dh      /* if y is -ve infinity */
 270         jnz     41b             /* then go return +0.0 */
 271         ret                     /* else return -ve infinity */
 272
 273 /* Here, we have -1.0 < x < +1.0, and y is infinite; for this case,
 274  * POSIX prescribes a return value of +0.0 when y is +ve infinity, or
 275  * +ve infinity when y itself -ve infinity.
 276  */
 277 53:     test    $0x02, %dh      /* if y is +ve infinity */
 278         jz      41b             /* then go return +0.0 */
 279         fabs                    /* else force to +ve infinity */
 280         ret                     /* and return it */
 281
 282 /* We've now established that both x and y are finite, but we must
 283  * still consider the special restrictions which apply when x == 0.0
 284  * or x < 0.0
 285  */
 286 60:     movb    %dl, %ah        /* review x value classification */
 287         sahf                    /* examining ZF = C3 and PF = C2 */
 288         jnz     70f             /* x is non-zero */
 289         jp      70f             /* x is non-zero, denormalized */
 290
 291 /* When x is zero, the return value is (possibly signed) zero for
 292  * all y > 0.0, but infinite, and reported as a pole error, for any
 293  * y < 0.0
 294  */
 295         testb   $0x02, %dh      /* if y is -ve? */
 296         jnz     61f             /* then go process the pole error */
 297
 298 /* For the case where y > 0.0, the sign of the original x value is
 299  * preserved when y is an odd valued integer, or forced to +0.0 for
 300  * any other +ve value of y; (obviously, if x is already +0.0, the
 301  * sign preservation condition becomes irrelevant).
 302  */
 303         testb   $0x02, %ah      /* if x == +0.0 */
 304         jz      32b             /* then just go return it */
 305         jmp     43b             /* else go ajust sign */
 306
 307 /* For the pole error case, we must substitute an infinity for the
 308  * original value of x; a convenient way to achieve this is to take
 309  * the logarithm of x, (which is -ve infinity by definition).
 310  */
 311 61:     fld1                    /* 1.0 ; 0.0 ; y */
 312         fxch    %st(1)          /* 0.0 ; 1.0 ; y */
 313         fyl2x                   /* 1.0 * log2(0.0) = -inf ; y */
 314
 315 /* To diagnose the pole error, we will set errno = ERANGE, (which
 316  * is compliant with POSIX); on Win32, we call __errno() to get a
 317  * pointer to errno itself, but note that we haven't done with EDX
 318  * yet, so we must guard against possible modification during the
 319  * execution of __errno().
 320  */
 321         pushl   %edx            /* we must save this */
 322         errno   ERANGE          /* because this may change it */
 323         popl    %edx            /* restore saved value */
 324
 325 /* The returned infinity must preserve the sign of the original x,
 326  * when y is an odd valued integer, otherwise it is forced to +inf;
 327  * (obviously, if x is +0.0, we may just force +inf anyway).
 328  */
 329         testb   $0x02, %dl      /* if x is -0.0 */
 330         jnz     43b             /* then go do signed return */
 331         fabs                    /* else force to +inf */
 332         jmp     32b             /* and return it */
 333
 334 /* When both x and y are finite and non-zero, then we must check
 335  * for a possible domain error condition, which occurs when x < 0
 336  * and y has any value which is not an integer.
 337  */
 338 70:     testb   $0x02, %ah      /* if x > 0.0 */
 339         jz      80f             /* then result is computable */
 340
 341 /* Here, x < 0.0; the result may still be computable, if (and only
 342  * if) the value of y is an integer.
 343  */
 344         fld     %st(1)          /* y ; x ; y */
 345         fld     %st(0)          /* y ; y ; x ; y */
 346         frndint                 /* int(y) ; y ; x ; y */
 347         fucompp                 /* x ; y */
 348         fnstsw  %ax             /* copy FPU flags to CPU flags */
 349         sahf                    /* to test if y == int(y) ? */
 350         je      71f             /* then result is computable */
 351
 352 /* Fall through when x < 0.0 and y is not an integer; in this case
 353  * we must set errno to report a domain error, and return NaN.
 354  */
 355         fsqrt                   /* NaN ; y */
 356         errno   EDOM            /* set errno = EDOM */
 357         jmp     32b             /* return NaN */
 358
 359 /* When x < 0.0 and y is an integer, we may still compute x^y
 360  * according to the relationship x^y = -1^y * 2^(y * log2(|x|))
 361  */
 362 71:     fabs                    /* |x| ; y */
 363         fld     %st(1)          /* y ; |x| ; y */
 364         fxch    %st(1)          /* |x| ; y ; y */
 365         call    80f             /* |x|^y ; y */
 366         fchs                    /* assume y is odd valued */
 367         jmp     43b             /* adjust if even valued */
 368
 369 /* When x > 0.0, and y is finite, we may proceed to compute x^y,
 370  * according to the relationship: x^y = 2^(y * log2(x)); first we
 371  * compute log2(x), preferring the FYL2XP1 method for values of x
 372  * close to zero, but falling back on FYL2X for x > 1.29
 373  */
 374 80:     call    ___x87log       /* y*log2(x) */
 375
 376 /* Having computed the value of y * log2(x), we may now compute
 377  * the final result as 2^(y * log2(x)).  We must compute this in
 378  * stages, combining 2^frac(y * log2(x)) * 2^int(y * log2(x)) to
 379  * yielding the final result for x^y; first separate y * log2(x)
 380  * into fractional and integer parts:
 381  */
 382         fld     %st             /* y*log2(x) ; y*log2(x) */
 383         frndint                 /* int(y*log2(x)) ; y*log2(x) */
 384         fxch    %st(1)          /* y*log2(x) ; int(y*log2(x)) */
 385         fsub    %st(1), %st     /* frac(y*log2(x)) ; int(y*log2(x)) */
 386
 387 /* Now compute the intermediate 2^frac(y * log(x)) - 1.0 result:
 388  */
 389         f2xm1                   /* 2^frac(y*log2(x))-1 ; int(y*log2(x)) */
 390
 391 /* Add the 1.0 deficit, to yield the 2^frac(y * log2(x)) result:
 392  */
 393         fld1                    /* 1 ; 2^frac(y*log2(x))-1 ; int(y*log2(x)) */
 394         faddp                   /* 2^frac(y*log2(x)) ; int(y*log2(x)) */
 395
 396 /* Finally, multiply by 2^int(y * log2(x)), to yield the x^y result:
 397  */
 398         fscale                  /* x^y ; int(y*log2(x)) */
 399         fstp    %st(1)          /* x^y */
 400
 401 /* At this point, the value of x^y should not be zero; if it is, then
 402  * the computation has underflowed, in which case POSIX recommends that
 403  * errno should be set to ERANGE.  Alternatively, if the result becomes
 404  * infinite then the computation has overflowed, in which case POSIX
 405  * requires that errno be so set.  Check if either is appropriate.
 406  */
 407         fxam                    /* classify x^y result */
 408         fnstsw  %ax             /* copy FPU status flags */
 409         sahf                    /* to test via CPU flags, hence report */
 410         jbe     81f             /* ZF -> underflow; CF -> overflow */
 411         ret                     /* else return x^y, errno unchanged */
 412
 413 /* Here, we provide an alternative function return, for use when either
 414  * overflow or underflow is detected during the computation of x^y; it
 415  * returns whatever x^y value has been computed, after having set errno
 416  * to indicate the ERANGE condition.
 417  */
 418 81:     errno   ERANGE          /* set errno = ERANGE */
 419         ret                     /* return x^y, errno = ERANGE */
 420 #endif
 421
 422 /* vim: set autoindent filetype=asm formatoptions=croql: */
 423 /* $RCSfile$: end of file */