mingwrt/mingwex/math/remquo_generic.sx

   1 /*
   2  * remquo_generic.sx
   3  *
   4  * Generic implementation for each of the ISO-C99 remquo(), remquol(),
   5  * and remquof() functions.
   6  *
   7  * $Id$
   8  *
   9  * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
  10  * Copyright (C) 2021, MinGW.org Project
  11  *
  12  * Adapted from original code written by J. T. Conklin <jtc@netbsd.org>.
  13  *
  14  *
  15  * Permission is hereby granted, free of charge, to any person obtaining a
  16  * copy of this software and associated documentation files (the "Software"),
  17  * to deal in the Software without restriction, including without limitation
  18  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  19  * and/or sell copies of the Software, and to permit persons to whom the
  20  * Software is furnished to do so, subject to the following conditions:
  21  *
  22  * The above copyright notice and this permission notice (including the next
  23  * paragraph) shall be included in all copies or substantial portions of the
  24  * Software.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  32  * DEALINGS IN THE SOFTWARE.
  33  *
  34  */
  35 .intel_syntax noprefix
  36
  37 .text
  38 .align  4
  39 .def    ___x87remquo; .scl 2; .type 32; .endef
  40
  41 #if defined _remquo_source
  42 /* Preamble to load the FPU registers, and EDX register, from the
  43  * arguments passed in any call to the function:
  44  *
  45  *   double remquo (double, double, int *);
  46  */
  47 .globl  _remquo
  48 .def    _remquo; .scl 2; .type 32; .endef
  49 .def    ___x87cvt; .scl 2; .type 32; .endef
  50
  51 _remquo:
  52         fld     QWORD ptr 4[esp]        /* FPU TOS = x */
  53         fld     QWORD ptr 12[esp]       /* FPU TOS = y, x */
  54         mov     edx, DWORD ptr 20[esp]  /* EDX = *q */
  55
  56 /* Hand off the preloaded register set, to the shared computational
  57  * back-end routine, before ultimately converting its REAL10 result
  58  * to the required REAL8.
  59  */
  60         call    ___x87remquo            /* compute REAL10 result */
  61         jmp     ___x87cvt               /* convert to REAL8 */
  62
  63 #elif defined _remquof_source
  64 /* Preamble to load the FPU registers, and EDX register, from the
  65  * arguments passed in any call to the function:
  66  *
  67  *   float remquof (float, float, int *);
  68  */
  69 .globl  _remquof
  70 .def    _remquof; .scl 2; .type 32; .endef
  71 .def    ___x87cvtf; .scl 2; .type 32; .endef
  72
  73 _remquof:
  74         fld     DWORD ptr 4[esp]        /* FPU TOS = x */
  75         fld     DWORD ptr 8[esp]        /* FPU TOS = y, x */
  76         mov     edx, DWORD ptr 12[esp]  /* EDX = *q */
  77
  78 /* Hand off the preloaded register set, to the shared computational
  79  * back-end routine, before ultimately converting its REAL10 result
  80  * to the required REAL4.
  81  */
  82         call    ___x87remquo            /* compute REAL10 result */
  83         jmp     ___x87cvtf              /* convert to REAL4 */
  84
  85 #elif defined _remquol_source
  86 /* Preamble to load the FPU registers, and EDX register, from the
  87  * arguments passed in any call to the function:
  88  *
  89  *   long double remquo (long double, long double, int *);
  90  */
  91 .globl  _remquol
  92 .def    _remquol; .scl 2; .type 32; .endef
  93
  94 _remquol:
  95         fld     TBYTE ptr 4[esp]        /* FPU TOS = x */
  96         fld     TBYTE ptr 16[esp]       /* FPU TOS = y, x */
  97         mov     edx, DWORD ptr 28[esp]  /* EDX = *q */
  98
  99 /* Hand off the preloaded register set, to the shared computational
 100  * back-end routine, to...
 101  */
 102         jmp     ___x87remquo            /* ...compute REAL10 result */
 103
 104 #else
 105 /* No specific function entry point identified; implement the generic
 106  * back-end code, which is shared by all three entry points.
 107  */
 108 .globl  ___x87remquo
 109
 110 ___x87remquo:
 111 /* Assuming that the entry point preamble has stored the pointer to the
 112  * storage location for the returned integer quotient, in the EDX register,
 113  * and has loaded the floating point divisor (y), and dividend (x), values
 114  * into the FPU st(0) and st(1) registers, respectively, this computes the
 115  * remainder, and floating point quotient, to the full IEEE-754 extended
 116  * (80-bit) precision of the FPU, before ultimately reducing the quotient
 117  * to an integer, storing as many of itsleast-significant bits as can be
 118  * accommodated in an "int", at the address pointed to by EDX, and
 119  * returning the remainder in FPU register st(0).
 120  */
 121         fst     st(2)                   /* save a copy of 'y'... */
 122         fld     st(1)                   /* ...and of 'x' */
 123
 124 /* Computation of the remainder requires an iterative procedure...
 125  */
 126 10:     fprem1                          /* compute interim result */
 127         fstsw   ax                      /* copy resultant FPU status... */
 128         sahf                            /* ...into CPU flags, for testing... */
 129         jp      10b                     /* ...until completion */
 130
 131 /* We now have the computed remainder (r), and the original saved x and y,
 132  * in FPU registers st(0), st(1), and st(2) respectively; the next step is
 133  * to compute the floating point quotient, (leaving the remainder in place
 134  * as a fractional part, to ensure that eventual truncation rounds in the
 135  * correct direction)...
 136  */
 137         fstp    st(3)                   /* ...after saving 'r' for return... */
 138         fdivp   st(1), st               /* ...divide 'x' by 'y' */
 139
 140 /* This now leaves the integer-valued floating point quotient (q) in st(0),
 141  * and the saved remainder in st(1); the computed value of the quotient may
 142  * exceed the maximum which can be represented as an "int", so we reduce it
 143  * modulo "INT_MAX + 1", to retain the least significant bits with absolute
 144  * value not exceeding INT_MAX.
 145  */
 146         fld     DWORD ptr ___int_max_1  /* load equivalent of (INT_MAX + 1) */
 147         fxch    st(1)                   /* bring 'q' to top of FPU stack */
 148 20:     fprem                           /* compute interim modulus value */
 149         fstsw   ax                      /* copy resultant FPU status... */
 150         sahf                            /* ...into CPU flags, for testing... */
 151         jp      20b                     /* ...until completion */
 152
 153 /* Finally, we are left with the residual quotient in st(0), the remainder
 154  * in st(2), and st(1) still retaining the "INT_MAX + 1" value, (which is of
 155  * no further use to us).
 156  */
 157         fstp    st(1)                   /* pop FPU stack, discarding st(1) */
 158         fistp   DWORD ptr [edx]         /* store reduced quotient, leaving... */
 159         ret                             /* ...just the remainder, to return */
 160
 161 .section .rdata, "dr"
 162 .align  4
 163 ___int_max_1:   .long   0x4F000000      /* (1 + INT_MAX) as float */
 164 #endif
 165
 166 /* vim: set autoindent filetype=asm formatoptions=croqlj: */
 167 /* $RCSfile$: end of file */