4 * Generic implementation for each of the ISO-C99 remquo(), remquol(),
5 * and remquof() functions.
9 * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
10 * Copyright (C) 2021, MinGW.org Project
12 * Adapted from original code written by J. T. Conklin <jtc@netbsd.org>.
15 * Permission is hereby granted, free of charge, to any person obtaining a
16 * copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
22 * The above copyright notice and this permission notice (including the next
23 * paragraph) shall be included in all copies or substantial portions of the
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32 * DEALINGS IN THE SOFTWARE.
35 .intel_syntax noprefix
39 .def ___x87remquo; .scl 2; .type 32; .endef
41 #if defined _remquo_source
42 /* Preamble to load the FPU registers, and EDX register, from the
43 * arguments passed in any call to the function:
45 * double remquo (double, double, int *);
48 .def _remquo; .scl 2; .type 32; .endef
49 .def ___x87cvt; .scl 2; .type 32; .endef
52 fld QWORD ptr 4[esp] /* FPU TOS = x */
53 fld QWORD ptr 12[esp] /* FPU TOS = y, x */
54 mov edx, DWORD ptr 20[esp] /* EDX = *q */
56 /* Hand off the preloaded register set, to the shared computational
57 * back-end routine, before ultimately converting its REAL10 result
58 * to the required REAL8.
60 call ___x87remquo /* compute REAL10 result */
61 jmp ___x87cvt /* convert to REAL8 */
63 #elif defined _remquof_source
64 /* Preamble to load the FPU registers, and EDX register, from the
65 * arguments passed in any call to the function:
67 * float remquof (float, float, int *);
70 .def _remquof; .scl 2; .type 32; .endef
71 .def ___x87cvtf; .scl 2; .type 32; .endef
74 fld DWORD ptr 4[esp] /* FPU TOS = x */
75 fld DWORD ptr 8[esp] /* FPU TOS = y, x */
76 mov edx, DWORD ptr 12[esp] /* EDX = *q */
78 /* Hand off the preloaded register set, to the shared computational
79 * back-end routine, before ultimately converting its REAL10 result
80 * to the required REAL4.
82 call ___x87remquo /* compute REAL10 result */
83 jmp ___x87cvtf /* convert to REAL4 */
85 #elif defined _remquol_source
86 /* Preamble to load the FPU registers, and EDX register, from the
87 * arguments passed in any call to the function:
89 * long double remquo (long double, long double, int *);
92 .def _remquol; .scl 2; .type 32; .endef
95 fld TBYTE ptr 4[esp] /* FPU TOS = x */
96 fld TBYTE ptr 16[esp] /* FPU TOS = y, x */
97 mov edx, DWORD ptr 28[esp] /* EDX = *q */
99 /* Hand off the preloaded register set, to the shared computational
100 * back-end routine, to...
102 jmp ___x87remquo /* ...compute REAL10 result */
105 /* No specific function entry point identified; implement the generic
106 * back-end code, which is shared by all three entry points.
111 /* Assuming that the entry point preamble has stored the pointer to the
112 * storage location for the returned integer quotient, in the EDX register,
113 * and has loaded the floating point divisor (y), and dividend (x), values
114 * into the FPU st(0) and st(1) registers, respectively, this computes the
115 * remainder, and floating point quotient, to the full IEEE-754 extended
116 * (80-bit) precision of the FPU, before ultimately reducing the quotient
117 * to an integer, storing as many of itsleast-significant bits as can be
118 * accommodated in an "int", at the address pointed to by EDX, and
119 * returning the remainder in FPU register st(0).
121 fst st(2) /* save a copy of 'y'... */
122 fld st(1) /* ...and of 'x' */
124 /* Computation of the remainder requires an iterative procedure...
126 10: fprem1 /* compute interim result */
127 fstsw ax /* copy resultant FPU status... */
128 sahf /* ...into CPU flags, for testing... */
129 jp 10b /* ...until completion */
131 /* We now have the computed remainder (r), and the original saved x and y,
132 * in FPU registers st(0), st(1), and st(2) respectively; the next step is
133 * to compute the floating point quotient, (leaving the remainder in place
134 * as a fractional part, to ensure that eventual truncation rounds in the
135 * correct direction)...
137 fstp st(3) /* ...after saving 'r' for return... */
138 fdivp st(1), st /* ...divide 'x' by 'y' */
140 /* This now leaves the integer-valued floating point quotient (q) in st(0),
141 * and the saved remainder in st(1); the computed value of the quotient may
142 * exceed the maximum which can be represented as an "int", so we reduce it
143 * modulo "INT_MAX + 1", to retain the least significant bits with absolute
144 * value not exceeding INT_MAX.
146 fld DWORD ptr ___int_max_1 /* load equivalent of (INT_MAX + 1) */
147 fxch st(1) /* bring 'q' to top of FPU stack */
148 20: fprem /* compute interim modulus value */
149 fstsw ax /* copy resultant FPU status... */
150 sahf /* ...into CPU flags, for testing... */
151 jp 20b /* ...until completion */
153 /* Finally, we are left with the residual quotient in st(0), the remainder
154 * in st(2), and st(1) still retaining the "INT_MAX + 1" value, (which is of
155 * no further use to us).
157 fstp st(1) /* pop FPU stack, discarding st(1) */
158 fistp DWORD ptr [edx] /* store reduced quotient, leaving... */
159 ret /* ...just the remainder, to return */
161 .section .rdata, "dr"
163 ___int_max_1: .long 0x4F000000 /* (1 + INT_MAX) as float */
166 /* vim: set autoindent filetype=asm formatoptions=croqlj: */
167 /* $RCSfile$: end of file */