OSDN Git Service

Replace FSF snail mail address with URLs
[uclinux-h8/uClibc.git] / libc / string / sparc / sparc64 / stpcpy.S
1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2    For SPARC v9.
3    Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6                   Jakub Jelinek <jj@ultra.linux.cz>.
7
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
12
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <http://www.gnu.org/licenses/>.  */
21
22 #include <asm/asi.h>
23 #ifndef XCC
24         .register       %g2, #scratch
25         .register       %g3, #scratch
26         .register       %g6, #scratch
27 #endif
28
29         /* Normally, this uses
30            ((xword - 0x0101010101010101) & 0x8080808080808080) test
31            to find out if any byte in xword could be zero. This is fast, but
32            also gives false alarm for any byte in range 0x81-0xff. It does
33            not matter for correctness, as if this test tells us there could
34            be some zero byte, we check it byte by byte, but if bytes with
35            high bits set are common in the strings, then this will give poor
36            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
37            will use one tick slower, but more precise test
38            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
39            which does not give any false alarms (but if some bits are set,
40            one cannot assume from it which bytes are zero and which are not).
41            It is yet to be measured, what is the correct default for glibc
42            in these days for an average user.
43          */
44
45         .text
46         .align          32
47 ENTRY(stpcpy)
48         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
49         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
50         andcc           %o0, 7, %g0                     /* IEU1                         */
51         sllx            %g1, 32, %g2                    /* IEU0         Group           */
52
53         bne,pn          %icc, 12f                       /* CTI                          */
54          andcc          %o1, 7, %g3                     /* IEU1                         */
55         or              %g1, %g2, %g1                   /* IEU0         Group           */
56         bne,pn          %icc, 14f                       /* CTI                          */
57
58          sllx           %g1, 7, %g2                     /* IEU0         Group           */
59 1:      ldx             [%o1], %o3                      /* Load                         */
60         add             %o1, 8, %o1                     /* IEU1                         */
61 2:      mov             %o3, %g3                        /* IEU0         Group           */
62
63         sub             %o3, %g1, %o2                   /* IEU1                         */
64 3:      ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
65 #ifdef EIGHTBIT_NOT_RARE
66         andn            %o2, %g3, %o2                   /* IEU0         Group           */
67 #endif
68         add             %o0, 8, %o0                     /* IEU0         Group           */
69         andcc           %o2, %g2, %g0                   /* IEU1                         */
70
71         add             %o1, 8, %o1                     /* IEU0         Group           */
72         be,a,pt         %xcc, 2b                        /* CTI                          */
73          stx            %g3, [%o0 - 8]                  /* Store                        */
74         srlx            %g3, 56, %g5                    /* IEU0         Group           */
75
76         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
77         be,pn           %icc, 11f                       /* CTI                          */
78          srlx           %g3, 48, %g4                    /* IEU0                         */
79         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
80
81         be,pn           %icc, 10f                       /* CTI                          */
82          srlx           %g3, 40, %g5                    /* IEU0                         */
83         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
84         be,pn           %icc, 9f                        /* CTI                          */
85
86          srlx           %g3, 32, %g4                    /* IEU0                         */
87         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
88         be,pn           %icc, 8f                        /* CTI                          */
89          srlx           %g3, 24, %g5                    /* IEU0                         */
90
91         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
92         be,pn           %icc, 7f                        /* CTI                          */
93          srlx           %g3, 16, %g4                    /* IEU0                         */
94         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
95
96         be,pn           %icc, 6f                        /* CTI                          */
97          srlx           %g3, 8, %g5                     /* IEU0                         */
98         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
99         be,pn           %icc, 5f                        /* CTI                          */
100
101          sub            %o3, %g1, %o2                   /* IEU0                         */
102         stx             %g3, [%o0 - 8]                  /* Store        Group           */
103         andcc           %g3, 0xff, %g0                  /* IEU1                         */
104         bne,pt          %icc, 3b                        /* CTI                          */
105
106          mov            %o3, %g3                        /* IEU0         Group           */
107 4:      retl                                            /* CTI+IEU1     Group           */
108          sub            %o0, 1, %o0                     /* IEU0                         */
109
110         .align          16
111 6:      ba,pt           %xcc, 23f                       /* CTI          Group           */
112          sub            %o0, 3, %g6                     /* IEU0                         */
113 5:      sub             %o0, 2, %g6                     /* IEU0         Group           */
114         stb             %g5, [%o0 - 2]                  /* Store                        */
115
116         srlx            %g3, 16, %g4                    /* IEU0         Group           */
117 23:     sth             %g4, [%o0 - 4]                  /* Store                        */
118         srlx            %g3, 32, %g4                    /* IEU0         Group           */
119         stw             %g4, [%o0 - 8]                  /* Store                        */
120
121         retl                                            /* CTI+IEU1     Group           */
122          mov            %g6, %o0                        /* IEU0                         */
123 8:      ba,pt           %xcc, 24f                       /* CTI          Group           */
124          sub            %o0, 5, %g6                     /* IEU0                         */
125
126 7:      sub             %o0, 4, %g6                     /* IEU0         Group           */
127         stb             %g5, [%o0 - 4]                  /* Store                        */
128         srlx            %g3, 32, %g4                    /* IEU0         Group           */
129 24:     stw             %g4, [%o0 - 8]                  /* Store                        */
130
131         retl                                            /* CTI+IEU1     Group           */
132          mov            %g6, %o0                        /* IEU0                         */
133 10:     ba,pt           %xcc, 25f                       /* CTI          Group           */
134          sub            %o0, 7, %g6                     /* IEU0                         */
135
136 9:      sub             %o0, 6, %g6                     /* IEU0         Group           */
137         stb             %g5, [%o0 - 6]                  /* Store                        */
138         srlx            %g3, 48, %g4                    /* IEU0                         */
139 25:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
140
141         retl                                            /* CTI+IEU1     Group           */
142          mov            %g6, %o0                        /* IEU0                         */
143 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
144         retl                                            /* CTI+IEU1     Group           */
145
146          sub            %o0, 8, %o0                     /* IEU0                         */
147
148         .align          16
149 12:     or              %g1, %g2, %g1                   /* IEU0         Group           */
150         ldub            [%o1], %o3                      /* Load                         */
151         sllx            %g1, 7, %g2                     /* IEU0         Group           */
152         stb             %o3, [%o0]                      /* Store        Group           */
153
154 13:     add             %o0, 1, %o0                     /* IEU0                         */
155         add             %o1, 1, %o1                     /* IEU1                         */
156         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
157         be,pn           %icc, 4b                        /* CTI                          */
158
159          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
160         andcc           %o0, 7, %g0                     /* IEU1         Group           */
161         bne,a,pt        %icc, 13b                       /* CTI                          */
162          stb            %o3, [%o0]                      /* Store                        */
163
164         andcc           %o1, 7, %g3                     /* IEU1         Group           */
165         be,a,pt         %icc, 1b                        /* CTI                          */
166          ldx            [%o1], %o3                      /* Load                         */
167 14:     orcc            %g0, 64, %g4                    /* IEU1         Group           */
168
169         sllx            %g3, 3, %g5                     /* IEU0                         */
170         sub             %o1, %g3, %o1                   /* IEU0         Group           */
171         sub             %g4, %g5, %g4                   /* IEU1                         */
172                                                         /* %g1 = 0101010101010101       *
173                                                          * %g2 = 8080808080808080       *
174                                                          * %g3 = source alignment       *
175                                                          * %g5 = number of bits to shift left  *
176                                                          * %g4 = number of bits to shift right */
177         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
178
179         addcc           %o1, 8, %o1                     /* IEU1                         */
180 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
181         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
182         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
183
184         add             %o0, 8, %o0                     /* IEU1                         */
185         or              %o3, %o4, %o3                   /* IEU0         Group           */
186         add             %o1, 8, %o1                     /* IEU1                         */
187         sub             %o3, %g1, %o4                   /* IEU0         Group           */
188
189 #ifdef EIGHTBIT_NOT_RARE
190         andn            %o4, %o3, %o4                   /* IEU0         Group           */
191 #endif
192         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
193         be,a,pt         %xcc, 15b                       /* CTI                          */
194          stx            %o3, [%o0 - 8]                  /* Store                        */
195         srlx            %o3, 56, %o4                    /* IEU0         Group           */
196
197         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
198         be,pn           %icc, 22f                       /* CTI                          */
199          srlx           %o3, 48, %o4                    /* IEU0                         */
200         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
201
202         be,pn           %icc, 21f                       /* CTI                          */
203          srlx           %o3, 40, %o4                    /* IEU0                         */
204         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
205         be,pn           %icc, 20f                       /* CTI                          */
206
207          srlx           %o3, 32, %o4                    /* IEU0                         */
208         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
209         be,pn           %icc, 19f                       /* CTI                          */
210          srlx           %o3, 24, %o4                    /* IEU0                         */
211
212         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
213         be,pn           %icc, 18f                       /* CTI                          */
214          srlx           %o3, 16, %o4                    /* IEU0                         */
215         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
216
217         be,pn           %icc, 17f                       /* CTI                          */
218          srlx           %o3, 8, %o4                     /* IEU0                         */
219         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
220         be,pn           %icc, 16f                       /* CTI                          */
221
222          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
223         bne,pn          %icc, 15b                       /* CTI                          */
224          stx            %o3, [%o0 - 8]                  /* Store                        */
225         retl                                            /* CTI+IEU1     Group           */
226
227          sub            %o0, 1, %o0                     /* IEU0                         */
228
229         .align          16
230 17:     ba,pt           %xcc, 26f                       /* CTI          Group           */
231          subcc          %o0, 3, %g6                     /* IEU1                         */
232 18:     ba,pt           %xcc, 27f                       /* CTI          Group           */
233          subcc          %o0, 4, %g6                     /* IEU1                         */
234
235 19:     ba,pt           %xcc, 28f                       /* CTI          Group           */
236          subcc          %o0, 5, %g6                     /* IEU1                         */
237 16:     subcc           %o0, 2, %g6                     /* IEU1         Group           */
238         srlx            %o3, 8, %o4                     /* IEU0                         */
239
240         stb             %o4, [%o0 - 2]                  /* Store                        */
241 26:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
242         stb             %o4, [%o0 - 3]                  /* Store                        */
243 27:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
244
245         stb             %o4, [%o0 - 4]                  /* Store                        */
246 28:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
247         stw             %o4, [%o0 - 8]                  /* Store                        */
248         retl                                            /* CTI+IEU1     Group           */
249
250          mov            %g6, %o0                        /* IEU0                         */
251
252         .align          16
253 21:     ba,pt           %xcc, 29f                       /* CTI          Group           */
254          subcc          %o0, 7, %g6                     /* IEU1                         */
255 22:     ba,pt           %xcc, 30f                       /* CTI          Group           */
256          subcc          %o0, 8, %g6                     /* IEU1                         */
257
258 20:     subcc           %o0, 6, %g6                     /* IEU1         Group           */
259         srlx            %o3, 40, %o4                    /* IEU0                         */
260         stb             %o4, [%o0 - 6]                  /* Store                        */
261 29:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
262
263         stb             %o4, [%o0 - 7]                  /* Store                        */
264 30:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
265         stb             %o4, [%o0 - 8]                  /* Store                        */
266         retl                                            /* CTI+IEU1     Group           */
267
268          mov            %g6, %o0                        /* IEU0                         */
269 END(stpcpy)
270 libc_hidden_def(stpcpy)