1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
3 Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
24 .register %g2, #scratch
25 .register %g3, #scratch
26 .register %g6, #scratch
29 /* Normally, this uses
30 ((xword - 0x0101010101010101) & 0x8080808080808080) test
31 to find out if any byte in xword could be zero. This is fast, but
32 also gives false alarm for any byte in range 0x81-0xff. It does
33 not matter for correctness, as if this test tells us there could
34 be some zero byte, we check it byte by byte, but if bytes with
35 high bits set are common in the strings, then this will give poor
36 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
37 will use one tick slower, but more precise test
38 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
39 which does not give any false alarms (but if some bits are set,
40 one cannot assume from it which bytes are zero and which are not).
41 It is yet to be measured, what is the correct default for glibc
42 in these days for an average user.
48 sethi %hi(0x01010101), %g1 /* IEU0 Group */
49 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
50 andcc %o0, 7, %g0 /* IEU1 */
51 sllx %g1, 32, %g2 /* IEU0 Group */
53 bne,pn %icc, 12f /* CTI */
54 andcc %o1, 7, %g3 /* IEU1 */
55 or %g1, %g2, %g1 /* IEU0 Group */
56 bne,pn %icc, 14f /* CTI */
58 sllx %g1, 7, %g2 /* IEU0 Group */
59 1: ldx [%o1], %o3 /* Load */
60 add %o1, 8, %o1 /* IEU1 */
61 2: mov %o3, %g3 /* IEU0 Group */
63 sub %o3, %g1, %o2 /* IEU1 */
64 3: ldxa [%o1] ASI_PNF, %o3 /* Load */
65 #ifdef EIGHTBIT_NOT_RARE
66 andn %o2, %g3, %o2 /* IEU0 Group */
68 add %o0, 8, %o0 /* IEU0 Group */
69 andcc %o2, %g2, %g0 /* IEU1 */
71 add %o1, 8, %o1 /* IEU0 Group */
72 be,a,pt %xcc, 2b /* CTI */
73 stx %g3, [%o0 - 8] /* Store */
74 srlx %g3, 56, %g5 /* IEU0 Group */
76 andcc %g5, 0xff, %g0 /* IEU1 Group */
77 be,pn %icc, 11f /* CTI */
78 srlx %g3, 48, %g4 /* IEU0 */
79 andcc %g4, 0xff, %g0 /* IEU1 Group */
81 be,pn %icc, 10f /* CTI */
82 srlx %g3, 40, %g5 /* IEU0 */
83 andcc %g5, 0xff, %g0 /* IEU1 Group */
84 be,pn %icc, 9f /* CTI */
86 srlx %g3, 32, %g4 /* IEU0 */
87 andcc %g4, 0xff, %g0 /* IEU1 Group */
88 be,pn %icc, 8f /* CTI */
89 srlx %g3, 24, %g5 /* IEU0 */
91 andcc %g5, 0xff, %g0 /* IEU1 Group */
92 be,pn %icc, 7f /* CTI */
93 srlx %g3, 16, %g4 /* IEU0 */
94 andcc %g4, 0xff, %g0 /* IEU1 Group */
96 be,pn %icc, 6f /* CTI */
97 srlx %g3, 8, %g5 /* IEU0 */
98 andcc %g5, 0xff, %g0 /* IEU1 Group */
99 be,pn %icc, 5f /* CTI */
101 sub %o3, %g1, %o2 /* IEU0 */
102 stx %g3, [%o0 - 8] /* Store Group */
103 andcc %g3, 0xff, %g0 /* IEU1 */
104 bne,pt %icc, 3b /* CTI */
106 mov %o3, %g3 /* IEU0 Group */
107 4: retl /* CTI+IEU1 Group */
108 sub %o0, 1, %o0 /* IEU0 */
111 6: ba,pt %xcc, 23f /* CTI Group */
112 sub %o0, 3, %g6 /* IEU0 */
113 5: sub %o0, 2, %g6 /* IEU0 Group */
114 stb %g5, [%o0 - 2] /* Store */
116 srlx %g3, 16, %g4 /* IEU0 Group */
117 23: sth %g4, [%o0 - 4] /* Store */
118 srlx %g3, 32, %g4 /* IEU0 Group */
119 stw %g4, [%o0 - 8] /* Store */
121 retl /* CTI+IEU1 Group */
122 mov %g6, %o0 /* IEU0 */
123 8: ba,pt %xcc, 24f /* CTI Group */
124 sub %o0, 5, %g6 /* IEU0 */
126 7: sub %o0, 4, %g6 /* IEU0 Group */
127 stb %g5, [%o0 - 4] /* Store */
128 srlx %g3, 32, %g4 /* IEU0 Group */
129 24: stw %g4, [%o0 - 8] /* Store */
131 retl /* CTI+IEU1 Group */
132 mov %g6, %o0 /* IEU0 */
133 10: ba,pt %xcc, 25f /* CTI Group */
134 sub %o0, 7, %g6 /* IEU0 */
136 9: sub %o0, 6, %g6 /* IEU0 Group */
137 stb %g5, [%o0 - 6] /* Store */
138 srlx %g3, 48, %g4 /* IEU0 */
139 25: sth %g4, [%o0 - 8] /* Store Group */
141 retl /* CTI+IEU1 Group */
142 mov %g6, %o0 /* IEU0 */
143 11: stb %g5, [%o0 - 8] /* Store Group */
144 retl /* CTI+IEU1 Group */
146 sub %o0, 8, %o0 /* IEU0 */
149 12: or %g1, %g2, %g1 /* IEU0 Group */
150 ldub [%o1], %o3 /* Load */
151 sllx %g1, 7, %g2 /* IEU0 Group */
152 stb %o3, [%o0] /* Store Group */
154 13: add %o0, 1, %o0 /* IEU0 */
155 add %o1, 1, %o1 /* IEU1 */
156 andcc %o3, 0xff, %g0 /* IEU1 Group */
157 be,pn %icc, 4b /* CTI */
159 lduba [%o1] ASI_PNF, %o3 /* Load */
160 andcc %o0, 7, %g0 /* IEU1 Group */
161 bne,a,pt %icc, 13b /* CTI */
162 stb %o3, [%o0] /* Store */
164 andcc %o1, 7, %g3 /* IEU1 Group */
165 be,a,pt %icc, 1b /* CTI */
166 ldx [%o1], %o3 /* Load */
167 14: orcc %g0, 64, %g4 /* IEU1 Group */
169 sllx %g3, 3, %g5 /* IEU0 */
170 sub %o1, %g3, %o1 /* IEU0 Group */
171 sub %g4, %g5, %g4 /* IEU1 */
172 /* %g1 = 0101010101010101 *
173 * %g2 = 8080808080808080 *
174 * %g3 = source alignment *
175 * %g5 = number of bits to shift left *
176 * %g4 = number of bits to shift right */
177 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
179 addcc %o1, 8, %o1 /* IEU1 */
180 15: sllx %o5, %g5, %o3 /* IEU0 Group */
181 ldxa [%o1] ASI_PNF, %o5 /* Load */
182 srlx %o5, %g4, %o4 /* IEU0 Group */
184 add %o0, 8, %o0 /* IEU1 */
185 or %o3, %o4, %o3 /* IEU0 Group */
186 add %o1, 8, %o1 /* IEU1 */
187 sub %o3, %g1, %o4 /* IEU0 Group */
189 #ifdef EIGHTBIT_NOT_RARE
190 andn %o4, %o3, %o4 /* IEU0 Group */
192 andcc %o4, %g2, %g0 /* IEU1 Group */
193 be,a,pt %xcc, 15b /* CTI */
194 stx %o3, [%o0 - 8] /* Store */
195 srlx %o3, 56, %o4 /* IEU0 Group */
197 andcc %o4, 0xff, %g0 /* IEU1 Group */
198 be,pn %icc, 22f /* CTI */
199 srlx %o3, 48, %o4 /* IEU0 */
200 andcc %o4, 0xff, %g0 /* IEU1 Group */
202 be,pn %icc, 21f /* CTI */
203 srlx %o3, 40, %o4 /* IEU0 */
204 andcc %o4, 0xff, %g0 /* IEU1 Group */
205 be,pn %icc, 20f /* CTI */
207 srlx %o3, 32, %o4 /* IEU0 */
208 andcc %o4, 0xff, %g0 /* IEU1 Group */
209 be,pn %icc, 19f /* CTI */
210 srlx %o3, 24, %o4 /* IEU0 */
212 andcc %o4, 0xff, %g0 /* IEU1 Group */
213 be,pn %icc, 18f /* CTI */
214 srlx %o3, 16, %o4 /* IEU0 */
215 andcc %o4, 0xff, %g0 /* IEU1 Group */
217 be,pn %icc, 17f /* CTI */
218 srlx %o3, 8, %o4 /* IEU0 */
219 andcc %o4, 0xff, %g0 /* IEU1 Group */
220 be,pn %icc, 16f /* CTI */
222 andcc %o3, 0xff, %g0 /* IEU1 Group */
223 bne,pn %icc, 15b /* CTI */
224 stx %o3, [%o0 - 8] /* Store */
225 retl /* CTI+IEU1 Group */
227 sub %o0, 1, %o0 /* IEU0 */
230 17: ba,pt %xcc, 26f /* CTI Group */
231 subcc %o0, 3, %g6 /* IEU1 */
232 18: ba,pt %xcc, 27f /* CTI Group */
233 subcc %o0, 4, %g6 /* IEU1 */
235 19: ba,pt %xcc, 28f /* CTI Group */
236 subcc %o0, 5, %g6 /* IEU1 */
237 16: subcc %o0, 2, %g6 /* IEU1 Group */
238 srlx %o3, 8, %o4 /* IEU0 */
240 stb %o4, [%o0 - 2] /* Store */
241 26: srlx %o3, 16, %o4 /* IEU0 Group */
242 stb %o4, [%o0 - 3] /* Store */
243 27: srlx %o3, 24, %o4 /* IEU0 Group */
245 stb %o4, [%o0 - 4] /* Store */
246 28: srlx %o3, 32, %o4 /* IEU0 Group */
247 stw %o4, [%o0 - 8] /* Store */
248 retl /* CTI+IEU1 Group */
250 mov %g6, %o0 /* IEU0 */
253 21: ba,pt %xcc, 29f /* CTI Group */
254 subcc %o0, 7, %g6 /* IEU1 */
255 22: ba,pt %xcc, 30f /* CTI Group */
256 subcc %o0, 8, %g6 /* IEU1 */
258 20: subcc %o0, 6, %g6 /* IEU1 Group */
259 srlx %o3, 40, %o4 /* IEU0 */
260 stb %o4, [%o0 - 6] /* Store */
261 29: srlx %o3, 48, %o4 /* IEU0 Group */
263 stb %o4, [%o0 - 7] /* Store */
264 30: srlx %o3, 56, %o4 /* IEU0 Group */
265 stb %o4, [%o0 - 8] /* Store */
266 retl /* CTI+IEU1 Group */
268 mov %g6, %o0 /* IEU0 */
270 libc_hidden_def(stpcpy)