1 /* Compare two strings for differences.
3 Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 andcc %o0, 7, %g0 /* IEU1 */
51 bne,pn %icc, 7f /* CTI */
52 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
54 andcc %o1, 7, %g3 /* IEU1 */
55 bne,pn %icc, 9f /* CTI */
56 sllx %g1, 32, %g2 /* IEU0 Group */
57 ldx [%o0], %o2 /* Load */
59 or %g1, %g2, %g1 /* IEU0 Group */
60 1: ldx [%o1], %o3 /* Load */
61 sub %o1, %o0, %o1 /* IEU1 */
62 sllx %g1, 7, %g2 /* IEU0 Group */
64 2: add %o0, 8, %o0 /* IEU1 */
65 sub %o2, %g1, %g3 /* IEU0 Group */
66 subcc %o2, %o3, %g0 /* IEU1 */
67 bne,pn %xcc, 13f /* CTI */
69 #ifdef EIGHTBIT_NOT_RARE
70 andn %g3, %o2, %g4 /* IEU0 Group */
71 ldxa [%o0] ASI_PNF, %o2 /* Load */
72 andcc %g4, %g2, %g0 /* IEU1 Group */
74 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
75 andcc %g3, %g2, %g0 /* IEU1 */
77 be,a,pt %xcc, 2b /* CTI */
78 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */
80 addcc %g3, %g1, %o4 /* IEU1 */
81 srlx %g3, 32, %g3 /* IEU0 */
82 andcc %g3, %g2, %g0 /* IEU1 Group */
83 be,pt %xcc, 3f /* CTI */
85 srlx %o4, 56, %o5 /* IEU0 */
86 andcc %o5, 0xff, %g0 /* IEU1 Group */
87 be,pn %icc, 4f /* CTI */
88 srlx %o4, 48, %o5 /* IEU0 */
90 andcc %o5, 0xff, %g0 /* IEU1 Group */
91 be,pn %icc, 4f /* CTI */
92 srlx %o4, 40, %o5 /* IEU0 */
93 andcc %o5, 0xff, %g0 /* IEU1 Group */
95 be,pn %icc, 4f /* CTI */
96 srlx %o4, 32, %o5 /* IEU0 */
97 andcc %o5, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 4f /* CTI */
100 3: srlx %o4, 24, %o5 /* IEU0 */
101 andcc %o5, 0xff, %g0 /* IEU1 Group */
102 be,pn %icc, 4f /* CTI */
103 srlx %o4, 16, %o5 /* IEU0 */
105 andcc %o5, 0xff, %g0 /* IEU1 Group */
106 be,pn %icc, 4f /* CTI */
107 srlx %o4, 8, %o5 /* IEU0 */
108 andcc %o5, 0xff, %g0 /* IEU1 Group */
110 be,pn %icc, 4f /* CTI */
111 andcc %o4, 0xff, %g0 /* IEU1 Group */
112 bne,a,pn %icc, 2b /* CTI */
113 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */
115 4: retl /* CTI+IEU1 Group */
119 13: mov 0xff, %g6 /* IEU0 Group */
120 #ifdef EIGHTBIT_NOT_RARE
121 andcc %g4, %g2, %g0 /* IEU1 */
123 andcc %g3, %g2, %g0 /* IEU1 */
125 be,pt %xcc, 25f /* CTI */
126 addcc %g3, %g1, %o4 /* IEU1 Group */
128 srlx %g3, 32, %g3 /* IEU0 */
129 andcc %g3, %g2, %g0 /* IEU1 Group */
130 be,pt %xcc, 23f /* CTI */
131 sllx %g6, 56, %o5 /* IEU0 */
133 andcc %o4, %o5, %g0 /* IEU1 Group */
134 be,pn %xcc, 24f /* CTI */
135 sllx %g6, 48, %o5 /* IEU0 */
136 andcc %o4, %o5, %g0 /* IEU1 Group */
138 be,pn %xcc, 24f /* CTI */
139 sllx %g6, 40, %o5 /* IEU0 */
140 andcc %o4, %o5, %g0 /* IEU1 Group */
141 be,pn %xcc, 24f /* CTI */
143 sllx %g6, 32, %o5 /* IEU0 */
144 andcc %o4, %o5, %g0 /* IEU1 Group */
145 be,pn %xcc, 24f /* CTI */
146 23: sllx %g6, 24, %o5 /* IEU0 */
148 andcc %o4, %o5, %g0 /* IEU1 Group */
149 be,pn %icc, 24f /* CTI */
150 sllx %g6, 16, %o5 /* IEU0 */
151 andcc %o4, %o5, %g0 /* IEU1 Group */
153 be,pn %icc, 24f /* CTI */
154 sllx %g6, 8, %o5 /* IEU0 */
155 andcc %o4, %o5, %g0 /* IEU1 Group */
156 be,pn %icc, 24f /* CTI */
158 mov %g6, %o5 /* IEU0 */
159 25: cmp %o4, %o3 /* IEU1 Group */
160 5: mov -1, %o0 /* IEU0 */
161 retl /* CTI+IEU1 Group */
163 movgu %xcc, 1, %o0 /* Single Group */
166 24: sub %o5, 1, %g6 /* IEU0 Group */
168 or %o5, %g6, %o5 /* IEU0 Group */
169 andn %o4, %o5, %o4 /* IEU0 Group */
171 andn %o3, %o5, %o3 /* IEU1 */
172 cmp %o4, %o3 /* IEU1 Group */
173 movgu %xcc, 1, %o0 /* Single Group */
174 retl /* CTI+IEU1 Group */
176 movlu %xcc, -1, %o0 /* Single Group */
177 6: retl /* CTI+IEU1 Group */
178 mov %o4, %o0 /* IEU0 */
181 7: ldub [%o0], %o2 /* Load */
182 add %o0, 1, %o0 /* IEU1 */
183 ldub [%o1], %o3 /* Load Group */
184 sllx %g1, 32, %g2 /* IEU0 */
186 8: add %o1, 1, %o1 /* IEU1 */
187 subcc %o2, %o3, %o4 /* IEU1 Group */
188 bne,pn %xcc, 6b /* CTI */
189 lduba [%o0] ASI_PNF, %o2 /* Load */
191 brz,pn %o3, 4b /* CTI+IEU1 Group */
192 lduba [%o1] ASI_PNF, %o3 /* Load */
193 andcc %o0, 7, %g0 /* IEU1 Group */
194 bne,a,pn %icc, 8b /* CTI */
196 add %o0, 1, %o0 /* IEU0 */
197 or %g1, %g2, %g1 /* IEU0 Group */
198 andcc %o1, 7, %g3 /* IEU1 */
199 be,a,pn %icc, 1b /* CTI */
201 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
202 9: sllx %g3, 3, %g5 /* IEU0 */
203 mov 64, %o5 /* IEU1 */
204 sub %o1, %g3, %o1 /* IEU0 Group */
206 sub %o5, %g5, %o5 /* IEU1 */
207 ldxa [%o1] ASI_PNF, %g6 /* Load Group */
208 or %g1, %g2, %g1 /* IEU0 */
209 sub %o1, %o0, %o1 /* IEU1 */
211 sllx %g1, 7, %g2 /* IEU0 Group */
212 add %o1, 8, %o1 /* IEU1 */
213 /* %g1 = 0101010101010101
214 * %g2 = 8080808080800880
215 * %g5 = number of bits to shift left
216 * %o5 = number of bits to shift right */
217 10: sllx %g6, %g5, %o3 /* IEU0 Group */
218 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
220 11: srlx %g6, %o5, %o4 /* IEU0 Group */
221 ldxa [%o0] ASI_PNF, %o2 /* Load */
222 or %o3, %o4, %o3 /* IEU1 */
223 add %o0, 8, %o0 /* IEU0 Group */
225 subcc %o2, %o3, %g0 /* IEU1 */
226 #ifdef EIGHTBIT_NOT_RARE
227 sub %o2, %g1, %g3 /* IEU0 Group */
228 bne,pn %xcc, 13b /* CTI */
229 andn %g3, %o2, %g4 /* IEU0 Group */
231 andcc %g4, %g2, %g0 /* IEU1 Group */
232 be,pt %xcc, 10b /* CTI */
233 srlx %g4, 32, %g4 /* IEU0 */
234 andcc %g4, %g2, %g0 /* IEU1 Group */
236 bne,pn %xcc, 13b /* CTI */
237 sub %o2, %g1, %g3 /* IEU0 Group */
238 andcc %g3, %g2, %g0 /* IEU1 Group */
240 be,pt %xcc, 10b /* CTI */
241 srlx %g3, 32, %g3 /* IEU0 */
242 andcc %g3, %g2, %g0 /* IEU1 Group */
244 be,pt %xcc, 12f /* CTI */
246 srlx %o2, 56, %g3 /* IEU0 */
247 andcc %g3, 0xff, %g0 /* IEU1 Group */
248 be,pn %icc, 4b /* CTI */
249 srlx %o2, 48, %g3 /* IEU0 */
251 andcc %g3, 0xff, %g0 /* IEU1 Group */
252 be,pn %icc, 4b /* CTI */
253 srlx %o2, 40, %g3 /* IEU0 */
254 andcc %g3, 0xff, %g0 /* IEU1 Group */
256 be,pn %icc, 4b /* CTI */
257 srlx %o2, 32, %g3 /* IEU0 */
258 andcc %g3, 0xff, %g0 /* IEU1 Group */
259 be,pn %icc, 4b /* CTI */
261 12: srlx %o2, 24, %g3 /* IEU0 */
262 andcc %g3, 0xff, %g0 /* IEU1 Group */
263 be,pn %icc, 4b /* CTI */
264 srlx %o2, 16, %g3 /* IEU0 */
266 andcc %g3, 0xff, %g0 /* IEU1 Group */
267 be,pn %icc, 4b /* CTI */
268 srlx %o2, 8, %g3 /* IEU0 */
269 andcc %g3, 0xff, %g0 /* IEU1 Group */
271 be,pn %icc, 4b /* CTI */
272 andcc %o2, 0xff, %g0 /* IEU1 Group */
273 be,pn %icc, 4b /* CTI */
274 sllx %g6, %g5, %o3 /* IEU0 */
276 ba,pt %xcc, 11b /* CTI Group */
277 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
279 libc_hidden_def(strcmp)