OSDN Git Service

import sparc-optimized string functions from glibc
[uclinux-h8/uClibc.git] / libc / string / sparc / sparc64 / memchr.S
1 /* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
2    than N.
3    For SPARC v9.
4    Copyright (C) 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
5    This file is part of the GNU C Library.
6    Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
7                   Jakub Jelinek <jj@ultra.linux.cz>.
8    This version is developed using the same algorithm as the fast C
9    version which carries the following introduction:
10    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
11    with help from Dan Sahlin (dan@sics.se) and
12    commentary by Jim Blandy (jimb@ai.mit.edu);
13    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
14    and implemented by Roland McGrath (roland@ai.mit.edu).
15
16    The GNU C Library is free software; you can redistribute it and/or
17    modify it under the terms of the GNU Lesser General Public
18    License as published by the Free Software Foundation; either
19    version 2.1 of the License, or (at your option) any later version.
20
21    The GNU C Library is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    Lesser General Public License for more details.
25
26    You should have received a copy of the GNU Lesser General Public
27    License along with the GNU C Library; if not, write to the Free
28    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29    02111-1307 USA.  */
30
31 #include <asm/asi.h>
32 #ifndef XCC
33 #define XCC xcc
34 #define USE_BPR
35         .register       %g2, #scratch
36         .register       %g3, #scratch
37 #endif
38
39         /* Normally, this uses
40            ((xword - 0x0101010101010101) & 0x8080808080808080) test
41            to find out if any byte in xword could be zero. This is fast, but
42            also gives false alarm for any byte in range 0x81-0xff. It does
43            not matter for correctness, as if this test tells us there could
44            be some zero byte, we check it byte by byte, but if bytes with
45            high bits set are common in the strings, then this will give poor
46            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
47            will use one tick slower, but more precise test
48            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
49            which does not give any false alarms (but if some bits are set,
50            one cannot assume from it which bytes are zero and which are not).
51            It is yet to be measured, what is the correct default for glibc
52            in these days for an average user.
53          */
54
55         .text
56         .align          32
57 ENTRY(__memchr)
58         and             %o1, 0xff, %o1                  /* IEU0         Group           */
59 #ifdef USE_BPR
60         brz,pn          %o2, 12f                        /* CTI+IEU1                     */
61 #else
62         tst             %o2                             /* IEU1                         */
63         be,pn           %XCC, 12f                       /* CTI                          */
64 #endif
65          sll            %o1, 8, %g3                     /* IEU0         Group           */
66         add             %o0, %o2, %o2                   /* IEU1                         */
67
68         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
69         or              %g3, %o1, %g3                   /* IEU1                         */
70         ldub            [%o0], %o3                      /* Load                         */
71         sllx            %g3, 16, %g5                    /* IEU0         Group           */
72
73         or              %g1, %lo(0x01010101), %g1       /* IEU1                         */
74         sllx            %g1, 32, %g2                    /* IEU0         Group           */
75         or              %g3, %g5, %g3                   /* IEU1                         */
76         sllx            %g3, 32, %g5                    /* IEU0         Group           */
77
78         cmp             %o3, %o1                        /* IEU1                         */
79         be,pn           %xcc, 13f                       /* CTI                          */
80          or             %g1, %g2, %g1                   /* IEU0         Group           */
81         andcc           %o0, 7, %g0                     /* IEU1                         */
82
83         bne,a,pn        %icc, 21f                       /* CTI                          */
84          add            %o0, 1, %o0                     /* IEU0         Group           */
85         ldx             [%o0], %o3                      /* Load         Group           */
86         sllx            %g1, 7, %g2                     /* IEU0                         */
87
88         or              %g3, %g5, %g3                   /* IEU1                         */
89 1:      add             %o0, 8, %o0                     /* IEU0         Group           */
90         xor             %o3, %g3, %o4                   /* IEU1                         */
91                                                         /* %g1 = 0101010101010101       *
92                                                          * %g2 = 8080088080808080       *
93                                                          * %g3 =  c c c c c c c c       *
94                                                          * %o3 =      value             *
95                                                          * %o4 =   value XOR c          */
96 2:      cmp             %o0, %o2                        /* IEU1         Group           */
97
98         bg,pn           %XCC, 11f                       /* CTI                          */
99          ldxa           [%o0] ASI_PNF, %o3              /* Load                         */
100         sub             %o4, %g1, %o5                   /* IEU0         Group           */
101         add             %o0, 8, %o0                     /* IEU1                         */
102 #ifdef EIGHTBIT_NOT_RARE
103         andn            %o5, %o4, %o5                   /* IEU0         Group           */
104 #endif
105
106         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
107         be,a,pt         %xcc, 2b                        /* CTI                          */
108          xor            %o3, %g3, %o4                   /* IEU0                         */
109         srlx            %o4, 56, %g5                    /* IEU0                         */
110
111         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
112         be,pn           %icc, 3f                        /* CTI                          */
113          srlx           %o4, 48, %g5                    /* IEU0                         */
114         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
115
116         be,pn           %icc, 4f                        /* CTI                          */
117          srlx           %o4, 40, %g5                    /* IEU0                         */
118         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
119         be,pn           %icc, 5f                        /* CTI                          */
120
121          srlx           %o4, 32, %g5                    /* IEU0                         */
122         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
123         be,pn           %icc, 6f                        /* CTI                          */
124          srlx           %o4, 24, %g5                    /* IEU0                         */
125
126         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
127         be,pn           %icc, 7f                        /* CTI                          */
128          srlx           %o4, 16, %g5                    /* IEU0                         */
129         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
130
131         be,pn           %icc, 8f                        /* CTI                          */
132          srlx           %o4, 8, %g5                     /* IEU0                         */
133         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
134         be,pn           %icc, 9f                        /* CTI                          */
135
136          andcc          %o4, 0xff, %g0                  /* IEU1         Group           */
137         bne,pt          %icc, 2b                        /* CTI                          */
138          xor            %o3, %g3, %o4                   /* IEU0                         */
139         retl                                            /* CTI+IEU1     Group           */
140
141          add            %o0, -9, %o0                    /* IEU0                         */
142
143         .align          16
144 3:      retl                                            /* CTI+IEU1     Group           */
145          add            %o0, -16, %o0                   /* IEU0                         */
146 4:      retl                                            /* CTI+IEU1     Group           */
147          add            %o0, -15, %o0                   /* IEU0                         */
148
149 5:      retl                                            /* CTI+IEU1     Group           */
150          add            %o0, -14, %o0                   /* IEU0                         */
151 6:      retl                                            /* CTI+IEU1     Group           */
152          add            %o0, -13, %o0                   /* IEU0                         */
153
154 7:      retl                                            /* CTI+IEU1     Group           */
155          add            %o0, -12, %o0                   /* IEU0                         */
156 8:      retl                                            /* CTI+IEU1     Group           */
157          add            %o0, -11, %o0                   /* IEU0                         */
158
159 9:      retl                                            /* CTI+IEU1     Group           */
160          add            %o0, -10, %o0                   /* IEU0                         */
161 11:     sub             %o4, %g1, %o5                   /* IEU0         Group           */
162         sub             %o0, 8, %o0                     /* IEU1                         */
163
164         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
165         be,pt           %xcc, 12f                       /* CTI                          */
166          sub            %o2, %o0, %o2                   /* IEU0                         */
167         tst             %o2                             /* IEU1         Group           */
168
169         be,pn           %XCC, 12f                       /* CTI                          */
170          srlx           %o4, 56, %g5                    /* IEU0                         */
171         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
172         be,pn           %icc, 13f                       /* CTI                          */
173
174          cmp            %o2, 1                          /* IEU0                         */
175         be,pn           %XCC, 12f                       /* CTI          Group           */
176          srlx           %o4, 48, %g5                    /* IEU0                         */
177         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
178
179         be,pn           %icc, 14f                       /* CTI                          */
180          cmp            %o2, 2                          /* IEU1         Group           */
181         be,pn           %XCC, 12f                       /* CTI                          */
182          srlx           %o4, 40, %g5                    /* IEU0                         */
183
184         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
185         be,pn           %icc, 15f                       /* CTI                          */
186          cmp            %o2, 3                          /* IEU1         Group           */
187         be,pn           %XCC, 12f                       /* CTI                          */
188
189          srlx           %o4, 32, %g5                    /* IEU0                         */
190         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
191         be,pn           %icc, 16f                       /* CTI                          */
192          cmp            %o2, 4                          /* IEU1         Group           */
193
194         be,pn           %XCC, 12f                       /* CTI                          */
195          srlx           %o4, 24, %g5                    /* IEU0                         */
196         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
197         be,pn           %icc, 17f                       /* CTI                          */
198
199          cmp            %o2, 5                          /* IEU1         Group           */
200         be,pn           %XCC, 12f                       /* CTI                          */
201          srlx           %o4, 16, %g5                    /* IEU0                         */
202         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
203
204         be,pn           %icc, 18f                       /* CTI                          */
205          cmp            %o2, 6                          /* IEU1         Group           */
206         be,pn           %XCC, 12f                       /* CTI                          */
207          srlx           %o4, 8, %g5                     /* IEU0                         */
208
209         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
210         be,pn           %icc, 19f                       /* CTI                          */
211          nop                                            /* IEU0                         */
212 12:     retl                                            /* CTI+IEU1     Group           */
213
214          clr            %o0                             /* IEU0                         */
215         nop                                             /* Stub                         */
216 13:     retl                                            /* CTI+IEU1     Group           */
217          nop                                            /* IEU0                         */
218
219 14:     retl                                            /* CTI+IEU1     Group           */
220          add            %o0, 1, %o0                     /* IEU0                         */
221 15:     retl                                            /* CTI+IEU1     Group           */
222          add            %o0, 2, %o0                     /* IEU0                         */
223
224 16:     retl                                            /* CTI+IEU1     Group           */
225          add            %o0, 3, %o0                     /* IEU0                         */
226 17:     retl                                            /* CTI+IEU1     Group           */
227          add            %o0, 4, %o0                     /* IEU0                         */
228
229 18:     retl                                            /* CTI+IEU1     Group           */
230          add            %o0, 5, %o0                     /* IEU0                         */
231 19:     retl                                            /* CTI+IEU1     Group           */
232          add            %o0, 6, %o0                     /* IEU0                         */
233
234 21:     cmp             %o0, %o2                        /* IEU1                         */
235         be,pn           %XCC, 12b                       /* CTI                          */
236          sllx           %g1, 7, %g2                     /* IEU0         Group           */
237         ldub            [%o0], %o3                      /* Load                         */
238
239         or              %g3, %g5, %g3                   /* IEU1                         */
240 22:     andcc           %o0, 7, %g0                     /* IEU1         Group           */
241         be,a,pn         %icc, 1b                        /* CTI                          */
242          ldx            [%o0], %o3                      /* Load                         */
243
244         cmp             %o3, %o1                        /* IEU1         Group           */
245         be,pn           %xcc, 23f                       /* CTI                          */
246          add            %o0, 1, %o0                     /* IEU0                         */
247         cmp             %o0, %o2                        /* IEU1         Group           */
248
249         bne,a,pt        %XCC, 22b                       /* CTI                          */
250          ldub           [%o0], %o3                      /* Load                         */
251         retl                                            /* CTI+IEU1     Group           */
252          clr            %o0                             /* IEU0                         */
253
254 23:     retl                                            /* CTI+IEU1     Group           */
255          add            %o0, -1, %o0                    /* IEU0                         */
256 END(__memchr)
257
258 weak_alias (__memchr, memchr)
259 #if !__BOUNDED_POINTERS__
260 weak_alias (__memchr, __ubp_memchr)
261 #endif