2 * Copyright (C) 2008 The Android Open Source Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <machine/cpu-features.h>
34 .type __memcmp16, %function
38 * Optimized memcmp16() for ARM9.
39 * This would not be optimal on XScale or ARM11, where more prefetching
40 * and use of PLD will be needed.
41 * The 2 major optimzations here are
42 * (1) The main loop compares 16 bytes at a time
43 * (2) The loads are scheduled in a way they won't stall
51 /* take of the case where length is nul or the buffers are the same */
57 /* since r0 hold the result, move the first source
58 * pointer somewhere else
63 /* make sure we have at least 12 words, this simplify things below
64 * and avoid some overhead for small blocks
70 /* small blocks (less then 12 words) */
85 0: stmfd sp!, {r4, lr}
87 /* align first pointer to word boundary */
95 /* restore registers and return */
102 0: /* here the first pointer is aligned, and we have at least 3 words
106 /* see if the pointers are congruent */
111 /* congruent case, 16 half-words per iteration
112 * We need to make sure there are at least 16+2 words left
113 * because we effectively read ahead one long word, and we could
114 * read past the buffer (and segfault) if we're not careful.
118 subs r2, r2, #(16 + 2)
152 /* do we have at least 2 words left? */
153 1: adds r2, r2, #(16 - 2 + 2)
156 /* finish off 2 words at a time */
167 /* restore registers and return */
172 2: /* the last 2 words are different, restart them */
179 /* restore registers and return */
183 /* process the last few words */
191 9: /* restore registers and return */
196 5: /*************** non-congruent case ***************/
198 /* align the unaligned pointer */
209 orr ip, ip, lr, lsl #16
211 moveq ip, lr, lsr #16
214 orreq ip, ip, lr, lsl #16
216 moveq ip, lr, lsr #16
219 orreq ip, ip, lr, lsl #16
221 moveq ip, lr, lsr #16
224 orreq ip, ip, lr, lsl #16
234 /* finish off the remaining bytes */
237 7: /* fix up the 2 pointers and fallthrough... */