1 /* Optimized memcpy for Xtensa.
2 Copyright (C) 2001, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
18 Boston, MA 02110-1301, USA. */
20 #include "../../sysdeps/linux/xtensa/sysdep.h"
21 #include <bits/xtensa-config.h>
23 .macro src_b r, w0, w1
39 /* If the Xtensa Unaligned Load Exception option is not used, this
40 code can run a few cycles faster by relying on the low address bits
41 being ignored. However, if the code is then run with an Xtensa ISS
42 client that checks for unaligned accesses, it will produce a lot of
43 warning messages. Set this flag to disable the use of unaligned
44 accesses and keep the ISS happy. */
46 #define UNALIGNED_ADDRESSES_CHECKED 1
48 /* Do not use .literal_position in the ENTRY macro. */
49 #undef LITERAL_POSITION
50 #define LITERAL_POSITION
53 /* void *memcpy (void *dst, const void *src, size_t len)
55 The algorithm is as follows:
57 If the destination is unaligned, align it by conditionally
58 copying 1- and/or 2-byte pieces.
60 If the source is aligned, copy 16 bytes with a loop, and then finish up
61 with 8, 4, 2, and 1-byte copies conditional on the length.
63 Else (if source is unaligned), do the same, but use SRC to align the
66 This code tries to use fall-through branches for the common
67 case of aligned source and destination and multiple of 4 (or 8) length. */
70 /* Byte by byte copy. */
77 /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
78 (0 mod 4 alignment for LBEG). */
86 add a7, a3, a4 // a7 = end address for source
98 /* Destination is unaligned. */
101 .Ldst1mod2: // dst is only byte aligned
103 /* Do short copies byte-by-byte. */
104 _bltui a4, 7, .Lbytecopy
113 /* Return to main algorithm if dst is now aligned. */
114 _bbci.l a5, 1, .Ldstaligned
116 .Ldst2mod4: // dst has 16-bit alignment
118 /* Do short copies byte-by-byte. */
119 _bltui a4, 6, .Lbytecopy
130 /* dst is now aligned; return to main algorithm. */
135 /* a2 = dst, a3 = src, a4 = len */
137 mov a5, a2 // copy dst so that a2 is return value
138 _bbsi.l a2, 0, .Ldst1mod2
139 _bbsi.l a2, 1, .Ldst2mod4
142 /* Get number of loop iterations with 16B per iteration. */
145 /* Check if source is aligned. */
147 _bany a3, a8, .Lsrcunaligned
149 /* Destination and source are word-aligned, use word copy. */
155 add a8, a8, a3 // a8 = end of last 16B source chunk
167 #if !XCHAL_HAVE_LOOPS
171 /* Copy any leftover pieces smaller than 16B. */
212 /* Destination is aligned; source is unaligned. */
216 /* Avoid loading anything for zero-length copies. */
219 /* Copy 16 bytes per iteration for word-aligned dst and
221 ssa8 a3 // set shift amount from byte offset
222 #if UNALIGNED_ADDRESSES_CHECKED
223 and a11, a3, a8 // save unalignment offset for below
224 sub a3, a3, a11 // align a3
226 l32i a6, a3, 0 // load first word
232 add a10, a10, a3 // a10 = end of last 16B source chunk
248 #if !XCHAL_HAVE_LOOPS
275 #if UNALIGNED_ADDRESSES_CHECKED
276 add a3, a3, a11 // readjust a3 with correct misalignment
297 libc_hidden_def (memcpy)