1 /* Optimized memset for Xtensa.
2 Copyright (C) 2001, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
20 #include <bits/xtensa-config.h>
22 /* Do not use .literal_position in the ENTRY macro. */
23 #undef LITERAL_POSITION
24 #define LITERAL_POSITION
26 /* void *memset (void *dst, int c, size_t length)
28 The algorithm is as follows:
30 Create a word with c in all byte positions.
32 If the destination is aligned, set 16B chunks with a loop, and then
33 finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
35 If the destination is unaligned, align it by conditionally
36 setting 1B and/or 2B and then go to aligned case.
38 This code tries to use fall-through branches for the common
39 case of an aligned destination (except for the branches to
40 the alignment labels). */
43 /* Byte-by-byte set. */
50 /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
51 (0 mod 4 alignment for LBEG). */
59 add a6, a5, a4 /* a6 = ending address */
69 /* Destination is unaligned. */
73 .Ldst1mod2: /* dst is only byte aligned */
75 /* Do short sizes byte-by-byte. */
76 bltui a4, 8, .Lbyteset
83 /* Now retest if dst is aligned. */
84 _bbci.l a5, 1, .Ldstaligned
86 .Ldst2mod4: /* dst has 16-bit alignment */
88 /* Do short sizes byte-by-byte. */
89 bltui a4, 8, .Lbyteset
96 /* dst is now aligned; return to main algorithm */
101 /* a2 = dst, a3 = c, a4 = length */
103 /* Duplicate character into all bytes of word. */
110 mov a5, a2 /* copy dst so that a2 is return value */
112 /* Check if dst is unaligned. */
113 _bbsi.l a2, 0, .Ldst1mod2
114 _bbsi.l a2, 1, .Ldst2mod4
117 /* Get number of loop iterations with 16B per iteration. */
120 /* Destination is word-aligned. */
126 add a6, a6, a5 /* a6 = end of last 16B chunk */
128 /* Set 16 bytes per iteration. */
134 #if !XCHAL_HAVE_LOOPS
138 /* Set any leftover pieces smaller than 16B. */
164 libc_hidden_def (memset)