1 /* Optimized version of the standard strcpy() function.
2 This file is part of the GNU C Library.
3 Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
4 Contributed by Dan Pop <Dan.Pop@cern.ch>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 In this form, it assumes little endian mode. For big endian mode, the
28 the two shifts in .l2 must be inverted:
30 shl value = r[1], sh1 // value = w0 << sh1
31 shr.u tmp = r[0], sh2 // tmp = w1 >> sh2
56 alloc r2 = ar.pfs, 2, 0, 30, 32
62 mov ret0 = in0 /* return value = dest */
64 mov saved_pr = pr /* save the predicate registers */
66 mov saved_lc = ar.lc /* save the loop counter */
68 sub tmp = r0, in0 ;; /* tmp = -dest */
69 mov dest = in0 /* dest */
70 mov src = in1 /* src */
71 and loopcnt = 7, tmp ;; /* loopcnt = -dest % 8 */
72 cmp.eq p6, p0 = loopcnt, r0
73 adds loopcnt = -1, loopcnt /* --loopcnt */
74 (p6) br.cond.sptk .dest_aligned ;;
76 .l1: /* copy -dest % 8 bytes */
77 ld1 c = [src], 1 /* c = *src++ */
79 st1 [dest] = c, 1 /* *dest++ = c */
81 (p6) br.cond.dpnt .restore_and_exit
84 and sh1 = 7, src /* sh1 = src % 8 */
85 mov ar.lc = -1 /* "infinite" loop */
86 and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */
88 mov pr.rot = 1 << 16 /* set rotating predicates */
89 cmp.ne p7, p0 = r0, r0 /* clear p7 */
90 shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */
91 sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
92 cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
93 (p6) br.cond.sptk .src_aligned ;;
94 ld8 r[1] = [asrc],8 ;;
98 ld8.s r[0] = [asrc], 8
99 shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */
100 czx1.r pos = value ;; /* do we have an "early" zero */
101 cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */
102 (p7) br.cond.dpnt .found0
103 chk.s r[0], .recovery2 /* it is safe to do that only */
104 .back2: /* after the previous test */
105 shl tmp = r[0], sh2 /* tmp = w1 << sh2 */
107 or value = value, tmp ;; /* value |= tmp */
108 czx1.r pos = value ;;
109 cmp.ne p7, p0 = 8, pos
110 (p7) br.cond.dpnt .found0
111 st8 [dest] = value, 8 /* store val to dest */
115 (p[0]) ld8.s r[0] = [src], 8
116 (p[MEMLAT]) chk.s r[MEMLAT], .recovery3
118 (p[MEMLAT]) mov value = r[MEMLAT]
119 (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;;
120 (p[MEMLAT]) cmp.ne p7, p0 = 8, pos
121 (p7) br.cond.dpnt .found0
122 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
127 extr.u c = value, 0, 8 /* c = value & 0xff */
128 shr.u value = value, 8
133 mov ar.lc = saved_lc /* restore the loop counter */
134 mov pr = saved_pr, -1 /* restore the predicate registers */
137 add tmp = -8, asrc ;;
141 add tmp = -(MEMLAT + 1) * 8, src ;;
142 ld8 r[MEMLAT] = [tmp]
145 libc_hidden_def (strcpy)