From 20cd138d06a70c98ffcf41105f3af8af3a8a8204 Mon Sep 17 00:00:00 2001 From: meissner Date: Mon, 11 Mar 2002 15:44:34 +0000 Subject: [PATCH] Add MIPS specific string/memory functions --- newlib/ChangeLog | 17 ++- newlib/libc/machine/mips/Makefile.am | 2 +- newlib/libc/machine/mips/Makefile.in | 4 +- newlib/libc/machine/mips/memcpy.c | 164 +++++++++++++++++++++++++ newlib/libc/machine/mips/memset.c | 142 ++++++++++++++++++++++ newlib/libc/machine/mips/strcmp.c | 71 +++++++++++ newlib/libc/machine/mips/strlen.c | 71 +++++++++++ newlib/libc/machine/mips/strncpy.c | 229 +++++++++++++++++++++++++++++++++++ 8 files changed, 694 insertions(+), 6 deletions(-) create mode 100644 newlib/libc/machine/mips/memcpy.c create mode 100644 newlib/libc/machine/mips/memset.c create mode 100644 newlib/libc/machine/mips/strcmp.c create mode 100644 newlib/libc/machine/mips/strlen.c create mode 100644 newlib/libc/machine/mips/strncpy.c diff --git a/newlib/ChangeLog b/newlib/ChangeLog index e0ddb7d34d..f2cb305ad2 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,8 +1,19 @@ +2002-03-11 Michael Meissner + + * libc/machine/mips/Makefile.am (lib_a_SOURCES): Add Mips specific + variants strlen.c, strcmp.c, strncpy.c, memset.c and memcpy.c. + * libc/machine/mips/Makefile.in: Regenerate. + * libc/machine/mips/memcpy.c: New file, optimized for MIPS. + * libc/machine/mips/memset.c: Dito. + * libc/machine/mips/strcmp.c: Dito. + * libc/machine/mips/strlen.c: Dito. + * libc/machine/mips/strncmp.c: Dito. + 2002-03-06 Jeff Johnston - * libc/machine/i386/Makefile.am: Add $(oext) for setjmp - object so it works for shared library or statici library. - * libc/machine/i386/Makefile.in: Regenerated. + * libc/machine/i386/Makefile.am: Add $(oext) for setjmp + object so it works for shared library or statici library. + * libc/machine/i386/Makefile.in: Regenerated. Wed Mar 6 10:24:26 2002 J"orn Rennecke diff --git a/newlib/libc/machine/mips/Makefile.am b/newlib/libc/machine/mips/Makefile.am index 1c65b9ffe9..74c08bca5f 100644 --- a/newlib/libc/machine/mips/Makefile.am +++ b/newlib/libc/machine/mips/Makefile.am @@ -6,7 +6,7 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S +lib_a_SOURCES = setjmp.S strlen.c strcmp.c strncpy.c memset.c memcpy.c ACLOCAL_AMFLAGS = -I ../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host diff --git a/newlib/libc/machine/mips/Makefile.in b/newlib/libc/machine/mips/Makefile.in index ace6f06129..a4394df006 100644 --- a/newlib/libc/machine/mips/Makefile.in +++ b/newlib/libc/machine/mips/Makefile.in @@ -84,7 +84,7 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a -lib_a_SOURCES = setjmp.S +lib_a_SOURCES = setjmp.S strlen.c strcmp.c strncpy.c memset.c memcpy.c ACLOCAL_AMFLAGS = -I ../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host @@ -98,7 +98,7 @@ DEFS = @DEFS@ -I. -I$(srcdir) CPPFLAGS = @CPPFLAGS@ LIBS = @LIBS@ lib_a_LIBADD = -lib_a_OBJECTS = setjmp.o +lib_a_OBJECTS = setjmp.o strlen.o strcmp.o strncpy.o memset.o memcpy.o CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) diff --git a/newlib/libc/machine/mips/memcpy.c b/newlib/libc/machine/mips/memcpy.c new file mode 100644 index 0000000000..761f7e9ab7 --- /dev/null +++ b/newlib/libc/machine/mips/memcpy.c @@ -0,0 +1,164 @@ +/* +FUNCTION + <>---copy memory regions, optimized for the mips processors + +ANSI_SYNOPSIS + #include + void* memcpy(void *<[out]>, const void *<[in]>, size_t <[n]>); + +TRAD_SYNOPSIS + void *memcpy(<[out]>, <[in]>, <[n]> + void *<[out]>; + void *<[in]>; + size_t <[n]>; + +DESCRIPTION + This function copies <[n]> bytes from the memory region + pointed to by <[in]> to the memory region pointed to by + <[out]>. + + If the regions overlap, the behavior is undefined. + +RETURNS + <> returns a pointer to the first byte of the <[out]> + region. + +PORTABILITY +<> is ANSI C. + +<> requires no supporting OS subroutines. + +QUICKREF + memcpy ansi pure + */ + +#include <_ansi.h> +#include +#include + +#ifdef __mips64 +#define wordtype long long +#else +#define wordtype long +#endif + +/* Nonzero if either X or Y is not aligned on a "long" boundary. */ +#define UNALIGNED(X, Y) \ + (((long)X & (sizeof (wordtype) - 1)) | ((long)Y & (sizeof (wordtype) - 1))) + +/* How many bytes are copied each iteration of the 4X unrolled loop. */ +#define BIGBLOCKSIZE (sizeof (wordtype) << 2) + +/* How many bytes are copied each iteration of the word copy loop. */ +#define LITTLEBLOCKSIZE (sizeof (wordtype)) + +/* Threshhold for punting to the byte copier. */ +#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE) + +_PTR +_DEFUN (memcpy, (dst0, src0, len0), + _PTR dst0 _AND + _CONST _PTR src0 _AND + size_t len0) +{ +#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16) + char *dst = (char *) dst0; + char *src = (char *) src0; + + _PTR save = dst0; + + while (len0--) + { + *dst++ = *src++; + } + + return save; +#else + char *dst = dst0; + _CONST char *src = src0; + wordtype *aligned_dst; + _CONST wordtype *aligned_src; + int len = len0; + size_t iter; + + /* Handle aligned moves here. */ + if (!UNALIGNED (src, dst)) + { + iter = len / BIGBLOCKSIZE; + len = len % BIGBLOCKSIZE; + aligned_dst = (wordtype *)dst; + aligned_src = (wordtype *)src; + + /* Copy 4X long or long long words at a time if possible. */ + while (iter > 0) + { + wordtype tmp0 = aligned_src[0]; + wordtype tmp1 = aligned_src[1]; + wordtype tmp2 = aligned_src[2]; + wordtype tmp3 = aligned_src[3]; + + aligned_dst[0] = tmp0; + aligned_dst[1] = tmp1; + aligned_dst[2] = tmp2; + aligned_dst[3] = tmp3; + aligned_src += 4; + aligned_dst += 4; + iter--; + } + + /* Copy one long or long long word at a time if possible. */ + iter = len / LITTLEBLOCKSIZE; + len = len % LITTLEBLOCKSIZE; + + while (iter > 0) + { + *aligned_dst++ = *aligned_src++; + iter--; + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + + while (len > 0) + { + *dst++ = *src++; + len--; + } + + return dst0; + } + + /* Handle unaligned moves here, using lwr/lwl and swr/swl where possible */ + else + { +#ifndef NO_UNALIGNED_LOADSTORE + int tmp; + int *int_src = (int *)src; + int *int_dst = (int *)dst; + iter = len / 4; + len = len % 4; + while (iter > 0) + { + __asm__ ("ulw %0,%1" : "=r" (tmp) : "m" (*int_src)); + iter--; + int_src++; + __asm__ ("usw %1,%0" : "=m" (*int_dst) : "r" (tmp)); + int_dst++; + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)int_dst; + src = (char*)int_src; +#endif + + while (len > 0) + { + *dst++ = *src++; + len--; + } + + return dst0; + } +#endif /* not PREFER_SIZE_OVER_SPEED */ +} diff --git a/newlib/libc/machine/mips/memset.c b/newlib/libc/machine/mips/memset.c new file mode 100644 index 0000000000..786ba79716 --- /dev/null +++ b/newlib/libc/machine/mips/memset.c @@ -0,0 +1,142 @@ +/* +FUNCTION + <>---set an area of memory, optimized for the MIPS processors + +INDEX + memset + +ANSI_SYNOPSIS + #include + void *memset(const void *<[dst]>, int <[c]>, size_t <[length]>); + +TRAD_SYNOPSIS + #include + void *memset(<[dst]>, <[c]>, <[length]>) + void *<[dst]>; + int <[c]>; + size_t <[length]>; + +DESCRIPTION + This function converts the argument <[c]> into an unsigned + char and fills the first <[length]> characters of the array + pointed to by <[dst]> to the value. + +RETURNS + <> returns the value of <[m]>. + +PORTABILITY +<> is ANSI C. + + <> requires no supporting OS subroutines. + +QUICKREF + memset ansi pure +*/ + +#include + +#ifdef __mips64 +#define wordtype long long +#else +#define wordtype long +#endif + +#define LBLOCKSIZE (sizeof(wordtype)) +#define UNALIGNED(X) ((long)(X) & (LBLOCKSIZE - 1)) +#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE * 4) + +_PTR +_DEFUN (memset, (m, c, n), + _PTR m _AND + int c _AND + size_t n) +{ +#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16) + char *s = (char *) m; + + while (n-- != 0) + { + *s++ = (char) c; + } + + return m; +#else + char *s = (char *) m; + int i; + unsigned wordtype buffer; + unsigned wordtype *aligned_addr; + unsigned short *short_addr; + size_t iter; + + if (!TOO_SMALL (n)) + { + int unaligned = UNALIGNED (s); + + /* We know that N is >= LBLOCKSIZE so we can just word + align the S without having to check the length. */ + + if (unaligned) + { + while (unaligned++ < LBLOCKSIZE) + *s++ = (char)c, n--; + } + + /* S is now word-aligned so we can process the remainder + in word sized chunks except for a few (< LBLOCKSIZE) + bytes which might be left over at the end. */ + + aligned_addr = (unsigned wordtype *)s; + + /* Store C into each char sized location in BUFFER so that + we can set large blocks quickly. */ + c &= 0xff; + buffer = c; + if (buffer != 0) + { + if (LBLOCKSIZE == 4) + { + buffer |= (buffer << 8); + buffer |= (buffer << 16); + } + else if (LBLOCKSIZE == 8) + { + buffer |= (buffer << 8); + buffer |= (buffer << 16); + buffer |= ((buffer << 31) << 1); + } + else + { + for (i = 1; i < LBLOCKSIZE; i++) + buffer = (buffer << 8) | c; + } + } + + iter = n / (2*LBLOCKSIZE); + n = n % (2*LBLOCKSIZE); + while (iter > 0) + { + aligned_addr[0] = buffer; + aligned_addr[1] = buffer; + aligned_addr += 2; + iter--; + } + + if (n >= LBLOCKSIZE) + { + *aligned_addr++ = buffer; + n -= LBLOCKSIZE; + } + + /* Pick up the remainder with a bytewise loop. */ + s = (char*)aligned_addr; + } + + while (n > 0) + { + *s++ = (char)c; + n--; + } + + return m; +#endif /* not PREFER_SIZE_OVER_SPEED */ +} diff --git a/newlib/libc/machine/mips/strcmp.c b/newlib/libc/machine/mips/strcmp.c new file mode 100644 index 0000000000..c9c1c65956 --- /dev/null +++ b/newlib/libc/machine/mips/strcmp.c @@ -0,0 +1,71 @@ +/* + * strcmp.c -- strcmp function. On at least some MIPS chips, a strcmp that is + * unrolled twice is faster than the 'optimized' C version in newlib. + * + * Copyright (c) 2001 Red Hat, Inc. + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. */ + +#include +#include +#include + +int +strcmp (const char *s1, const char *s2) +{ + unsigned const char *us1 = (unsigned const char *)s1; + unsigned const char *us2 = (unsigned const char *)s2; + int c1a, c1b; + int c2a, c2b; + + /* If the pointers aren't both aligned to a 16-byte boundary, do the + comparison byte by byte, so that we don't get an invalid page fault if we + are comparing a string whose null byte is at the last byte on the last + valid page. */ + if (((((long)us1) | ((long)us2)) & 1) == 0) + { + c1a = *us1; + for (;;) + { + c1b = *us2; + us1 += 2; + if (c1a == '\0') + goto ret1; + + c2a = us1[-1]; + if (c1a != c1b) + goto ret1; + + c2b = us2[1]; + us2 += 2; + if (c2a == '\0') + break; + + c1a = *us1; + if (c2a != c2b) + break; + } + + return c2a - c2b; + } + else + { + do + { + c1a = *us1++; + c1b = *us2++; + } + while (c1a != '\0' && c1a == c1b); + } + + ret1: + return c1a - c1b; +} diff --git a/newlib/libc/machine/mips/strlen.c b/newlib/libc/machine/mips/strlen.c new file mode 100644 index 0000000000..f936039fd3 --- /dev/null +++ b/newlib/libc/machine/mips/strlen.c @@ -0,0 +1,71 @@ +/* + * strlen.c -- strlen function. On at least some MIPS chips, a simple + * strlen is faster than the 'optimized' C version. + * + * Copyright (c) 2001 Red Hat, Inc. + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. + */ + +#include +#include + +/* MIPS16 needs to come first. */ + +#if defined(__mips16) +size_t +strlen (const char *str) +{ + const char *start = str; + + while (*str++ != '\0') + ; + + return str - start + 1; +} +#elif defined(__mips64) +__asm__("" /* 64-bit MIPS targets */ + " .set noreorder\n" + " .set nomacro\n" + " .globl strlen\n" + " .ent strlen\n" + "strlen:\n" + " daddiu $2,$4,1\n" + "\n" + "1: lbu $3,0($4)\n" + " bnez $3,1b\n" + " daddiu $4,$4,1\n" + "\n" + " jr $31\n" + " dsubu $2,$4,$2\n" + " .end strlen\n" + " .set macro\n" + " .set reorder\n"); + +#else +__asm__("" /* 32-bit MIPS targets */ + " .set noreorder\n" + " .set nomacro\n" + " .globl strlen\n" + " .ent strlen\n" + "strlen:\n" + " addiu $2,$4,1\n" + "\n" + "1: lbu $3,0($4)\n" + " bnez $3,1b\n" + " addiu $4,$4,1\n" + "\n" + " jr $31\n" + " subu $2,$4,$2\n" + " .end strlen\n" + " .set macro\n" + " .set reorder\n"); +#endif diff --git a/newlib/libc/machine/mips/strncpy.c b/newlib/libc/machine/mips/strncpy.c new file mode 100644 index 0000000000..a2ceb2c77e --- /dev/null +++ b/newlib/libc/machine/mips/strncpy.c @@ -0,0 +1,229 @@ +/* + * strncpy.S -- strncmp function. On at least some MIPS chips, you get better + * code by hand unrolling the loops, and by using store words to zero the + * remainder of the buffer than the default newlib C version. + * + * Copyright (c) 2001 Red Hat, Inc. + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. */ + +#include +#include +#include + +#if !defined(__GNUC__) || (__GNUC__ < 3) +#define __builtin_expect(a,b) a + +#else +#ifdef __mips64 +/* Don't use limits test for the size of long, in order to allow the use of + 64-bit stores on MIPS3 machines, even if -mlong32 was used. */ +typedef unsigned word_type __attribute__ ((mode (DI))); +#else +typedef unsigned word_type __attribute__ ((mode (SI))); +#endif + +typedef unsigned si_type __attribute__ ((mode (SI))); +typedef unsigned hi_type __attribute__ ((mode (HI))); + +#ifndef UNROLL_FACTOR +#define UNROLL_FACTOR 4 + +#elif (UNROLL_FACTOR != 2) && (UNROLL_FACTOR != 4) +#error "UNROLL_FACTOR must be 2 or 4" +#endif +#endif + +char * +strncpy (char *dst0, const char *src0, size_t count) +{ +#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) || defined(__mips16) || !defined(__GNUC__) || (__GNUC__ < 3) + char *dst, *end; + const char *src; + int ch; + + dst = dst0; + src = src0; + end = dst + count; + while (dst != end) + { + *dst++ = ch = *src++; + if (__builtin_expect (ch == '\0', 0)) + { + while (dst != end) + *dst++ = '\0'; + + break; + } + } + + return dst0; + +#else + unsigned char *dst; + unsigned char *dst_end; + unsigned char *end; + const unsigned char *src; + int ch0, ch1; +#if UNROLL_FACTOR > 2 + int ch2, ch3; +#endif + int ch; + int odd_bytes; + size_t long_count; + + dst = (unsigned char *)dst0; + src = (unsigned const char *)src0; + if (__builtin_expect (count >= 4, 1)) + { + odd_bytes = (count & (UNROLL_FACTOR - 1)); + count -= odd_bytes; + + do + { + ch0 = src[0]; + ch1 = src[1]; +#if UNROLL_FACTOR > 2 + ch2 = src[2]; + ch3 = src[3]; +#endif + src += UNROLL_FACTOR; + count -= UNROLL_FACTOR; + + dst[0] = ch0; + if (ch0 == '\0') + goto found_null0; + + dst[1] = ch1; + if (ch1 == '\0') + goto found_null1; + +#if UNROLL_FACTOR > 2 + dst[2] = ch2; + if (ch2 == '\0') + goto found_null2; + + dst[3] = ch3; + if (ch3 == '\0') + goto found_null3; +#endif + + dst += UNROLL_FACTOR; + } + while (count); + + /* fall through, count == 0, no null found, deal with last bytes */ + count = odd_bytes; + } + + end = dst + count; + while (dst != end) + { + *dst++ = ch = *src++; + if (ch == '\0') + { + while (dst != end) + *dst++ = '\0'; + + break; + } + } + + return dst0; + + /* Found null byte in first byte, count has been decremented by 4, null has + been stored in dst[0]. */ + found_null0: + count++; /* add 1 to cover remaining byte */ + dst -= 1; /* adjust dst += 4 gets correct ptr */ + /* fall through */ + + /* Found null byte in second byte, count has been decremented by 4, null has + been stored in dst[1]. */ + found_null1: +#if UNROLL_FACTOR > 2 + count++; /* add 1 to cover remaining byte */ + dst -= 1; /* adjust dst += 4 gets correct ptr */ + /* fall through */ + + /* Found null byte in third byte, count has been decremented by 4, null has + been stored in dst[2]. */ + found_null2: + count++; /* add 1 to cover remaining byte */ + dst -= 1; /* adjust dst += 4 gets correct ptr */ + /* fall through */ + + /* Found null byte in fourth byte, count is accurate, dst has not been + updated yet. */ + found_null3: +#endif + count += odd_bytes; /* restore odd byte count */ + dst += UNROLL_FACTOR; + + /* Zero fill remainder of the array. Unroll the loop, and use word/dword + stores where we can. */ + while (count && (((long)dst) & (sizeof (word_type) - 1)) != 0) + { + count--; + *dst++ = 0; + } + + while (count >= UNROLL_FACTOR*sizeof (word_type)) + { + count -= UNROLL_FACTOR*sizeof (word_type); + dst += UNROLL_FACTOR*sizeof (word_type); +#if UNROLL_FACTOR > 2 + ((word_type *)(void *)dst)[-4] = 0; + ((word_type *)(void *)dst)[-3] = 0; +#endif + ((word_type *)(void *)dst)[-2] = 0; + ((word_type *)(void *)dst)[-1] = 0; + } + +#if UNROLL_FACTOR > 2 + if (count >= 2*sizeof (word_type)) + { + count -= 2*sizeof (word_type); + ((word_type *)(void *)dst)[0] = 0; + ((word_type *)(void *)dst)[1] = 0; + dst += 2*sizeof (word_type); + } +#endif + + if (count >= sizeof (word_type)) + { + count -= sizeof (word_type); + ((word_type *)(void *)dst)[0] = 0; + dst += sizeof (word_type); + } + +#ifdef __mips64 + if (count >= sizeof (si_type)) + { + count -= sizeof (si_type); + ((si_type *)(void *)dst)[0] = 0; + dst += sizeof (si_type); + } +#endif + + if (count >= sizeof (hi_type)) + { + count -= sizeof (hi_type); + ((hi_type *)(void *)dst)[0] = 0; + dst += sizeof (hi_type); + } + + if (count) + *dst = '\0'; + + return dst0; +#endif +} -- 2.11.0