From 869cd7c233f1cc123c156ce04e1e475d0fa05259 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 15 Feb 2013 14:25:15 -0800 Subject: [PATCH] Move the arraycopy implementation out of bionic and into dalvik. Change-Id: I5b7964e53eca6f2da949d6ef24d42a9c6ed3996c --- vm/native/java_lang_System.cpp | 166 +++++++++++++++++++++++++++++------------ 1 file changed, 120 insertions(+), 46 deletions(-) diff --git a/vm/native/java_lang_System.cpp b/vm/native/java_lang_System.cpp index 6ebf4f3b9..29063cdc7 100644 --- a/vm/native/java_lang_System.cpp +++ b/vm/native/java_lang_System.cpp @@ -14,12 +14,13 @@ * limitations under the License. */ -/* - * java.lang.Class native methods - */ #include "Dalvik.h" #include "native/InternalNativePriv.h" +#include +#include +#include + /* * The VM makes guarantees about the atomicity of accesses to primitive * variables. These guarantees also apply to elements of arrays. @@ -45,64 +46,137 @@ * appropriately for the element type, and that n is a multiple of the * element size. */ -#ifdef __BIONIC__ -/* always present in bionic libc */ -#define HAVE_MEMMOVE_WORDS -#endif - -#ifdef HAVE_MEMMOVE_WORDS -extern "C" void _memmove_words(void* dest, const void* src, size_t n); -#define move16 _memmove_words -#define move32 _memmove_words -#else -static void move16(void* dest, const void* src, size_t n) -{ + +/* + * Works like memmove(), except: + * - if all arguments are at least 32-bit aligned, we guarantee that we + * will use operations that preserve atomicity of 32-bit values + * - if not, we guarantee atomicity of 16-bit values + * + * If all three arguments are not at least 16-bit aligned, the behavior + * of this function is undefined. (We could remove this restriction by + * testing for unaligned values and punting to memmove(), but that's + * not currently useful.) + * + * TODO: add loop for 64-bit alignment + * TODO: use __builtin_prefetch + * TODO: write an ARM-optimized version + */ +static void memmove_words(void* dest, const void* src, size_t n) { assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0); - uint16_t* d = (uint16_t*) dest; - const uint16_t* s = (uint16_t*) src; + char* d = (char*) dest; + const char* s = (const char*) src; + size_t copyCount; + + /* + * If the source and destination pointers are the same, this is + * an expensive no-op. Testing for an empty move now allows us + * to skip a check later. + */ + if (n == 0 || d == s) + return; + + /* + * Determine if the source and destination buffers will overlap if + * we copy data forward (i.e. *dest++ = *src++). + * + * It's okay if the destination buffer starts before the source and + * there is some overlap, because the reader is always ahead of the + * writer. + */ + if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) { + /* + * Copy forward. We prefer 32-bit loads and stores even for 16-bit + * data, so sort that out. + */ + if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) { + /* + * Not 32-bit aligned. Two possibilities: + * (1) Congruent, we can align to 32-bit by copying one 16-bit val + * (2) Non-congruent, we can do one of: + * a. copy whole buffer as a series of 16-bit values + * b. load/store 32 bits, using shifts to ensure alignment + * c. just copy the as 32-bit values and assume the CPU + * will do a reasonable job + * + * We're currently using (a), which is suboptimal. + */ + if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) { + copyCount = n; + } else { + copyCount = 2; + } + n -= copyCount; + copyCount /= sizeof(uint16_t); + + while (copyCount--) { + *(uint16_t*)d = *(uint16_t*)s; + d += sizeof(uint16_t); + s += sizeof(uint16_t); + } + } - n /= sizeof(uint16_t); + /* + * Copy 32-bit aligned words. + */ + copyCount = n / sizeof(uint32_t); + while (copyCount--) { + *(uint32_t*)d = *(uint32_t*)s; + d += sizeof(uint32_t); + s += sizeof(uint32_t); + } - if (d < s) { - /* copy forward */ - while (n--) { - *d++ = *s++; + /* + * Check for leftovers. Either we finished exactly, or we have + * one remaining 16-bit chunk. + */ + if ((n & 0x02) != 0) { + *(uint16_t*)d = *(uint16_t*)s; } } else { - /* copy backward */ + /* + * Copy backward, starting at the end. + */ d += n; s += n; - while (n--) { - *--d = *--s; - } - } -} - -static void move32(void* dest, const void* src, size_t n) -{ - assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x03) == 0); - uint32_t* d = (uint32_t*) dest; - const uint32_t* s = (uint32_t*) src; + if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) { + /* try for 32-bit alignment */ + if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) { + copyCount = n; + } else { + copyCount = 2; + } + n -= copyCount; + copyCount /= sizeof(uint16_t); - n /= sizeof(uint32_t); + while (copyCount--) { + d -= sizeof(uint16_t); + s -= sizeof(uint16_t); + *(uint16_t*)d = *(uint16_t*)s; + } + } - if (d < s) { - /* copy forward */ - while (n--) { - *d++ = *s++; + /* copy 32-bit aligned words */ + copyCount = n / sizeof(uint32_t); + while (copyCount--) { + d -= sizeof(uint32_t); + s -= sizeof(uint32_t); + *(uint32_t*)d = *(uint32_t*)s; } - } else { - /* copy backward */ - d += n; - s += n; - while (n--) { - *--d = *--s; + + /* copy leftovers */ + if ((n & 0x02) != 0) { + d -= sizeof(uint16_t); + s -= sizeof(uint16_t); + *(uint16_t*)d = *(uint16_t*)s; } } } -#endif /*HAVE_MEMMOVE_WORDS*/ + +#define move16 memmove_words +#define move32 memmove_words /* * public static void arraycopy(Object src, int srcPos, Object dest, -- 2.11.0