From 59a434629ba06d4decf7bc88a62ae370a1935f0e Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Wed, 2 Sep 2009 18:07:23 -0700 Subject: [PATCH] Add inline version of String.indexOf(). This provides an inline-native version of String.indexOf(int) and String.indexOf(int, int), i.e. the functions that work like strchr(). Has a fairly solid impact on specific benchmarks. Might give a boost to an app somewhere. Added some indexOf tests to 020-string. Added hard-coded field offsets for String. These are verified during startup. Improves some of our String micro-benchmarks by ~10%. --- tests/020-string/expected.txt | 2 + tests/020-string/src/Main.java | 32 ++++++++++ vm/DalvikVersion.h | 2 +- vm/InlineNative.c | 133 ++++++++++++++++++++++++++++++++++------- vm/UtfString.c | 75 +++++++++++++++-------- vm/UtfString.h | 25 ++++++++ 6 files changed, 220 insertions(+), 49 deletions(-) diff --git a/tests/020-string/expected.txt b/tests/020-string/expected.txt index 67b71593e..081fea3a4 100644 --- a/tests/020-string/expected.txt +++ b/tests/020-string/expected.txt @@ -3,3 +3,5 @@ This is a very nice string Compare result is 32 Compare unicode: -65302 Got expected exception +subStr is 'uick brown fox jumps over the lazy ' +Indexes are: 0:-1:0:43:33:-1:18:13:13:-1:18:18:-1:13:-1:-1:-1 diff --git a/tests/020-string/src/Main.java b/tests/020-string/src/Main.java index 4a57fc672..bb8ce1fa5 100644 --- a/tests/020-string/src/Main.java +++ b/tests/020-string/src/Main.java @@ -19,6 +19,11 @@ */ public class Main { public static void main(String args[]) { + basicTest(); + indexTest(); + } + + public static void basicTest() { String baseStr = "*** This is a very nice string!!!"; String testStr; int i; @@ -49,4 +54,31 @@ public class Main { System.out.println("Got expected exception"); } } + + public static void indexTest() { + String baseStr = "The quick brown fox jumps over the lazy dog!"; + String subStr; + + subStr = baseStr.substring(5, baseStr.length() - 4); + System.out.println("subStr is '" + subStr + "'"); + + System.out.println("Indexes are: " + + baseStr.indexOf('T') + ":" + + subStr.indexOf('T') + ":" + + subStr.indexOf('u') + ":" + + baseStr.indexOf('!') + ":" + + subStr.indexOf('y') + ":" + + subStr.indexOf('d') + ":" + + baseStr.indexOf('x') + ":" + + subStr.indexOf('x', 0) + ":" + + subStr.indexOf('x', -1) + ":" + + subStr.indexOf('x', 200) + ":" + + baseStr.indexOf('x', 17) + ":" + + baseStr.indexOf('x', 18) + ":" + + baseStr.indexOf('x', 19) + ":" + + subStr.indexOf('x', 13) + ":" + + subStr.indexOf('x', 14) + ":" + + subStr.indexOf('&') + ":" + + baseStr.indexOf(0x12341234)); + } } diff --git a/vm/DalvikVersion.h b/vm/DalvikVersion.h index efbb39327..dfc95ce19 100644 --- a/vm/DalvikVersion.h +++ b/vm/DalvikVersion.h @@ -32,6 +32,6 @@ * way classes load changes, e.g. field ordering or vtable layout. Changing * this guarantees that the optimized form of the DEX file is regenerated. */ -#define DALVIK_VM_BUILD 17 +#define DALVIK_VM_BUILD 18 #endif /*_DALVIK_VERSION*/ diff --git a/vm/InlineNative.c b/vm/InlineNative.c index f8293608f..fd287081c 100644 --- a/vm/InlineNative.c +++ b/vm/InlineNative.c @@ -129,14 +129,14 @@ static bool javaLangString_charAt(u4 arg0, u4 arg1, u4 arg2, u4 arg3, return false; //LOGI("String.charAt this=0x%08x index=%d\n", arg0, arg1); - count = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count); + count = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT); if ((s4) arg1 < 0 || (s4) arg1 >= count) { dvmThrowException("Ljava/lang/StringIndexOutOfBoundsException;", NULL); return false; } else { - offset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset); + offset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET); chars = (ArrayObject*) - dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE); pResult->i = ((const u2*) chars->contents)[arg1 + offset]; return true; @@ -157,17 +157,17 @@ static void badMatch(StringObject* thisStrObj, StringObject* compStrObj, int thisOffset, compOffset, thisCount, compCount; thisCount = - dvmGetFieldInt((Object*) thisStrObj, gDvm.offJavaLangString_count); + dvmGetFieldInt((Object*) thisStrObj, STRING_FIELDOFF_COUNT); compCount = - dvmGetFieldInt((Object*) compStrObj, gDvm.offJavaLangString_count); + dvmGetFieldInt((Object*) compStrObj, STRING_FIELDOFF_COUNT); thisOffset = - dvmGetFieldInt((Object*) thisStrObj, gDvm.offJavaLangString_offset); + dvmGetFieldInt((Object*) thisStrObj, STRING_FIELDOFF_OFFSET); compOffset = - dvmGetFieldInt((Object*) compStrObj, gDvm.offJavaLangString_offset); + dvmGetFieldInt((Object*) compStrObj, STRING_FIELDOFF_OFFSET); thisArray = (ArrayObject*) - dvmGetFieldObject((Object*) thisStrObj, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) thisStrObj, STRING_FIELDOFF_VALUE); compArray = (ArrayObject*) - dvmGetFieldObject((Object*) compStrObj, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) compStrObj, STRING_FIELDOFF_VALUE); thisStr = dvmCreateCstrFromString(thisStrObj); compStr = dvmCreateCstrFromString(compStrObj); @@ -221,16 +221,16 @@ static bool javaLangString_compareTo(u4 arg0, u4 arg1, u4 arg2, u4 arg3, const u2* compChars; int i, minCount, countDiff; - thisCount = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count); - compCount = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_count); + thisCount = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT); + compCount = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_COUNT); countDiff = thisCount - compCount; minCount = (countDiff < 0) ? thisCount : compCount; - thisOffset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset); - compOffset = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_offset); + thisOffset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET); + compOffset = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_OFFSET); thisArray = (ArrayObject*) - dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE); compArray = (ArrayObject*) - dvmGetFieldObject((Object*) arg1, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) arg1, STRING_FIELDOFF_VALUE); thisChars = ((const u2*) thisArray->contents) + thisOffset; compChars = ((const u2*) compArray->contents) + compOffset; @@ -321,19 +321,19 @@ static bool javaLangString_equals(u4 arg0, u4 arg1, u4 arg2, u4 arg3, int i; /* quick length check */ - thisCount = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count); - compCount = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_count); + thisCount = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT); + compCount = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_COUNT); if (thisCount != compCount) { pResult->i = false; return true; } - thisOffset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset); - compOffset = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_offset); + thisOffset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET); + compOffset = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_OFFSET); thisArray = (ArrayObject*) - dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE); compArray = (ArrayObject*) - dvmGetFieldObject((Object*) arg1, gDvm.offJavaLangString_value); + dvmGetFieldObject((Object*) arg1, STRING_FIELDOFF_VALUE); thisChars = ((const u2*) thisArray->contents) + thisOffset; compChars = ((const u2*) compArray->contents) + compOffset; @@ -383,7 +383,90 @@ static bool javaLangString_length(u4 arg0, u4 arg1, u4 arg2, u4 arg3, if (!dvmValidateObject((Object*) arg0)) return false; - pResult->i = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count); + pResult->i = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT); + return true; +} + +/* + * Determine the index of the first character matching "ch". The string + * to search is described by "chars", "offset", and "count". + * + * The "ch" parameter is allowed to be > 0xffff. Our Java-language + * implementation does not currently handle this, so neither do we. + * + * The "start" parameter must be clamped to [0..count]. + * + * Returns -1 if no match is found. + */ +static inline int indexOfCommon(Object* strObj, int ch, int start) +{ + //if ((ch & 0xffff) != ch) /* 32-bit code point */ + // return -1; + + /* pull out the basic elements */ + ArrayObject* charArray = + (ArrayObject*) dvmGetFieldObject(strObj, STRING_FIELDOFF_VALUE); + const u2* chars = (const u2*) charArray->contents; + int offset = dvmGetFieldInt(strObj, STRING_FIELDOFF_OFFSET); + int count = dvmGetFieldInt(strObj, STRING_FIELDOFF_COUNT); + //LOGI("String.indexOf(0x%08x, 0x%04x, %d) off=%d count=%d\n", + // (u4) strObj, ch, start, offset, count); + + /* factor out the offset */ + chars += offset; + + if (start < 0) + start = 0; + +#if 0 + /* 16-bit loop, simple */ + while (start < count) { + if (chars[start] == ch) + return start; + start++; + } +#else + /* 16-bit loop, slightly better on ARM */ + const u2* ptr = chars + start; + const u2* endPtr = chars + count; + while (ptr < endPtr) { + if (*ptr++ == ch) + return (ptr-1) - chars; + } +#endif + + return -1; +} + +/* + * public int indexOf(int c) + * + * Scan forward through the string for a matching character. + */ +static bool javaLangString_indexOf_I(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult) +{ + /* null reference check on "this" */ + if (!dvmValidateObject((Object*) arg0)) + return false; + + pResult->i = indexOfCommon((Object*) arg0, arg1, 0); + return true; +} + +/* + * public int indexOf(int c, int start) + * + * Scan forward through the string for a matching character. + */ +static bool javaLangString_indexOf_II(u4 arg0, u4 arg1, u4 arg2, u4 arg3, + JValue* pResult) +{ + /* null reference check on "this" */ + if (!dvmValidateObject((Object*) arg0)) + return false; + + pResult->i = indexOfCommon((Object*) arg0, arg1, arg2); return true; } @@ -564,6 +647,10 @@ const InlineOperation gDvmInlineOpsTable[] = { "Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I" }, { javaLangString_equals, "Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z" }, + { javaLangString_indexOf_I, + "Ljava/lang/String;", "indexOf", "(I)I" }, + { javaLangString_indexOf_II, + "Ljava/lang/String;", "indexOf", "(II)I" }, { javaLangString_length, "Ljava/lang/String;", "length", "()I" }, @@ -587,7 +674,6 @@ const InlineOperation gDvmInlineOpsTable[] = { "Ljava/lang/Math;", "sin", "(D)D" }, }; - /* * Allocate some tables. */ @@ -696,3 +782,4 @@ skip_prof: #endif return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult); } + diff --git a/vm/UtfString.c b/vm/UtfString.c index dfb76bc85..8e20a0f38 100644 --- a/vm/UtfString.c +++ b/vm/UtfString.c @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + /* * UTF-8 and Unicode string manipulation, plus java/lang/String convenience * functions. @@ -69,6 +70,30 @@ static bool stringStartup() return false; } + bool badValue = false; + if (gDvm.offJavaLangString_value != STRING_FIELDOFF_VALUE) { + LOGE("InlineNative: String.value offset = %d, expected %d\n", + gDvm.offJavaLangString_value, STRING_FIELDOFF_VALUE); + badValue = true; + } + if (gDvm.offJavaLangString_count != STRING_FIELDOFF_COUNT) { + LOGE("InlineNative: String.count offset = %d, expected %d\n", + gDvm.offJavaLangString_count, STRING_FIELDOFF_COUNT); + badValue = true; + } + if (gDvm.offJavaLangString_offset != STRING_FIELDOFF_OFFSET) { + LOGE("InlineNative: String.offset offset = %d, expected %d\n", + gDvm.offJavaLangString_offset, STRING_FIELDOFF_OFFSET); + badValue = true; + } + if (gDvm.offJavaLangString_hashCode != STRING_FIELDOFF_HASHCODE) { + LOGE("InlineNative: String.hashCode offset = %d, expected %d\n", + gDvm.offJavaLangString_hashCode, STRING_FIELDOFF_HASHCODE); + badValue = true; + } + if (badValue) + return false; + gDvm.javaLangStringReady = 1; return true; @@ -213,11 +238,11 @@ static inline u4 dvmComputeUtf16Hash(const u2* utf16Str, int len) } u4 dvmComputeStringHash(StringObject* strObj) { ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); int offset, len; - len = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_count); - offset = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_offset); + len = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_COUNT); + offset = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_OFFSET); return dvmComputeUtf16Hash((u2*) chars->contents + offset, len); } @@ -285,11 +310,11 @@ StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str, dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str); hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length); - dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value, + dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE, (Object*)chars); dvmReleaseTrackedAllocIFN((Object*) chars, NULL, allocFlags); - dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, utf16Length); - dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode); + dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, utf16Length); + dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode); /* leave offset set to zero */ /* debugging stuff */ @@ -339,11 +364,11 @@ StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len) memcpy(chars->contents, unichars, len * sizeof(u2)); hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len); - dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value, + dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE, (Object*)chars); dvmReleaseTrackedAlloc((Object*) chars, NULL); - dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, len); - dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode); + dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, len); + dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode); /* leave offset set to zero */ /* debugging stuff */ @@ -371,10 +396,10 @@ char* dvmCreateCstrFromString(StringObject* jstr) if (jstr == NULL) return NULL; - len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); - offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); + len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); + offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); data = (const u2*) chars->contents + offset; assert(offset + len <= (int) chars->length); @@ -416,10 +441,10 @@ int dvmStringUtf8ByteLen(StringObject* jstr) if (jstr == NULL) return 0; // should we throw something? assert? - len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); - offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); + len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); + offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); data = (const u2*) chars->contents + offset; assert(offset + len <= (int) chars->length); @@ -431,7 +456,7 @@ int dvmStringUtf8ByteLen(StringObject* jstr) */ int dvmStringLen(StringObject* jstr) { - return dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count); + return dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT); } /* @@ -440,7 +465,7 @@ int dvmStringLen(StringObject* jstr) ArrayObject* dvmStringCharArray(StringObject* jstr) { return (ArrayObject*) dvmGetFieldObject((Object*) jstr, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); } /* @@ -451,9 +476,9 @@ const u2* dvmStringChars(StringObject* jstr) ArrayObject* chars; int offset; - offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset); + offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET); chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); return (const u2*) chars->contents + offset; } @@ -476,17 +501,17 @@ int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2) assert(gDvm.javaLangStringReady > 0); /* get offset and length into char array; all values are in 16-bit units */ - len1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_count); - offset1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_offset); - len2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_count); - offset2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_offset); + len1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_COUNT); + offset1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_OFFSET); + len2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_COUNT); + offset2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_OFFSET); if (len1 != len2) return len1 - len2; chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2, - gDvm.offJavaLangString_value); + STRING_FIELDOFF_VALUE); /* damage here actually indicates a broken java/lang/String */ assert(offset1 + len1 <= (int) chars1->length); diff --git a/vm/UtfString.h b/vm/UtfString.h index ca500a7b7..8f0f97286 100644 --- a/vm/UtfString.h +++ b/vm/UtfString.h @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + /* * UTF-8 and Unicode string manipulation functions, plus convenience * functions for working with java/lang/String. @@ -21,6 +22,30 @@ #define _DALVIK_STRING /* + * (This is private to UtfString.c, but we cheat a bit and also use it + * for InlineNative.c. Not really worth creating a separate header.) + * + * We can avoid poking around in gDvm by hard-coding the expected values of + * the String field offsets. This will be annoying if String is in flux + * or the VM field layout is changing, so we use defines here to make it + * easy to switch back to the gDvm version. + * + * The values are checked for correctness during startup. + */ +//#define USE_GLOBAL_STRING_DEFS +#ifdef USE_GLOBAL_STRING_DEFS +# define STRING_FIELDOFF_VALUE gDvm.offJavaLangString_value +# define STRING_FIELDOFF_OFFSET gDvm.offJavaLangString_offset +# define STRING_FIELDOFF_COUNT gDvm.offJavaLangString_count +# define STRING_FIELDOFF_HASHCODE gDvm.offJavaLangString_hashCode +#else +# define STRING_FIELDOFF_VALUE 8 +# define STRING_FIELDOFF_HASHCODE 12 +# define STRING_FIELDOFF_OFFSET 16 +# define STRING_FIELDOFF_COUNT 20 +#endif + +/* * Hash function for modified UTF-8 strings. */ u4 dvmComputeUtf8Hash(const char* str); -- 2.11.0