2 * Copyright (C) 2008 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * UTF-8 and Unicode string manipulation, plus java/lang/String convenience
21 * In most cases we populate the fields in the String object directly,
22 * rather than going through an instance field lookup.
28 * Allocate a new instance of the class String, performing first-use
29 * initialization of the class if necessary. Upon success, the
30 * returned value will have all its fields except hashCode already
31 * filled in, including a reference to a newly-allocated char[] for
32 * the contents, sized as given. Additionally, a reference to the
33 * chars array is stored to the pChars pointer. Callers must
34 * subsequently call dvmReleaseTrackedAlloc() on the result pointer.
35 * This function returns NULL on failure.
37 static StringObject* makeStringObject(u4 charsLength, ArrayObject** pChars)
40 * The String class should have already gotten found (but not
41 * necessarily initialized) before making it here. We assert it
42 * explicitly, since historically speaking, we have had bugs with
43 * regard to when the class String gets set up. The assert helps
44 * make any regressions easier to diagnose.
46 assert(gDvm.classJavaLangString != NULL);
48 if (!dvmIsClassInitialized(gDvm.classJavaLangString)) {
49 /* Perform first-time use initialization of the class. */
50 if (!dvmInitClass(gDvm.classJavaLangString)) {
51 ALOGE("FATAL: Could not initialize class String");
56 Object* result = dvmAllocObject(gDvm.classJavaLangString, ALLOC_DEFAULT);
61 ArrayObject* chars = dvmAllocPrimitiveArray('C', charsLength, ALLOC_DEFAULT);
63 dvmReleaseTrackedAlloc(result, NULL);
67 dvmSetFieldInt(result, STRING_FIELDOFF_COUNT, charsLength);
68 dvmSetFieldObject(result, STRING_FIELDOFF_VALUE, (Object*) chars);
69 dvmReleaseTrackedAlloc((Object*) chars, NULL);
70 /* Leave offset and hashCode set to zero. */
73 return (StringObject*) result;
77 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
79 * This may or may not yield the same results as the java/lang/String
80 * computeHashCode() function. (To make sure this doesn't get abused,
81 * I'm initializing the hash code to 1 so they *don't* match up.)
83 * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute
84 * the hash with the result. That way, if something encoded the same
85 * character in two different ways, the hash value would be the same. For
86 * our purposes that isn't necessary.
88 u4 dvmComputeUtf8Hash(const char* utf8Str)
92 while (*utf8Str != '\0')
93 hash = hash * 31 + *utf8Str++;
99 * Like "strlen", but for strings encoded with "modified" UTF-8.
101 * The value returned is the number of characters, which may or may not
102 * be the same as the number of bytes.
104 * (If this needs optimizing, try: mask against 0xa0, shift right 5,
105 * get increment {1-3} from table of 8 values.)
107 size_t dvmUtf8Len(const char* utf8Str)
112 while ((ic = *utf8Str++) != '\0') {
114 if ((ic & 0x80) != 0) {
115 /* two- or three-byte encoding */
117 if ((ic & 0x20) != 0) {
118 /* three-byte encoding */
128 * Convert a "modified" UTF-8 string to UTF-16.
130 void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str)
132 while (*utf8Str != '\0')
133 *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str);
137 * Given a UTF-16 string, compute the length of the corresponding UTF-8
140 static int utf16_utf8ByteLen(const u2* utf16Str, int len)
145 unsigned int uic = *utf16Str++;
148 * The most common case is (uic > 0 && uic <= 0x7f).
150 if (uic == 0 || uic > 0x7f) {
153 else /*(uic > 0x7f || uic == 0) */
162 * Convert a UTF-16 string to UTF-8.
164 * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(),
167 static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len)
172 unsigned int uic = *utf16Str++;
175 * The most common case is (uic > 0 && uic <= 0x7f).
177 if (uic == 0 || uic > 0x7f) {
179 *utf8Str++ = (uic >> 12) | 0xe0;
180 *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80;
181 *utf8Str++ = (uic & 0x3f) | 0x80;
182 } else /*(uic > 0x7f || uic == 0)*/ {
183 *utf8Str++ = (uic >> 6) | 0xc0;
184 *utf8Str++ = (uic & 0x3f) | 0x80;
195 * Use the java/lang/String.computeHashCode() algorithm.
197 static inline u4 computeUtf16Hash(const u2* utf16Str, size_t len)
202 hash = hash * 31 + *utf16Str++;
207 u4 dvmComputeStringHash(StringObject* strObj) {
208 int hashCode = dvmGetFieldInt(strObj, STRING_FIELDOFF_HASHCODE);
212 int len = dvmGetFieldInt(strObj, STRING_FIELDOFF_COUNT);
213 int offset = dvmGetFieldInt(strObj, STRING_FIELDOFF_OFFSET);
215 (ArrayObject*) dvmGetFieldObject(strObj, STRING_FIELDOFF_VALUE);
216 hashCode = computeUtf16Hash((u2*)(void*)chars->contents + offset, len);
217 dvmSetFieldInt(strObj, STRING_FIELDOFF_HASHCODE, hashCode);
221 StringObject* dvmCreateStringFromCstr(const char* utf8Str) {
222 assert(utf8Str != NULL);
223 return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str));
226 StringObject* dvmCreateStringFromCstr(const std::string& utf8Str) {
227 return dvmCreateStringFromCstr(utf8Str.c_str());
231 * Create a java/lang/String from a C string, given its UTF-16 length
232 * (number of UTF-16 code points).
234 * The caller must call dvmReleaseTrackedAlloc() on the return value.
236 * Returns NULL and throws an exception on failure.
238 StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str,
241 assert(utf8Str != NULL);
244 StringObject* newObj = makeStringObject(utf16Length, &chars);
245 if (newObj == NULL) {
249 dvmConvertUtf8ToUtf16((u2*)(void*)chars->contents, utf8Str);
251 u4 hashCode = computeUtf16Hash((u2*)(void*)chars->contents, utf16Length);
252 dvmSetFieldInt((Object*) newObj, STRING_FIELDOFF_HASHCODE, hashCode);
258 * Create a new java/lang/String object, using the given Unicode data.
260 StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len)
262 /* We allow a NULL pointer if the length is zero. */
263 assert(len == 0 || unichars != NULL);
266 StringObject* newObj = makeStringObject(len, &chars);
267 if (newObj == NULL) {
271 if (len > 0) memcpy(chars->contents, unichars, len * sizeof(u2));
273 u4 hashCode = computeUtf16Hash((u2*)(void*)chars->contents, len);
274 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
280 * Create a new C string from a java/lang/String object.
282 * Returns NULL if the object is NULL.
284 char* dvmCreateCstrFromString(const StringObject* jstr)
286 assert(gDvm.classJavaLangString != NULL);
291 int len = dvmGetFieldInt(jstr, STRING_FIELDOFF_COUNT);
292 int offset = dvmGetFieldInt(jstr, STRING_FIELDOFF_OFFSET);
294 (ArrayObject*) dvmGetFieldObject(jstr, STRING_FIELDOFF_VALUE);
295 const u2* data = (const u2*)(void*)chars->contents + offset;
296 assert(offset + len <= (int) chars->length);
298 int byteLen = utf16_utf8ByteLen(data, len);
299 char* newStr = (char*) malloc(byteLen+1);
300 if (newStr == NULL) {
303 convertUtf16ToUtf8(newStr, data, len);
308 void dvmGetStringUtfRegion(const StringObject* jstr,
309 int start, int len, char* buf)
311 const u2* data = jstr->chars() + start;
312 convertUtf16ToUtf8(buf, data, len);
315 int StringObject::utfLength() const
317 assert(gDvm.classJavaLangString != NULL);
319 int len = dvmGetFieldInt(this, STRING_FIELDOFF_COUNT);
320 int offset = dvmGetFieldInt(this, STRING_FIELDOFF_OFFSET);
322 (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
323 const u2* data = (const u2*)(void*)chars->contents + offset;
324 assert(offset + len <= (int) chars->length);
326 return utf16_utf8ByteLen(data, len);
329 int StringObject::length() const
331 return dvmGetFieldInt(this, STRING_FIELDOFF_COUNT);
334 ArrayObject* StringObject::array() const
336 return (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
339 const u2* StringObject::chars() const
341 int offset = dvmGetFieldInt(this, STRING_FIELDOFF_OFFSET);
343 (ArrayObject*) dvmGetFieldObject(this, STRING_FIELDOFF_VALUE);
344 return (const u2*)(void*)chars->contents + offset;
349 * Compare two String objects.
351 * This is a dvmHashTableLookup() callback. The function has already
352 * compared their hash values; we need to do a full compare to ensure
353 * that the strings really match.
355 int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2)
357 const StringObject* strObj1 = (const StringObject*) vstrObj1;
358 const StringObject* strObj2 = (const StringObject*) vstrObj2;
360 assert(gDvm.classJavaLangString != NULL);
362 /* get offset and length into char array; all values are in 16-bit units */
363 int len1 = dvmGetFieldInt(strObj1, STRING_FIELDOFF_COUNT);
364 int offset1 = dvmGetFieldInt(strObj1, STRING_FIELDOFF_OFFSET);
365 int len2 = dvmGetFieldInt(strObj2, STRING_FIELDOFF_COUNT);
366 int offset2 = dvmGetFieldInt(strObj2, STRING_FIELDOFF_OFFSET);
371 ArrayObject* chars1 =
372 (ArrayObject*) dvmGetFieldObject(strObj1, STRING_FIELDOFF_VALUE);
373 ArrayObject* chars2 =
374 (ArrayObject*) dvmGetFieldObject(strObj2, STRING_FIELDOFF_VALUE);
376 /* damage here actually indicates a broken java/lang/String */
377 assert(offset1 + len1 <= (int) chars1->length);
378 assert(offset2 + len2 <= (int) chars2->length);
380 return memcmp((const u2*)(void*)chars1->contents + offset1,
381 (const u2*)(void*)chars2->contents + offset2,
385 ArrayObject* dvmCreateStringArray(const std::vector<std::string>& strings) {
386 Thread* self = dvmThreadSelf();
388 // Allocate an array to hold the String objects.
389 ClassObject* elementClass = dvmFindArrayClassForElement(gDvm.classJavaLangString);
390 ArrayObject* stringArray = dvmAllocArrayByClass(elementClass, strings.size(), ALLOC_DEFAULT);
391 if (stringArray == NULL) {
393 assert(dvmCheckException(self));
397 // Create the individual String objects and add them to the array.
398 for (size_t i = 0; i < strings.size(); i++) {
399 Object* str = (Object*) dvmCreateStringFromCstr(strings[i]);
401 // Probably OOM; drop out now.
402 assert(dvmCheckException(self));
403 dvmReleaseTrackedAlloc((Object*) stringArray, self);
406 dvmSetObjectArrayElement(stringArray, i, str);
407 /* stored in tracked array, okay to release */
408 dvmReleaseTrackedAlloc(str, self);