libcore/luni/src/main/native/java_nio_charset_Charsets.cpp

   1 /*
   2  * Copyright (C) 2010 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #define LOG_TAG "String"
  18
  19 #include "JNIHelp.h"
  20 #include "JniConstants.h"
  21 #include "ScopedPrimitiveArray.h"
  22 #include "jni.h"
  23 #include "unicode/utf16.h"
  24
  25 #include <string.h>
  26
  27 /**
  28  * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
  29  * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
  30  * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
  31  *
  32  * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
  33  * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
  34  * to the garbage collector (nor hide potentially large allocations from it).
  35  *
  36  * Because a call to append might require an allocation, it might fail. Callers should always
  37  * check the return value of append.
  38  */
  39 class NativeUnsafeByteSequence {
  40 public:
  41     NativeUnsafeByteSequence(JNIEnv* env)
  42         : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0)
  43     {
  44     }
  45
  46     ~NativeUnsafeByteSequence() {
  47         // Release our pointer to the raw array, copying changes back to the Java heap.
  48         if (mRawArray != NULL) {
  49             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
  50         }
  51     }
  52
  53     bool append(jbyte b) {
  54         if (mOffset == mSize && !resize(mSize * 2)) {
  55             return false;
  56         }
  57         mRawArray[mOffset++] = b;
  58         return true;
  59     }
  60
  61     bool resize(int newSize) {
  62         if (newSize == mSize) {
  63             return true;
  64         }
  65
  66         // Allocate a new array.
  67         jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
  68         if (newJavaArray == NULL) {
  69             return false;
  70         }
  71         jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL);
  72         if (newRawArray == NULL) {
  73             return false;
  74         }
  75
  76         // Copy data out of the old array and then let go of it.
  77         // Note that we may be trimming the array.
  78         if (mRawArray != NULL) {
  79             memcpy(newRawArray, mRawArray, mOffset);
  80             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
  81         }
  82
  83         // Point ourselves at the new array.
  84         mJavaArray = newJavaArray;
  85         mRawArray = newRawArray;
  86         mSize = newSize;
  87         return true;
  88     }
  89
  90     jbyteArray toByteArray() {
  91         // Trim any unused space, if necessary.
  92         bool okay = resize(mOffset);
  93         return okay ? mJavaArray : NULL;
  94     }
  95
  96 private:
  97     JNIEnv* mEnv;
  98     jbyteArray mJavaArray;
  99     jbyte* mRawArray;
 100     jint mSize;
 101     jint mOffset;
 102
 103     // Disallow copy and assignment.
 104     NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
 105     void operator=(const NativeUnsafeByteSequence&);
 106 };
 107
 108 static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
 109     ScopedByteArrayRO bytes(env, javaBytes);
 110     if (bytes.get() == NULL) {
 111         return;
 112     }
 113     ScopedCharArrayRW chars(env, javaChars);
 114     if (chars.get() == NULL) {
 115         return;
 116     }
 117
 118     const jbyte* src = &bytes[offset];
 119     jchar* dst = &chars[0];
 120     static const jchar REPLACEMENT_CHAR = 0xfffd;
 121     for (int i = length - 1; i >= 0; --i) {
 122         jchar ch = static_cast<jchar>(*src++ & 0xff);
 123         *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
 124     }
 125 }
 126
 127 static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
 128     ScopedByteArrayRO bytes(env, javaBytes);
 129     if (bytes.get() == NULL) {
 130         return;
 131     }
 132     ScopedCharArrayRW chars(env, javaChars);
 133     if (chars.get() == NULL) {
 134         return;
 135     }
 136
 137     const jbyte* src = &bytes[offset];
 138     jchar* dst = &chars[0];
 139     for (int i = length - 1; i >= 0; --i) {
 140         *dst++ = static_cast<jchar>(*src++ & 0xff);
 141     }
 142 }
 143
 144 /**
 145  * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
 146  * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
 147  * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
 148  */
 149 static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) {
 150     ScopedCharArrayRO chars(env, javaChars);
 151     if (chars.get() == NULL) {
 152         return NULL;
 153     }
 154
 155     jbyteArray javaBytes = env->NewByteArray(length);
 156     ScopedByteArrayRW bytes(env, javaBytes);
 157     if (bytes.get() == NULL) {
 158         return NULL;
 159     }
 160
 161     const jchar* src = &chars[offset];
 162     jbyte* dst = &bytes[0];
 163     for (int i = length - 1; i >= 0; --i) {
 164         jchar ch = *src++;
 165         if (ch > maxValidChar) {
 166             ch = '?';
 167         }
 168         *dst++ = static_cast<jbyte>(ch);
 169     }
 170
 171     return javaBytes;
 172 }
 173
 174 static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
 175     return charsToBytes(env, javaChars, offset, length, 0x7f);
 176 }
 177
 178 static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
 179     return charsToBytes(env, javaChars, offset, length, 0xff);
 180 }
 181
 182 static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
 183     ScopedCharArrayRO chars(env, javaChars);
 184     if (chars.get() == NULL) {
 185         return NULL;
 186     }
 187
 188     NativeUnsafeByteSequence out(env);
 189     if (!out.resize(length)) {
 190         return NULL;
 191     }
 192
 193     const int end = offset + length;
 194     for (int i = offset; i < end; ++i) {
 195         jint ch = chars[i];
 196         if (ch < 0x80) {
 197             // One byte.
 198             if (!out.append(ch)) {
 199                 return NULL;
 200             }
 201         } else if (ch < 0x800) {
 202             // Two bytes.
 203             if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
 204                 return NULL;
 205             }
 206         } else if (U16_IS_SURROGATE(ch)) {
 207             // A supplementary character.
 208             jchar high = (jchar) ch;
 209             jchar low = (i + 1 != end) ? chars[i + 1] : 0;
 210             if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
 211                 if (!out.append('?')) {
 212                     return NULL;
 213                 }
 214                 continue;
 215             }
 216             // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
 217             ++i;
 218             ch = U16_GET_SUPPLEMENTARY(high, low);
 219             // Four bytes.
 220             jbyte b1 = (ch >> 18) | 0xf0;
 221             jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
 222             jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
 223             jbyte b4 = (ch & 0x3f) | 0x80;
 224             if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
 225                 return NULL;
 226             }
 227         } else {
 228             // Three bytes.
 229             jbyte b1 = (ch >> 12) | 0xe0;
 230             jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
 231             jbyte b3 = (ch & 0x3f) | 0x80;
 232             if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
 233                 return NULL;
 234             }
 235         }
 236     }
 237     return out.toByteArray();
 238 }
 239
 240 static JNINativeMethod gMethods[] = {
 241     NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"),
 242     NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"),
 243     NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"),
 244     NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"),
 245     NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"),
 246 };
 247 int register_java_nio_charset_Charsets(JNIEnv* env) {
 248     return jniRegisterNativeMethods(env, "java/nio/charset/Charsets", gMethods, NELEM(gMethods));
 249 }