From 7bd361665890f5cb3a1d3325ad7e6be333609716 Mon Sep 17 00:00:00 2001 From: Ivailo Monev Date: Sat, 23 Nov 2019 21:52:42 +0000 Subject: [PATCH] rework ICU codec to avoid relocations during conversion --- src/core/codecs/qicucodec.cpp | 108 +++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 69 deletions(-) diff --git a/src/core/codecs/qicucodec.cpp b/src/core/codecs/qicucodec.cpp index ed3e57e10..c56ea633e 100644 --- a/src/core/codecs/qicucodec.cpp +++ b/src/core/codecs/qicucodec.cpp @@ -419,93 +419,63 @@ UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const return conv; } -QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const +// enough space to hold BOM, each char as surrogate pair and terminator +#define QMAXSTRLEN(X) 1 + (X * 2) + 2 + +QString QIcuCodec::convertToUnicode(const char *src, int length, QTextCodec::ConverterState *state) const { UConverter *conv = getConverter(state); - // for small strings pre-allocate 2x the length, else increment by 2 to be - // able to hold atleast one more surrogate pair - const bool small = (length < 100); - QString string(small ? (length * 2) : (length + 2), Qt::Uninitialized); - - bool failed = false; - const char *end = chars + length; - int convertedChars = 0; + QString string(QMAXSTRLEN(length), Qt::Uninitialized); + UChar *dest = reinterpret_cast(string.data()); UErrorCode error = U_ZERO_ERROR; - while (chars != end) { - UChar *uc = reinterpret_cast(string.data()); - const UChar *ucEnd = uc + string.length(); - uc += convertedChars; - ucnv_toUnicode(conv, - &uc, ucEnd, - &chars, end, - 0, false, &error); - if (Q_UNLIKELY(U_FAILURE(error))) { - qWarning("QIcuCodec::convertToUnicode: failed %s", u_errorName(error)); - failed = true; - break; + const int convresult = ucnv_toUChars(conv, dest, string.length(), src, length, &error); + if (Q_UNLIKELY(U_FAILURE(error))) { + qWarning("QIcuCodec::convertToUnicode: failed %s", u_errorName(error)); + if (state) { + error = U_ZERO_ERROR; + char errorbytes[10]; + int8_t invalidlen = 0; + ucnv_getInvalidChars(conv, + errorbytes, + &invalidlen, + &error); + state->invalidChars = invalidlen; } - - convertedChars = uc - reinterpret_cast(string.constData()); - if (!small) - string.resize(string.length() + 2); - } - string.resize(convertedChars); - - if (failed && state) { - error = U_ZERO_ERROR; - char errorbytes[10]; - int8_t invalidlen = 0; - ucnv_getInvalidChars(conv, - errorbytes, - &invalidlen, - &error); - state->invalidChars = invalidlen; + } else { + string.resize(convresult); } if (!state) ucnv_close(conv); return string; } +#undef QMAXSTRLEN QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const { UConverter *conv = getConverter(state); - int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); - QByteArray string(requiredLength, Qt::Uninitialized); - - bool failed = false; + int maxbytes = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); + QByteArray string(maxbytes, Qt::Uninitialized); + const UChar *src = reinterpret_cast(unicode); + char *dest = reinterpret_cast(string.data()); UErrorCode error = U_ZERO_ERROR; - const UChar *uc = reinterpret_cast(unicode); - const UChar *end = uc + length; - int convertedChars = 0; - while (uc != end) { - char *ch = (char *)string.data(); - char *chEnd = ch + string.length(); - ch += convertedChars; - ucnv_fromUnicode(conv, - &ch, chEnd, - &uc, end, - 0, false, &error); - if (Q_UNLIKELY(U_FAILURE(error))) { - qWarning("QIcuCodec::convertFromUnicode: failed %s", u_errorName(error)); - failed = true; - break; + const int convresult = ucnv_fromUChars(conv, dest, string.length(), src, length, &error); + if (Q_UNLIKELY(U_FAILURE(error))) { + qWarning("QIcuCodec::convertFromUnicode: failed %s", u_errorName(error)); + if (state) { + error = U_ZERO_ERROR; + char errorbytes[10]; + int8_t invalidlen = 0; + ucnv_getInvalidChars(conv, + errorbytes, + &invalidlen, + &error); + state->invalidChars = invalidlen; } - convertedChars = ch - string.data(); - } - string.resize(convertedChars); - - if (failed && state) { - error = U_ZERO_ERROR; - char errorbytes[10]; - int8_t invalidlen = 0; - ucnv_getInvalidChars(conv, - errorbytes, - &invalidlen, - &error); - state->invalidChars = invalidlen; + } else { + string.resize(convresult); } if (!state) -- 2.11.0