#include "qcorecommon_p.h"
#include "qdebug.h"
+#include <unicode/ucsdet.h>
+
QT_BEGIN_NAMESPACE
// generated via genmib.py
return defaultCodec;
}
+QTextCodec *QIcuCodec::codecForData(const QByteArray &text, QTextCodec *defaultCodec)
+{
+ UErrorCode error = U_ZERO_ERROR;
+ UCharsetDetector *detector = ucsdet_open(&error);
+ if (Q_UNLIKELY(U_FAILURE(error))) {
+ qWarning("QIcuCodec::codecForData: ucsdet_open() failed %s", u_errorName(error));
+ return defaultCodec;
+ }
+
+ error = U_ZERO_ERROR;
+ ucsdet_setText(detector, text.constData(), text.size(), &error);
+ if (Q_UNLIKELY(U_FAILURE(error))) {
+ qWarning("QIcuCodec::codecForData: ucsdet_setText() failed %s", u_errorName(error));
+ ucsdet_close(detector);
+ return defaultCodec;
+ }
+
+ error = U_ZERO_ERROR;
+ const UCharsetMatch *match = ucsdet_detect(detector, &error);
+ if (Q_UNLIKELY(U_FAILURE(error))) {
+ qWarning("QIcuCodec::codecForData: ucsdet_detect() failed %s", u_errorName(error));
+ ucsdet_close(detector);
+ return defaultCodec;
+ }
+
+ error = U_ZERO_ERROR;
+ const char *name = ucsdet_getName(match, &error);
+ if (Q_UNLIKELY(U_FAILURE(error))) {
+ qWarning("QIcuCodec::codecForData: ucsdet_getName() failed %s", u_errorName(error));
+ ucsdet_close(detector);
+ return defaultCodec;
+ }
+
+ ucsdet_close(detector);
+ return QTextCodec::codecForName(name);
+}
#endif
UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const
cannot be detected from the content provided, \a defaultCodec is
returned.
- \sa codecForUtfText()
+ \sa codecForText(), codecForUtfText()
*/
QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
{
int pos2 = header.indexOf('\"', pos+1);
QByteArray cs = header.mid(pos, pos2-pos);
// qDebug("found charset: %s", cs.data());
- c = QTextCodec::codecForName(cs);
+ return QTextCodec::codecForName(cs);
}
}
}
}
- if (!c)
- c = defaultCodec;
-
- return c;
+ return QTextCodec::codecForText(ba, defaultCodec);;
}
/*!
and the content-type meta header and returns a QTextCodec instance
that is capable of decoding the html to unicode. If the codec cannot
be detected, this overload returns a Latin-1 QTextCodec.
+
+ \sa codecForText(), codecForUtfText()
*/
QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
{
cannot be detected from the content provided, \a defaultCodec is
returned.
- \sa codecForHtml()
+ \sa codecForText(), codecForHtml()
*/
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
{
that is capable of decoding the text to unicode. If the codec
cannot be detected, this overload returns a Latin-1 QTextCodec.
- \sa codecForHtml()
+ \sa codecForText(), codecForHtml()
*/
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
{
}
+/*!
+ \since 4.9
+
+ Tries to detect the encoding of the provided snippet \a ba and
+ returns a QTextCodec instance that is capable of decoding the text
+ to unicode. If the codec cannot be detected from the content
+ provided, \a defaultCodec is returned. The results can not be
+ guaranteed to always be correct.
+
+ \sa codecForUtfText(), codecForHtml()
+*/
+QTextCodec *QTextCodec::codecForText(const QByteArray &ba, QTextCodec *defaultCodec)
+{
+ return QIcuCodec::codecForData(ba, defaultCodec);
+}
+
+/*!
+ \overload
+
+ Tries to detect the encoding of the provided snippet \a ba by
+ returns a QTextCodec instance that is capable of decoding the
+ text to unicode. If the codec cannot be detected, this overload
+ returns a Latin-1 QTextCodec. The results can not be guaranteed
+ to always be correct.
+
+ \sa codecForUtfText(), codecForHtml()
+*/
+QTextCodec *QTextCodec::codecForText(const QByteArray &ba)
+{
+ return codecForText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
+}
+
/*! \internal
\since 4.3
Determines whether the decoder encountered a failure while decoding the input. If