summaryrefslogtreecommitdiffstats
path: root/src/codecs
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2025-01-29 18:05:37 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2025-01-30 19:06:32 +0900
commitb64537250370dd61e3d8ba037679bddbc0f79d61 (patch)
tree9531e2d2fb79945ae821a2805079b93697e54734 /src/codecs
parentc919740e87c71232b3d2d1335efb2c4c293ff80c (diff)
downloadtqt-b6453725.tar.gz
tqt-b6453725.zip
Add support for surrogate pairs to TQChar API.
This relates to issue #162. The new code is partially taken from Qt4 but with some local rework. Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it> (cherry picked from commit c5cda03125a6d34c179d968011083bceb87976bd)
Diffstat (limited to 'src/codecs')
-rw-r--r--src/codecs/qgb18030codec.cpp30
-rw-r--r--src/codecs/qutfcodec.cpp23
2 files changed, 21 insertions, 32 deletions
diff --git a/src/codecs/qgb18030codec.cpp b/src/codecs/qgb18030codec.cpp
index f2c84dab7..8630cf32e 100644
--- a/src/codecs/qgb18030codec.cpp
+++ b/src/codecs/qgb18030codec.cpp
@@ -188,18 +188,16 @@ TQCString TQGb18030Codec::fromUnicode(const TQString& uc, int& lenInOut) const
if ( ch.row() == 0x00 && ch.cell() < 0x80 ) {
// ASCII
*cursor++ = ch.cell();
- } else if ((ch.unicode() & 0xf800) == 0xd800) {
- unsigned short high = ch.unicode();
+ } else if (ch.isHighSurrogate()) {
// surrogates area. check for correct encoding
// we need at least one more character, first the high surrogate, then the low one
- if (i == l-1 || high >= 0xdc00)
+ if (i == l-1)
*cursor++ = '?';
else {
- unsigned short low = uc[i+1].unicode();
- if (low >= 0xdc00 && low <= 0xdfff) {
+ if (uc[i+1].isLowSurrogate()) {
// valid surrogate pair
+ uint u = TQChar::surrogateToUcs4(uc[i], uc[i + 1]);
++i;
- uint u = (high-0xd800)*0x400+(low-0xdc00)+0x10000;
len = qt_UnicodeToGb18030(u, buf);
if (len >= 2) {
for (int j=0; j<len; j++)
@@ -245,15 +243,13 @@ TQString TQGb18030Codec::toUnicode(const char* chars, int len) const
uint u = qt_Gb18030ToUnicode( (const uchar*)(chars + i), clen );
if (clen == 2 || clen == 4) {
- if (u < 0x10000)
+ if (!TQChar::requiresSurrogates(u)) {
result += TQValidChar(u);
+ }
else {
// encode into surrogate pair
- u -= 0x10000;
- unsigned short high = u/0x400 + 0xd800;
- unsigned short low = u%0x400 + 0xdc00;
- result += TQChar(high);
- result += TQChar(low);
+ result += TQChar(TQChar::highSurrogate(u));
+ result += TQChar(TQChar::lowSurrogate(u));
}
i += clen;
} else if (i < len) {
@@ -406,15 +402,13 @@ public:
int clen = 4;
uint u = qt_Gb18030ToUnicode(buf, clen);
if (clen == 4) {
- if (u < 0x10000)
+ if (!TQChar::requiresSurrogates(u)) {
result += TQValidChar(u);
+ }
else {
// encode into surrogate pair
- u -= 0x10000;
- unsigned short high = u/0x400 + 0xd800;
- unsigned short low = u%0x400 + 0xdc00;
- result += TQChar(high);
- result += TQChar(low);
+ result += TQChar(TQChar::highSurrogate(u));
+ result += TQChar(TQChar::lowSurrogate(u));
}
} else {
result += TQChar::replacement;
diff --git a/src/codecs/qutfcodec.cpp b/src/codecs/qutfcodec.cpp
index 0a47f1523..0f88b8260 100644
--- a/src/codecs/qutfcodec.cpp
+++ b/src/codecs/qutfcodec.cpp
@@ -64,13 +64,10 @@ TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const
if ( u < 0x0800 ) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
- if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
- unsigned short low = ch[1].unicode();
- if (low >= 0xdc00 && low < 0xe000) {
- ++ch;
- ++i;
- u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
- }
+ if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) {
+ u = TQChar::surrogateToUcs4(ch[0], ch[1]);
+ ++ch;
+ ++i;
}
if (u > 0xffff) {
// see TQString::fromUtf8() and TQString::utf8() for explanations
@@ -179,16 +176,14 @@ public:
uc = (uc << 6) | (ch & 0x3f);
need--;
if ( !need ) {
- if (uc > 0xffff) {
+ if (TQChar::requiresSurrogates(uc)) {
// surrogate pair
- uc -= 0x10000;
- unsigned short high = uc/0x400 + 0xd800;
- unsigned short low = uc%0x400 + 0xdc00;
- *qch++ = TQChar(high);
- *qch++ = TQChar(low);
+ *qch++ = TQChar(TQChar::highSurrogate(uc));
+ *qch++ = TQChar(TQChar::lowSurrogate(uc));
headerDone = TRUE;
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- *qch++ = TQChar::replacement;
+ // overlong sequence, UTF16 surrogate or BOM
+ *qch++ = TQChar::replacement;
} else {
if (headerDone || TQChar(uc) != TQChar::byteOrderMark)
*qch++ = uc;