diff options
| author | Michele Calgaro <michele.calgaro@yahoo.it> | 2025-01-29 18:05:37 +0900 |
|---|---|---|
| committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2025-01-30 19:06:32 +0900 |
| commit | b64537250370dd61e3d8ba037679bddbc0f79d61 (patch) | |
| tree | 9531e2d2fb79945ae821a2805079b93697e54734 /src/tools | |
| parent | c919740e87c71232b3d2d1335efb2c4c293ff80c (diff) | |
| download | tqt-b6453725.tar.gz tqt-b6453725.zip | |
Add support for surrogate pairs to TQChar API.
This relates to issue #162.
The new code is partially taken from Qt4 but with some local rework.
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
(cherry picked from commit c5cda03125a6d34c179d968011083bceb87976bd)
Diffstat (limited to 'src/tools')
| -rw-r--r-- | src/tools/ntqstring.h | 43 | ||||
| -rw-r--r-- | src/tools/qstring.cpp | 22 |
2 files changed, 51 insertions, 14 deletions
diff --git a/src/tools/ntqstring.h b/src/tools/ntqstring.h index ce9ccf108..9ce34e975 100644 --- a/src/tools/ntqstring.h +++ b/src/tools/ntqstring.h @@ -222,6 +222,14 @@ public: bool isDigit() const; bool isSymbol() const; + // Surrogate pairs support + bool isHighSurrogate() const; + bool isLowSurrogate() const; + static bool requiresSurrogates(uint ucs4); + static ushort highSurrogate(uint ucs4); + static ushort lowSurrogate(uint ucs4); + static uint surrogateToUcs4(const TQChar &high, const TQChar &low); + uchar cell() const { return ((uchar) ucs & 0xff); } uchar row() const { return ((uchar) (ucs>>8)&0xff); } void setCell( uchar cell ) { ucs = (ucs & 0xff00) + cell; } @@ -313,6 +321,36 @@ inline TQChar::TQChar( int rc ) : ucs( (ushort) (rc & 0xffff) ) { } +inline bool TQChar::isHighSurrogate() const +{ + return ((ucs & 0xfc00) == 0xd800); +} + +inline bool TQChar::isLowSurrogate() const +{ + return ((ucs & 0xfc00) == 0xdc00); +} + +inline bool TQChar::requiresSurrogates(uint ucs4) +{ + return (ucs4 >= 0x10000); +} + +inline ushort TQChar::highSurrogate(uint ucs4) +{ + return ushort(((ucs4 - 0x10000) >> 10)) | 0xd800; +} + +inline ushort TQChar::lowSurrogate(uint ucs4) +{ + return ushort(ucs4 & 0x03FF) | 0xdc00; +} + +inline uint TQChar::surrogateToUcs4(const TQChar &high, const TQChar &low) +{ + return (uint(high.ucs & 0x03FF) << 10) | (low.ucs & 0x03FF) | 0x10000; +} + inline bool operator==( char ch, TQChar c ) { return ((uchar) ch) == c.ucs; @@ -806,6 +844,11 @@ public: bool isNumber() const { return s.constref(p).isNumber(); } bool isLetterOrNumber() { return s.constref(p).isLetterOrNumber(); } bool isDigit() const { return s.constref(p).isDigit(); } + bool isSymbol() const { return s.constref(p).isSymbol(); } + + // Surrogate pairs support + bool isHighSurrogate() const { return s.constref(p).isHighSurrogate(); } + bool isLowSurrogate() const { return s.constref(p).isLowSurrogate(); } int digitValue() const { return s.constref(p).digitValue(); } TQChar lower() const { return s.constref(p).lower(); } diff --git a/src/tools/qstring.cpp b/src/tools/qstring.cpp index a7256e354..82ae39a0e 100644 --- a/src/tools/qstring.cpp +++ b/src/tools/qstring.cpp @@ -6019,13 +6019,10 @@ TQCString TQString::utf8() const if ( u < 0x0800 ) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { - if (u >= 0xd800 && u < 0xdc00 && i < l-1) { - unsigned short low = ch[1].unicode(); - if (low >= 0xdc00 && low < 0xe000) { - ++ch; - ++i; - u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; - } + if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) { + u = TQChar::surrogateToUcs4(ch[0], ch[1]); + ++ch; + ++i; } if (u > 0xffff) { // if people are working in utf8, but strings are encoded in eg. latin1, the resulting @@ -6104,15 +6101,12 @@ TQString TQString::fromUtf8( const char* utf8, int len ) uc = (uc << 6) | (ch & 0x3f); need--; if ( !need ) { - if (uc > 0xffff) { + if (TQChar::requiresSurrogates(uc)) { // surrogate pair - uc -= 0x10000; - unsigned short high = uc/0x400 + 0xd800; - unsigned short low = uc%0x400 + 0xdc00; - *qch++ = TQChar(high); - *qch++ = TQChar(low); + *qch++ = TQChar(TQChar::highSurrogate(uc)); + *qch++ = TQChar(TQChar::lowSurrogate(uc)); } else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // overlong seqence, UTF16 surrogate or BOM + // overlong sequence, UTF16 surrogate or BOM i = error; qch = addOne(qch, result); *qch++ = TQChar(0xdbff); |
