summaryrefslogtreecommitdiffstats
path: root/src/tools
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2025-01-29 18:05:37 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2025-01-30 19:06:32 +0900
commitb64537250370dd61e3d8ba037679bddbc0f79d61 (patch)
tree9531e2d2fb79945ae821a2805079b93697e54734 /src/tools
parentc919740e87c71232b3d2d1335efb2c4c293ff80c (diff)
downloadtqt-b6453725.tar.gz
tqt-b6453725.zip
Add support for surrogate pairs to TQChar API.
This relates to issue #162. The new code is partially taken from Qt4 but with some local rework. Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it> (cherry picked from commit c5cda03125a6d34c179d968011083bceb87976bd)
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/ntqstring.h43
-rw-r--r--src/tools/qstring.cpp22
2 files changed, 51 insertions, 14 deletions
diff --git a/src/tools/ntqstring.h b/src/tools/ntqstring.h
index ce9ccf108..9ce34e975 100644
--- a/src/tools/ntqstring.h
+++ b/src/tools/ntqstring.h
@@ -222,6 +222,14 @@ public:
bool isDigit() const;
bool isSymbol() const;
+ // Surrogate pairs support
+ bool isHighSurrogate() const;
+ bool isLowSurrogate() const;
+ static bool requiresSurrogates(uint ucs4);
+ static ushort highSurrogate(uint ucs4);
+ static ushort lowSurrogate(uint ucs4);
+ static uint surrogateToUcs4(const TQChar &high, const TQChar &low);
+
uchar cell() const { return ((uchar) ucs & 0xff); }
uchar row() const { return ((uchar) (ucs>>8)&0xff); }
void setCell( uchar cell ) { ucs = (ucs & 0xff00) + cell; }
@@ -313,6 +321,36 @@ inline TQChar::TQChar( int rc ) : ucs( (ushort) (rc & 0xffff) )
{
}
+inline bool TQChar::isHighSurrogate() const
+{
+ return ((ucs & 0xfc00) == 0xd800);
+}
+
+inline bool TQChar::isLowSurrogate() const
+{
+ return ((ucs & 0xfc00) == 0xdc00);
+}
+
+inline bool TQChar::requiresSurrogates(uint ucs4)
+{
+ return (ucs4 >= 0x10000);
+}
+
+inline ushort TQChar::highSurrogate(uint ucs4)
+{
+ return ushort(((ucs4 - 0x10000) >> 10)) | 0xd800;
+}
+
+inline ushort TQChar::lowSurrogate(uint ucs4)
+{
+ return ushort(ucs4 & 0x03FF) | 0xdc00;
+}
+
+inline uint TQChar::surrogateToUcs4(const TQChar &high, const TQChar &low)
+{
+ return (uint(high.ucs & 0x03FF) << 10) | (low.ucs & 0x03FF) | 0x10000;
+}
+
inline bool operator==( char ch, TQChar c )
{
return ((uchar) ch) == c.ucs;
@@ -806,6 +844,11 @@ public:
bool isNumber() const { return s.constref(p).isNumber(); }
bool isLetterOrNumber() { return s.constref(p).isLetterOrNumber(); }
bool isDigit() const { return s.constref(p).isDigit(); }
+ bool isSymbol() const { return s.constref(p).isSymbol(); }
+
+ // Surrogate pairs support
+ bool isHighSurrogate() const { return s.constref(p).isHighSurrogate(); }
+ bool isLowSurrogate() const { return s.constref(p).isLowSurrogate(); }
int digitValue() const { return s.constref(p).digitValue(); }
TQChar lower() const { return s.constref(p).lower(); }
diff --git a/src/tools/qstring.cpp b/src/tools/qstring.cpp
index a7256e354..82ae39a0e 100644
--- a/src/tools/qstring.cpp
+++ b/src/tools/qstring.cpp
@@ -6019,13 +6019,10 @@ TQCString TQString::utf8() const
if ( u < 0x0800 ) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
- if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
- unsigned short low = ch[1].unicode();
- if (low >= 0xdc00 && low < 0xe000) {
- ++ch;
- ++i;
- u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
- }
+ if (ch[0].isHighSurrogate() && i < (l - 1) && ch[1].isLowSurrogate()) {
+ u = TQChar::surrogateToUcs4(ch[0], ch[1]);
+ ++ch;
+ ++i;
}
if (u > 0xffff) {
// if people are working in utf8, but strings are encoded in eg. latin1, the resulting
@@ -6104,15 +6101,12 @@ TQString TQString::fromUtf8( const char* utf8, int len )
uc = (uc << 6) | (ch & 0x3f);
need--;
if ( !need ) {
- if (uc > 0xffff) {
+ if (TQChar::requiresSurrogates(uc)) {
// surrogate pair
- uc -= 0x10000;
- unsigned short high = uc/0x400 + 0xd800;
- unsigned short low = uc%0x400 + 0xdc00;
- *qch++ = TQChar(high);
- *qch++ = TQChar(low);
+ *qch++ = TQChar(TQChar::highSurrogate(uc));
+ *qch++ = TQChar(TQChar::lowSurrogate(uc));
} else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // overlong seqence, UTF16 surrogate or BOM
+ // overlong sequence, UTF16 surrogate or BOM
i = error;
qch = addOne(qch, result);
*qch++ = TQChar(0xdbff);