From c57343e948aa9f3346ad866ad88d4b1330d098b8 Mon Sep 17 00:00:00 2001 From: Michele Calgaro Date: Sun, 13 Dec 2020 19:22:19 +0900 Subject: Renaming of files in preparation for code style tools. Signed-off-by: Michele Calgaro (cherry picked from commit 14d0fbe96c6abdb9da80e99953aec672f999948c) --- kpdf/xpdf/xpdf/CharCodeToUnicode.cpp | 540 +++++++++++++++++++++++++++++++++++ 1 file changed, 540 insertions(+) create mode 100644 kpdf/xpdf/xpdf/CharCodeToUnicode.cpp (limited to 'kpdf/xpdf/xpdf/CharCodeToUnicode.cpp') diff --git a/kpdf/xpdf/xpdf/CharCodeToUnicode.cpp b/kpdf/xpdf/xpdf/CharCodeToUnicode.cpp new file mode 100644 index 00000000..1144caec --- /dev/null +++ b/kpdf/xpdf/xpdf/CharCodeToUnicode.cpp @@ -0,0 +1,540 @@ +//======================================================================== +// +// CharCodeToUnicode.cpp +// +// Copyright 2001-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include +#include +#include "gmem.h" +#include "gfile.h" +#include "GString.h" +#include "Error.h" +#include "GlobalParams.h" +#include "PSTokenizer.h" +#include "CharCodeToUnicode.h" + +//------------------------------------------------------------------------ + +#define maxUnicodeString 8 + +struct CharCodeToUnicodeString { + CharCode c; + Unicode u[maxUnicodeString]; + int len; +}; + +//------------------------------------------------------------------------ + +static int getCharFromString(void *data) { + char *p; + int c; + + p = *(char **)data; + if (*p) { + c = *p++; + *(char **)data = p; + } else { + c = EOF; + } + return c; +} + +static int getCharFromFile(void *data) { + return fgetc((FILE *)data); +} + +//------------------------------------------------------------------------ + +CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName, + GString *collection) { + FILE *f; + Unicode *mapA; + CharCode size, mapLenA; + char buf[64]; + Unicode u; + CharCodeToUnicode *ctu; + + if (!(f = fopen(fileName->getCString(), "r"))) { + error(-1, "Couldn't open cidToUnicode file '%s'", + fileName->getCString()); + return NULL; + } + + size = 32768; + mapA = (Unicode *)gmallocn(size, sizeof(Unicode)); + mapLenA = 0; + + while (getLine(buf, sizeof(buf), f)) { + if (mapLenA == size) { + size *= 2; + mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode)); + } + if (sscanf(buf, "%x", &u) == 1) { + mapA[mapLenA] = u; + } else { + error(-1, "Bad line (%d) in cidToUnicode file '%s'", + (int)(mapLenA + 1), fileName->getCString()); + mapA[mapLenA] = 0; + } + ++mapLenA; + } + fclose(f); + + ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue, + NULL, 0, 0); + gfree(mapA); + return ctu; +} + +CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode( + GString *fileName) { + FILE *f; + Unicode *mapA; + CharCodeToUnicodeString *sMapA; + CharCode size, oldSize, len, sMapSizeA, sMapLenA; + char buf[256]; + char *tok; + Unicode u0; + Unicode uBuf[maxUnicodeString]; + CharCodeToUnicode *ctu; + int line, n, i; + + if (!(f = fopen(fileName->getCString(), "r"))) { + error(-1, "Couldn't open unicodeToUnicode file '%s'", + fileName->getCString()); + return NULL; + } + + size = 4096; + mapA = (Unicode *)gmallocn(size, sizeof(Unicode)); + memset(mapA, 0, size * sizeof(Unicode)); + len = 0; + sMapA = NULL; + sMapSizeA = sMapLenA = 0; + + line = 0; + while (getLine(buf, sizeof(buf), f)) { + ++line; + if (!(tok = strtok(buf, " \t\r\n")) || + sscanf(tok, "%x", &u0) != 1) { + error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", + line, fileName->getCString()); + continue; + } + n = 0; + while (n < maxUnicodeString) { + if (!(tok = strtok(NULL, " \t\r\n"))) { + break; + } + if (sscanf(tok, "%x", &uBuf[n]) != 1) { + error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", + line, fileName->getCString()); + break; + } + ++n; + } + if (n < 1) { + error(-1, "Bad line (%d) in unicodeToUnicode file '%s'", + line, fileName->getCString()); + continue; + } + if (u0 >= size) { + oldSize = size; + while (u0 >= size) { + size *= 2; + } + mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode)); + memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode)); + } + if (n == 1) { + mapA[u0] = uBuf[0]; + } else { + mapA[u0] = 0; + if (sMapLenA == sMapSizeA) { + sMapSizeA += 16; + sMapA = (CharCodeToUnicodeString *) + greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString)); + } + sMapA[sMapLenA].c = u0; + for (i = 0; i < n; ++i) { + sMapA[sMapLenA].u[i] = uBuf[i]; + } + sMapA[sMapLenA].len = n; + ++sMapLenA; + } + if (u0 >= len) { + len = u0 + 1; + } + } + fclose(f); + + ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue, + sMapA, sMapLenA, sMapSizeA); + gfree(mapA); + return ctu; +} + +CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) { + return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0); +} + +CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) { + CharCodeToUnicode *ctu; + char *p; + + ctu = new CharCodeToUnicode(NULL); + p = buf->getCString(); + ctu->parseCMap1(&getCharFromString, &p, nBits); + return ctu; +} + +void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) { + char *p; + + p = buf->getCString(); + parseCMap1(&getCharFromString, &p, nBits); +} + +void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, + int nBits) { + PSTokenizer *pst; + char tok1[256], tok2[256], tok3[256]; + int nDigits, n1, n2, n3; + CharCode i; + CharCode code1, code2; + GString *name; + FILE *f; + + nDigits = nBits / 4; + pst = new PSTokenizer(getCharFunc, data); + pst->getToken(tok1, sizeof(tok1), &n1); + while (pst->getToken(tok2, sizeof(tok2), &n2)) { + if (!strcmp(tok2, "usecmap")) { + if (tok1[0] == '/') { + name = new GString(tok1 + 1); + if ((f = globalParams->findToUnicodeFile(name))) { + parseCMap1(&getCharFromFile, f, nBits); + fclose(f); + } else { + error(-1, "Couldn't find ToUnicode CMap file for '%s'", + name->getCString()); + } + delete name; + } + pst->getToken(tok1, sizeof(tok1), &n1); + } else if (!strcmp(tok2, "beginbfchar")) { + while (pst->getToken(tok1, sizeof(tok1), &n1)) { + if (!strcmp(tok1, "endbfchar")) { + break; + } + if (!pst->getToken(tok2, sizeof(tok2), &n2) || + !strcmp(tok2, "endbfchar")) { + error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); + break; + } + if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && + tok2[0] == '<' && tok2[n2 - 1] == '>')) { + error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); + continue; + } + tok1[n1 - 1] = tok2[n2 - 1] = '\0'; + if (sscanf(tok1 + 1, "%x", &code1) != 1) { + error(-1, "Illegal entry in bfchar block in ToUnicode CMap"); + continue; + } + addMapping(code1, tok2 + 1, n2 - 2, 0); + } + pst->getToken(tok1, sizeof(tok1), &n1); + } else if (!strcmp(tok2, "beginbfrange")) { + while (pst->getToken(tok1, sizeof(tok1), &n1)) { + if (!strcmp(tok1, "endbfrange")) { + break; + } + if (!pst->getToken(tok2, sizeof(tok2), &n2) || + !strcmp(tok2, "endbfrange") || + !pst->getToken(tok3, sizeof(tok3), &n3) || + !strcmp(tok3, "endbfrange")) { + error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); + break; + } + if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && + n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) { + error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); + continue; + } + tok1[n1 - 1] = tok2[n2 - 1] = '\0'; + if (sscanf(tok1 + 1, "%x", &code1) != 1 || + sscanf(tok2 + 1, "%x", &code2) != 1) { + error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); + continue; + } + if (!strcmp(tok3, "[")) { + i = 0; + while (pst->getToken(tok1, sizeof(tok1), &n1) && + code1 + i <= code2) { + if (!strcmp(tok1, "]")) { + break; + } + if (tok1[0] == '<' && tok1[n1 - 1] == '>') { + tok1[n1 - 1] = '\0'; + addMapping(code1 + i, tok1 + 1, n1 - 2, 0); + } else { + error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); + } + ++i; + } + } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') { + tok3[n3 - 1] = '\0'; + for (i = 0; code1 <= code2; ++code1, ++i) { + addMapping(code1, tok3 + 1, n3 - 2, i); + } + + } else { + error(-1, "Illegal entry in bfrange block in ToUnicode CMap"); + } + } + pst->getToken(tok1, sizeof(tok1), &n1); + } else { + strcpy(tok1, tok2); + } + } + delete pst; +} + +void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, + int offset) { + CharCode oldLen, i; + Unicode u; + char uHex[5]; + int j; + + if (code >= mapLen) { + oldLen = mapLen; + mapLen = (code + 256) & ~255; + map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode)); + for (i = oldLen; i < mapLen; ++i) { + map[i] = 0; + } + } + if (n <= 4) { + if (sscanf(uStr, "%x", &u) != 1) { + error(-1, "Illegal entry in ToUnicode CMap"); + return; + } + map[code] = u + offset; + } else { + if (sMapLen >= sMapSize) { + sMapSize = sMapSize + 16; + sMap = (CharCodeToUnicodeString *) + greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString)); + } + map[code] = 0; + sMap[sMapLen].c = code; + sMap[sMapLen].len = n / 4; + for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) { + strncpy(uHex, uStr + j*4, 4); + uHex[4] = '\0'; + if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) { + error(-1, "Illegal entry in ToUnicode CMap"); + } + } + sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset; + ++sMapLen; + } +} + +CharCodeToUnicode::CharCodeToUnicode(GString *tagA) { + CharCode i; + + tag = tagA; + mapLen = 256; + map = (Unicode *)gmallocn(mapLen, sizeof(Unicode)); + for (i = 0; i < mapLen; ++i) { + map[i] = 0; + } + sMap = NULL; + sMapLen = sMapSize = 0; + refCnt = 1; +#if MULTITHREADED + gInitMutex(&mutex); +#endif +} + +CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA, + CharCode mapLenA, GBool copyMap, + CharCodeToUnicodeString *sMapA, + int sMapLenA, int sMapSizeA) { + tag = tagA; + mapLen = mapLenA; + if (copyMap) { + map = (Unicode *)gmallocn(mapLen, sizeof(Unicode)); + memcpy(map, mapA, mapLen * sizeof(Unicode)); + } else { + map = mapA; + } + sMap = sMapA; + sMapLen = sMapLenA; + sMapSize = sMapSizeA; + refCnt = 1; +#if MULTITHREADED + gInitMutex(&mutex); +#endif +} + +CharCodeToUnicode::~CharCodeToUnicode() { + if (tag) { + delete tag; + } + gfree(map); + if (sMap) { + gfree(sMap); + } +#if MULTITHREADED + gDestroyMutex(&mutex); +#endif +} + +void CharCodeToUnicode::incRefCnt() { +#if MULTITHREADED + gLockMutex(&mutex); +#endif + ++refCnt; +#if MULTITHREADED + gUnlockMutex(&mutex); +#endif +} + +void CharCodeToUnicode::decRefCnt() { + GBool done; + +#if MULTITHREADED + gLockMutex(&mutex); +#endif + done = --refCnt == 0; +#if MULTITHREADED + gUnlockMutex(&mutex); +#endif + if (done) { + delete this; + } +} + +GBool CharCodeToUnicode::match(GString *tagA) { + return tag && !tag->cmp(tagA); +} + +void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) { + int i, j; + + if (len == 1) { + map[c] = u[0]; + } else { + for (i = 0; i < sMapLen; ++i) { + if (sMap[i].c == c) { + break; + } + } + if (i == sMapLen) { + if (sMapLen == sMapSize) { + sMapSize += 8; + sMap = (CharCodeToUnicodeString *) + greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString)); + } + ++sMapLen; + } + map[c] = 0; + sMap[i].c = c; + sMap[i].len = len; + for (j = 0; j < len && j < maxUnicodeString; ++j) { + sMap[i].u[j] = u[j]; + } + } +} + +int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) { + int i, j; + + if (c >= mapLen) { + return 0; + } + if (map[c]) { + u[0] = map[c]; + return 1; + } + for (i = 0; i < sMapLen; ++i) { + if (sMap[i].c == c) { + for (j = 0; j < sMap[i].len && j < size; ++j) { + u[j] = sMap[i].u[j]; + } + return j; + } + } + return 0; +} + +//------------------------------------------------------------------------ + +CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) { + int i; + + size = sizeA; + cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *)); + for (i = 0; i < size; ++i) { + cache[i] = NULL; + } +} + +CharCodeToUnicodeCache::~CharCodeToUnicodeCache() { + int i; + + for (i = 0; i < size; ++i) { + if (cache[i]) { + cache[i]->decRefCnt(); + } + } + gfree(cache); +} + +CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) { + CharCodeToUnicode *ctu; + int i, j; + + if (cache[0] && cache[0]->match(tag)) { + cache[0]->incRefCnt(); + return cache[0]; + } + for (i = 1; i < size; ++i) { + if (cache[i] && cache[i]->match(tag)) { + ctu = cache[i]; + for (j = i; j >= 1; --j) { + cache[j] = cache[j - 1]; + } + cache[0] = ctu; + ctu->incRefCnt(); + return ctu; + } + } + return NULL; +} + +void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) { + int i; + + if (cache[size - 1]) { + cache[size - 1]->decRefCnt(); + } + for (i = size - 1; i >= 1; --i) { + cache[i] = cache[i - 1]; + } + cache[0] = ctu; + ctu->incRefCnt(); +} -- cgit v1.2.3