summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/chm/CHMFile.cpp
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
commit17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree5ed61937459cb7081089111b0242c01ec178f1f3 /reader/src/formats/chm/CHMFile.cpp
parent1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
downloadtde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz
tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'reader/src/formats/chm/CHMFile.cpp')
-rw-r--r--reader/src/formats/chm/CHMFile.cpp490
1 files changed, 490 insertions, 0 deletions
diff --git a/reader/src/formats/chm/CHMFile.cpp b/reader/src/formats/chm/CHMFile.cpp
new file mode 100644
index 0000000..8c62bca
--- /dev/null
+++ b/reader/src/formats/chm/CHMFile.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstring>
+
+#include <ZLFile.h>
+#include <ZLStringUtil.h>
+#include <ZLUnicodeUtil.h>
+#include <ZLInputStream.h>
+
+#include "CHMFile.h"
+#include "CHMReferenceCollection.h"
+
+#include "LZXDecompressor.h"
+
+static std::string readString(ZLInputStream &stream, std::size_t length) {
+ std::string string(length, ' ');
+ stream.read(const_cast<char*>(string.data()), length);
+ return string;
+}
+
+static unsigned short readUnsignedWord(ZLInputStream &stream) {
+ unsigned char buffer[2];
+ stream.read((char*)buffer, 2);
+ unsigned short result = buffer[1];
+ result = result << 8;
+ result += buffer[0];
+ return result;
+}
+
+static unsigned long readUnsignedDWord(ZLInputStream &stream) {
+ unsigned long lowPart = readUnsignedWord(stream);
+ unsigned long highPart = readUnsignedWord(stream);
+ return (highPart << 16) + lowPart;
+}
+
+static unsigned long long readUnsignedQWord(ZLInputStream &stream) {
+ unsigned long long lowPart = readUnsignedDWord(stream);
+ unsigned long long highPart = readUnsignedDWord(stream);
+ return (highPart << 32) + lowPart;
+}
+
+static unsigned long long readEncodedInteger(ZLInputStream &stream) {
+ unsigned long long result = 0;
+ char part;
+ do {
+ result = result << 7;
+ stream.read(&part, 1);
+ result += part & 0x7F;
+ } while (part & -0x80);
+ return result;
+}
+
+CHMInputStream::CHMInputStream(shared_ptr<ZLInputStream> base, const CHMFileInfo::SectionInfo &sectionInfo, std::size_t offset, std::size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) {
+ myBaseStartIndex = offset / 0x8000;
+ myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval;
+ myBytesToSkip = offset - myBaseStartIndex * 0x8000;
+ myOutData = new unsigned char[0x8000];
+}
+
+CHMInputStream::~CHMInputStream() {
+ close();
+ delete[] myOutData;
+}
+
+bool CHMInputStream::open() {
+ myOffset = 0;
+ myDoSkip = true;
+ myBaseIndex = myBaseStartIndex;
+ if (myDecompressor.isNull()) {
+ myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex);
+ } else {
+ myDecompressor->reset();
+ }
+ myOutDataOffset = 0;
+ myOutDataLength = 0;
+ return true;
+}
+
+std::size_t CHMInputStream::read(char *buffer, std::size_t maxSize) {
+ if (myDoSkip) {
+ do_read(0, myBytesToSkip);
+ myDoSkip = false;
+ }
+ std::size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset));
+ myOffset += realSize;
+ return realSize;
+}
+
+std::size_t CHMInputStream::do_read(char *buffer, std::size_t maxSize) {
+ std::size_t realSize = 0;
+ do {
+ if (myOutDataLength == 0) {
+ if (myBaseIndex >= mySectionInfo.ResetTable.size()) {
+ break;
+ }
+ const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size();
+ const std::size_t start = mySectionInfo.ResetTable[myBaseIndex];
+ const std::size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1];
+ myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000;
+ myOutDataOffset = 0;
+
+ myInData.erase();
+ myInData.append(end - start, '\0');
+ myBase->seek(mySectionInfo.Offset + start, true);
+ myBase->read((char*)myInData.data(), myInData.length());
+ if (myBaseIndex % mySectionInfo.ResetInterval == 0) {
+ myDecompressor->reset();
+ }
+ ++myBaseIndex;
+
+ if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) {
+ break;
+ }
+ }
+ const std::size_t partSize = std::min(myOutDataLength, maxSize);
+ if (buffer != 0) {
+ std::memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize);
+ }
+ maxSize -= partSize;
+ realSize += partSize;
+ myOutDataLength -= partSize;
+ myOutDataOffset += partSize;
+ } while (maxSize != 0);
+ return realSize;
+}
+
+void CHMInputStream::close() {
+ myDecompressor = 0;
+}
+
+void CHMInputStream::seek(int offset, bool absoluteOffset) {
+ if (absoluteOffset) {
+ offset -= myOffset;
+ }
+ if (offset > 0) {
+ read(0, offset);
+ } else if (offset < 0) {
+ open();
+ read(0, std::max(offset + (int)myOffset, 0));
+ }
+}
+
+std::size_t CHMInputStream::offset() const {
+ return myOffset;
+}
+
+std::size_t CHMInputStream::sizeOfOpened() {
+ return mySize;
+}
+
+shared_ptr<ZLInputStream> CHMFileInfo::entryStream(shared_ptr<ZLInputStream> base, const std::string &name) const {
+ RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name));
+ if (it == myRecords.end()) {
+ return 0;
+ }
+ const RecordInfo &recordInfo = it->second;
+ if (recordInfo.Length == 0) {
+ return 0;
+ }
+ if (recordInfo.Section == 0) {
+ // TODO: implement
+ return 0;
+ }
+ if (recordInfo.Section > mySectionInfos.size()) {
+ return 0;
+ }
+ const SectionInfo &sectionInfo = mySectionInfos[recordInfo.Section - 1];
+ if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) {
+ return 0;
+ }
+
+ return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length);
+}
+
+CHMFileInfo::CHMFileInfo(const ZLFile &file) : myFilePath(file.path()) {
+}
+
+bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) {
+ RecordMap::const_iterator it = myRecords.find(entryName);
+ if (it == myRecords.end()) {
+ return false;
+ }
+ RecordInfo recordInfo = it->second;
+ if (recordInfo.Section > mySectionInfos.size()) {
+ return false;
+ }
+ if (recordInfo.Section != 0) {
+ // TODO: ???
+ return false;
+ }
+
+ stream.seek(mySection0Offset + recordInfo.Offset, true);
+ return true;
+}
+
+bool CHMFileInfo::init(ZLInputStream &stream) {
+ {
+ // header start
+ if (readString(stream, 4) != "ITSF") {
+ return false;
+ }
+
+ unsigned long version = readUnsignedDWord(stream);
+
+ // DWORD total length
+ // DWORD unknown
+ // DWORD timestamp
+ // DWORD language id
+ // 0x10 bytes 1st GUID
+ // 0x10 bytes 2nd GUID
+ // QWORD section 0 offset
+ // QWORD section 0 length
+ stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false);
+
+ unsigned long long sectionOffset1 = readUnsignedQWord(stream);
+ unsigned long long sectionLength1 = readUnsignedQWord(stream);
+ mySection0Offset = sectionOffset1 + sectionLength1;
+ // header end
+
+ // additional header data start
+ if (version > 2) {
+ mySection0Offset = readUnsignedQWord(stream);
+ }
+ // additional header data end
+
+ stream.seek(sectionOffset1, true);
+ // header section 1 start
+ // directory header start
+ if (readString(stream, 4) != "ITSP") {
+ return false;
+ }
+
+ // DWORD version
+ // DWORD length
+ // DWORD 0x000A
+ // DWORD chunk size
+ // DWORD density
+ // DWORD depth
+ // DWORD root chunk number
+ // DWORD first chunk number
+ // DWORD last chunk number
+ // DWORD -1
+ stream.seek(10 * 4, false);
+ unsigned long dirChunkNumber = readUnsignedDWord(stream);
+ // ...
+ stream.seek(36, false);
+ // header section 1 end
+
+ std::size_t nextOffset = stream.offset();
+ for (unsigned long i = 0; i < dirChunkNumber; ++i) {
+ nextOffset += 4096;
+ std::string header = readString(stream, 4);
+ if (header == "PMGL") {
+ unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096;
+ stream.seek(12, false);
+ std::size_t startOffset = stream.offset();
+ std::size_t oldOffset = startOffset;
+ while (startOffset < nextOffset - quickRefAreaSize) {
+ int nameLength = readEncodedInteger(stream);
+ std::string name = readString(stream, nameLength);
+ int contentSection = readEncodedInteger(stream);
+ int offset = readEncodedInteger(stream);
+ int length = readEncodedInteger(stream);
+ if (name.substr(0, 2) != "::") {
+ name = ZLUnicodeUtil::toLower(name);
+ }
+ myRecords.insert(
+ std::make_pair(
+ name,
+ CHMFileInfo::RecordInfo(contentSection, offset, length)
+ )
+ );
+ startOffset = stream.offset();
+ if (oldOffset == startOffset) {
+ break;
+ }
+ oldOffset = startOffset;
+ }
+ } else if (header == "PMGI") {
+ unsigned long quickRefAreaSize = readUnsignedDWord(stream);
+ std::size_t startOffset = stream.offset();
+ std::size_t oldOffset = startOffset;
+ while (startOffset < nextOffset - quickRefAreaSize) {
+ int nameLength = readEncodedInteger(stream);
+ std::string name = readString(stream, nameLength);
+ // chunk number
+ readEncodedInteger(stream);
+ startOffset = stream.offset();
+ if (oldOffset == startOffset) {
+ break;
+ }
+ oldOffset = startOffset;
+ }
+ }
+ stream.seek(nextOffset, true);
+ if (stream.offset() != nextOffset) {
+ break;
+ }
+ }
+ }
+
+ {
+ if (!moveToEntry(stream, "::DataSpace/NameList")) {
+ return false;
+ }
+ stream.seek(2, false);
+ const int sectionNumber = readUnsignedWord(stream);
+ for (int i = 0; i < sectionNumber; ++i) {
+ const int length = readUnsignedWord(stream);
+ std::string sectionName;
+ sectionName.reserve(length);
+ for (int j = 0; j < length; ++j) {
+ sectionName += (char)readUnsignedWord(stream);
+ }
+ stream.seek(2, false);
+ mySectionNames.push_back(sectionName);
+ }
+ }
+
+ {
+ for (unsigned int i = 1; i < mySectionNames.size(); ++i) {
+ RecordMap::const_iterator it =
+ myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content");
+ if (it == myRecords.end()) {
+ return false;
+ }
+ RecordInfo recordInfo = it->second;
+ if (recordInfo.Section != 0) {
+ return false;
+ }
+ mySectionInfos.push_back(SectionInfo());
+ SectionInfo &info = mySectionInfos.back();
+ info.Offset = mySection0Offset + recordInfo.Offset;
+ info.Length = recordInfo.Length;
+
+ if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) {
+ return false;
+ }
+ stream.seek(4, false);
+ std::string lzxc = readString(stream, 4);
+ if (lzxc != "LZXC") {
+ return false;
+ }
+ const int version = readUnsignedDWord(stream);
+ if ((version <= 0) || (version > 2)) {
+ return false;
+ }
+ info.ResetInterval = readUnsignedDWord(stream);
+ if (version == 1) {
+ info.ResetInterval /= 0x8000;
+ }
+ info.WindowSizeIndex = (version == 1) ? 0 : 15;
+ {
+ int ws = readUnsignedDWord(stream);
+ if (ws > 0) {
+ while ((ws & 1) == 0) {
+ ws >>= 1;
+ info.WindowSizeIndex++;
+ }
+ }
+ }
+
+ if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) {
+ return false;
+ }
+ stream.seek(4, false);
+ const std::size_t entriesNumber = readUnsignedDWord(stream);
+ if (entriesNumber == 0) {
+ return false;
+ }
+ if (entriesNumber > 2048) {
+ // file size is greater than 60 Mb
+ return false;
+ }
+ info.ResetTable.reserve(entriesNumber);
+ stream.seek(8, false);
+ info.UncompressedSize = readUnsignedQWord(stream);
+ if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) {
+ return false;
+ }
+ info.CompressedSize = readUnsignedQWord(stream);
+ stream.seek(8, false);
+ std::size_t previous = 0;
+ for (std::size_t j = 0; j < entriesNumber; ++j) {
+ std::size_t value = readUnsignedQWord(stream);
+ if ((j > 0) == (value <= previous)) {
+ return false;
+ }
+ info.ResetTable.push_back(value);
+ previous = value;
+ }
+ }
+ }
+
+ return true;
+}
+
+static std::string readNTString(ZLInputStream &stream) {
+ std::string s;
+ char c;
+ while (stream.read(&c, 1) == 1) {
+ if (c == '\0') {
+ break;
+ } else {
+ s += c;
+ }
+ }
+ return CHMReferenceCollection::fullReference("/", s);
+}
+
+bool CHMFileInfo::FileNames::empty() const {
+ return Start.empty() && TOC.empty() && Home.empty() && Index.empty();
+}
+
+CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr<ZLInputStream> base) const {
+ FileNames names;
+ shared_ptr<ZLInputStream> stringsStream = entryStream(base, "/#STRINGS");
+ if (!stringsStream.isNull() && stringsStream->open()) {
+ std::vector<std::string> fileNames;
+ int tocIndex = -1;
+ int indexIndex = -1;
+ for (int i = 0; i < 12; ++i) {
+ std::string argument = readNTString(*stringsStream);
+ if (argument.empty() || (argument[argument.length() - 1] == '/')) {
+ continue;
+ }
+ if (myRecords.find(argument) == myRecords.end()) {
+ continue;
+ }
+ if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) {
+ tocIndex = fileNames.size();
+ names.TOC = argument;
+ } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) {
+ indexIndex = fileNames.size();
+ names.Index = argument;
+ }
+ fileNames.push_back(argument);
+ }
+ std::size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1);
+ if (startIndex < 11) {
+ if (startIndex < fileNames.size()) {
+ names.Start = fileNames[startIndex];
+ }
+ if (startIndex + 1 < fileNames.size()) {
+ names.Home = fileNames[startIndex + 1];
+ }
+ }
+ stringsStream->close();
+ }
+ if (names.TOC.empty()) {
+ for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
+ if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) {
+ names.TOC = it->first;
+ break;
+ }
+ }
+ }
+ if (names.empty()) {
+ for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) {
+ if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) ||
+ (ZLStringUtil::stringEndsWith(it->first, ".html"))) {
+ names.Start = it->first;
+ break;
+ }
+ }
+ }
+
+ return names;
+}
+
+const std::string CHMFileInfo::filePath() const {
+ return myFilePath;
+}