/* * Copyright (C) 2004-2012 Geometer Plus * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include "CHMFile.h" #include "CHMReferenceCollection.h" #include "LZXDecompressor.h" static std::string readString(ZLInputStream &stream, std::size_t length) { std::string string(length, ' '); stream.read(const_cast(string.data()), length); return string; } static unsigned short readUnsignedWord(ZLInputStream &stream) { unsigned char buffer[2]; stream.read((char*)buffer, 2); unsigned short result = buffer[1]; result = result << 8; result += buffer[0]; return result; } static unsigned long readUnsignedDWord(ZLInputStream &stream) { unsigned long lowPart = readUnsignedWord(stream); unsigned long highPart = readUnsignedWord(stream); return (highPart << 16) + lowPart; } static unsigned long long readUnsignedQWord(ZLInputStream &stream) { unsigned long long lowPart = readUnsignedDWord(stream); unsigned long long highPart = readUnsignedDWord(stream); return (highPart << 32) + lowPart; } static unsigned long long readEncodedInteger(ZLInputStream &stream) { unsigned long long result = 0; char part; do { result = result << 7; stream.read(&part, 1); result += part & 0x7F; } while (part & -0x80); return result; } CHMInputStream::CHMInputStream(shared_ptr base, const CHMFileInfo::SectionInfo §ionInfo, std::size_t offset, std::size_t size) : myBase(base), mySectionInfo(sectionInfo), mySize(size) { myBaseStartIndex = offset / 0x8000; myBaseStartIndex -= myBaseStartIndex % sectionInfo.ResetInterval; myBytesToSkip = offset - myBaseStartIndex * 0x8000; myOutData = new unsigned char[0x8000]; } CHMInputStream::~CHMInputStream() { close(); delete[] myOutData; } bool CHMInputStream::open() { myOffset = 0; myDoSkip = true; myBaseIndex = myBaseStartIndex; if (myDecompressor.isNull()) { myDecompressor = new LZXDecompressor(mySectionInfo.WindowSizeIndex); } else { myDecompressor->reset(); } myOutDataOffset = 0; myOutDataLength = 0; return true; } std::size_t CHMInputStream::read(char *buffer, std::size_t maxSize) { if (myDoSkip) { do_read(0, myBytesToSkip); myDoSkip = false; } std::size_t realSize = do_read(buffer, std::min(maxSize, mySize - myOffset)); myOffset += realSize; return realSize; } std::size_t CHMInputStream::do_read(char *buffer, std::size_t maxSize) { std::size_t realSize = 0; do { if (myOutDataLength == 0) { if (myBaseIndex >= mySectionInfo.ResetTable.size()) { break; } const bool isTail = myBaseIndex + 1 == mySectionInfo.ResetTable.size(); const std::size_t start = mySectionInfo.ResetTable[myBaseIndex]; const std::size_t end = isTail ? mySectionInfo.CompressedSize : mySectionInfo.ResetTable[myBaseIndex + 1]; myOutDataLength = isTail ? mySectionInfo.UncompressedSize % 0x8000 : 0x8000; myOutDataOffset = 0; myInData.erase(); myInData.append(end - start, '\0'); myBase->seek(mySectionInfo.Offset + start, true); myBase->read((char*)myInData.data(), myInData.length()); if (myBaseIndex % mySectionInfo.ResetInterval == 0) { myDecompressor->reset(); } ++myBaseIndex; if (!myDecompressor->decompress(myInData, myOutData, myOutDataLength)) { break; } } const std::size_t partSize = std::min(myOutDataLength, maxSize); if (buffer != 0) { std::memcpy(buffer + realSize, myOutData + myOutDataOffset, partSize); } maxSize -= partSize; realSize += partSize; myOutDataLength -= partSize; myOutDataOffset += partSize; } while (maxSize != 0); return realSize; } void CHMInputStream::close() { myDecompressor = 0; } void CHMInputStream::seek(int offset, bool absoluteOffset) { if (absoluteOffset) { offset -= myOffset; } if (offset > 0) { read(0, offset); } else if (offset < 0) { open(); read(0, std::max(offset + (int)myOffset, 0)); } } std::size_t CHMInputStream::offset() const { return myOffset; } std::size_t CHMInputStream::sizeOfOpened() { return mySize; } shared_ptr CHMFileInfo::entryStream(shared_ptr base, const std::string &name) const { RecordMap::const_iterator it = myRecords.find(ZLUnicodeUtil::toLower(name)); if (it == myRecords.end()) { return 0; } const RecordInfo &recordInfo = it->second; if (recordInfo.Length == 0) { return 0; } if (recordInfo.Section == 0) { // TODO: implement return 0; } if (recordInfo.Section > mySectionInfos.size()) { return 0; } const SectionInfo §ionInfo = mySectionInfos[recordInfo.Section - 1]; if (recordInfo.Offset + recordInfo.Length > sectionInfo.UncompressedSize) { return 0; } return new CHMInputStream(base, sectionInfo, recordInfo.Offset, recordInfo.Length); } CHMFileInfo::CHMFileInfo(const ZLFile &file) : myFilePath(file.path()) { } bool CHMFileInfo::moveToEntry(ZLInputStream &stream, const std::string &entryName) { RecordMap::const_iterator it = myRecords.find(entryName); if (it == myRecords.end()) { return false; } RecordInfo recordInfo = it->second; if (recordInfo.Section > mySectionInfos.size()) { return false; } if (recordInfo.Section != 0) { // TODO: ??? return false; } stream.seek(mySection0Offset + recordInfo.Offset, true); return true; } bool CHMFileInfo::init(ZLInputStream &stream) { { // header start if (readString(stream, 4) != "ITSF") { return false; } unsigned long version = readUnsignedDWord(stream); // DWORD total length // DWORD unknown // DWORD timestamp // DWORD language id // 0x10 bytes 1st GUID // 0x10 bytes 2nd GUID // QWORD section 0 offset // QWORD section 0 length stream.seek(4 * 4 + 2 * 0x10 + 2 * 8, false); unsigned long long sectionOffset1 = readUnsignedQWord(stream); unsigned long long sectionLength1 = readUnsignedQWord(stream); mySection0Offset = sectionOffset1 + sectionLength1; // header end // additional header data start if (version > 2) { mySection0Offset = readUnsignedQWord(stream); } // additional header data end stream.seek(sectionOffset1, true); // header section 1 start // directory header start if (readString(stream, 4) != "ITSP") { return false; } // DWORD version // DWORD length // DWORD 0x000A // DWORD chunk size // DWORD density // DWORD depth // DWORD root chunk number // DWORD first chunk number // DWORD last chunk number // DWORD -1 stream.seek(10 * 4, false); unsigned long dirChunkNumber = readUnsignedDWord(stream); // ... stream.seek(36, false); // header section 1 end std::size_t nextOffset = stream.offset(); for (unsigned long i = 0; i < dirChunkNumber; ++i) { nextOffset += 4096; std::string header = readString(stream, 4); if (header == "PMGL") { unsigned long quickRefAreaSize = readUnsignedDWord(stream) % 4096; stream.seek(12, false); std::size_t startOffset = stream.offset(); std::size_t oldOffset = startOffset; while (startOffset < nextOffset - quickRefAreaSize) { int nameLength = readEncodedInteger(stream); std::string name = readString(stream, nameLength); int contentSection = readEncodedInteger(stream); int offset = readEncodedInteger(stream); int length = readEncodedInteger(stream); if (name.substr(0, 2) != "::") { name = ZLUnicodeUtil::toLower(name); } myRecords.insert( std::make_pair( name, CHMFileInfo::RecordInfo(contentSection, offset, length) ) ); startOffset = stream.offset(); if (oldOffset == startOffset) { break; } oldOffset = startOffset; } } else if (header == "PMGI") { unsigned long quickRefAreaSize = readUnsignedDWord(stream); std::size_t startOffset = stream.offset(); std::size_t oldOffset = startOffset; while (startOffset < nextOffset - quickRefAreaSize) { int nameLength = readEncodedInteger(stream); std::string name = readString(stream, nameLength); // chunk number readEncodedInteger(stream); startOffset = stream.offset(); if (oldOffset == startOffset) { break; } oldOffset = startOffset; } } stream.seek(nextOffset, true); if (stream.offset() != nextOffset) { break; } } } { if (!moveToEntry(stream, "::DataSpace/NameList")) { return false; } stream.seek(2, false); const int sectionNumber = readUnsignedWord(stream); for (int i = 0; i < sectionNumber; ++i) { const int length = readUnsignedWord(stream); std::string sectionName; sectionName.reserve(length); for (int j = 0; j < length; ++j) { sectionName += (char)readUnsignedWord(stream); } stream.seek(2, false); mySectionNames.push_back(sectionName); } } { for (unsigned int i = 1; i < mySectionNames.size(); ++i) { RecordMap::const_iterator it = myRecords.find("::DataSpace/Storage/" + mySectionNames[i] + "/Content"); if (it == myRecords.end()) { return false; } RecordInfo recordInfo = it->second; if (recordInfo.Section != 0) { return false; } mySectionInfos.push_back(SectionInfo()); SectionInfo &info = mySectionInfos.back(); info.Offset = mySection0Offset + recordInfo.Offset; info.Length = recordInfo.Length; if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/ControlData")) { return false; } stream.seek(4, false); std::string lzxc = readString(stream, 4); if (lzxc != "LZXC") { return false; } const int version = readUnsignedDWord(stream); if ((version <= 0) || (version > 2)) { return false; } info.ResetInterval = readUnsignedDWord(stream); if (version == 1) { info.ResetInterval /= 0x8000; } info.WindowSizeIndex = (version == 1) ? 0 : 15; { int ws = readUnsignedDWord(stream); if (ws > 0) { while ((ws & 1) == 0) { ws >>= 1; info.WindowSizeIndex++; } } } if (!moveToEntry(stream, "::DataSpace/Storage/" + mySectionNames[i] + "/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable")) { return false; } stream.seek(4, false); const std::size_t entriesNumber = readUnsignedDWord(stream); if (entriesNumber == 0) { return false; } if (entriesNumber > 2048) { // file size is greater than 60 Mb return false; } info.ResetTable.reserve(entriesNumber); stream.seek(8, false); info.UncompressedSize = readUnsignedQWord(stream); if ((info.UncompressedSize - 1) / 0x8000 != entriesNumber - 1) { return false; } info.CompressedSize = readUnsignedQWord(stream); stream.seek(8, false); std::size_t previous = 0; for (std::size_t j = 0; j < entriesNumber; ++j) { std::size_t value = readUnsignedQWord(stream); if ((j > 0) == (value <= previous)) { return false; } info.ResetTable.push_back(value); previous = value; } } } return true; } static std::string readNTString(ZLInputStream &stream) { std::string s; char c; while (stream.read(&c, 1) == 1) { if (c == '\0') { break; } else { s += c; } } return CHMReferenceCollection::fullReference("/", s); } bool CHMFileInfo::FileNames::empty() const { return Start.empty() && TOC.empty() && Home.empty() && Index.empty(); } CHMFileInfo::FileNames CHMFileInfo::sectionNames(shared_ptr base) const { FileNames names; shared_ptr stringsStream = entryStream(base, "/#STRINGS"); if (!stringsStream.isNull() && stringsStream->open()) { std::vector fileNames; int tocIndex = -1; int indexIndex = -1; for (int i = 0; i < 12; ++i) { std::string argument = readNTString(*stringsStream); if (argument.empty() || (argument[argument.length() - 1] == '/')) { continue; } if (myRecords.find(argument) == myRecords.end()) { continue; } if ((tocIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhc")) { tocIndex = fileNames.size(); names.TOC = argument; } else if ((indexIndex == -1) && ZLStringUtil::stringEndsWith(argument, ".hhk")) { indexIndex = fileNames.size(); names.Index = argument; } fileNames.push_back(argument); } std::size_t startIndex = std::max(3, std::max(tocIndex, indexIndex) + 1); if (startIndex < 11) { if (startIndex < fileNames.size()) { names.Start = fileNames[startIndex]; } if (startIndex + 1 < fileNames.size()) { names.Home = fileNames[startIndex + 1]; } } stringsStream->close(); } if (names.TOC.empty()) { for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) { if (ZLStringUtil::stringEndsWith(it->first, ".hhc")) { names.TOC = it->first; break; } } } if (names.empty()) { for (RecordMap::const_iterator it = myRecords.begin(); it != myRecords.end(); ++it) { if ((ZLStringUtil::stringEndsWith(it->first, ".htm")) || (ZLStringUtil::stringEndsWith(it->first, ".html"))) { names.Start = it->first; break; } } } return names; } const std::string CHMFileInfo::filePath() const { return myFilePath; }