diff options
Diffstat (limited to 'fbreader/src/formats/doc/OleMainStream.cpp')
| -rw-r--r-- | fbreader/src/formats/doc/OleMainStream.cpp | 1085 |
1 files changed, 0 insertions, 1085 deletions
diff --git a/fbreader/src/formats/doc/OleMainStream.cpp b/fbreader/src/formats/doc/OleMainStream.cpp deleted file mode 100644 index fe829e6..0000000 --- a/fbreader/src/formats/doc/OleMainStream.cpp +++ /dev/null @@ -1,1085 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <string> - -#include <ZLLogger.h> -#include <ZLUnicodeUtil.h> - -#include "OleUtil.h" -#include "OleStorage.h" - -#include "DocInlineImageReader.h" - -#include "OleMainStream.h" - -OleMainStream::Style::Style() : - StyleIdCurrent(STYLE_INVALID), - StyleIdNext(STYLE_INVALID), - HasPageBreakBefore(false), - BeforeParagraphIndent(0), - AfterParagraphIndent(0), - LeftIndent(0), - FirstLineIndent(0), - RightIndent(0), - Alignment(ALIGNMENT_DEFAULT) { -} - -OleMainStream::CharInfo::CharInfo() : FontStyle(FONT_REGULAR), FontSize(20) { -} - -OleMainStream::SectionInfo::SectionInfo() : CharPosition(0), IsNewPage(true) { -} - -OleMainStream::InlineImageInfo::InlineImageInfo() : DataPosition(0) { -} - -OleMainStream::FloatImageInfo::FloatImageInfo() : ShapeId(0) { -} - -OleMainStream::OleMainStream(shared_ptr<OleStorage> storage, OleEntry oleEntry, shared_ptr<ZLInputStream> stream) : OleStream(storage, oleEntry, stream) { -} - -bool OleMainStream::open(bool doReadFormattingData) { - if (OleStream::open() == false) { - return false; - } - - static const std::size_t HEADER_SIZE = 768; //size of data in header of main stream - char headerBuffer[HEADER_SIZE]; - seek(0, true); - - if (read(headerBuffer, HEADER_SIZE) != HEADER_SIZE) { - return false; - } - - bool result = readFIB(headerBuffer); - if (!result) { - return false; - } - - // determining table stream number - unsigned int tableNumber = (OleUtil::getU2Bytes(headerBuffer, 0xA) & 0x0200) ? 1 : 0; - std::string tableName = tableNumber == 0 ? "0" : "1"; - tableName += "Table"; - OleEntry tableEntry; - result = myStorage->getEntryByName(tableName, tableEntry); - - if (!result) { - // cant't find table stream (that can be only in case if file format is below Word 7/8), so building simple table stream - // TODO: CHECK may be not all old documents have ANSI - ZLLogger::Instance().println("DocPlugin", "cant't find table stream, building own simple piece table, that includes all charachters"); - Piece piece = {myStartOfText, myEndOfText - myStartOfText, true, Piece::PIECE_TEXT, 0}; - myPieces.push_back(piece); - return true; - } - - result = readPieceTable(headerBuffer, tableEntry); - - if (!result) { - ZLLogger::Instance().println("DocPlugin", "error during reading piece table"); - return false; - } - - if (!doReadFormattingData) { - return true; - } - - OleEntry dataEntry; - if (myStorage->getEntryByName("Data", dataEntry)) { - myDataStream = new OleStream(myStorage, dataEntry, myBaseStream); - } - - //result of reading following structures doesn't check, because all these - //problems can be ignored, and document can be showed anyway, maybe with wrong formatting - readBookmarks(headerBuffer, tableEntry); - readStylesheet(headerBuffer, tableEntry); - //readSectionsInfoTable(headerBuffer, tableEntry); //it isn't used now - readParagraphStyleTable(headerBuffer, tableEntry); - readCharInfoTable(headerBuffer, tableEntry); - readFloatingImages(headerBuffer, tableEntry); - return true; -} - -const OleMainStream::Pieces &OleMainStream::getPieces() const { - return myPieces; -} - -const OleMainStream::CharInfoList &OleMainStream::getCharInfoList() const { - return myCharInfoList; -} - -const OleMainStream::StyleInfoList &OleMainStream::getStyleInfoList() const { - return myStyleInfoList; -} - -const OleMainStream::BookmarksList &OleMainStream::getBookmarks() const { - return myBookmarks; -} - -const OleMainStream::InlineImageInfoList &OleMainStream::getInlineImageInfoList() const { - return myInlineImageInfoList; -} - -const OleMainStream::FloatImageInfoList &OleMainStream::getFloatImageInfoList() const { - return myFloatImageInfoList; -} - -ZLFileImage::Blocks OleMainStream::getFloatImage(unsigned int shapeId) const { - if (myFLoatImageReader.isNull()) { - return ZLFileImage::Blocks(); - } - return myFLoatImageReader->getBlocksForShapeId(shapeId); -} - -ZLFileImage::Blocks OleMainStream::getInlineImage(unsigned int dataPosition) const { - if (myDataStream.isNull()) { - return ZLFileImage::Blocks(); - } - DocInlineImageReader imageReader(myDataStream); - return imageReader.getImagePieceInfo(dataPosition); -} - -bool OleMainStream::readFIB(const char *headerBuffer) { - int flags = OleUtil::getU2Bytes(headerBuffer, 0xA); //offset for flags - - if (flags & 0x0004) { //flag for complex format - ZLLogger::Instance().println("DocPlugin", "This was fast-saved. Some information is lost"); - //lostInfo = (flags & 0xF0) >> 4); - } - - if (flags & 0x1000) { //flag for using extending charset - ZLLogger::Instance().println("DocPlugin", "File uses extended character set (get_word8_char)"); - } else { - ZLLogger::Instance().println("DocPlugin", "File uses get_8bit_char character set"); - } - - if (flags & 0x100) { //flag for encrypted files - ZLLogger::Instance().println("DocPlugin", "File is encrypted"); - // Encryption key = %08lx ; NumUtil::get4Bytes(header, 14) - return false; - } - - unsigned int charset = OleUtil::getU2Bytes(headerBuffer, 0x14); //offset for charset number - if (charset && charset != 0x100) { //0x100 = default charset - ZLLogger::Instance().println("DocPlugin", "Using not default character set %d"); - } else { - ZLLogger::Instance().println("DocPlugin", "Using default character set"); - } - - myStartOfText = OleUtil::get4Bytes(headerBuffer, 0x18); //offset for start of text value - myEndOfText = OleUtil::get4Bytes(headerBuffer, 0x1c); //offset for end of text value - return true; -} - -void OleMainStream::splitPieces(const Pieces &s, Pieces &dest1, Pieces &dest2, Piece::PieceType type1, Piece::PieceType type2, int boundary) { - Pieces source = s; - dest1.clear(); - dest2.clear(); - - int sumLength = 0; - std::size_t i = 0; - for (i = 0; i < source.size(); ++i) { - Piece piece = source.at(i); - if (piece.Length + sumLength >= boundary) { - Piece piece2 = piece; - - piece.Length = boundary - sumLength; - piece.Type = type1; - - piece2.Type = type2; - piece2.Offset += piece.Length * 2; - piece2.Length -= piece.Length; - - if (piece.Length > 0) { - dest1.push_back(piece); - } - if (piece2.Length > 0) { - dest2.push_back(piece2); - } - ++i; - break; - } - sumLength += piece.Length; - piece.Type = type1; - dest1.push_back(piece); - } - for (; i < source.size(); ++i) { - Piece piece = source.at(i); - piece.Type = type2; - dest2.push_back(piece); - } - -} - -std::string OleMainStream::getPiecesTableBuffer(const char *headerBuffer, OleStream &tableStream) { - unsigned int clxOffset = OleUtil::getU4Bytes(headerBuffer, 0x01A2); //offset for CLX structure - unsigned int clxLength = OleUtil::getU4Bytes(headerBuffer, 0x01A6); //offset for value of CLX structure length - - //1 step : loading CLX table from table stream - char *clxBuffer = new char[clxLength]; - if (!tableStream.seek(clxOffset, true)) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- error for seeking to CLX structure"); - return std::string(); - } - if (tableStream.read(clxBuffer, clxLength) != clxLength) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure length is invalid"); - return std::string(); - } - std::string clx(clxBuffer, clxLength); - delete[] clxBuffer; - - //2 step: searching for pieces table buffer at CLX - //(determines it by 0x02 as start symbol) - std::size_t from = 0; - std::size_t i; - std::string pieceTableBuffer; - while ((i = clx.find_first_of(0x02, from)) != std::string::npos) { - if (clx.size() < i + 1 + 4) { - ZLLogger::Instance().println("DocPlugin", "getPiecesTableBuffer -- CLX structure has invalid format"); - return std::string(); - } - unsigned int pieceTableLength = OleUtil::getU4Bytes(clx.c_str(), i + 1); - pieceTableBuffer = std::string(clx, i + 1 + 4); - if (pieceTableBuffer.length() != pieceTableLength) { - from = i + 1; - continue; - } - break; - } - return pieceTableBuffer; -} - - -bool OleMainStream::readPieceTable(const char *headerBuffer, const OleEntry &tableEntry) { - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string piecesTableBuffer = getPiecesTableBuffer(headerBuffer, tableStream); - - if (piecesTableBuffer.empty()) { - return false; - } - - //getting count of Character Positions for different types of subdocuments in Main Stream - int ccpText = OleUtil::get4Bytes(headerBuffer, 0x004C); //text - int ccpFtn = OleUtil::get4Bytes(headerBuffer, 0x0050); //footnote subdocument - int ccpHdd = OleUtil::get4Bytes(headerBuffer, 0x0054); //header subdocument - int ccpMcr = OleUtil::get4Bytes(headerBuffer, 0x0058); //macro subdocument - int ccpAtn = OleUtil::get4Bytes(headerBuffer, 0x005C); //comment subdocument - int ccpEdn = OleUtil::get4Bytes(headerBuffer, 0x0060); //endnote subdocument - int ccpTxbx = OleUtil::get4Bytes(headerBuffer, 0x0064); //textbox subdocument - int ccpHdrTxbx = OleUtil::get4Bytes(headerBuffer, 0x0068); //textbox subdocument of the header - int lastCP = ccpFtn + ccpHdd + ccpMcr + ccpAtn + ccpEdn + ccpTxbx + ccpHdrTxbx; - if (lastCP != 0) { - ++lastCP; - } - lastCP += ccpText; - - //getting the CP (character positions) and CP descriptors - std::vector<int> cp; //array of character positions for pieces - unsigned int j = 0; - for (j = 0; ; j += 4) { - if (piecesTableBuffer.size() < j + 4) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, cp ends not with a lastcp"); - break; - } - int curCP = OleUtil::get4Bytes(piecesTableBuffer.c_str(), j); - cp.push_back(curCP); - if (curCP == lastCP) { - break; - } - } - - if (cp.size() < 2) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, < 2 pieces"); - return false; - } - - std::vector<std::string> descriptors; - for (std::size_t k = 0; k < cp.size() - 1; ++k) { - //j + 4, because it should be taken after CP in PiecesTable Buffer - //k * 8, because it should be taken 8 byte for each descriptor - std::size_t substrFrom = j + 4 + k * 8; - if (piecesTableBuffer.size() < substrFrom + 8) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, problems with descriptors reading"); - break; - } - descriptors.push_back(piecesTableBuffer.substr(substrFrom, 8)); - } - - //filling the Pieces vector - std::size_t minValidSize = std::min(cp.size() - 1, descriptors.size()); - if (minValidSize == 0) { - ZLLogger::Instance().println("DocPlugin", "invalid piece table, there are no pieces"); - return false; - } - - for (std::size_t i = 0; i < minValidSize; ++i) { - //4byte integer with offset and ANSI flag - int fcValue = OleUtil::get4Bytes(descriptors.at(i).c_str(), 0x2); //offset for piece structure - Piece piece; - piece.IsANSI = (fcValue & 0x40000000) == 0x40000000; //ansi flag - piece.Offset = fcValue & 0x3FFFFFFF; //gettting offset for current piece - piece.Length = cp.at(i + 1) - cp.at(i); - myPieces.push_back(piece); - } - - //split pieces into different types - Pieces piecesText, piecesFootnote, piecesOther; - splitPieces(myPieces, piecesText, piecesFootnote, Piece::PIECE_TEXT, Piece::PIECE_FOOTNOTE, ccpText); - splitPieces(piecesFootnote, piecesFootnote, piecesOther, Piece::PIECE_FOOTNOTE, Piece::PIECE_OTHER, ccpFtn); - - myPieces.clear(); - for (std::size_t i = 0; i < piecesText.size(); ++i) { - myPieces.push_back(piecesText.at(i)); - } - for (std::size_t i = 0; i < piecesFootnote.size(); ++i) { - myPieces.push_back(piecesFootnote.at(i)); - } - for (std::size_t i = 0; i < piecesOther.size(); ++i) { - myPieces.push_back(piecesOther.at(i)); - } - - //converting length and offset depending on isANSI - for (std::size_t i = 0; i < myPieces.size(); ++i) { - Piece &piece = myPieces.at(i); - if (!piece.IsANSI) { - piece.Length *= 2; - } else { - piece.Offset /= 2; - } - } - - //filling startCP field - unsigned int curStartCP = 0; - for (std::size_t i = 0; i < myPieces.size(); ++i) { - Piece &piece = myPieces.at(i); - piece.startCP = curStartCP; - if (piece.IsANSI) { - curStartCP += piece.Length; - } else { - curStartCP += piece.Length / 2; - } - } - return true; -} - -bool OleMainStream::readBookmarks(const char *headerBuffer, const OleEntry &tableEntry) { - //SttbfBkmk structure is a table of bookmark name strings - unsigned int beginNamesInfo = OleUtil::getU4Bytes(headerBuffer, 0x142); // address of SttbfBkmk structure - std::size_t namesInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x146); // length of SttbfBkmk structure - - if (namesInfoLength == 0) { - return true; //there's no bookmarks - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginNamesInfo, namesInfoLength, tableStream)) { - return false; - } - - unsigned int recordsNumber = OleUtil::getU2Bytes(buffer.c_str(), 0x2); //count of records - - std::vector<std::string> names; - unsigned int offset = 0x6; //initial offset - for (unsigned int i = 0; i < recordsNumber; ++i) { - if (buffer.size() < offset + 2) { - ZLLogger::Instance().println("DocPlugin", "problmes with reading bookmarks names"); - break; - } - unsigned int length = OleUtil::getU2Bytes(buffer.c_str(), offset) * 2; //length of string in bytes - ZLUnicodeUtil::Ucs2String name; - for (unsigned int j = 0; j < length; j+=2) { - char ch1 = buffer.at(offset + 2 + j); - char ch2 = buffer.at(offset + 2 + j + 1); - ZLUnicodeUtil::Ucs2Char ucs2Char = (unsigned int)ch1 | ((unsigned int)ch2 << 8); - name.push_back(ucs2Char); - } - std::string utf8Name; - ZLUnicodeUtil::ucs2ToUtf8(utf8Name, name); - names.push_back(utf8Name); - offset += length + 2; - } - - //plcfBkmkf structure is table recording beginning CPs of bookmarks - unsigned int beginCharPosInfo = OleUtil::getU4Bytes(headerBuffer, 0x14A); // address of plcfBkmkf structure - std::size_t charPosInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x14E); // length of plcfBkmkf structure - - if (charPosInfoLen == 0) { - return true; //there's no bookmarks - } - - if (!readToBuffer(buffer, beginCharPosInfo, charPosInfoLen, tableStream)) { - return false; - } - - static const unsigned int BKF_SIZE = 4; - std::size_t size = calcCountOfPLC(charPosInfoLen, BKF_SIZE); - std::vector<unsigned int> charPage; - for (std::size_t index = 0, offset = 0; index < size; ++index, offset += 4) { - charPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset)); - } - - for (std::size_t i = 0; i < names.size(); ++i) { - if (i >= charPage.size()) { - break; //for the case if something in these structures goes wrong, to not to lose all bookmarks - } - Bookmark bookmark; - bookmark.CharPosition = charPage.at(i); - bookmark.Name = names.at(i); - myBookmarks.push_back(bookmark); - } - - return true; -} - -bool OleMainStream::readStylesheet(const char *headerBuffer, const OleEntry &tableEntry) { - //STSH structure is a stylesheet - unsigned int beginStshInfo = OleUtil::getU4Bytes(headerBuffer, 0xa2); // address of STSH structure - std::size_t stshInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xa6); // length of STSH structure - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - char *buffer = new char[stshInfoLength]; - if (!tableStream.seek(beginStshInfo, true)) { - ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure"); - return false; - } - if (tableStream.read(buffer, stshInfoLength) != stshInfoLength) { - ZLLogger::Instance().println("DocPlugin", "problems with reading STSH structure, invalid length"); - return false; - } - - std::size_t stdCount = (std::size_t)OleUtil::getU2Bytes(buffer, 2); - std::size_t stdBaseInFile = (std::size_t)OleUtil::getU2Bytes(buffer, 4); - myStyleSheet.resize(stdCount); - - std::vector<bool> isFilled; - isFilled.resize(stdCount, false); - - std::size_t stdLen = 0; - bool styleSheetWasChanged = false; - do { //make it in while loop, because some base style can be after their successors - styleSheetWasChanged = false; - for (std::size_t index = 0, offset = 2 + (std::size_t)OleUtil::getU2Bytes(buffer, 0); index < stdCount; index++, offset += 2 + stdLen) { - stdLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset); - if (isFilled.at(index)) { - continue; - } - - if (stdLen == 0) { - //if record is empty, left it default - isFilled[index] = true; - continue; - } - - Style styleInfo = myStyleSheet.at(index); - - const unsigned int styleAndBaseType = OleUtil::getU2Bytes(buffer, offset + 4); - const unsigned int styleType = styleAndBaseType % 16; - const unsigned int baseStyleId = styleAndBaseType / 16; - if (baseStyleId == Style::STYLE_NIL || baseStyleId == Style::STYLE_USER) { - //if based on nil or user style, left default - } else { - int baseStyleIndex = getStyleIndex(baseStyleId, isFilled, myStyleSheet); - if (baseStyleIndex < 0) { - //this base style is not filled yet, so pass it at some time - continue; - } - styleInfo = myStyleSheet.at(baseStyleIndex); - styleInfo.StyleIdCurrent = Style::STYLE_INVALID; - } - - // parse STD structure - unsigned int tmp = OleUtil::getU2Bytes(buffer, offset + 6); - unsigned int upxCount = tmp % 16; - styleInfo.StyleIdNext = tmp / 16; - - //adding current style - myStyleSheet[index] = styleInfo; - isFilled[index] = true; - styleSheetWasChanged = true; - - std::size_t pos = 2 + stdBaseInFile; - std::size_t nameLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - nameLen = nameLen * 2 + 2; //from Unicode characters to bytes + Unicode null charachter length - pos += 2 + nameLen; - if (pos % 2 != 0) { - ++pos; - } - if (pos >= stdLen) { - continue; - } - std::size_t upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - if (pos + upxLen > stdLen) { - //UPX length too large - continue; - } - //for style info styleType must be equal 1 - if (styleType == 1 && upxCount >= 1) { - if (upxLen >= 2) { - styleInfo.StyleIdCurrent = OleUtil::getU2Bytes(buffer, offset + pos + 2); - getStyleInfo(0, buffer + offset + pos + 4, upxLen - 2, styleInfo); - myStyleSheet[index] = styleInfo; - } - pos += 2 + upxLen; - if (pos % 2 != 0) { - ++pos; - } - upxLen = (std::size_t)OleUtil::getU2Bytes(buffer, offset + pos); - } - if (upxLen == 0 || pos + upxLen > stdLen) { - //too small/too large - continue; - } - //for char info styleType can be equal 1 or 2 - if ((styleType == 1 && upxCount >= 2) || (styleType == 2 && upxCount >= 1)) { - CharInfo charInfo; - getCharInfo(0, Style::STYLE_INVALID, buffer + offset + pos + 2, upxLen, charInfo); - styleInfo.CurrentCharInfo = charInfo; - myStyleSheet[index] = styleInfo; - } - } - } while (styleSheetWasChanged); - delete[] buffer; - return true; -} - -bool OleMainStream::readCharInfoTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcfbteChpx structure is table with formatting for particular run of text - unsigned int beginCharInfo = OleUtil::getU4Bytes(headerBuffer, 0xfa); // address of PlcfbteChpx structure - std::size_t charInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xfe); // length of PlcfbteChpx structure - if (charInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginCharInfo, charInfoLength, tableStream)) { - return false; - } - - static const unsigned int CHPX_SIZE = 4; - std::size_t size = calcCountOfPLC(charInfoLength, CHPX_SIZE); - std::vector<unsigned int> charBlocks; - for (std::size_t index = 0, offset = (size + 1) * 4; index < size; ++index, offset += CHPX_SIZE) { - charBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), offset)); - } - - char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; - for (std::size_t index = 0; index < charBlocks.size(); ++index) { - seek(charBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); - if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { - return false; - } - unsigned int crun = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with crun (count of 'run of text') - for (unsigned int index2 = 0; index2 < crun; ++index2) { - unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4); - unsigned int chpxOffset = 2 * OleUtil::getU1Byte(formatPageBuffer, (crun + 1) * 4 + index2); - unsigned int len = OleUtil::getU1Byte(formatPageBuffer, chpxOffset); - unsigned int charPos = 0; - if (!offsetToCharPos(offset, charPos, myPieces)) { - continue; - } - unsigned int styleId = getStyleIdByCharPos(charPos, myStyleInfoList); - - CharInfo charInfo = getStyleFromStylesheet(styleId, myStyleSheet).CurrentCharInfo; - if (chpxOffset != 0) { - getCharInfo(chpxOffset, styleId, formatPageBuffer + 1, len - 1, charInfo); - } - myCharInfoList.push_back(CharPosToCharInfo(charPos, charInfo)); - - if (chpxOffset != 0) { - InlineImageInfo pictureInfo; - if (getInlineImageInfo(chpxOffset, formatPageBuffer + 1, len - 1, pictureInfo)) { - myInlineImageInfoList.push_back(CharPosToInlineImageInfo(charPos, pictureInfo)); - } - } - - } - } - delete[] formatPageBuffer; - return true; -} - -bool OleMainStream::readFloatingImages(const char *headerBuffer, const OleEntry &tableEntry) { - //Plcspa structure is a table with information for FSPA (File Shape Address) - unsigned int beginPicturesInfo = OleUtil::getU4Bytes(headerBuffer, 0x01DA); // address of Plcspa structure - if (beginPicturesInfo == 0) { - return true; //there's no office art objects - } - unsigned int picturesInfoLength = OleUtil::getU4Bytes(headerBuffer, 0x01DE); // length of Plcspa structure - if (picturesInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginPicturesInfo, picturesInfoLength, tableStream)) { - return false; - } - - static const unsigned int SPA_SIZE = 26; - std::size_t size = calcCountOfPLC(picturesInfoLength, SPA_SIZE); - - std::vector<unsigned int> picturesBlocks; - for (std::size_t index = 0, tOffset = 0; index < size; ++index, tOffset += 4) { - picturesBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset)); - } - - for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += SPA_SIZE) { - unsigned int spid = OleUtil::getU4Bytes(buffer.c_str(), tOffset); - FloatImageInfo info; - unsigned int charPos = picturesBlocks.at(index); - info.ShapeId = spid; - myFloatImageInfoList.push_back(CharPosToFloatImageInfo(charPos, info)); - } - - //DggInfo structure is office art object table data - unsigned int beginOfficeArtContent = OleUtil::getU4Bytes(headerBuffer, 0x22A); // address of DggInfo structure - if (beginOfficeArtContent == 0) { - return true; //there's no office art objects - } - unsigned int officeArtContentLength = OleUtil::getU4Bytes(headerBuffer, 0x022E); // length of DggInfo structure - if (officeArtContentLength < 4) { - return false; - } - - shared_ptr<OleStream> newTableStream = new OleStream(myStorage, tableEntry, myBaseStream); - shared_ptr<OleStream> newMainStream = new OleStream(myStorage, myOleEntry, myBaseStream); - if (newTableStream->open() && newMainStream->open()) { - myFLoatImageReader = new DocFloatImageReader(beginOfficeArtContent, officeArtContentLength, newTableStream, newMainStream); - myFLoatImageReader->readAll(); - } - return true; -} - -bool OleMainStream::readParagraphStyleTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcBtePapx structure is table with formatting for all paragraphs - unsigned int beginParagraphInfo = OleUtil::getU4Bytes(headerBuffer, 0x102); // address of PlcBtePapx structure - std::size_t paragraphInfoLength = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0x106); // length of PlcBtePapx structure - if (paragraphInfoLength < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginParagraphInfo, paragraphInfoLength, tableStream)) { - return false; - } - - static const unsigned int PAPX_SIZE = 4; - std::size_t size = calcCountOfPLC(paragraphInfoLength, PAPX_SIZE); - - std::vector<unsigned int> paragraphBlocks; - for (std::size_t index = 0, tOffset = (size + 1) * 4; index < size; ++index, tOffset += PAPX_SIZE) { - paragraphBlocks.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset)); - } - - char *formatPageBuffer = new char[OleStorage::BBD_BLOCK_SIZE]; - for (std::size_t index = 0; index < paragraphBlocks.size(); ++index) { - seek(paragraphBlocks.at(index) * OleStorage::BBD_BLOCK_SIZE, true); - if (read(formatPageBuffer, OleStorage::BBD_BLOCK_SIZE) != OleStorage::BBD_BLOCK_SIZE) { - return false; - } - const unsigned int paragraphsCount = OleUtil::getU1Byte(formatPageBuffer, 0x1ff); //offset with 'cpara' value (count of paragraphs) - for (unsigned int index2 = 0; index2 < paragraphsCount; ++index2) { - const unsigned int offset = OleUtil::getU4Bytes(formatPageBuffer, index2 * 4); - unsigned int papxOffset = OleUtil::getU1Byte(formatPageBuffer, (paragraphsCount + 1) * 4 + index2 * 13) * 2; - if (papxOffset <= 0) { - continue; - } - unsigned int len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2; - if (len == 0) { - ++papxOffset; - len = OleUtil::getU1Byte(formatPageBuffer, papxOffset) * 2; - } - - const unsigned int styleId = OleUtil::getU2Bytes(formatPageBuffer, papxOffset + 1); - Style styleInfo = getStyleFromStylesheet(styleId, myStyleSheet); - - if (len >= 3) { - getStyleInfo(papxOffset, formatPageBuffer + 3, len - 3, styleInfo); - } - - unsigned int charPos = 0; - if (!offsetToCharPos(offset, charPos, myPieces)) { - continue; - } - myStyleInfoList.push_back(CharPosToStyle(charPos, styleInfo)); - } - } - delete[] formatPageBuffer; - return true; -} - -bool OleMainStream::readSectionsInfoTable(const char *headerBuffer, const OleEntry &tableEntry) { - //PlcfSed structure is a section table - unsigned int beginOfText = OleUtil::getU4Bytes(headerBuffer, 0x18); //address of text's begin in main stream - unsigned int beginSectInfo = OleUtil::getU4Bytes(headerBuffer, 0xca); //address if PlcfSed structure - - std::size_t sectInfoLen = (std::size_t)OleUtil::getU4Bytes(headerBuffer, 0xce); //length of PlcfSed structure - if (sectInfoLen < 4) { - return false; - } - - OleStream tableStream(myStorage, tableEntry, myBaseStream); - std::string buffer; - if (!readToBuffer(buffer, beginSectInfo, sectInfoLen, tableStream)) { - return false; - } - - static const unsigned int SED_SIZE = 12; - std::size_t decriptorsCount = calcCountOfPLC(sectInfoLen, SED_SIZE); - - //saving the section offsets (in character positions) - std::vector<unsigned int> charPos; - for (std::size_t index = 0, tOffset = 0; index < decriptorsCount; ++index, tOffset += 4) { - unsigned int ulTextOffset = OleUtil::getU4Bytes(buffer.c_str(), tOffset); - charPos.push_back(beginOfText + ulTextOffset); - } - - //saving sepx offsets - std::vector<unsigned int> sectPage; - for (std::size_t index = 0, tOffset = (decriptorsCount + 1) * 4; index < decriptorsCount; ++index, tOffset += SED_SIZE) { - sectPage.push_back(OleUtil::getU4Bytes(buffer.c_str(), tOffset + 2)); - } - - //reading the section properties - char tmpBuffer[2]; - for (std::size_t index = 0; index < sectPage.size(); ++index) { - if (sectPage.at(index) == 0xffffffffUL) { //check for invalid record, to make default section info - SectionInfo sectionInfo; - sectionInfo.CharPosition = charPos.at(index); - mySectionInfoList.push_back(sectionInfo); - continue; - } - //getting number of bytes to read - if (!seek(sectPage.at(index), true)) { - continue; - } - if (read(tmpBuffer, 2) != 2) { - continue; - } - std::size_t bytes = 2 + (std::size_t)OleUtil::getU2Bytes(tmpBuffer, 0); - - if (!seek(sectPage.at(index), true)) { - continue; - } - char *formatPageBuffer = new char[bytes]; - if (read(formatPageBuffer, bytes) != bytes) { - delete[] formatPageBuffer; - continue; - } - SectionInfo sectionInfo; - sectionInfo.CharPosition = charPos.at(index); - getSectionInfo(formatPageBuffer + 2, bytes - 2, sectionInfo); - mySectionInfoList.push_back(sectionInfo); - delete[] formatPageBuffer; - } - return true; -} - -void OleMainStream::getStyleInfo(unsigned int papxOffset, const char *grpprlBuffer, unsigned int bytes, Style &styleInfo) { - int tmp, toDelete, toAdd; - unsigned int offset = 0; - while (bytes >= offset + 2) { - unsigned int curPrlLength = 0; - switch (OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset)) { - case 0x2403: - styleInfo.Alignment = (Style::AlignmentType)OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x4610: - styleInfo.LeftIndent += OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - if (styleInfo.LeftIndent < 0) { - styleInfo.LeftIndent = 0; - } - break; - case 0xc60d: // ChgTabsPapx - case 0xc615: // ChgTabs - tmp = OleUtil::get1Byte(grpprlBuffer, papxOffset + offset + 2); - if (tmp < 2) { - curPrlLength = 1; - break; - } - toDelete = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 3); - if (tmp < 2 + 2 * toDelete) { - curPrlLength = 1; - break; - } - toAdd = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 4 + 2 * toDelete); - if (tmp < 2 + 2 * toDelete + 2 * toAdd) { - curPrlLength = 1; - break; - } - break; - case 0x840e: - styleInfo.RightIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x840f: - styleInfo.LeftIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x8411: - styleInfo.FirstLineIndent = (int)OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0xa413: - styleInfo.BeforeParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0xa414: - styleInfo.AfterParagraphIndent = OleUtil::getU2Bytes(grpprlBuffer, papxOffset + offset + 2); - break; - case 0x2407: - styleInfo.HasPageBreakBefore = OleUtil::getU1Byte(grpprlBuffer, papxOffset + offset + 2) == 0x01; - break; - default: - break; - } - if (curPrlLength == 0) { - curPrlLength = getPrlLength(grpprlBuffer, papxOffset + offset); - } - offset += curPrlLength; - } - -} - -void OleMainStream::getCharInfo(unsigned int chpxOffset, unsigned int /*styleId*/, const char *grpprlBuffer, unsigned int bytes, CharInfo &charInfo) { - unsigned int sprm = 0; //single propery modifier - unsigned int offset = 0; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) { - case 0x0835: //bold - sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2); - switch (sprm) { - case UNSET: - charInfo.FontStyle &= ~CharInfo::FONT_BOLD; - break; - case SET: - charInfo.FontStyle |= CharInfo::FONT_BOLD; - break; - case UNCHANGED: - break; - case NEGATION: - charInfo.FontStyle ^= CharInfo::FONT_BOLD; - break; - default: - break; - } - break; - case 0x0836: //italic - sprm = OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2); - switch (sprm) { - case UNSET: - charInfo.FontStyle &= ~CharInfo::FONT_ITALIC; - break; - case SET: - charInfo.FontStyle |= CharInfo::FONT_ITALIC; - break; - case UNCHANGED: - break; - case NEGATION: - charInfo.FontStyle ^= CharInfo::FONT_ITALIC; - break; - default: - break; - } - break; - case 0x4a43: //size of font - charInfo.FontSize = OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset + 2); - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, chpxOffset + offset); - } - -} - -void OleMainStream::getSectionInfo(const char *grpprlBuffer, std::size_t bytes, SectionInfo §ionInfo) { - unsigned int tmp; - std::size_t offset = 0; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, offset)) { - case 0x3009: //new page - tmp = OleUtil::getU1Byte(grpprlBuffer, offset + 2); - sectionInfo.IsNewPage = (tmp != 0 && tmp != 1); - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, offset); - } -} - -bool OleMainStream::getInlineImageInfo(unsigned int chpxOffset, const char *grpprlBuffer, unsigned int bytes, InlineImageInfo &pictureInfo) { - //p. 105 of [MS-DOC] documentation - unsigned int offset = 0; - bool isFound = false; - while (bytes >= offset + 2) { - switch (OleUtil::getU2Bytes(grpprlBuffer, chpxOffset + offset)) { - case 0x080a: // ole object, p.107 [MS-DOC] - if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) == 0x01) { - return false; - } - break; - case 0x0806: // is not a picture, but a binary data? (sprmCFData, p.106 [MS-DOC]) - if (OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2) == 0x01) { - return false; - } - break; -// case 0x0855: // sprmCFSpec, p.117 [MS-DOC], MUST BE applied with a value of 1 (see p.105 [MS-DOC]) -// if (OleUtil::getU1Byte(grpprlBuffer, chpxOffset + offset + 2) != 0x01) { -// return false; -// } -// break; - case 0x6a03: // location p.105 [MS-DOC] - pictureInfo.DataPosition = OleUtil::getU4Bytes(grpprlBuffer, chpxOffset + offset + 2); - isFound = true; - break; - default: - break; - } - offset += getPrlLength(grpprlBuffer, chpxOffset + offset); - } - return isFound; -} - -OleMainStream::Style OleMainStream::getStyleFromStylesheet(unsigned int styleId, const StyleSheet &stylesheet) { - //TODO optimize it: StyleSheet can be map structure with styleId key - Style style; - if (styleId != Style::STYLE_INVALID && styleId != Style::STYLE_NIL && styleId != Style::STYLE_USER) { - for (std::size_t index = 0; index < stylesheet.size(); ++index) { - if (stylesheet.at(index).StyleIdCurrent == styleId) { - return stylesheet.at(index); - } - } - } - style.StyleIdCurrent = styleId; - return style; -} - -int OleMainStream::getStyleIndex(unsigned int styleId, const std::vector<bool> &isFilled, const StyleSheet &stylesheet) { - //TODO optimize it: StyleSheet can be map structure with styleId key - //in that case, this method will be excess - if (styleId == Style::STYLE_INVALID) { - return -1; - } - for (int index = 0; index < (int)stylesheet.size(); ++index) { - if (isFilled.at(index) && stylesheet.at(index).StyleIdCurrent == styleId) { - return index; - } - } - return -1; -} - -unsigned int OleMainStream::getStyleIdByCharPos(unsigned int charPos, const StyleInfoList &styleInfoList) { - unsigned int styleId = Style::STYLE_INVALID; - for (std::size_t i = 0; i < styleInfoList.size(); ++i) { - const Style &info = styleInfoList.at(i).second; - if (i == styleInfoList.size() - 1) { //if last - styleId = info.StyleIdCurrent; - break; - } - unsigned int curOffset = styleInfoList.at(i).first; - unsigned int nextOffset = styleInfoList.at(i + 1).first; - if (charPos >= curOffset && charPos < nextOffset) { - styleId = info.StyleIdCurrent; - break; - } - } - return styleId; -} - -bool OleMainStream::offsetToCharPos(unsigned int offset, unsigned int &charPos, const Pieces &pieces) { - if (pieces.empty()) { - return false; - } - if ((unsigned int)pieces.front().Offset > offset) { - charPos = 0; - return true; - } - if ((unsigned int)(pieces.back().Offset + pieces.back().Length) <= offset) { - return false; - } - - std::size_t pieceNumber = 0; - for (std::size_t i = 0; i < pieces.size(); ++i) { - if (i == pieces.size() - 1) { //if last - pieceNumber = i; - break; - } - unsigned int curOffset = pieces.at(i).Offset; - unsigned int nextOffset = pieces.at(i + 1).Offset; - if (offset >= curOffset && offset < nextOffset) { - pieceNumber = i; - break; - } - } - - const Piece &piece = pieces.at(pieceNumber); - unsigned int diffOffset = offset - piece.Offset; - if (!piece.IsANSI) { - diffOffset /= 2; - } - charPos = piece.startCP + diffOffset; - return true; -} - -bool OleMainStream::readToBuffer(std::string &result, unsigned int offset, std::size_t length, OleStream &stream) { - char *buffer = new char[length]; - stream.seek(offset, true); - if (stream.read(buffer, length) != length) { - return false; - } - result = std::string(buffer, length); - delete[] buffer; - return true; -} - -unsigned int OleMainStream::calcCountOfPLC(unsigned int totalSize, unsigned int elementSize) { - //calculates count of elements in PLC structure, formula from p.30 [MS-DOC] - return (totalSize - 4) / (4 + elementSize); -} - -unsigned int OleMainStream::getPrlLength(const char *grpprlBuffer, unsigned int byteNumber) { - unsigned int tmp; - unsigned int opCode = OleUtil::getU2Bytes(grpprlBuffer, byteNumber); - switch (opCode & 0xe000) { - case 0x0000: - case 0x2000: - return 3; - case 0x4000: - case 0x8000: - case 0xA000: - return 4; - case 0xE000: - return 5; - case 0x6000: - return 6; - case 0xC000: - //counting of info length - tmp = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 2); - if (opCode == 0xc615 && tmp == 255) { - unsigned int del = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 3); - unsigned int add = OleUtil::getU1Byte(grpprlBuffer, byteNumber + 4 + del * 4); - tmp = 2 + del * 4 + add * 3; - } - return 3 + tmp; - default: - return 1; - } -} |
