diff options
Diffstat (limited to 'reader/src/formats/rtf')
| -rw-r--r-- | reader/src/formats/rtf/RtfBookReader.cpp | 232 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfBookReader.h | 71 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfDescriptionReader.cpp | 100 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfDescriptionReader.h | 55 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfPlugin.cpp | 63 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfPlugin.h | 35 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfReader.cpp | 470 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfReader.h | 209 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfReaderStream.cpp | 175 | ||||
| -rw-r--r-- | reader/src/formats/rtf/RtfReaderStream.h | 50 |
10 files changed, 1460 insertions, 0 deletions
diff --git a/reader/src/formats/rtf/RtfBookReader.cpp b/reader/src/formats/rtf/RtfBookReader.cpp new file mode 100644 index 0000000..cf16bc7 --- /dev/null +++ b/reader/src/formats/rtf/RtfBookReader.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cctype> + +#include <ZLStringUtil.h> +#include <ZLFileImage.h> +#include <ZLTextStyleEntry.h> + +#include "RtfBookReader.h" +#include "../../bookmodel/BookModel.h" + +RtfBookReader::RtfBookReader(BookModel &model, const std::string &encoding) : RtfReader(encoding), myBookReader(model) { +} + +static const std::size_t maxBufferSize = 1024; + +void RtfBookReader::addCharData(const char *data, std::size_t len, bool convert) { + if (myCurrentState.ReadText) { + if (convert || myConverter.isNull()) { + myOutputBuffer.append(data, len); + if (myOutputBuffer.size() >= maxBufferSize) { + flushBuffer(); + } + } else { + flushBuffer(); + std::string newString(data, len); + characterDataHandler(newString); + } + } +} + +void RtfBookReader::flushBuffer() { + if (!myOutputBuffer.empty()) { + if (myCurrentState.ReadText) { + if (!myConverter.isNull()) { + static std::string newString; + myConverter->convert(newString, myOutputBuffer.data(), myOutputBuffer.data() + myOutputBuffer.length()); + characterDataHandler(newString); + newString.erase(); + } else { + characterDataHandler(myOutputBuffer); + } + } + myOutputBuffer.erase(); + } +} + +void RtfBookReader::switchDestination(DestinationType destination, bool on) { + switch (destination) { + case DESTINATION_NONE: + break; + case DESTINATION_SKIP: + case DESTINATION_INFO: + case DESTINATION_TITLE: + case DESTINATION_AUTHOR: + case DESTINATION_STYLESHEET: + myCurrentState.ReadText = !on; + break; + case DESTINATION_PICTURE: + if (on) { + flushBuffer(); + if (myBookReader.paragraphIsOpen()) { + myBookReader.endParagraph(); + } + } + myCurrentState.ReadText = !on; + break; + case DESTINATION_FOOTNOTE: + flushBuffer(); + if (on) { + std::string id; + ZLStringUtil::appendNumber(id, myFootnoteIndex++); + + myStateStack.push(myCurrentState); + myCurrentState.Id = id; + myCurrentState.ReadText = true; + + myBookReader.addHyperlinkControl(FOOTNOTE, id); + myBookReader.addData(id); + myBookReader.addControl(FOOTNOTE, false); + + myBookReader.setFootnoteTextModel(id); + myBookReader.addHyperlinkLabel(id); + myBookReader.pushKind(REGULAR); + myBookReader.beginParagraph(); + } else { + myBookReader.endParagraph(); + myBookReader.popKind(); + + if (!myStateStack.empty()) { + myCurrentState = myStateStack.top(); + myStateStack.pop(); + } + + if (myStateStack.empty()) { + myBookReader.setMainTextModel(); + } else { + myBookReader.setFootnoteTextModel(myCurrentState.Id); + } + } + break; + } +} + +void RtfBookReader::insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size) { + std::string id; + ZLStringUtil::appendNumber(id, myImageIndex++); + myBookReader.addImageReference(id); + const ZLFile file(fileName, mimeType); + myBookReader.addImage(id, new ZLFileImage(file, startOffset, size, ZLFileImage::ENCODING_HEX)); +} + +bool RtfBookReader::characterDataHandler(std::string &str) { + if (myCurrentState.ReadText) { + if (!myBookReader.paragraphIsOpen()) { + myBookReader.beginParagraph(); + } + myBookReader.addData(str); + } + return true; +} + +bool RtfBookReader::readDocument(const ZLFile &file) { + myImageIndex = 0; + myFootnoteIndex = 1; + + myCurrentState.ReadText = true; + + myBookReader.setMainTextModel(); + myBookReader.pushKind(REGULAR); + myBookReader.beginParagraph(); + + bool code = RtfReader::readDocument(file); + + flushBuffer(); + myBookReader.endParagraph(); + while (!myStateStack.empty()) { + myStateStack.pop(); + } + + return code; +} + +void RtfBookReader::setFontProperty(FontProperty property) { + if (!myCurrentState.ReadText) { + //DPRINT("change style not in text.\n"); + return; + } + flushBuffer(); + + switch (property) { + case FONT_BOLD: + if (myState.Bold) { + myBookReader.pushKind(STRONG); + } else { + myBookReader.popKind(); + } + myBookReader.addControl(STRONG, myState.Bold); + break; + case FONT_ITALIC: + if (myState.Italic) { + if (!myState.Bold) { + //DPRINT("add style emphasis.\n"); + myBookReader.pushKind(EMPHASIS); + myBookReader.addControl(EMPHASIS, true); + } else { + //DPRINT("add style emphasis and strong.\n"); + myBookReader.popKind(); + myBookReader.addControl(STRONG, false); + + myBookReader.pushKind(EMPHASIS); + myBookReader.addControl(EMPHASIS, true); + myBookReader.pushKind(STRONG); + myBookReader.addControl(STRONG, true); + } + } else { + if (!myState.Bold) { + //DPRINT("remove style emphasis.\n"); + myBookReader.addControl(EMPHASIS, false); + myBookReader.popKind(); + } else { + //DPRINT("remove style strong n emphasis, add strong.\n"); + myBookReader.addControl(STRONG, false); + myBookReader.popKind(); + myBookReader.addControl(EMPHASIS, false); + myBookReader.popKind(); + + myBookReader.pushKind(STRONG); + myBookReader.addControl(STRONG, true); + } + } + break; + case FONT_UNDERLINED: + break; + } +} + +void RtfBookReader::newParagraph() { + flushBuffer(); + myBookReader.endParagraph(); + myBookReader.beginParagraph(); + if (myState.Alignment != ALIGN_UNDEFINED) { + setAlignment(); + } +} + +void RtfBookReader::setEncoding(int) { +} + +void RtfBookReader::setAlignment() { + ZLTextStyleEntry entry(ZLTextStyleEntry::STYLE_OTHER_ENTRY); + entry.setAlignmentType(myState.Alignment); + myBookReader.addStyleEntry(entry); + // TODO: call addStyleCloseEntry somewhere (?) +} diff --git a/reader/src/formats/rtf/RtfBookReader.h b/reader/src/formats/rtf/RtfBookReader.h new file mode 100644 index 0000000..a977cbd --- /dev/null +++ b/reader/src/formats/rtf/RtfBookReader.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __RTFBOOKREADER_H__ +#define __RTFBOOKREADER_H__ + +#include <vector> + +#include "RtfReader.h" +#include "../../bookmodel/BookReader.h" + +class ZLFile; + +class BookModel; + +class RtfBookReader : public RtfReader { + +public: + RtfBookReader(BookModel &model, const std::string &encoding); + ~RtfBookReader(); + + bool readDocument(const ZLFile &file); + + bool characterDataHandler(std::string &str); + void flushBuffer(); + + void setEncoding(int code); + void setAlignment(); + void switchDestination(DestinationType destination, bool on); + void addCharData(const char *data, std::size_t len, bool convert); + void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size); + + void setFontProperty(FontProperty property); + void newParagraph(); + +private: + BookReader myBookReader; + + std::string myOutputBuffer; + + int myImageIndex; + int myFootnoteIndex; + + struct RtfBookReaderState { + std::string Id; + bool ReadText; + }; + + RtfBookReaderState myCurrentState; + std::stack<RtfBookReaderState> myStateStack; +}; + +inline RtfBookReader::~RtfBookReader() {} + +#endif /* __RTFBOOKREADER_H__ */ diff --git a/reader/src/formats/rtf/RtfDescriptionReader.cpp b/reader/src/formats/rtf/RtfDescriptionReader.cpp new file mode 100644 index 0000000..571e66b --- /dev/null +++ b/reader/src/formats/rtf/RtfDescriptionReader.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLInputStream.h> + +#include "RtfDescriptionReader.h" + +#include "../FormatPlugin.h" +#include "../../library/Book.h" +#include "../../library/Author.h" + +RtfDescriptionReader::RtfDescriptionReader(Book &book) : RtfReader(book.encoding()), myBook(book) { +} + +void RtfDescriptionReader::setEncoding(int code) { + ZLEncodingCollection &collection = ZLEncodingCollection::Instance(); + ZLEncodingConverterInfoPtr info = collection.info(code); + if (!info.isNull()) { + myConverter = info->createConverter(); + myBook.setEncoding(info->name()); + } else { + myConverter = collection.defaultConverter(); + } +} + +bool RtfDescriptionReader::readDocument(const ZLFile &file) { + myDoRead = false; + bool code = RtfReader::readDocument(file); + if (myBook.encoding().empty()) { + myBook.setEncoding(PluginCollection::Instance().DefaultEncodingOption.value()); + } + return code; +} + +void RtfDescriptionReader::addCharData(const char *data, std::size_t len, bool convert) { + if (myDoRead && len > 0) { + if (convert) { + myConverter->convert(myBuffer, data, data + len); + } else { + myBuffer.append(data, len); + } + } +} + +void RtfDescriptionReader::switchDestination(DestinationType destination, bool on) { + switch (destination) { + case DESTINATION_INFO: + if (!on) { + interrupt(); + } + break; + case DESTINATION_TITLE: + myDoRead = on; + if (!on) { + myBook.setTitle(myBuffer); + myBuffer.erase(); + } + break; + case DESTINATION_AUTHOR: + myDoRead = on; + if (!on) { + myBook.addAuthor(myBuffer); + myBuffer.erase(); + } + break; + default: + break; + } + if (!myBook.title().empty() && !myBook.authors().empty() && !myBook.encoding().empty()) { + interrupt(); + } +} + +void RtfDescriptionReader::insertImage(shared_ptr<ZLMimeType>, const std::string&, std::size_t, std::size_t) { +} + +void RtfDescriptionReader::setFontProperty(FontProperty) { +} + +void RtfDescriptionReader::newParagraph() { +} + +void RtfDescriptionReader::setAlignment() { +} diff --git a/reader/src/formats/rtf/RtfDescriptionReader.h b/reader/src/formats/rtf/RtfDescriptionReader.h new file mode 100644 index 0000000..ff4ffa1 --- /dev/null +++ b/reader/src/formats/rtf/RtfDescriptionReader.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __RTFDESCRIPTIONREADER_H__ +#define __RTFDESCRIPTIONREADER_H__ + +#include <string> + +#include "RtfReader.h" + +class Book; + +class RtfDescriptionReader : public RtfReader { + +public: + RtfDescriptionReader(Book &book); + ~RtfDescriptionReader(); + + bool readDocument(const ZLFile &file); + + void setEncoding(int code); + void setAlignment(); + void switchDestination(DestinationType destination, bool on); + void addCharData(const char *data, std::size_t len, bool convert); + void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size); + + void setFontProperty(FontProperty property); + void newParagraph(); + +private: + Book &myBook; + + bool myDoRead; + std::string myBuffer; +}; + +inline RtfDescriptionReader::~RtfDescriptionReader() {} + +#endif /* __RTFDESCRIPTIONREADER_H__ */ diff --git a/reader/src/formats/rtf/RtfPlugin.cpp b/reader/src/formats/rtf/RtfPlugin.cpp new file mode 100644 index 0000000..42ce39b --- /dev/null +++ b/reader/src/formats/rtf/RtfPlugin.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <ZLStringUtil.h> +#include <ZLFile.h> +#include <ZLInputStream.h> + +#include "RtfPlugin.h" +#include "RtfDescriptionReader.h" +#include "RtfBookReader.h" +#include "RtfReaderStream.h" + +#include "../../bookmodel/BookModel.h" +#include "../../library/Book.h" + +bool RtfPlugin::providesMetaInfo() const { + return false; +} + +bool RtfPlugin::acceptsFile(const ZLFile &file) const { + return file.extension() == "rtf"; +} + +bool RtfPlugin::readMetaInfo(Book &book) const { + shared_ptr<ZLInputStream> stream = new RtfReaderStream(book.file(), 50000); + + if (stream.isNull()) { + return false; + } + + detectEncodingAndLanguage(book, *stream); + + if (!RtfDescriptionReader(book).readDocument(book.file())) { + return false; + } + + return true; +} + +bool RtfPlugin::readModel(BookModel &model) const { + const Book &book = *model.book(); + return RtfBookReader(model, book.encoding()).readDocument(book.file()); +} +bool RtfPlugin::readLanguageAndEncoding(Book &book) const { + (void)book; + return true; +} diff --git a/reader/src/formats/rtf/RtfPlugin.h b/reader/src/formats/rtf/RtfPlugin.h new file mode 100644 index 0000000..cb3ef9d --- /dev/null +++ b/reader/src/formats/rtf/RtfPlugin.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __RTFPLUGIN_H__ +#define __RTFPLUGIN_H__ + +#include "../FormatPlugin.h" + +class RtfPlugin : public FormatPlugin { + +public: + bool providesMetaInfo() const; + bool acceptsFile(const ZLFile &file) const; + bool readMetaInfo(Book &book) const; + bool readLanguageAndEncoding(Book &book) const; + bool readModel(BookModel &model) const; +}; + +#endif /* __RTFPLUGIN_H__ */ diff --git a/reader/src/formats/rtf/RtfReader.cpp b/reader/src/formats/rtf/RtfReader.cpp new file mode 100644 index 0000000..91fea0c --- /dev/null +++ b/reader/src/formats/rtf/RtfReader.cpp @@ -0,0 +1,470 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstdlib> +#include <cctype> + +#include <ZLFile.h> +#include <ZLInputStream.h> + +#include "RtfReader.h" + +std::map<std::string, RtfCommand*> RtfReader::ourKeywordMap; + +static const int rtfStreamBufferSize = 4096; + +RtfReader::RtfReader(const std::string &encoding) : EncodedTextReader(encoding) { + myNextImageMimeType = ZLMimeType::EMPTY; +} + +RtfReader::~RtfReader() { +} + +RtfCommand::~RtfCommand() { +} + +void RtfDummyCommand::run(RtfReader&, int*) const { +} + +void RtfNewParagraphCommand::run(RtfReader &reader, int*) const { + reader.newParagraph(); +} + +RtfFontPropertyCommand::RtfFontPropertyCommand(RtfReader::FontProperty property) : myProperty(property) { +} + +void RtfFontPropertyCommand::run(RtfReader &reader, int *parameter) const { + const bool start = (parameter == 0) || (*parameter != 0); + switch (myProperty) { + case RtfReader::FONT_BOLD: + if (reader.myState.Bold != start) { + reader.myState.Bold = start; + reader.setFontProperty(RtfReader::FONT_BOLD); + } + break; + case RtfReader::FONT_ITALIC: + if (reader.myState.Italic != start) { + reader.myState.Italic = start; + reader.setFontProperty(RtfReader::FONT_ITALIC); + } + break; + case RtfReader::FONT_UNDERLINED: + if (reader.myState.Underlined != start) { + reader.myState.Underlined = start; + reader.setFontProperty(RtfReader::FONT_UNDERLINED); + } + break; + } +} + +RtfAlignmentCommand::RtfAlignmentCommand(ZLTextAlignmentType alignment) : myAlignment(alignment) { +} + +void RtfAlignmentCommand::run(RtfReader &reader, int*) const { + if (reader.myState.Alignment != myAlignment) { + reader.myState.Alignment = myAlignment; + reader.setAlignment(); + } +} + +RtfCharCommand::RtfCharCommand(const std::string &chr) : myChar(chr) { +} + +void RtfCharCommand::run(RtfReader &reader, int*) const { + reader.processCharData(myChar.data(), myChar.length(), false); +} + +RtfDestinationCommand::RtfDestinationCommand(RtfReader::DestinationType destination) : myDestination(destination) { +} + +void RtfDestinationCommand::run(RtfReader &reader, int*) const { + if (reader.myState.Destination == myDestination) { + return; + } + reader.myState.Destination = myDestination; + if (myDestination == RtfReader::DESTINATION_PICTURE) { + reader.myState.ReadDataAsHex = true; + reader.myNextImageMimeType = ZLMimeType::EMPTY; + } + reader.switchDestination(myDestination, true); +} + +void RtfStyleCommand::run(RtfReader &reader, int*) const { + if (reader.myState.Destination == RtfReader::DESTINATION_STYLESHEET) { + //std::cerr << "Add style index: " << val << "\n"; + + //sprintf(style_attributes[0], "%i", val); + } else /*if (myState.Destination == rdsContent)*/ { + //std::cerr << "Set style index: " << val << "\n"; + + //sprintf(style_attributes[0], "%i", val); + } +} + +void RtfCodepageCommand::run(RtfReader &reader, int *parameter) const { + if (parameter != 0) { + reader.setEncoding(*parameter); + } +} + +void RtfSpecialCommand::run(RtfReader &reader, int*) const { + reader.mySpecialMode = true; +} + +RtfPictureCommand::RtfPictureCommand(shared_ptr<ZLMimeType> mimeType) : myMimeType(mimeType) { +} + +void RtfPictureCommand::run(RtfReader &reader, int*) const { + reader.myNextImageMimeType = myMimeType; +} + +void RtfFontResetCommand::run(RtfReader &reader, int*) const { + if (reader.myState.Bold) { + reader.myState.Bold = false; + reader.setFontProperty(RtfReader::FONT_BOLD); + } + if (reader.myState.Italic) { + reader.myState.Italic = false; + reader.setFontProperty(RtfReader::FONT_ITALIC); + } + if (reader.myState.Underlined) { + reader.myState.Underlined = false; + reader.setFontProperty(RtfReader::FONT_UNDERLINED); + } +} + +void RtfReader::addAction(const std::string &tag, RtfCommand *command) { + ourKeywordMap.insert(std::make_pair(tag, command)); +} + +void RtfReader::fillKeywordMap() { + if (ourKeywordMap.empty()) { + addAction("*", new RtfSpecialCommand()); + addAction("ansicpg", new RtfCodepageCommand()); + + static const char *keywordsToSkip[] = {"buptim", "colortbl", "comment", "creatim", "doccomm", "fonttbl", "footer", "footerf", "footerl", "footerr", "ftncn", "ftnsep", "ftnsepc", "header", "headerf", "headerl", "headerr", "keywords", "operator", "printim", "private1", "revtim", "rxe", "subject", "tc", "txe", "xe", 0}; + RtfCommand *skipCommand = new RtfDestinationCommand(RtfReader::DESTINATION_SKIP); + for (const char **i = keywordsToSkip; *i != 0; ++i) { + addAction(*i, skipCommand); + } + addAction("shppict", new RtfDummyCommand()); + addAction("info", new RtfDestinationCommand(RtfReader::DESTINATION_INFO)); + addAction("title", new RtfDestinationCommand(RtfReader::DESTINATION_TITLE)); + addAction("author", new RtfDestinationCommand(RtfReader::DESTINATION_AUTHOR)); + addAction("pict", new RtfDestinationCommand(RtfReader::DESTINATION_PICTURE)); + addAction("stylesheet", new RtfDestinationCommand(RtfReader::DESTINATION_STYLESHEET)); + addAction("footnote", new RtfDestinationCommand(RtfReader::DESTINATION_FOOTNOTE)); + + RtfCommand *newParagraphCommand = new RtfNewParagraphCommand(); + addAction("\n", newParagraphCommand); + addAction("\r", newParagraphCommand); + addAction("par", newParagraphCommand); + + addAction("\x09", new RtfCharCommand("\x09")); + addAction("_", new RtfCharCommand("-")); + addAction("\\", new RtfCharCommand("\\")); + addAction("{", new RtfCharCommand("{")); + addAction("}", new RtfCharCommand("}")); + addAction("bullet", new RtfCharCommand("\xE2\x80\xA2")); // • + addAction("endash", new RtfCharCommand("\xE2\x80\x93")); // – + addAction("emdash", new RtfCharCommand("\xE2\x80\x94")); // — + addAction("~", new RtfCharCommand("\xC0\xA0")); // + addAction("enspace", new RtfCharCommand("\xE2\x80\x82")); //   + addAction("emspace", new RtfCharCommand("\xE2\x80\x83")); //   + addAction("lquote", new RtfCharCommand("\xE2\x80\x98")); // ‘ + addAction("rquote", new RtfCharCommand("\xE2\x80\x99")); // ’ + addAction("ldblquote", new RtfCharCommand("\xE2\x80\x9C")); // “ + addAction("rdblquote", new RtfCharCommand("\xE2\x80\x9D")); // ” + + addAction("jpegblip", new RtfPictureCommand(ZLMimeType::IMAGE_JPEG)); + addAction("pngblip", new RtfPictureCommand(ZLMimeType::IMAGE_PNG)); + + addAction("s", new RtfStyleCommand()); + + addAction("qc", new RtfAlignmentCommand(ALIGN_CENTER)); + addAction("ql", new RtfAlignmentCommand(ALIGN_LEFT)); + addAction("qr", new RtfAlignmentCommand(ALIGN_RIGHT)); + addAction("qj", new RtfAlignmentCommand(ALIGN_JUSTIFY)); + addAction("pard", new RtfAlignmentCommand(ALIGN_UNDEFINED)); + + addAction("b", new RtfFontPropertyCommand(RtfReader::FONT_BOLD)); + addAction("i", new RtfFontPropertyCommand(RtfReader::FONT_ITALIC)); + addAction("u", new RtfFontPropertyCommand(RtfReader::FONT_UNDERLINED)); + addAction("plain", new RtfFontResetCommand()); + } +} + +bool RtfReader::parseDocument() { + enum { + READ_NORMAL_DATA, + READ_BINARY_DATA, + READ_HEX_SYMBOL, + READ_KEYWORD, + READ_KEYWORD_PARAMETER, + READ_END_OF_FILE + } parserState = READ_NORMAL_DATA; + + std::string keyword; + std::string parameterString; + std::string hexString; + int imageStartOffset = -1; + + while (!myIsInterrupted) { + const char *ptr = myStreamBuffer; + const char *end = myStreamBuffer + myStream->read(myStreamBuffer, rtfStreamBufferSize); + if (ptr == end) { + break; + } + const char *dataStart = ptr; + bool readNextChar = true; + while (ptr != end) { + switch (parserState) { + case READ_END_OF_FILE: + if (*ptr != '}' && !std::isspace(*ptr)) { + return false; + } + break; + case READ_BINARY_DATA: + // TODO: optimize + processCharData(ptr, 1); + --myBinaryDataSize; + if (myBinaryDataSize == 0) { + parserState = READ_NORMAL_DATA; + } + break; + case READ_NORMAL_DATA: + switch (*ptr) { + case '{': + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + dataStart = ptr + 1; + myStateStack.push(myState); + myState.ReadDataAsHex = false; + break; + case '}': + { + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + dataStart = ptr + 1; + + if (imageStartOffset >= 0) { + if (ZLMimeType::EMPTY != myNextImageMimeType) { + const int imageSize = myStream->offset() + (ptr - end) - imageStartOffset; + insertImage(myNextImageMimeType, myFileName, imageStartOffset, imageSize); + } + imageStartOffset = -1; + } + + if (myStateStack.empty()) { + parserState = READ_END_OF_FILE; + break; + } + + if (myState.Destination != myStateStack.top().Destination) { + switchDestination(myState.Destination, false); + switchDestination(myStateStack.top().Destination, true); + } + + bool oldItalic = myState.Italic; + bool oldBold = myState.Bold; + bool oldUnderlined = myState.Underlined; + ZLTextAlignmentType oldAlignment = myState.Alignment; + myState = myStateStack.top(); + myStateStack.pop(); + + if (myState.Italic != oldItalic) { + setFontProperty(RtfReader::FONT_ITALIC); + } + if (myState.Bold != oldBold) { + setFontProperty(RtfReader::FONT_BOLD); + } + if (myState.Underlined != oldUnderlined) { + setFontProperty(RtfReader::FONT_UNDERLINED); + } + if (myState.Alignment != oldAlignment) { + setAlignment(); + } + + break; + } + case '\\': + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + dataStart = ptr + 1; + keyword.erase(); + parserState = READ_KEYWORD; + break; + case 0x0d: + case 0x0a: // cr and lf are noise characters... + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + dataStart = ptr + 1; + break; + default: + if (myState.ReadDataAsHex) { + if (imageStartOffset == -1) { + imageStartOffset = myStream->offset() + (ptr - end); + } + } + break; + } + break; + case READ_HEX_SYMBOL: + hexString += *ptr; + if (hexString.size() == 2) { + char ch = std::strtol(hexString.c_str(), 0, 16); + hexString.erase(); + processCharData(&ch, 1); + parserState = READ_NORMAL_DATA; + dataStart = ptr + 1; + } + break; + case READ_KEYWORD: + if (!std::isalpha(*ptr)) { + if ((ptr == dataStart) && (keyword.empty())) { + if (*ptr == '\'') { + parserState = READ_HEX_SYMBOL; + } else { + keyword = *ptr; + processKeyword(keyword); + parserState = READ_NORMAL_DATA; + } + dataStart = ptr + 1; + } else { + keyword.append(dataStart, ptr - dataStart); + if (*ptr == '-' || std::isdigit(*ptr)) { + dataStart = ptr; + parserState = READ_KEYWORD_PARAMETER; + } else { + readNextChar = *ptr == ' '; + processKeyword(keyword); + parserState = READ_NORMAL_DATA; + dataStart = readNextChar ? ptr + 1 : ptr; + } + } + } + break; + case READ_KEYWORD_PARAMETER: + if (!std::isdigit(*ptr)) { + parameterString.append(dataStart, ptr - dataStart); + int parameter = std::atoi(parameterString.c_str()); + parameterString.erase(); + readNextChar = *ptr == ' '; + if ((keyword == "bin") && (parameter > 0)) { + myBinaryDataSize = parameter; + parserState = READ_BINARY_DATA; + } else { + processKeyword(keyword, ¶meter); + parserState = READ_NORMAL_DATA; + } + dataStart = readNextChar ? ptr + 1 : ptr; + } + break; + } + if (readNextChar) { + ++ptr; + } else { + readNextChar = true; + } + } + if (dataStart < end) { + switch (parserState) { + case READ_NORMAL_DATA: + processCharData(dataStart, end - dataStart); + case READ_KEYWORD: + keyword.append(dataStart, end - dataStart); + break; + case READ_KEYWORD_PARAMETER: + parameterString.append(dataStart, end - dataStart); + break; + default: + break; + } + } + } + + return myIsInterrupted || myStateStack.empty(); +} + +void RtfReader::processKeyword(const std::string &keyword, int *parameter) { + const bool wasSpecialMode = mySpecialMode; + mySpecialMode = false; + if (myState.Destination == RtfReader::DESTINATION_SKIP) { + return; + } + + std::map<std::string, RtfCommand*>::const_iterator it = ourKeywordMap.find(keyword); + + if (it == ourKeywordMap.end()) { + if (wasSpecialMode) { + myState.Destination = RtfReader::DESTINATION_SKIP; + } + return; + } + + it->second->run(*this, parameter); +} + +void RtfReader::processCharData(const char *data, std::size_t len, bool convert) { + if (myState.Destination != RtfReader::DESTINATION_SKIP) { + addCharData(data, len, convert); + } +} + +void RtfReader::interrupt() { + myIsInterrupted = true; +} + +bool RtfReader::readDocument(const ZLFile &file) { + myFileName = file.path(); + myStream = file.inputStream(); + if (myStream.isNull() || !myStream->open()) { + return false; + } + + fillKeywordMap(); + + myStreamBuffer = new char[rtfStreamBufferSize]; + + myIsInterrupted = false; + + mySpecialMode = false; + + myState.Alignment = ALIGN_UNDEFINED; + myState.Italic = false; + myState.Bold = false; + myState.Underlined = false; + myState.Destination = RtfReader::DESTINATION_NONE; + myState.ReadDataAsHex = false; + + bool code = parseDocument(); + + while (!myStateStack.empty()) { + myStateStack.pop(); + } + + delete[] myStreamBuffer; + myStream->close(); + + return code; +} diff --git a/reader/src/formats/rtf/RtfReader.h b/reader/src/formats/rtf/RtfReader.h new file mode 100644 index 0000000..10b037a --- /dev/null +++ b/reader/src/formats/rtf/RtfReader.h @@ -0,0 +1,209 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __RTFREADER_H__ +#define __RTFREADER_H__ + +#include <string> +#include <map> +#include <stack> +#include <ZLMimeType.h> + +#include <ZLEncodingConverter.h> + +#include <ZLTextAlignmentType.h> + +#include "../EncodedTextReader.h" + +class ZLFile; +class ZLInputStream; +class RtfCommand; + +class RtfReader : public EncodedTextReader { + +private: + static void fillKeywordMap(); + static void addAction(const std::string &tag, RtfCommand *command); + +private: + static std::map<std::string, RtfCommand*> ourKeywordMap; + +protected: + RtfReader(const std::string &encoding); + virtual ~RtfReader(); + +public: + virtual bool readDocument(const ZLFile &file); + +protected: + enum DestinationType { + DESTINATION_NONE, + DESTINATION_SKIP, + DESTINATION_INFO, + DESTINATION_TITLE, + DESTINATION_AUTHOR, + DESTINATION_PICTURE, + DESTINATION_STYLESHEET, + DESTINATION_FOOTNOTE, + }; + + enum FontProperty { + FONT_BOLD, + FONT_ITALIC, + FONT_UNDERLINED + }; + + virtual void addCharData(const char *data, std::size_t len, bool convert) = 0; + virtual void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size) = 0; + virtual void setEncoding(int code) = 0; + virtual void switchDestination(DestinationType destination, bool on) = 0; + virtual void setAlignment() = 0; + virtual void setFontProperty(FontProperty property) = 0; + virtual void newParagraph() = 0; + + void interrupt(); + +private: + bool parseDocument(); + void processKeyword(const std::string &keyword, int *parameter = 0); + void processCharData(const char *data, std::size_t len, bool convert = true); + +protected: + struct RtfReaderState { + bool Bold; + bool Italic; + bool Underlined; + ZLTextAlignmentType Alignment; + DestinationType Destination; + + bool ReadDataAsHex; + }; + + RtfReaderState myState; + +private: + bool mySpecialMode; + + std::string myFileName; + shared_ptr<ZLInputStream> myStream; + char *myStreamBuffer; + + std::stack<RtfReaderState> myStateStack; + + int myBinaryDataSize; + shared_ptr<ZLMimeType> myNextImageMimeType; + + int myIsInterrupted; + +friend class RtfNewParagraphCommand; +friend class RtfFontPropertyCommand; +friend class RtfAlignmentCommand; +friend class RtfCharCommand; +friend class RtfDestinationCommand; +friend class RtfStyleCommand; +friend class RtfSpecialCommand; +friend class RtfPictureCommand; +friend class RtfFontResetCommand; +friend class RtfCodepageCommand; +}; + +class RtfCommand { +protected: + virtual ~RtfCommand(); + +public: + virtual void run(RtfReader &reader, int *parameter) const = 0; +}; + +class RtfDummyCommand : public RtfCommand { +public: + void run(RtfReader &reader, int *parameter) const; +}; + +class RtfNewParagraphCommand : public RtfCommand { +public: + void run(RtfReader &reader, int *parameter) const; +}; + +class RtfFontPropertyCommand : public RtfCommand { + +public: + RtfFontPropertyCommand(RtfReader::FontProperty property); + void run(RtfReader &reader, int *parameter) const; + +private: + RtfReader::FontProperty myProperty; +}; + +class RtfAlignmentCommand : public RtfCommand { +public: + RtfAlignmentCommand(ZLTextAlignmentType alignment); + void run(RtfReader &reader, int *parameter) const; + +private: + ZLTextAlignmentType myAlignment; +}; + +class RtfCharCommand : public RtfCommand { +public: + RtfCharCommand(const std::string &chr); + void run(RtfReader &reader, int *parameter) const; + +private: + std::string myChar; +}; + +class RtfDestinationCommand : public RtfCommand { +public: + RtfDestinationCommand(RtfReader::DestinationType dest); + void run(RtfReader &reader, int *parameter) const; + +private: + RtfReader::DestinationType myDestination; +}; + +class RtfStyleCommand : public RtfCommand { +public: + void run(RtfReader &reader, int *parameter) const; +}; + +class RtfSpecialCommand : public RtfCommand { + void run(RtfReader &reader, int *parameter) const; +}; + +class RtfPictureCommand : public RtfCommand { +public: + RtfPictureCommand(shared_ptr<ZLMimeType> mimeType); + void run(RtfReader &reader, int *parameter) const; + +private: + const shared_ptr<ZLMimeType> myMimeType; +}; + +class RtfFontResetCommand : public RtfCommand { +public: + void run(RtfReader &reader, int *parameter) const; +}; + +class RtfCodepageCommand : public RtfCommand { +public: + void run(RtfReader &reader, int *parameter) const; +}; + +#endif /* __RTFREADER_H__ */ diff --git a/reader/src/formats/rtf/RtfReaderStream.cpp b/reader/src/formats/rtf/RtfReaderStream.cpp new file mode 100644 index 0000000..f4537f7 --- /dev/null +++ b/reader/src/formats/rtf/RtfReaderStream.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <cstring> +#include <cstdlib> +#include <string> + +#include "RtfReader.h" +#include "RtfReaderStream.h" + +class RtfTextOnlyReader : public RtfReader { + +public: + RtfTextOnlyReader(char *buffer, std::size_t maxSize); + ~RtfTextOnlyReader(); + std::size_t readSize() const; + +protected: + void addCharData(const char *data, std::size_t len, bool convert); + void insertImage(shared_ptr<ZLMimeType> mimeType, const std::string &fileName, std::size_t startOffset, std::size_t size); + void setEncoding(int code); + void switchDestination(DestinationType destination, bool on); + void setAlignment(); + void setFontProperty(FontProperty property); + void newParagraph(); + + void interrupt(); + +private: + struct RtfTextOnlyReaderState { + bool ReadText; + }; + + RtfTextOnlyReaderState myCurrentState; + +private: + char* myBuffer; + const std::size_t myMaxSize; + std::size_t myFilledSize; +}; + +RtfTextOnlyReader::RtfTextOnlyReader(char *buffer, std::size_t maxSize) : RtfReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0) { + myCurrentState.ReadText = true; +} + +RtfTextOnlyReader::~RtfTextOnlyReader() { +} + +void RtfTextOnlyReader::addCharData(const char *data, std::size_t len, bool) { + if (myBuffer == 0) { + return; + } + if (myCurrentState.ReadText) { + if (myFilledSize < myMaxSize) { + len = std::min((std::size_t)len, myMaxSize - myFilledSize); + std::memcpy(myBuffer + myFilledSize, data, len); + myFilledSize += len; + } + if (myFilledSize < myMaxSize) { + myBuffer[myFilledSize++]=' '; + } else { + interrupt(); + } + } +} + +std::size_t RtfTextOnlyReader::readSize() const { + return myFilledSize; +} + +void RtfTextOnlyReader::insertImage(shared_ptr<ZLMimeType>, const std::string&, std::size_t, std::size_t) { +} + +void RtfTextOnlyReader::setEncoding(int) { +} + +void RtfTextOnlyReader::switchDestination(DestinationType destination, bool on) { + switch (destination) { + case DESTINATION_NONE: + break; + case DESTINATION_SKIP: + case DESTINATION_INFO: + case DESTINATION_TITLE: + case DESTINATION_AUTHOR: + case DESTINATION_STYLESHEET: + myCurrentState.ReadText = !on; + break; + case DESTINATION_PICTURE: + myCurrentState.ReadText = !on; + break; + case DESTINATION_FOOTNOTE: + if (on) { + myCurrentState.ReadText = true; + } + break; + } +} + +void RtfTextOnlyReader::setAlignment() { +} + +void RtfTextOnlyReader::setFontProperty(FontProperty) { +} + +void RtfTextOnlyReader::newParagraph() { +} + +void RtfTextOnlyReader::interrupt() { +} + +RtfReaderStream::RtfReaderStream(const ZLFile& file, std::size_t maxSize) : myFile(file), myBuffer(0), mySize(maxSize) { +} + +RtfReaderStream::~RtfReaderStream() { + close(); +} + +bool RtfReaderStream::open() { + if (mySize != 0) { + myBuffer = new char[mySize]; + } + RtfTextOnlyReader reader(myBuffer, mySize); + reader.readDocument(myFile); + mySize = reader.readSize(); + myOffset = 0; + return true; +} + +std::size_t RtfReaderStream::read(char *buffer, std::size_t maxSize) { + maxSize = std::min(maxSize, mySize - myOffset); + if ((buffer != 0) && (myBuffer !=0)) { + std::memcpy(buffer, myBuffer + myOffset, maxSize); + } + myOffset += maxSize; + return maxSize; +} + +void RtfReaderStream::close() { + if (myBuffer != 0) { + delete[] myBuffer; + myBuffer = 0; + } +} + +void RtfReaderStream::seek(int offset, bool absoluteOffset) { + if (!absoluteOffset) { + offset += myOffset; + } + myOffset = std::min(mySize, (std::size_t)std::max(0, offset)); +} + +std::size_t RtfReaderStream::offset() const { + return myOffset; +} + +std::size_t RtfReaderStream::sizeOfOpened() { + return mySize; +} + diff --git a/reader/src/formats/rtf/RtfReaderStream.h b/reader/src/formats/rtf/RtfReaderStream.h new file mode 100644 index 0000000..71555b4 --- /dev/null +++ b/reader/src/formats/rtf/RtfReaderStream.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#ifndef __RTFREADERSTREAM_H__ +#define __RTFREADERSTREAM_H__ + +#include <string> + +#include <ZLFile.h> +#include <ZLInputStream.h> + +class RtfReaderStream : public ZLInputStream { + +public: + RtfReaderStream(const ZLFile& file, std::size_t maxSize); + ~RtfReaderStream(); + +private: + bool open(); + std::size_t read(char *buffer, std::size_t maxSize); + void close(); + + void seek(int offset, bool absoluteOffset); + std::size_t offset() const; + std::size_t sizeOfOpened(); + +private: + const ZLFile myFile; + char *myBuffer; + std::size_t mySize; + std::size_t myOffset; +}; + +#endif /* __RTFREADERSTREAM_H__ */ |
