diff options
| author | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-06-07 23:30:05 +0900 |
|---|---|---|
| committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-06-07 23:30:05 +0900 |
| commit | 17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch) | |
| tree | 5ed61937459cb7081089111b0242c01ec178f1f3 /reader/src/formats/pdb/PmlReader.cpp | |
| parent | 1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff) | |
| download | tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip | |
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'reader/src/formats/pdb/PmlReader.cpp')
| -rw-r--r-- | reader/src/formats/pdb/PmlReader.cpp | 407 |
1 files changed, 407 insertions, 0 deletions
diff --git a/reader/src/formats/pdb/PmlReader.cpp b/reader/src/formats/pdb/PmlReader.cpp new file mode 100644 index 0000000..712a6e0 --- /dev/null +++ b/reader/src/formats/pdb/PmlReader.cpp @@ -0,0 +1,407 @@ +/* + * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/* + * Information about Palm Markup Language was taken from: + * http://www.m.ereader.com/ereader/help/dropbook/pml.htm + * http://ccit205.wikispaces.com/Palm+Markup+Language+(PML) + */ + +#include <cstdlib> +#include <cctype> + +#include <ZLFile.h> +#include <ZLInputStream.h> + +#include "PmlReader.h" + +static const int pmlStreamBufferSize = 4096; + +const std::string PmlReader::ourDefaultParameter = ""; + +PmlReader::PmlReader(const std::string &encoding) : EncodedTextReader(encoding) { +} + +PmlReader::~PmlReader() { +} + +bool PmlReader::readDocument(ZLInputStream& stream) { + myStreamBuffer = new char[pmlStreamBufferSize]; + + myIsInterrupted = false; + + myState.Italic = false; + myState.Bold = false; + myState.Underlined = false; + myState.SmallCaps = false; + myState.Subscript = false; + myState.Superscript = false; + myState.Alignment = ALIGN_UNDEFINED; + myState.FontSize = NORMAL; + myState.Indent = 0; + myState.IndentBlockOn = false; + myState.BoldBlockOn = false; + myState.FootnoteLinkOn = false; + myState.InternalLinkOn = false; + myState.InvisibleText = false; + + bool code = parseDocument(stream); + + delete[] myStreamBuffer; + + return code; +} + +bool PmlReader::parseDocument(ZLInputStream &stream) { + enum { + READ_NORMAL_DATA, + READ_TAG, + READ_TAG_PARAMETER, + } parserState = READ_NORMAL_DATA; + + std::size_t tagNameLength = 0; + std::string tagName; + std::string parameterString; + + bool startParameterReading = false; + std::size_t tagCounter = 0; + static bool FLAG = true; + + while (!myIsInterrupted) { + const char *ptr = myStreamBuffer; + const char *end = myStreamBuffer + stream.read(myStreamBuffer, pmlStreamBufferSize); + if (ptr == end) { + break; + } + const char *dataStart = ptr; + bool readNextChar = true; + while (ptr != end) { + switch (parserState) { + case READ_NORMAL_DATA: + if (*ptr == '\n') { + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + newLine(); + FLAG = true; + dataStart = ptr + 1; + } else if (FLAG && std::isspace(*ptr)) { + } else { + FLAG = false; + if (*ptr == '\\') { + if (ptr > dataStart) { + processCharData(dataStart, ptr - dataStart); + } + dataStart = ptr + 1; + tagName.erase(); + parserState = READ_TAG; + } + } + break; + case READ_TAG: + if ((ptr == dataStart) && (tagName.empty())) { + if (*ptr == '\\') { + processCharData(ptr, 1); + dataStart = ptr + 1; + parserState = READ_NORMAL_DATA; + } else { + tagNameLength = findTagLength(ptr); + if (tagNameLength == 0) { + dataStart = ptr + 1; + parserState = READ_NORMAL_DATA; + ++tagCounter; + } else { + --tagNameLength; + } + } + } else { + if (tagNameLength == 0) { + tagName.append(dataStart, ptr - dataStart); + if (*ptr == '=') { + dataStart = ptr + 1; + parameterString.erase(); + parserState = READ_TAG_PARAMETER; + ++tagCounter; + } else { + readNextChar = false; + processTag(tagName); + dataStart = ptr; + parserState = READ_NORMAL_DATA; + ++tagCounter; + } + } else { + --tagNameLength; + } + } + break; + case READ_TAG_PARAMETER: + if (*ptr == '"') { + if (!startParameterReading) { + startParameterReading = true; + dataStart = ptr + 1; + } else { + parameterString.append(dataStart, ptr - dataStart); + processTag(tagName, parameterString); + parserState = READ_NORMAL_DATA; + dataStart = ptr + 1; + startParameterReading = false; + } + } + break; + } + if (readNextChar) { + ++ptr; + } else { + readNextChar = true; + } + } + if (dataStart < end) { + switch (parserState) { + case READ_NORMAL_DATA: + processCharData(dataStart, end - dataStart); + case READ_TAG: + tagName.append(dataStart, end - dataStart); + break; + case READ_TAG_PARAMETER: + parameterString.append(dataStart, end - dataStart); + break; + default: + break; + } + } + } + return myIsInterrupted; +} + +std::size_t PmlReader::findTagLength(const char* ptr) { + switch(*ptr) { // tag action description | close | support | + case 'p': // new page | - | + | + case 'x': // new chapter and new page | + | + | + case 'c': // center alignment block | + | + | + case 'r': // right alignment block | + | + | + case 'i': // italize block | + | + | + case 'u': // underlined block | + | + | + case 'o': // overstrike block | + | - | + case 'v': // invisible text block | + | + | + case 't': // indent block | + | + | + case 'T': // indent with value | - | + | + case 'w': // embed text width rule | - | - | + case 'n': // switch to normal font | - | + | + case 's': // switch to std font |+ or \n| + | + case 'b': // switch to bold font (deprecated) |+ or \n| - | + case 'l': // switch to large font |+ or \n| + | + case 'B': // mark text as bold | + | + | + case 'k': // smaller font size and uppercase | + | + | + case 'm': // insert named image | - | + | + case 'q': // reference to another spot | + | + | + case 'Q': // link anchor for \q reference | - | + | + case '-': // soft hyphen | - | - | + case 'I': // reference index item | - | - | + return 1; + case 'X': // XN - new chapter, n indent level | + | - | + case 'S': // Sp - mark text as superscript | + | + | + // Sb - mark text as subscript | + | + | + // Sd - link to a sidebar | + | - | + case 'C': // CN - chapter title + indent level| - | - | + case 'F': // Fn - link to a footnote | + | + | + return 2; + default: + return 0; + } +} + + +void PmlReader::interrupt() { + myIsInterrupted = true; +} + + +void PmlReader::processTag(std::string &tagName, const std::string ¶meter) { + const char tagDeterminant = *tagName.data(); + switch (tagDeterminant) { + case 'p': + newPage(); + break; + case 'x': + //TODO add close tag processing + newPage(); + break; + case 'B': + if (!myState.BoldBlockOn) { + processFontProperty(FONT_BOLD); + } + break; + case 'i': + processFontProperty(FONT_ITALIC); + break; + case 'u': + processFontProperty(FONT_UNDERLINED); + break; + case 'v': + myState.InvisibleText = !myState.InvisibleText;; + break; + case 'c': + processAlignment(ALIGN_CENTER); + break; + case 'r': + processAlignment(ALIGN_RIGHT); + break; + case 'n': + processFontSize(NORMAL); + break; + case 'b': + myState.BoldBlockOn = !myState.BoldBlockOn; + processFontProperty(FONT_BOLD); + break; + case 's': + processFontSize(SMALLER); + break; + case 'l': + processFontSize(LARGER); + break; + case 'k': + myState.SmallCaps = !myState.SmallCaps; + processFontSize(SMALLER); + break; + case 'S': + if (tagName == "Sb") { + processFontProperty(FONT_SUBSCRIPT); + } else if (tagName == "Sp") { + processFontProperty(FONT_SUPERSCRIPT); + } else if (tagName == "Sd") { + //processSidebarLink(); + } + break; + case 't': + processIndent(); + break; + case 'T': + processIndent(parameter); + myState.IndentBlockOn = false; + break; + case 'w': + //addHorizontalRule(parameter); + break; + case 'F': + processLink(FOOTNOTE, parameter); + break; + case 'q': + processLink(INTERNAL_HYPERLINK, parameter); + break; + case 'Q': + addLinkLabel(parameter); + break; + case 'm': + addImageReference(parameter); + break; + default: + //std::cerr << "PmlReader: unsupported tag: name: " << tagName << " parameter: " << parameter << "\n"; + break; + } +} + +void PmlReader::processCharData(const char* data, std::size_t len, bool convert) { + if(!myState.InvisibleText) { + addCharData(data, len, convert); + } +} + +void PmlReader::processFontProperty(PmlReader::FontProperty property) { + switch (property) { + case FONT_BOLD: + myState.Bold = !myState.Bold; + switchFontProperty(FONT_BOLD); + break; + case FONT_ITALIC: + myState.Italic = !myState.Italic; + switchFontProperty(FONT_ITALIC); + break; + case FONT_UNDERLINED: + myState.Underlined = !myState.Underlined; + switchFontProperty(FONT_UNDERLINED); + break; + case FONT_SUBSCRIPT: + myState.Subscript = !myState.Subscript; + switchFontProperty(FONT_SUBSCRIPT); + break; + case FONT_SUPERSCRIPT: + myState.Superscript = !myState.Superscript; + switchFontProperty(FONT_SUPERSCRIPT); + break; + } +} + +void PmlReader::processAlignment(ZLTextAlignmentType alignment) { + if (myState.Alignment != alignment) { + myState.Alignment = alignment; + } else { + myState.Alignment = ALIGN_UNDEFINED; + } + newParagraph(); +} + +void PmlReader::processFontSize(FontSizeType sizeType) { + if (myState.FontSize != sizeType) { + myState.FontSize = sizeType; + } else { + myState.FontSize = NORMAL; + } + setFontSize(); +} + +void PmlReader::processIndent(const std::string& parameter) { + int indentPercentSize = 5; + if (!parameter.empty()) { + const int index = parameter.find('%'); + if (index != -1) { + const std::string indentValueStr = parameter.substr(0, index); + indentPercentSize = std::atoi(indentValueStr.data()); + } else { + indentPercentSize = 5; + } + } + if (!myState.IndentBlockOn) { + myState.Indent = indentPercentSize; + } else { + myState.Indent = 0; + } + myState.IndentBlockOn = !myState.IndentBlockOn; + newParagraph(); +} + +void PmlReader::processLink(FBTextKind kind, const std::string ¶meter) { + switch(kind) { + case FOOTNOTE: + myState.FootnoteLinkOn = !myState.FootnoteLinkOn; + addLink(FOOTNOTE, parameter, myState.FootnoteLinkOn); + break; + case INTERNAL_HYPERLINK: + myState.InternalLinkOn = !myState.InternalLinkOn; + if (parameter.size() > 1) { + // '#' character has to stand before link label , so we should omit '#' for getting label + addLink(INTERNAL_HYPERLINK, parameter.substr(1), myState.InternalLinkOn); + } else { + // In case trailing or corrupted tag we use parameter entirely + addLink(INTERNAL_HYPERLINK, parameter, myState.InternalLinkOn); + } + break; + default: + break; + } +} |
