diff options
| author | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-06-07 23:30:05 +0900 |
|---|---|---|
| committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2024-06-07 23:30:05 +0900 |
| commit | 17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch) | |
| tree | 5ed61937459cb7081089111b0242c01ec178f1f3 /fbreader/src/formats/html/HtmlBookReader.cpp | |
| parent | 1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff) | |
| download | tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip | |
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'fbreader/src/formats/html/HtmlBookReader.cpp')
| -rw-r--r-- | fbreader/src/formats/html/HtmlBookReader.cpp | 583 |
1 files changed, 0 insertions, 583 deletions
diff --git a/fbreader/src/formats/html/HtmlBookReader.cpp b/fbreader/src/formats/html/HtmlBookReader.cpp deleted file mode 100644 index 321913d..0000000 --- a/fbreader/src/formats/html/HtmlBookReader.cpp +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Copyright (C) 2004-2012 Geometer Plus <contact@geometerplus.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - */ - -#include <cctype> - -#include <ZLFile.h> -#include <ZLFileImage.h> -#include <ZLStringUtil.h> - -#include "HtmlBookReader.h" -#include "HtmlTagActions.h" -#include "../txt/PlainTextFormat.h" -#include "../util/MiscUtil.h" -#include "../../bookmodel/BookModel.h" -#include "../css/StyleSheetParser.h" - -HtmlTagAction::HtmlTagAction(HtmlBookReader &reader) : myReader(reader) { -} - -HtmlTagAction::~HtmlTagAction() { -} - -void HtmlTagAction::reset() { -} - -DummyHtmlTagAction::DummyHtmlTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void DummyHtmlTagAction::run(const HtmlReader::HtmlTag&) { -} - -HtmlControlTagAction::HtmlControlTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) { -} - -void HtmlControlTagAction::run(const HtmlReader::HtmlTag &tag) { - std::vector<FBTextKind> &list = myReader.myKindList; - int index; - for (index = list.size() - 1; index >= 0; --index) { - if (list[index] == myKind) { - break; - } - } - if (tag.Start) { - if (index == -1) { - bookReader().pushKind(myKind); - myReader.myKindList.push_back(myKind); - bookReader().addControl(myKind, true); - } - } else { - if (index >= 0) { - for (int i = list.size() - 1; i >= index; --i) { - bookReader().addControl(list[i], false); - bookReader().popKind(); - } - for (unsigned int j = index + 1; j < list.size(); ++j) { - bookReader().addControl(list[j], true); - bookReader().pushKind(list[j]); - } - list.erase(list.begin() + index); - } - } -} - -HtmlHeaderTagAction::HtmlHeaderTagAction(HtmlBookReader &reader, FBTextKind kind) : HtmlTagAction(reader), myKind(kind) { -} - -void HtmlHeaderTagAction::run(const HtmlReader::HtmlTag &tag) { - myReader.myIsStarted = false; - if (tag.Start) { - if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) { - if (!bookReader().contentsParagraphIsOpen()) { - bookReader().insertEndOfSectionParagraph(); - bookReader().enterTitle(); - bookReader().beginContentsParagraph(); - } - } - bookReader().pushKind(myKind); - } else { - bookReader().popKind(); - if (myReader.myBuildTableOfContent && !myReader.myIgnoreTitles) { - bookReader().endContentsParagraph(); - bookReader().exitTitle(); - } - } - bookReader().beginParagraph(); -} - -HtmlIgnoreTagAction::HtmlIgnoreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlIgnoreTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - if (myTagNames.find(tag.Name) == myTagNames.end()) { - ++myReader.myIgnoreDataCounter; - myTagNames.insert(tag.Name); - } - } else { - if (myTagNames.find(tag.Name) != myTagNames.end()) { - --myReader.myIgnoreDataCounter; - myTagNames.erase(tag.Name); - } - } -} - -HtmlHrefTagAction::HtmlHrefTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlHrefTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "NAME") { - bookReader().addHyperlinkLabel(tag.Attributes[i].Value); - } else if ((hyperlinkType() == REGULAR) && (tag.Attributes[i].Name == "HREF")) { - std::string value = tag.Attributes[i].Value; - if (!myReader.myFileName.empty() && - (value.length() > myReader.myFileName.length()) && - (value.substr(0, myReader.myFileName.length()) == myReader.myFileName)) { - value = value.substr(myReader.myFileName.length()); - } - if (!value.empty()) { - if (value[0] == '#') { - setHyperlinkType(INTERNAL_HYPERLINK); - bookReader().addHyperlinkControl(INTERNAL_HYPERLINK, value.substr(1)); - } else { - FBTextKind hyperlinkType = MiscUtil::referenceType(value); - if (hyperlinkType != INTERNAL_HYPERLINK) { - setHyperlinkType(hyperlinkType); - bookReader().addHyperlinkControl(hyperlinkType, value); - } - } - } - } - } - } else if (hyperlinkType() != REGULAR) { - bookReader().addControl(hyperlinkType(), false); - setHyperlinkType(REGULAR); - } -} - -void HtmlHrefTagAction::reset() { - setHyperlinkType(REGULAR); -} - -FBTextKind HtmlHrefTagAction::hyperlinkType() const { - return myHyperlinkType; -} - -void HtmlHrefTagAction::setHyperlinkType(FBTextKind hyperlinkType) { - myHyperlinkType = hyperlinkType; -} - -HtmlImageTagAction::HtmlImageTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlImageTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - bookReader().endParagraph(); - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "SRC") { - const std::string fileName = MiscUtil::decodeHtmlURL(tag.Attributes[i].Value); - const ZLFile file(myReader.myBaseDirPath + fileName); - if (file.exists()) { - bookReader().addImageReference(fileName); - bookReader().addImage(fileName, new ZLFileImage(file, 0)); - } - break; - } - } - bookReader().beginParagraph(); - } -} - -HtmlBreakTagAction::HtmlBreakTagAction(HtmlBookReader &reader, BreakType breakType) : HtmlTagAction(reader), myBreakType(breakType) { -} - -void HtmlBreakTagAction::run(const HtmlReader::HtmlTag &tag) { - if (myReader.myDontBreakParagraph) { - myReader.myDontBreakParagraph = false; - return; - } - - if ((tag.Start && (myBreakType & BREAK_AT_START)) || - (!tag.Start && (myBreakType & BREAK_AT_END))) { - bookReader().endParagraph(); - if (bookReader().isKindStackEmpty()) { - bookReader().pushKind(REGULAR); - } - bookReader().beginParagraph(); - } -} - -HtmlPreTagAction::HtmlPreTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlPreTagAction::run(const HtmlReader::HtmlTag &tag) { - bookReader().endParagraph(); - myReader.myIsPreformatted = tag.Start; - myReader.mySpaceCounter = -1; - myReader.myBreakCounter = 0; - if (myReader.myFormat.breakType() == PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) { - if (tag.Start) { - bookReader().pushKind(PREFORMATTED); - } else { - bookReader().popKind(); - } - } - bookReader().beginParagraph(); -} - -HtmlListTagAction::HtmlListTagAction(HtmlBookReader &reader, int startIndex) : HtmlTagAction(reader), myStartIndex(startIndex) { -} - -void HtmlListTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - myReader.myListNumStack.push(myStartIndex); - } else if (!myReader.myListNumStack.empty()) { - myReader.myListNumStack.pop(); - } -} - -HtmlListItemTagAction::HtmlListItemTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlListItemTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - bookReader().endParagraph(); - bookReader().beginParagraph(); - if (!myReader.myListNumStack.empty()) { - bookReader().addFixedHSpace(3 * myReader.myListNumStack.size()); - int &index = myReader.myListNumStack.top(); - if (index == 0) { - myReader.addConvertedDataToBuffer("\342\200\242 ", 4, false); - } else { - std::string number; - ZLStringUtil::appendNumber(number, index++); - number += ". "; - myReader.addConvertedDataToBuffer(number.data(), number.length(), false); - } - myReader.myDontBreakParagraph = true; - } - } else { - myReader.myDontBreakParagraph = false; - } -} - -HtmlTableTagAction::HtmlTableTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlTableTagAction::run(const HtmlReader::HtmlTag &tag) { - if (tag.Start) { - myReader.myIgnoreTitles = true; - } else { - myReader.myIgnoreTitles = false; - } -} - -HtmlStyleTagAction::HtmlStyleTagAction(HtmlBookReader &reader) : HtmlTagAction(reader) { -} - -void HtmlStyleTagAction::run(const HtmlReader::HtmlTag &tag) { - myReader.myStyleSheetParser = tag.Start ? new StyleSheetTableParser(myReader.myStyleSheetTable) : 0; - /* - if (!tag.Start) { - myReader.myStyleSheetTable.dump(); - } - */ -} - -shared_ptr<HtmlTagAction> HtmlBookReader::createAction(const std::string &tag) { - if (tag == "EM") { - return new HtmlControlTagAction(*this, EMPHASIS); - } else if (tag == "STRONG") { - return new HtmlControlTagAction(*this, STRONG); - } else if (tag == "B") { - return new HtmlControlTagAction(*this, BOLD); - } else if (tag == "I") { - return new HtmlControlTagAction(*this, ITALIC); - } else if (tag == "TT") { - return new HtmlControlTagAction(*this, CODE); - } else if (tag == "CODE") { - return new HtmlControlTagAction(*this, CODE); - } else if (tag == "CITE") { - return new HtmlControlTagAction(*this, CITE); - } else if (tag == "SUB") { - return new HtmlControlTagAction(*this, SUB); - } else if (tag == "SUP") { - return new HtmlControlTagAction(*this, SUP); - } else if (tag == "H1") { - return new HtmlHeaderTagAction(*this, H1); - } else if (tag == "H2") { - return new HtmlHeaderTagAction(*this, H2); - } else if (tag == "H3") { - return new HtmlHeaderTagAction(*this, H3); - } else if (tag == "H4") { - return new HtmlHeaderTagAction(*this, H4); - } else if (tag == "H5") { - return new HtmlHeaderTagAction(*this, H5); - } else if (tag == "H6") { - return new HtmlHeaderTagAction(*this, H6); - } else if (tag == "HEAD") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "TITLE") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "STYLE") { - return new HtmlStyleTagAction(*this); - } else if (tag == "SELECT") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "SCRIPT") { - return new HtmlIgnoreTagAction(*this); - } else if (tag == "A") { - return new HtmlHrefTagAction(*this); - } else if (tag == "TD") { - //return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "TR") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "DIV") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_END); - } else if (tag == "DT") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START); - } else if (tag == "P") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END); - } else if (tag == "BR") { - return new HtmlBreakTagAction(*this, HtmlBreakTagAction::BREAK_AT_START_AND_AT_END); - } else if (tag == "IMG") { - return new HtmlImageTagAction(*this); - } else if (tag == "UL") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "MENU") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "DIR") { - return new HtmlListTagAction(*this, 0); - } else if (tag == "OL") { - return new HtmlListTagAction(*this, 1); - } else if (tag == "LI") { - return new HtmlListItemTagAction(*this); - } else if (tag == "PRE") { - if (myProcessPreTag) { - return new HtmlPreTagAction(*this); - } - } else if (tag == "TABLE") { - return new HtmlTableTagAction(*this); - } - /* - } else if (tag == "DD") { - return 0; - } else if (tag == "DL") { - return 0; - } else if (tag == "DFN") { - return 0; - } else if (tag == "SAMP") { - return 0; - } else if (tag == "KBD") { - return 0; - } else if (tag == "VAR") { - return 0; - } else if (tag == "ABBR") { - return 0; - } else if (tag == "ACRONYM") { - return 0; - } else if (tag == "BLOCKQUOTE") { - return 0; - } else if (tag == "Q") { - return 0; - } else if (tag == "INS") { - return 0; - } else if (tag == "DEL") { - return 0; - } else if (tag == "BODY") { - return 0; - */ - return new DummyHtmlTagAction(*this); -} - -void HtmlBookReader::setBuildTableOfContent(bool build) { - myBuildTableOfContent = build; -} - -void HtmlBookReader::setProcessPreTag(bool process) { - myProcessPreTag = process; -} - -HtmlBookReader::HtmlBookReader(const std::string &baseDirectoryPath, BookModel &model, const PlainTextFormat &format, const std::string &encoding) : HtmlReader(encoding), myBookReader(model), myBaseDirPath(baseDirectoryPath), myFormat(format), myBuildTableOfContent(true), myProcessPreTag(true) { -} - -HtmlBookReader::~HtmlBookReader() { -} - -void HtmlBookReader::addConvertedDataToBuffer(const char *text, std::size_t len, bool convert) { - if (len > 0) { - if (myDontBreakParagraph) { - while (len > 0 && std::isspace(*text)) { - --len; - ++text; - } - if (len == 0) { - return; - } - } - if (convert) { - myConverter->convert(myConverterBuffer, text, text + len); - myBookReader.addData(myConverterBuffer); - myBookReader.addContentsData(myConverterBuffer); - myConverterBuffer.erase(); - } else { - std::string strText(text, len); - myBookReader.addData(strText); - myBookReader.addContentsData(strText); - } - myDontBreakParagraph = false; - } -} - -bool HtmlBookReader::tagHandler(const HtmlTag &tag) { - myConverter->reset(); - - for (unsigned int i = 0; i < tag.Attributes.size(); ++i) { - if (tag.Attributes[i].Name == "ID") { - myBookReader.addHyperlinkLabel(tag.Attributes[i].Value); - break; - } - } - shared_ptr<HtmlTagAction> action = myActionMap[tag.Name]; - if (action.isNull()) { - action = createAction(tag.Name); - myActionMap[tag.Name] = action; - } - action->run(tag); - - return true; -} - -void HtmlBookReader::preformattedCharacterDataHandler(const char *text, std::size_t len, bool convert) { - const char *start = text; - const char *end = text + len; - - int breakType = myFormat.breakType(); - if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_NEW_LINE) { - for (const char *ptr = text; ptr != end; ++ptr) { - if (*ptr == '\n') { - mySpaceCounter = 0; - if (start < ptr) { - addConvertedDataToBuffer(start, ptr - start, convert); - } else { - static const std::string SPACE = " "; - myBookReader.addData(SPACE); - } - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - start = ptr + 1; - } else if (mySpaceCounter >= 0) { - if (std::isspace((unsigned char)*ptr)) { - ++mySpaceCounter; - } else { - myBookReader.addFixedHSpace(mySpaceCounter); - mySpaceCounter = -1; - } - } - } - addConvertedDataToBuffer(start, end - start, convert); - } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_LINE_WITH_INDENT) { - for (const char *ptr = text; ptr != end; ++ptr) { - if (std::isspace((unsigned char)*ptr)) { - if (*ptr == '\n') { - mySpaceCounter = 0; - } else if (mySpaceCounter >= 0) { - ++mySpaceCounter; - } - } else { - if (mySpaceCounter > myFormat.ignoredIndent()) { - if (ptr - start > mySpaceCounter) { - addConvertedDataToBuffer(start, ptr - start - mySpaceCounter, convert); - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - } - start = ptr; - } - mySpaceCounter = -1; - } - } - mySpaceCounter = std::max(mySpaceCounter, 0); - if (end - start > mySpaceCounter) { - addConvertedDataToBuffer(start, end - start - mySpaceCounter, convert); - } - } else if (breakType & PlainTextFormat::BREAK_PARAGRAPH_AT_EMPTY_LINE) { - for (const char *ptr = start; ptr != end; ++ptr) { - if (std::isspace((unsigned char)*ptr)) { - if (*ptr == '\n') { - ++myBreakCounter; - } - } else { - if (myBreakCounter > 1) { - addConvertedDataToBuffer(start, ptr - start, convert); - myBookReader.endParagraph(); - myBookReader.beginParagraph(); - start = ptr; - } - myBreakCounter = 0; - } - } - addConvertedDataToBuffer(start, end - start, convert); - } -} - -bool HtmlBookReader::characterDataHandler(const char *text, std::size_t len, bool convert) { - if (!myStyleSheetParser.isNull()) { - myStyleSheetParser->parse(text, len); - return true; - } - - if (myIgnoreDataCounter != 0) { - return true; - } - - if (myIsPreformatted) { - preformattedCharacterDataHandler(text, len, convert); - return true; - } - - const char *ptr = text; - const char *end = text + len; - if (!myIsStarted) { - for (; ptr != end; ++ptr) { - if (!std::isspace((unsigned char)*ptr)) { - myIsStarted = true; - break; - } - } - } - if (myIsStarted) { - addConvertedDataToBuffer(ptr, end - ptr, convert); - } - return true; -} - -void HtmlBookReader::startDocumentHandler() { - while (!myListNumStack.empty()) { - myListNumStack.pop(); - } - myConverterBuffer.erase(); - myKindList.clear(); - - myBookReader.reset(); - myBookReader.setMainTextModel(); - myBookReader.pushKind(REGULAR); - myBookReader.beginParagraph(); - myIgnoreDataCounter = 0; - myIsPreformatted = false; - myDontBreakParagraph = false; - for (std::map<std::string,shared_ptr<HtmlTagAction> >::const_iterator it = myActionMap.begin(); it != myActionMap.end(); ++it) { - it->second->reset(); - } - myIsStarted = false; - myIgnoreTitles = false; - - myStyleSheetParser = 0; - - mySpaceCounter = -1; - myBreakCounter = 0; -} - -void HtmlBookReader::endDocumentHandler() { - myBookReader.endParagraph(); -} - -void HtmlBookReader::setFileName(const std::string fileName) { - myFileName = fileName; -} |
