summaryrefslogtreecommitdiffstats
path: root/reader/src/formats/html/HtmlReaderStream.cpp
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-06-07 23:30:05 +0900
commit17b259df9cb6b28779d4881b2b6c805ee2e48eea (patch)
tree5ed61937459cb7081089111b0242c01ec178f1f3 /reader/src/formats/html/HtmlReaderStream.cpp
parent1cba8bce178eb2d6719c6f7f21e2c9352c5513a6 (diff)
downloadtde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.tar.gz
tde-ebook-reader-17b259df9cb6b28779d4881b2b6c805ee2e48eea.zip
Rename to tde-ebook-reader
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'reader/src/formats/html/HtmlReaderStream.cpp')
-rw-r--r--reader/src/formats/html/HtmlReaderStream.cpp128
1 files changed, 128 insertions, 0 deletions
diff --git a/reader/src/formats/html/HtmlReaderStream.cpp b/reader/src/formats/html/HtmlReaderStream.cpp
new file mode 100644
index 0000000..08c43ae
--- /dev/null
+++ b/reader/src/formats/html/HtmlReaderStream.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2008-2012 Geometer Plus <contact@geometerplus.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <cstdlib>
+#include <cstring>
+#include <algorithm>
+
+#include "HtmlReaderStream.h"
+#include "HtmlReader.h"
+
+class HtmlTextOnlyReader : public HtmlReader {
+
+public:
+ HtmlTextOnlyReader(char *buffer, std::size_t maxSize);
+ std::size_t size() const;
+
+private:
+ void startDocumentHandler();
+ void endDocumentHandler();
+
+ bool tagHandler(const HtmlTag &tag);
+ bool characterDataHandler(const char *text, std::size_t len, bool convert);
+
+private:
+ char *myBuffer;
+ std::size_t myMaxSize;
+ std::size_t myFilledSize;
+ bool myIgnoreText;
+};
+
+HtmlTextOnlyReader::HtmlTextOnlyReader(char *buffer, std::size_t maxSize) : HtmlReader(std::string()), myBuffer(buffer), myMaxSize(maxSize), myFilledSize(0), myIgnoreText(false) {
+}
+
+std::size_t HtmlTextOnlyReader::size() const {
+ return myFilledSize;
+}
+
+void HtmlTextOnlyReader::startDocumentHandler() {
+}
+
+void HtmlTextOnlyReader::endDocumentHandler() {
+}
+
+bool HtmlTextOnlyReader::tagHandler(const HtmlTag &tag) {
+ if (tag.Name == "SCRIPT") {
+ myIgnoreText = tag.Start;
+ }
+ if ((myFilledSize < myMaxSize) && (myFilledSize > 0) && (myBuffer[myFilledSize - 1] != '\n')) {
+ myBuffer[myFilledSize++] = '\n';
+ }
+ return myFilledSize < myMaxSize;
+}
+
+bool HtmlTextOnlyReader::characterDataHandler(const char *text, std::size_t len, bool) {
+ if (!myIgnoreText) {
+ len = std::min((std::size_t)len, myMaxSize - myFilledSize);
+ std::memcpy(myBuffer + myFilledSize, text, len);
+ myFilledSize += len;
+ }
+ return myFilledSize < myMaxSize;
+}
+
+HtmlReaderStream::HtmlReaderStream(shared_ptr<ZLInputStream> base, std::size_t maxSize) : myBase(base), myBuffer(0), mySize(maxSize) {
+}
+
+HtmlReaderStream::~HtmlReaderStream() {
+ close();
+}
+
+bool HtmlReaderStream::open() {
+ if (myBase.isNull() || !myBase->open()) {
+ return false;
+ }
+ myBuffer = new char[mySize];
+ HtmlTextOnlyReader reader(myBuffer, mySize);
+ reader.readDocument(*myBase);
+ mySize = reader.size();
+ myOffset = 0;
+ myBase->close();
+ return true;
+}
+
+std::size_t HtmlReaderStream::read(char *buffer, std::size_t maxSize) {
+ maxSize = std::min(maxSize, mySize - myOffset);
+ if (buffer != 0) {
+ std::memcpy(buffer, myBuffer, maxSize);
+ }
+ myOffset += maxSize;
+ return maxSize;
+}
+
+void HtmlReaderStream::close() {
+ if (myBuffer != 0) {
+ delete[] myBuffer;
+ myBuffer = 0;
+ }
+}
+
+void HtmlReaderStream::seek(int offset, bool absoluteOffset) {
+ if (!absoluteOffset) {
+ offset += myOffset;
+ }
+ myOffset = std::min(mySize, (std::size_t)std::max(0, offset));
+}
+
+std::size_t HtmlReaderStream::offset() const {
+ return myOffset;
+}
+
+std::size_t HtmlReaderStream::sizeOfOpened() {
+ return mySize;
+}