summaryrefslogtreecommitdiffstats
path: root/klinkstatus/src/parser
diff options
context:
space:
mode:
authortoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
committertoma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2009-11-25 17:56:58 +0000
commite9ae80694875f869892f13f4fcaf1170a00dea41 (patch)
treeaa2f8d8a217e2d376224c8d46b7397b68d35de2d /klinkstatus/src/parser
downloadtdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.tar.gz
tdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.zip
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.
BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdewebdev@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'klinkstatus/src/parser')
-rw-r--r--klinkstatus/src/parser/Makefile.am6
-rw-r--r--klinkstatus/src/parser/htmlparser.cpp455
-rw-r--r--klinkstatus/src/parser/htmlparser.h124
-rw-r--r--klinkstatus/src/parser/http.cpp87
-rw-r--r--klinkstatus/src/parser/http.h79
-rw-r--r--klinkstatus/src/parser/mstring.cpp278
-rw-r--r--klinkstatus/src/parser/mstring.h174
-rw-r--r--klinkstatus/src/parser/node.cpp255
-rw-r--r--klinkstatus/src/parser/node.h279
-rw-r--r--klinkstatus/src/parser/node_impl.h412
-rw-r--r--klinkstatus/src/parser/url.cpp350
-rw-r--r--klinkstatus/src/parser/url.h57
12 files changed, 2556 insertions, 0 deletions
diff --git a/klinkstatus/src/parser/Makefile.am b/klinkstatus/src/parser/Makefile.am
new file mode 100644
index 00000000..b99146c1
--- /dev/null
+++ b/klinkstatus/src/parser/Makefile.am
@@ -0,0 +1,6 @@
+INCLUDES = $(all_includes)
+METASOURCES = AUTO
+noinst_HEADERS = htmlparser.h http.h mstring.h node.h node_impl.h url.h
+libparser_la_LDFLAGS = $(all_libraries)
+noinst_LTLIBRARIES = libparser.la
+libparser_la_SOURCES = htmlparser.cpp http.cpp mstring.cpp node.cpp url.cpp
diff --git a/klinkstatus/src/parser/htmlparser.cpp b/klinkstatus/src/parser/htmlparser.cpp
new file mode 100644
index 00000000..6bc93761
--- /dev/null
+++ b/klinkstatus/src/parser/htmlparser.cpp
@@ -0,0 +1,455 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include "htmlparser.h"
+
+#include <kapplication.h>
+#include <kdebug.h>
+
+
+HtmlParser::HtmlParser(QString const& documento)
+ : is_content_type_set_(false), document_(documento)
+{
+ Q_ASSERT(!documento.isEmpty());
+
+ stripScriptContent();
+ stripComments(); // after removing the script because comments in scripts have diferent sintaxe
+
+ nodes_.reserve(estimativaLinks(documento.length() * 2)); // à confiança ;)
+
+ parseNodesOfTypeA();
+ parseNodesOfTypeAREA();
+ parseNodesOfTypeLINK();
+ parseNodesOfTypeMETA();
+ parseNodesOfTypeIMG();
+ parseNodesOfTypeFRAME();
+ parseNodesOfTypeIFRAME();
+ parseNodesOfTypeBASE();
+ parseNodesOfTypeTITLE();
+}
+
+bool HtmlParser::hasBaseUrl() const
+{
+ return (node_BASE_.element() == Node::BASE &&
+ !node_BASE_.url().isEmpty());
+}
+
+NodeBASE const& HtmlParser::baseUrl() const
+{
+ Q_ASSERT(hasBaseUrl());
+ return node_BASE_;
+}
+
+NodeMETA const& HtmlParser::contentTypeMetaNode() const
+{
+ Q_ASSERT(hasContentType());
+ return node_META_content_type_;
+}
+
+bool HtmlParser::hasTitle() const
+{
+ return (node_TITLE_.element() == Node::TITLE &&
+ !node_TITLE_.attributeTITLE().isEmpty());
+}
+
+NodeTITLE const& HtmlParser::title() const
+{
+ Q_ASSERT(hasTitle());
+ return node_TITLE_;
+}
+
+vector<QString> const& HtmlParser::parseNodesOfType(QString const& element)
+{
+ HtmlParser::parseNodesOfType(element, document_, aux_);
+ return aux_;
+}
+
+void HtmlParser::parseNodesOfType(QString const& tipo, QString const& document, vector<QString>& nodes)
+{
+ QString node;
+ QString doc(document);
+ int inicio = 0, fim = 0;
+
+ nodes.clear();
+ if(upperCase(tipo) == "A")
+ nodes.reserve(estimativaLinks(doc.length() * 2));
+
+ while(true)
+ {
+ inicio = findSeparableWord(doc, "<" + tipo);
+ if(inicio == -1)
+ return;
+
+ //if( (doc[inicio] != ' ' && doc[inicio] != '\n' && doc[inicio] != '\r') )
+ if(!::isSpace(doc[inicio]))
+ {
+ doc.remove(0, QString("<" + tipo).length());
+ continue;
+ }
+
+ if(upperCase(tipo) == "A")
+ fim = findWord(doc, "</A>", inicio);
+ else
+ {
+ //fim = findChar(doc, '>', inicio + 1);
+ fim = endOfTag(doc, inicio, '>');
+ }
+
+ if(fim == -1)
+ {
+ doc.remove(0, 1);
+ continue;
+ }
+
+ int tag_begining_go_back = (tipo.length() + QString("<").length());
+ node = doc.mid(inicio - tag_begining_go_back,
+ fim - inicio + tag_begining_go_back);
+ nodes.push_back(node);
+ doc.remove(0, fim);
+ }
+}
+
+int HtmlParser::endOfTag(QString const& s, int index, QChar end_of_tag)
+{
+ if( (uint)index >= s.length() )
+ return -1;
+
+ int _end_of_tag = s.find(end_of_tag, index);
+ if(_end_of_tag == -1)
+ return _end_of_tag;
+
+ int open_aspas = s.find('"', index);
+ if(open_aspas == -1)
+ return _end_of_tag + 1;
+
+ else if(_end_of_tag < open_aspas)
+ return _end_of_tag + 1;
+
+ else if( ((uint)open_aspas + 1) >= s.length() - 1 )
+ return -1;
+
+ else
+ {
+ int close_aspas = s.find('"', open_aspas + 1);
+ if(close_aspas != -1)
+ return endOfTag(s, close_aspas + 1, end_of_tag);
+ else
+ {
+ kdDebug(23100) << "Mismatched quotes (\"): " << s.mid(index, _end_of_tag - index) << endl;
+ //return -1;
+ return _end_of_tag + 1;
+ }
+ }
+}
+
+vector<Node*> const& HtmlParser::nodes() const
+{
+ return nodes_;
+}
+
+
+void HtmlParser::parseNodesOfTypeA()
+{
+ vector<QString> const& aux = parseNodesOfType("A");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ {
+ nodes_.push_back( new NodeA(aux[i]) );
+ }
+}
+
+void HtmlParser::parseNodesOfTypeAREA()
+{
+ vector<QString> const& aux = parseNodesOfType("AREA");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ {
+ nodes_.push_back( new NodeAREA(aux[i]) );
+ }
+}
+
+void HtmlParser::parseNodesOfTypeLINK()
+{
+ vector<QString> const& aux = parseNodesOfType("LINK");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ nodes_.push_back( new NodeLINK(aux[i]) );
+}
+
+void HtmlParser::parseNodesOfTypeMETA()
+{
+ vector<QString> const& aux = parseNodesOfType("META");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ {
+ NodeMETA* node = new NodeMETA(aux[i]);
+ nodes_.push_back(node);
+
+ if(!is_content_type_set_ && node->atributoHTTP_EQUIV().lower() == QString("Content-Type").lower()) {
+ is_content_type_set_ = true;
+ node_META_content_type_.setNode(aux[i]);
+ }
+ }
+}
+
+QString HtmlParser::findCharsetInMetaElement(QString const& html)
+{
+ vector<QString> metaTags;
+ parseNodesOfType("META", html, metaTags);
+
+ for(vector<QString>::size_type i = 0; i != metaTags.size(); ++i)
+ {
+ NodeMETA node(metaTags[i]);
+
+ if(node.atributoHTTP_EQUIV().lower() == QString("Content-Type").lower()) {
+ return node.charset();
+ }
+ }
+ return QString();
+}
+
+void HtmlParser::parseNodesOfTypeIMG()
+{
+ vector<QString> const& aux = parseNodesOfType("IMG");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ nodes_.push_back( new NodeIMG(aux[i]) );
+}
+
+void HtmlParser::parseNodesOfTypeFRAME()
+{
+ vector<QString> const& aux = parseNodesOfType("FRAME");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ nodes_.push_back( new NodeFRAME(aux[i]) );
+}
+
+void HtmlParser::parseNodesOfTypeIFRAME()
+{
+ vector<QString> const& aux = parseNodesOfType("IFRAME");
+
+ for(vector<QString>::size_type i = 0; i != aux.size(); ++i)
+ nodes_.push_back( new NodeFRAME(aux[i]) );
+}
+
+void HtmlParser::parseNodesOfTypeBASE()
+{
+ QString node;
+ QString doc = document_;
+ int inicio = 0, fim = 0;
+
+ inicio = findSeparableWord(doc, "<BASE");
+ if(inicio == -1 || !doc[inicio].isSpace())
+ return;
+
+ fim = doc.find(">", inicio);
+ if(fim == -1)
+ return;
+
+ node = doc.mid(inicio, fim-inicio);
+ node_BASE_.setNode(node);
+}
+
+void HtmlParser::parseNodesOfTypeTITLE()
+{
+ QString node;
+ QString doc = document_;
+ int inicio = 0, fim = 0;
+
+ inicio = findSeparableWord(doc, "<TITLE>");
+ if(inicio == -1)
+ return;
+
+ fim = findSeparableWord(doc, "</TITLE>", inicio);
+ if(fim == -1)
+ return;
+
+ node = doc.mid(inicio, fim-inicio);
+
+ node_TITLE_.setNode(node);
+}
+
+
+void HtmlParser::stripComments()
+{
+ QString begin_comment = "<!--";
+ QString end_comment = "-->";
+ uint const begin_comment_length = begin_comment.length();
+
+ int inicio = -1;
+ do
+ {
+ inicio = findWord(document_, begin_comment);
+ if(inicio != -1)
+ {
+ int fim = findWord(document_, end_comment, inicio);
+ if(fim == -1)
+ {
+ kdDebug(23100) << "End of comment is missing!" << endl;
+ document_.remove(inicio - begin_comment_length, begin_comment_length);
+ }
+ else
+ {
+ comments_ += "\n" + document_.mid(inicio - begin_comment_length,
+ fim - inicio + begin_comment_length);
+ document_.remove(inicio - begin_comment_length, fim - inicio + begin_comment_length);
+ }
+ }
+ }
+ while(inicio != -1);
+}
+
+void HtmlParser::stripScriptContent()
+{
+ int inicio = -1;
+ QString const begin_script = "<script";
+ QString const end_script = "</script>";
+ uint const begin_script_length = begin_script.length();
+
+ do
+ {
+ inicio = findWord(document_, begin_script);
+ if(inicio != -1)
+ {
+ int fim = findWord(document_, end_script, inicio);
+
+ if(fim == -1)
+ {
+ kdDebug(23100) << "Malformed script tag!" << endl;
+ document_.remove(inicio - begin_script_length, begin_script_length);
+ }
+ else
+ {
+ script_ += "\n" + document_.mid(inicio - begin_script_length,
+ fim - inicio + begin_script_length);
+
+ document_.remove(inicio - begin_script_length,
+ fim - inicio + begin_script_length);
+ }
+ }
+ }
+ while(inicio != -1);
+}
+
+
+
+
+#include <iostream>
+void HtmlParser::mostra() const
+{
+ kdDebug(23100) << "\nA:\n\n";
+ for(unsigned int i = 0; i != nodes_.size(); ++i)
+ {
+ if(nodes_[i]->element() == Node::A)
+ kdDebug(23100) << nodes_[i]->url() << "\t" << nodes_[i]->linkLabel() << endl;
+ }
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+ kdDebug(23100) << "\nLINK:\n\n";
+ for(unsigned int i = 0; i != nodes_.size(); ++i)
+ {
+ if(nodes_[i]->element() == Node::LINK)
+ kdDebug(23100) << nodes_[i]->url() << "\t" << nodes_[i]->linkLabel() << endl;
+ }
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+ kdDebug(23100) << "\nMETA:\n";
+ for(unsigned int i = 0; i != nodes_.size(); ++i)
+ {
+ if(nodes_[i]->element() == Node::META)
+ {
+#if defined Q_WS_WIN
+ NodeMETA* nm = (NodeMETA*)nodes_[i];
+#else
+
+ NodeMETA* nm = dynamic_cast<NodeMETA*>(nodes_[i]);
+#endif
+
+ kdDebug(23100) << nm->url() << endl
+ << nm->atributoHTTP_EQUIV() << endl
+ << nm->atributoNAME() << endl
+ << nm->atributoCONTENT() << endl;
+ }
+ }
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+ kdDebug(23100) << "\nIMG:\n\n";
+ for(unsigned int i = 0; i != nodes_.size(); ++i)
+ {
+ if(nodes_[i]->element() == Node::IMG)
+ kdDebug(23100) << nodes_[i]->url() << "\t"
+ << nodes_[i]->linkLabel() << endl;
+ }
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+ kdDebug(23100) << "\nFRAME:\n\n";
+ for(unsigned int i = 0; i != nodes_.size(); ++i)
+ {
+ if(nodes_[i]->element() == Node::FRAME)
+ kdDebug(23100) << nodes_[i]->url() << endl;
+ }
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+ kdDebug(23100) << "\nBASE:\n\n";
+ kdDebug(23100) << node_BASE_.url() << endl;
+
+ kdDebug(23100) << "____________________________________________________________________" << endl;
+
+}
+
+#ifdef HTMLPARSER
+
+#include <fstream>
+
+int main()
+{
+ //ifstream stream("aterraprometida.html");
+ //ifstream stream("/var/www/html/STL/standard_library.html");
+ //ifstream stream("/var/www/html/qt-doc/functions.html");
+ ifstream stream("/var/www/html/index.html");
+
+ QString content;
+ while(stream)
+ {
+ char c;
+ stream.get(c);
+ content += c;
+ }
+ // kdDebug(23100) << content << endl;
+ kdDebug(23100) << "__________________________________________________________" << endl;
+ HtmlParser parser(content);
+ parser.mostra();
+ kdDebug(23100) << "__________________________________________________________\n\n\n" << endl;
+ vector<Node*> nods = parser.nodes();
+ for(int i = 0; i != nods.size(); ++i)
+ {
+ if(nods[i]->element() == Node::META)
+ {
+ NodeMETA* nod_meta = (NodeMETA*)(nods[i]);
+ //Node* nod_meta = nods[i];
+
+ kdDebug(23100) << nod_meta->atributoCONTENT() << endl;
+ }
+
+ }
+}
+
+
+#endif
diff --git a/klinkstatus/src/parser/htmlparser.h b/klinkstatus/src/parser/htmlparser.h
new file mode 100644
index 00000000..cf487ebf
--- /dev/null
+++ b/klinkstatus/src/parser/htmlparser.h
@@ -0,0 +1,124 @@
+ /***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#ifndef HTML_PARSER_H
+#define HTML_PARSER_H
+
+#include <qstring.h>
+
+#include <vector>
+
+
+#include "mstring.h"
+#include "node.h"
+
+#include <iostream>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+
+
+class HtmlParser
+{
+public:
+
+ HtmlParser();
+ HtmlParser(QString const& documento);
+ ~HtmlParser();
+
+ vector<Node*> const& nodes() const;
+ bool hasBaseUrl() const;
+ bool hasTitle() const;
+ bool hasContentType() const;
+ NodeBASE const& baseUrl() const;
+ NodeTITLE const& title() const;
+ NodeMETA const& contentTypeMetaNode() const;
+
+ static uint estimativaLinks(uint doc_size);
+ /**
+ * Convenience function for performance as it only parse in order
+ * to get the charset.
+ */
+ static QString findCharsetInMetaElement(QString const& html);
+
+ // test:
+ void mostra() const;
+
+private:
+
+ vector<QString> const& parseNodesOfType(QString const& element);
+ /**
+ * Vector nodes passed for performance.
+ */
+ static void parseNodesOfType(QString const& element, QString const& doc, vector<QString>& nodes);
+
+ void parseNodesOfTypeA();
+ void parseNodesOfTypeAREA();
+ void parseNodesOfTypeLINK();
+ void parseNodesOfTypeMETA();
+ void parseNodesOfTypeIMG();
+ void parseNodesOfTypeFRAME();
+ void parseNodesOfTypeIFRAME();
+ void parseNodesOfTypeBASE();
+ void parseNodesOfTypeTITLE();
+
+ void stripComments();
+ void stripScriptContent();
+
+ /**
+ Return the index of the next character of the end of tag.
+ e.g.
+ endOfTag("<img src=\"bad > luck\">") => 22 (not 15)
+ */
+ static int endOfTag(QString const& s, int index = 0, QChar end_of_tag = '>');
+
+private:
+
+ vector<QString> aux_; // for what the hell is this? looks ugly... maybe I was drunk, can't remember
+ vector<Node*> nodes_;
+ NodeBASE node_BASE_;
+ NodeTITLE node_TITLE_;
+ NodeMETA node_META_content_type_;
+ bool is_content_type_set_;
+
+ QString document_;
+ QString script_; // Fica aqui guardado (JavaScript, etc)
+ QString comments_;
+};
+
+
+inline HtmlParser::~HtmlParser()
+{
+ //kdDebug(23100) << "*";
+}
+
+inline uint HtmlParser::estimativaLinks(uint doc_size)
+{
+ return doc_size / 100; // valor estimado...
+}
+
+inline bool HtmlParser::hasContentType() const
+{
+ return is_content_type_set_;
+}
+
+#endif
diff --git a/klinkstatus/src/parser/http.cpp b/klinkstatus/src/parser/http.cpp
new file mode 100644
index 00000000..1133c937
--- /dev/null
+++ b/klinkstatus/src/parser/http.cpp
@@ -0,0 +1,87 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include "http.h"
+#include "mstring.h"
+
+#include <kdebug.h>
+
+#include <iostream>
+
+
+
+void HttpResponseHeader::parseLocation()
+{
+ QString cabecalho(toString());
+
+ int location = findWord(cabecalho, "Location: ");
+ Q_ASSERT(location != -1);
+
+ int fim_de_linha_1 = cabecalho.find('\n', location);
+ int fim_de_linha_2 = cabecalho.find('\r', location);
+
+ Q_ASSERT(fim_de_linha_1 != -1 || fim_de_linha_2 != -1);
+
+ int fim_de_linha;
+
+ if(fim_de_linha_1 == -1 && fim_de_linha_2 != -1)
+ fim_de_linha = fim_de_linha_2;
+
+ else if(fim_de_linha_2 == -1 && fim_de_linha_1 != -1)
+ fim_de_linha = fim_de_linha_1;
+
+ else if(fim_de_linha_1 < fim_de_linha_2)
+ fim_de_linha = fim_de_linha_1;
+
+ else fim_de_linha = fim_de_linha_2;
+
+ location_ = cabecalho.mid(location, fim_de_linha - location);
+}
+
+QString HttpResponseHeader::charset() const
+{
+ return HttpResponseHeader::charset(value("content-type"));
+}
+
+QString HttpResponseHeader::charset(QString const& contentTypeHttpHeaderLine)
+{
+ QString _charset;
+
+ if(contentTypeHttpHeaderLine.isEmpty())
+ return _charset;
+
+ int index = contentTypeHttpHeaderLine.find("charset=");
+ if(index != -1)
+ index += QString("charset=").length();
+ else {
+ index = contentTypeHttpHeaderLine.find("charset:");
+ if(index != -1)
+ index += QString("charset:").length();
+ }
+
+ if(index != -1) {
+ _charset = contentTypeHttpHeaderLine.mid(index);
+ _charset = _charset.stripWhiteSpace();
+ }
+
+// kdDebug(23100) << "Charset: |" << _charset << "|" << endl;
+ return _charset;
+
+}
diff --git a/klinkstatus/src/parser/http.h b/klinkstatus/src/parser/http.h
new file mode 100644
index 00000000..5878cfd1
--- /dev/null
+++ b/klinkstatus/src/parser/http.h
@@ -0,0 +1,79 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#ifndef HTTP_H
+#define HTTP_H
+
+#include <qhttp.h>
+#include <qstring.h>
+
+
+class HttpResponseHeader: public QHttpResponseHeader
+{
+public:
+
+ HttpResponseHeader();
+ HttpResponseHeader(const QHttpResponseHeader & header);
+ HttpResponseHeader(QString const& str);
+ virtual ~HttpResponseHeader();
+
+ void parseLocation();
+ QString const& location() const;
+ QString charset() const;
+
+ /**
+ * Parses the charset from this kind of server response:
+ * Content-Type: text/html; charset=EUC-JP
+ * Return an empty string in case it doesn't find nothing.
+ */
+ static QString charset(QString const& contentTypeHttpHeaderLine);
+
+private:
+
+ QString location_;
+};
+
+
+inline HttpResponseHeader::HttpResponseHeader()
+ : QHttpResponseHeader()
+{
+}
+
+inline HttpResponseHeader::HttpResponseHeader(const QHttpResponseHeader & /*header*/)
+ : QHttpResponseHeader()
+{
+}
+
+inline HttpResponseHeader::HttpResponseHeader(QString const& str)
+ : QHttpResponseHeader()
+{
+ parse(str);
+}
+
+inline HttpResponseHeader::~HttpResponseHeader()
+{
+}
+
+inline QString const& HttpResponseHeader::location() const
+{
+ return location_;
+}
+
+#endif
diff --git a/klinkstatus/src/parser/mstring.cpp b/klinkstatus/src/parser/mstring.cpp
new file mode 100644
index 00000000..114d6dc6
--- /dev/null
+++ b/klinkstatus/src/parser/mstring.cpp
@@ -0,0 +1,278 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include "mstring.h"
+
+#include <iostream>
+
+
+using namespace std;
+
+
+int findWord(QString const& s, QString const& palavra, uint a_partir_do_indice)
+{
+ int indice = s.find(palavra, a_partir_do_indice, false);
+
+ if(indice == -1)
+ return indice;
+ else
+ return (indice + palavra.length());
+}
+
+int findChar(QString const& s, QChar letra, uint a_partir_do_indice)
+{
+ int index = s.find(letra, a_partir_do_indice, false);
+ if(index == -1)
+ return index;
+ else
+ return index + 1;
+}
+
+/**
+ The string palavra, must not have any spaces.
+*/
+int findSeparableWord(QString const& s_, QString const& palavra, uint a_partir_do_indice)
+{
+ bool encontrou = true;
+ QString s(s_);
+ uint indice_palavra = 0;
+ int indice = a_partir_do_indice;
+
+ do
+ {
+ encontrou = true;
+ indice_palavra = 0;
+
+ indice = findChar(s, (palavra[indice_palavra++]), indice );
+
+ if(indice == -1)
+ {
+ return indice;
+ }
+ --indice;
+
+ while(encontrou && indice_palavra != palavra.length() && indice < (int)s.length())
+ {
+ indice = nextNonSpaceChar(s, indice);
+
+ if(indice == -1)
+ return indice;
+
+ // Nao se incrementa o indice porque isso j��feito com a fun�o nextNonSpaceChar
+ encontrou = encontrou && !(notEqual(s[indice], palavra[indice_palavra++]) );
+
+ }
+ }
+ while(!encontrou && indice < (int)s.length());
+
+ if(encontrou && indice < (int)s.length())
+ return ++indice;
+ else
+ return -1;
+}
+
+int nextNonSpaceChar(QString const& s, uint i)
+{
+ ++i;
+ // while( (s[i] == ' ' || s[i] == '\t' || s[i] == '\r' || s[i] == '\n')
+ while(isSpace(s[i])
+ && i < s.length() )
+ ++i;
+
+ if(i < s.length())
+ return i;
+ else
+ return -1;
+}
+
+
+/**
+ e.g.
+ nextSpaceChar("o biltre") => 1
+*/
+int nextSpaceChar(QString const& s, uint i)
+{
+ //while( (s[i] != ' ' && s[i] != '\r' && s[i] != '\n' && s[i] != '\t') &&
+ //i < s.size() )
+ while(!isSpace(s[i]) &&
+ i < s.length() )
+ ++i;
+
+ if(i < s.length())
+ return i;
+ else
+ return -1;
+}
+
+int nextCharDifferentThan(QChar c, QString const& s, uint i)
+{
+ while(i < s.length() && s[i] == c)
+ ++i;
+
+ if(i != s.length())
+ return i;
+ else
+ return -1;
+}
+
+vector<QString> tokenize(QString s)
+{
+ Q_ASSERT(!s.isEmpty());
+ vector<QString> v;
+
+ while(true)
+ {
+ int inicio = 0;
+ //if(s[0] == ' ' || s[0] == '\t' || s[0] == '\r' || s[0] == '\n')
+ if(isSpace(s[0]))
+ inicio = nextNonSpaceChar(s, 0);
+ if(inicio == -1)
+ return v;
+
+ int fim = nextSpaceChar(s, inicio);
+ if(fim == -1)
+ {
+ v.push_back(s.mid(inicio));
+ return v;
+ }
+ else
+ {
+ QString palavra = s.mid(inicio, fim - inicio);
+ v.push_back(palavra);
+ s.remove(0, fim);
+ }
+ }
+}
+
+vector<QString> tokenizeWordsSeparatedByDots(QString s)
+{
+ vector<QString> v;
+
+ while(true)
+ {
+ int inicio = 0;
+ if(s[0] == '.')
+ inicio = nextCharDifferentThan(QChar('.'), s, 0);
+ if(inicio == -1)
+ return v;
+
+ int fim = s.find('.', inicio);
+ if(fim == -1)
+ {
+ v.push_back(s.mid(inicio));
+ return v;
+ }
+ else
+ {
+ QString palavra = s.mid(inicio, fim - inicio);
+ v.push_back(palavra);
+ s.remove(0, fim);
+ }
+ }
+}
+
+vector<QString> tokenizeWordsSeparatedBy(QString s, QChar criteria)
+{
+ vector<QString> v;
+
+ while(true)
+ {
+ int inicio = 0;
+ if(s[0] == criteria)
+ inicio = nextCharDifferentThan(criteria, s, 0);
+ if(inicio == -1)
+ return v;
+
+ int fim = s.find(criteria, inicio);
+ if(fim == -1)
+ {
+ v.push_back(s.mid(inicio));
+ return v;
+ }
+ else
+ {
+ QString palavra = s.mid(inicio, fim - inicio);
+ v.push_back(palavra);
+ s.remove(0, fim);
+ }
+ }
+}
+
+
+
+#ifdef STRING
+//c++ -g -o teste_string mstring.cpp -DSTRING
+#include <fstream>
+
+int main(int argc, char* argv[])
+{
+ string s;
+ s = "S";
+ s = "Afazer";
+ s = "O MeU S sdadsadd ";
+ s = "www.trolltech.com/search/qt-interest/bla bla%20Bla";
+ s = "...http://w.ww..go.o.gle.p.t.......";
+
+ /*
+ ifstream stream("testeparser.html");
+ string content;
+ while(stream) {
+ char c;
+ stream.get(c);
+ content += c;
+ }
+ */
+ // kdDebug(23100) << simplifyWhiteSpace(content) << endl;
+ kdDebug(23100) << simplifyWhiteSpace(s) << endl;
+
+ /*
+ vector<string> v(tokenize(s));
+ for(int i = 0; i != v.size(); ++i)
+ kdDebug(23100) << v[i] << endl;
+ */
+
+ /*
+ int i = nextSpaceChar(s, 0);
+ i = nextNonSpaceChar(s, i);
+ kdDebug(23100) << s.substr(i) << endl;
+ */
+
+
+ vector<string> v(tokenizeWordsSeparatedByDots(s));
+ for(int i = 0; i != v.size(); ++i)
+ kdDebug(23100) << v[i] << endl;
+
+ removeLastCharIfExists(s, '/');
+ kdDebug(23100) << s << endl;
+
+ /*
+ kdDebug(23100) << findChar(s, 'T') << endl;
+ kdDebug(23100) << findWord(s, "trolltech") << endl;
+ kdDebug(23100) << findWord(s, "TROLLTECH") << endl;
+ kdDebug(23100) << findWord(s, "TROLLTECH", 2) << endl;
+ */
+ /*
+ stripWhiteSpace(s);
+ kdDebug(23100) << s << endl;
+ */
+}
+
+
+#endif
diff --git a/klinkstatus/src/parser/mstring.h b/klinkstatus/src/parser/mstring.h
new file mode 100644
index 00000000..cd359c7d
--- /dev/null
+++ b/klinkstatus/src/parser/mstring.h
@@ -0,0 +1,174 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#ifndef STRING_H
+#define STRING_H
+
+#include <qstring.h>
+
+#include <vector>
+#include <cctype>
+
+class QString;
+
+typedef unsigned int uint;
+
+
+/* Similar to std::string::find but return the next index of the last char
+ of the first word it finds.
+ Case insensitive.
+ e.g.
+ findWord("Biltre larvado", "biltre") => 6
+*/
+int findWord(QString const& s, QString const& palavra, uint a_partir_do_indice = 0);
+
+/**
+ Similar to std::string::find but return the next index of the first char
+ it finds.
+ Case insensitive.
+*/
+int findChar(QString const& s, QChar letra, uint a_partir_do_indice = 0);
+
+/**
+ Same as findWord but non space chars are eliminated.
+ e.g.
+ findWord("<a href=""></a>", "<a") => 2
+ findSeparableWord("<a href=""></a>", "<a") => 2
+
+ findWord("<\na href=""></a>", "<a") => -1
+ findSeparableWord("<\na href=""></a>", "<a") => 3
+*/
+int findSeparableWord(QString const& s, QString const& palavra, uint a_partir_do_indice = 0);
+
+/**
+ Space means Unicode characters with decimal values
+ 9 (TAB), 10 (LF), 11 (VT), 12 (FF), 13 (CR), and 32 (Space).
+*/
+bool isSpace(QChar c);
+
+/**
+ Return -1 if unsuccessful.
+*/
+int nextNonSpaceChar(QString const& s, uint i);
+int nextNonSpaceCharReverse(QString const& s, uint i);
+int nextSpaceChar(QString const& s, uint i);
+
+int nextCharDifferentThan(QChar c, QString const& s, uint i);
+
+/** Return a vector with the words */
+std::vector<QString> tokenize(QString s);
+std::vector<QString> tokenizeWordsSeparatedByDots(QString s);
+std::vector<QString> tokenizeWordsSeparatedBy(QString s, QChar criteria);
+
+/**
+ Returns a string that has whitespace removed from the start and the end,
+ and which has each sequence of internal whitespace replaced with a single space.
+*/
+QString simplifyWhiteSpace(QString const& s);
+
+/**
+ If char 'caractere' is the last in the string 's' it is removed
+*/
+void removeLastCharIfExists(QString& s, QChar caractere);
+
+QString upperCase(QString const& s);
+QString lowerCase(QString const& s);
+
+/**
+ Remove whitespaces from the end of the string
+*/
+void stripWhiteSpaceFromTheEnd(QString& s);
+
+/**
+ Returns a string that has whitespace removed from the start and the end.
+*/
+void stripWhiteSpace(QString& s);
+
+/**
+ Case insensitive comparisons
+*/
+bool equal(QString const& s1, QString const& s2);
+bool notEqual(QString const& s1, QString const& s2);
+
+bool equal(QChar c1, QChar c2);
+bool notEqual(QChar c1, QChar c2);
+
+
+//_________________________________________________________________________
+
+inline bool isSpace(QChar c)
+{
+ return c.isSpace();
+}
+
+inline bool equal(QString const& s1, QString const& s2)
+{
+ if(s1 == s2)
+ return true;
+ else
+ return s1.lower() == s2.lower();
+}
+
+inline bool notEqual(QString const& s1, QString const& s2)
+{
+ return !(equal(s1, s2));
+}
+
+inline bool equal(QChar c1, QChar c2)
+{
+ return c1.lower() == c2.lower();
+}
+
+inline bool notEqual(QChar c1, QChar c2)
+{
+ return !(equal(c1, c2));
+}
+
+inline QString upperCase(QString const& s)
+{
+ return s.upper();
+}
+
+inline QString lowerCase(QString const& s)
+{
+ return s.lower();
+}
+
+inline QString simplifyWhiteSpace(QString const& s)
+{
+ return s.simplifyWhiteSpace();
+}
+
+inline void removeLastCharIfExists(QString& s, QChar caractere)
+{
+ int index = s.length() - 1;
+ if(s[index] == caractere)
+ s.remove(index);
+}
+
+inline void stripWhiteSpace(QString& s)
+{
+ s = s.stripWhiteSpace();
+}
+
+
+
+
+#endif
diff --git a/klinkstatus/src/parser/node.cpp b/klinkstatus/src/parser/node.cpp
new file mode 100644
index 00000000..068184ae
--- /dev/null
+++ b/klinkstatus/src/parser/node.cpp
@@ -0,0 +1,255 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include "node.h"
+#include "mstring.h"
+#include "url.h"
+#include "../utils/utils.h"
+
+
+/*
+ Node________________________________________________________________________
+*/
+
+QString Node::getAttribute(QString const& atributo)
+{
+ QString attribute_;
+ int fim = - 1;
+ bool tem_aspas_ou_plicas = false;
+
+ int inicio = findWord(content_, atributo);
+ if(inicio != -1)
+ {
+ if(content_[inicio] == '"')
+ {
+ fim = content_.find("\"", inicio + 1);
+ tem_aspas_ou_plicas = true;
+ }
+ else if(content_[inicio] == '\'')
+ {
+ fim = content_.find("'", inicio + 1);
+ tem_aspas_ou_plicas = true;
+ }
+ else
+ {
+ int fim_bloco = nextSpaceChar(content_, inicio + 1);
+ int fim_tag = content_.find(">", inicio + 1);
+ int fim_aspas = content_.find("\"", inicio + 1);
+
+ if(fim_bloco == -1 && fim_tag == -1 && fim_aspas == -1)
+ {
+ attribute_ = content_;
+ malformed_ = true;
+ return attribute_;
+ }
+
+ if(smallerUnsigned(fim_bloco, fim_tag) == -1 &&
+ smallerUnsigned(fim_bloco, fim_aspas) == -1)
+ fim = fim_bloco;
+
+ else if(smallerUnsigned(fim_tag, fim_aspas) == -1)
+ fim = fim_tag;
+
+ else
+ fim = fim_aspas;
+ }
+
+ if(fim == -1)
+ {
+ attribute_ = content_;
+ malformed_ = true;
+ return attribute_;
+ }
+
+ attribute_ = content_.mid(inicio, fim-inicio);
+
+ if(tem_aspas_ou_plicas)
+ {
+ attribute_ = attribute_.mid(1, attribute_.length() - 1);
+ }
+ else
+ {
+ ::stripWhiteSpace(attribute_);
+ }
+ }
+
+ else
+ {
+ attribute_ = "";
+ }
+ ::decode(attribute_);
+
+ return attribute_;
+}
+
+
+/*
+ NodeLink________________________________________________________________________
+*/
+
+void NodeLink::parseAttributeHREF()
+{
+ if(findWord(content(), "HREF") == -1 &&
+ findWord(content(), "NAME") == -1 &&
+ findWord(content(), "TARGET") == -1)
+ {
+ kdDebug(23100) << "MALFORMED: " << endl
+ << "NodeLink::parseAttributeHREF: " << content() << endl;
+ setMalformed(true);
+ return;
+ }
+
+ else if(findWord(content(), "HREF") != -1)
+ {
+ attribute_href_ = getAttribute("HREF=");
+
+ if( !(malformed() || attribute_href_.isEmpty()) )
+ {
+ // Definnishr o tipo de link
+ linktype_ = Url::resolveLinkType(attribute_href_);
+
+ parseLinkLabel();
+ }
+ }
+}
+
+void NodeLink::parseLinkLabel()
+{
+ int fim_tag = 0;
+ char proximo_caractere = ' ';
+
+ do
+ {
+ fim_tag = content_.find(">", fim_tag);
+
+ if(fim_tag != -1)
+ proximo_caractere = QChar(content_[++fim_tag]);
+
+ }
+ while(fim_tag != -1 && proximo_caractere == '<'/*If the label starts by <*/);
+
+ if(fim_tag != -1)
+ {
+ int fim_label = content_.find("<", fim_tag);
+
+ if(fim_label != -1)
+ {
+ link_label_ =
+ ::simplifyWhiteSpace(content_.mid(fim_tag,
+ fim_label - fim_tag));
+ }
+ }
+}
+
+
+/*
+ NodeMETA________________________________________________________________________
+*/
+
+void NodeMETA::parseAttributeURL()
+{
+ if(attribute_http_equiv_.isEmpty())
+ parseAttributeHTTP_EQUIV();
+
+ if(upperCase(attribute_http_equiv_) == "REFRESH")
+ {
+ is_redirection_ = true;
+
+ if(findWord(content(), "URL") == -1)
+ {
+ //setMalformed(true);
+ return;
+ }
+
+ attribute_url_ = getAttribute("URL=");
+
+ int aspas = -1;
+ do
+ {
+ aspas = attribute_url_.find("\"");
+ if(aspas != -1)
+ attribute_url_.remove(aspas, 1);
+ }
+ while(aspas != -1);
+
+ if(attribute_url_.isEmpty())
+ kdDebug(23100) << "void NodeMeta::parseAttributeURL(): Assertion `!attribute_url_.isEmpty()' failed.\n"
+ << content_ << endl << attribute_http_equiv_ << endl << attribute_url_ << endl;
+ Q_ASSERT(!attribute_url_.isEmpty());
+
+ linktype_ = Url::resolveLinkType(attribute_url_);
+ }
+}
+
+QString NodeMETA::charset() const
+{
+ QString charset;
+ QString content(atributoCONTENT());
+
+ if(content.isEmpty())
+ return charset;
+
+ int index = content.find("charset=");
+ if(index != -1)
+ {
+ index += QString("charset=").length();
+ charset = content.mid(index, content.length() - index);
+ charset = charset.stripWhiteSpace();
+ }
+
+// kdDebug(23100) << "Charset: |" << charset << "|" << endl;
+ return charset;
+}
+
+/*
+ NodeIMG________________________________________________________________________
+*/
+
+void NodeIMG::parseAttributeSRC()
+{
+ if(findWord(content(), "SRC") == -1)
+ {
+ kdDebug(23100) << "MALFORMED_____________________________________________________________" << endl;
+ kdDebug(23100) << "Conteudo: " << content() << endl;
+ setMalformed(true);
+ return;
+ }
+
+ attribute_src_ = getAttribute("SRC=");
+ linktype_ = Url::resolveLinkType(attribute_src_);
+}
+
+
+/*
+ NodeFRAME________________________________________________________________________
+*/
+
+void NodeFRAME::parseAttributeSRC()
+{
+ if(findWord(content(), "SRC") == -1)
+ {
+ //setMalformed(true);
+ return;
+ }
+
+ attribute_src_ = getAttribute("SRC=");
+ linktype_ = Url::resolveLinkType(attribute_src_);
+}
+
diff --git a/klinkstatus/src/parser/node.h b/klinkstatus/src/parser/node.h
new file mode 100644
index 00000000..1d0b1fc3
--- /dev/null
+++ b/klinkstatus/src/parser/node.h
@@ -0,0 +1,279 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#ifndef NODULO_H
+#define NODULO_H
+
+#include "mstring.h"
+
+#include <qstring.h>
+
+#include <kdebug.h>
+#include <kcharsets.h>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+
+class Node
+{
+public:
+
+ enum Element {
+ A,
+ AREA,
+ LINK,
+ META,
+ IMG,
+ FRAME,
+ BASE,
+ TITLE
+ };
+ enum LinkType {
+ href,
+ file_href,
+ mailto,
+ relative
+ };
+
+ Node();
+ Node(QString const& content);
+ virtual ~Node();
+
+ QString getAttribute(QString const& atributo);
+ virtual QString const& url() const = 0;
+ virtual QString const& linkLabel() const = 0; // URL label
+ virtual void setNode(QString const& content);
+ virtual void parse() = 0;
+ void setMalformed(bool flag = true);
+ virtual void setLinkType(LinkType const& lt);
+
+ QString const& content() const;
+ bool malformed() const;
+ LinkType linkType() const;
+ Element element() const;
+ virtual bool isLink() const = 0;
+
+ bool isRedirection() const;
+
+protected:
+
+ Element element_;
+ LinkType linktype_;
+ QString link_label_;
+ QString content_;
+ bool is_redirection_;
+ bool malformed_;
+};
+
+
+class NodeLink: public Node
+{
+public:
+ NodeLink();
+ NodeLink(QString const& content);
+ ~NodeLink()
+ {}
+ ;
+
+ virtual void parse();
+
+ virtual QString const& url() const;
+ virtual QString const& linkLabel() const; // URL label
+ virtual QString mailto() const;
+ virtual bool isLink() const;
+
+private:
+ virtual void parseAttributeHREF();
+ void parseLinkLabel();
+
+private:
+ QString attribute_href_;
+};
+
+class NodeA: public NodeLink
+{
+public:
+ NodeA(QString const& content);
+ ~NodeA()
+ {}
+ ;
+ QString const& attributeNAME() const;
+
+ virtual void parse();
+
+private:
+ void parseAttributeNAME();
+
+private:
+ QString attribute_name_;
+};
+
+class NodeAREA: public NodeLink
+{
+public:
+ NodeAREA(QString const& content);
+ ~NodeAREA() {};
+
+ QString const& attributeTITLE() const;
+
+ virtual void parse();
+
+private:
+ void parseAttributeTITLE();
+
+private:
+ QString attribute_title_;
+};
+
+
+class NodeLINK: public NodeLink
+{
+public:
+ NodeLINK(QString const& content);
+ ~NodeLINK()
+ {}
+ ;
+};
+
+class NodeMETA: public Node
+{
+public:
+ NodeMETA();
+ NodeMETA(QString const& content);
+ ~NodeMETA()
+ {}
+ ;
+
+ virtual QString const& url() const;
+ virtual const QString& linkLabel() const;
+ virtual bool isLink() const;
+ QString const& atributoHTTP_EQUIV() const;
+ QString const& atributoNAME() const;
+ QString const& atributoCONTENT() const;
+ QString charset() const;
+ bool isRedirection() const;
+
+ virtual void parse();
+
+private:
+ /**
+ Procura se existem os atributos HTTP-EQUIV=Refresh e URL=...
+ Se existir considera o content do atributo URL como um link.
+ ex: <META HTTP-EQUIV=Refresh CONTENT="10; URL=http://www.htmlhelp.com/">
+ */
+ void parseAttributeURL();
+
+ void parseAttributeHTTP_EQUIV();
+ void parseAttributeNAME();
+ void parseAttributeCONTENT();
+
+private:
+ QString attribute_http_equiv_;
+ QString attribute_url_;
+ QString attribute_name_;
+ QString attribute_content_;
+};
+
+class NodeIMG: public Node
+{
+public:
+ NodeIMG(QString const& content);
+ ~NodeIMG()
+ {}
+ ;
+
+ virtual void parse();
+
+ virtual QString const& url() const;
+ virtual QString const& linkLabel() const; // Image label
+ virtual bool isLink() const;
+
+private:
+ void parseAttributeSRC();
+ void parseAttributeTITLE();
+ void parseAttributeALT();
+
+private:
+ QString attribute_src_;
+ QString attribute_title_;
+ QString attribute_alt_;
+};
+
+class NodeFRAME: public Node
+{
+public:
+ NodeFRAME(QString const& content);
+ ~NodeFRAME()
+ {}
+ ;
+
+ virtual void parse();
+ virtual QString const& url() const;
+ virtual QString const& linkLabel() const;
+ virtual bool isLink() const;
+
+private:
+ void parseAttributeSRC();
+
+private:
+ QString attribute_src_;
+};
+
+class NodeBASE: public NodeLink
+{
+public:
+ NodeBASE();
+ NodeBASE(QString const& content);
+ ~NodeBASE()
+ {}
+ ;
+
+ virtual bool isLink() const;
+};
+
+class NodeTITLE: public Node
+{
+public:
+ NodeTITLE();
+ NodeTITLE(QString const& content);
+ ~NodeTITLE()
+ {}
+ ;
+
+ virtual QString const& url() const;
+ virtual QString const& linkLabel() const;
+ virtual void parse();
+ virtual bool isLink() const;
+
+ QString const& attributeTITLE() const;
+
+private:
+ void parseAttributeTITLE();
+
+private:
+ QString attribute_title_;
+};
+
+
+#include "node_impl.h"
+
+#endif
diff --git a/klinkstatus/src/parser/node_impl.h b/klinkstatus/src/parser/node_impl.h
new file mode 100644
index 00000000..51249075
--- /dev/null
+++ b/klinkstatus/src/parser/node_impl.h
@@ -0,0 +1,412 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+inline Node::Node()
+ : is_redirection_(false), malformed_(false)
+{}
+
+inline Node::~Node()
+{
+ //kdDebug(23100) << "/";
+}
+
+inline Node::Node(QString const& content)
+ : content_(content), is_redirection_(false), malformed_(false)
+{}
+
+inline void Node::setNode(QString const& content)
+{
+ content_ = content;
+ parse();
+}
+
+inline QString const& Node::content() const
+{
+ return content_;
+}
+
+inline bool Node::malformed() const
+{
+ return malformed_;
+}
+
+inline void Node::setMalformed(bool flag)
+{
+ malformed_ = flag;
+}
+
+inline Node::LinkType Node::linkType() const
+{
+ return linktype_;
+}
+
+inline Node::Element Node::element() const
+{
+ return element_;
+}
+
+inline void Node::setLinkType(Node::LinkType const& lt)
+{
+ linktype_ = lt;
+}
+
+inline bool Node::isRedirection() const
+{
+ return is_redirection_;
+}
+
+// class NodeLink_______________________________________________________
+
+inline NodeLink::NodeLink()
+ : Node()
+{}
+
+inline NodeLink::NodeLink(QString const& content)
+ : Node(content)
+{
+ parse();
+}
+
+inline void NodeLink::parse()
+{
+ parseAttributeHREF();
+}
+
+inline QString const& NodeLink::url() const
+{
+ return attribute_href_;
+}
+
+inline QString const& NodeLink::linkLabel() const
+{
+ return link_label_;
+}
+
+inline QString NodeLink::mailto() const
+{
+ Q_ASSERT(linktype_ == Node::mailto);
+
+ QString href = KCharsets::resolveEntities(attribute_href_);
+
+ int inicio = findWord(href, "MAILTO:");
+ Q_ASSERT(inicio != -1);
+
+ return href.mid(inicio);
+}
+
+inline bool NodeLink::isLink() const
+{
+ if(Node::linkType() != Node::mailto && !url().isEmpty())
+ return true;
+ else
+ return false;
+}
+
+// class NodeA_______________________________________________________
+
+inline NodeA::NodeA(QString const& content)
+ : NodeLink(content)
+{
+ element_ = A;
+ parse();
+}
+
+inline QString const& NodeA::attributeNAME() const
+{
+ return attribute_name_;
+}
+
+inline void NodeA::parse()
+{
+ parseAttributeNAME();
+}
+
+inline void NodeA::parseAttributeNAME()
+{
+ attribute_name_ = getAttribute("NAME=");
+ //kdDebug(23100) << "NodeA::parseAttributeNAME: " << attribute_name_ << endl;
+}
+
+// class NodeAREA_______________________________________________________
+
+inline NodeAREA::NodeAREA(QString const& content)
+ : NodeLink(content)
+{
+ element_ = AREA;
+ parse();
+}
+
+inline QString const& NodeAREA::attributeTITLE() const
+{
+ return attribute_title_;
+}
+
+inline void NodeAREA::parse()
+{
+ parseAttributeTITLE();
+}
+
+inline void NodeAREA::parseAttributeTITLE()
+{
+ attribute_title_ = getAttribute("TITLE=");
+//kdDebug(23100) << "NodeAREA::parseAttributeTITLE: " << attribute_title_ << endl;
+}
+
+// class NodeLINK________________________________________
+
+inline NodeLINK::NodeLINK(QString const& content)
+ : NodeLink(content)
+{
+ element_ = LINK;
+}
+
+// class NodeMeta________________________________________
+
+inline NodeMETA::NodeMETA()
+ : Node()
+{
+ element_ = META;
+}
+
+inline NodeMETA::NodeMETA(QString const& content)
+ : Node(content)
+{
+ element_ = META;
+ parse();
+}
+
+inline QString const& NodeMETA::url() const
+{
+ return attribute_url_;
+}
+
+inline const QString& NodeMETA::linkLabel() const
+{
+ return link_label_;
+}
+
+inline bool NodeMETA::isLink() const
+{
+ if(upperCase(attribute_http_equiv_) == "REFRESH" &&
+ findWord(content(), "URL") != -1)
+ {
+ // Q_ASSERT(findWord(content(), "URL") != -1); // not necessarily
+ return true;
+ }
+ else
+ return false;
+}
+
+inline QString const& NodeMETA::atributoHTTP_EQUIV() const
+{
+ return attribute_http_equiv_;
+}
+
+inline QString const& NodeMETA::atributoNAME() const
+{
+ return attribute_name_;
+}
+
+inline QString const& NodeMETA::atributoCONTENT() const
+{
+ return attribute_content_;
+}
+
+inline bool NodeMETA::isRedirection() const
+{
+ return
+ upperCase(attribute_http_equiv_) == "REFRESH";
+}
+
+inline void NodeMETA::parse()
+{
+ parseAttributeHTTP_EQUIV();
+ parseAttributeNAME();
+ parseAttributeCONTENT();
+
+ parseAttributeURL();
+}
+
+inline void NodeMETA::parseAttributeHTTP_EQUIV()
+{
+ attribute_http_equiv_ = getAttribute("HTTP-EQUIV=");
+}
+
+inline void NodeMETA::parseAttributeNAME()
+{
+ attribute_name_ = getAttribute("NAME=");
+}
+
+inline void NodeMETA::parseAttributeCONTENT()
+{
+ attribute_content_ = getAttribute("CONTENT=");
+// kdDebug(23100) << "CONTENT: " << attribute_content_ << endl;
+}
+
+
+// class NodeIMG________________________________________
+
+inline NodeIMG::NodeIMG(QString const& content)
+ : Node(content)
+{
+ element_ = IMG;
+ parse();
+}
+
+inline void NodeIMG::parse()
+{
+ parseAttributeSRC();
+ parseAttributeTITLE();
+ parseAttributeALT();
+}
+
+inline QString const& NodeIMG::url() const
+{
+ return attribute_src_;
+}
+
+inline QString const& NodeIMG::linkLabel() const
+{
+ if(!attribute_title_.isEmpty())
+ return attribute_title_;
+ else
+ return attribute_alt_;
+}
+
+inline bool NodeIMG::isLink() const
+{
+ if(!url().isEmpty())
+ return true;
+ else
+ return false;
+}
+
+inline void NodeIMG::parseAttributeTITLE()
+{
+ attribute_title_ = getAttribute("TITLE=");
+}
+
+inline void NodeIMG::parseAttributeALT()
+{
+ attribute_alt_ = getAttribute("ALT=");
+}
+
+
+// class NodeFRAME________________________________________
+
+inline NodeFRAME::NodeFRAME(QString const& content)
+ : Node(content)
+{
+ element_ = FRAME;
+ parse();
+}
+
+inline void NodeFRAME::parse()
+{
+ parseAttributeSRC();
+}
+
+inline QString const& NodeFRAME::url() const
+{
+ return attribute_src_;
+}
+
+inline QString const& NodeFRAME::linkLabel() const
+{
+ return link_label_;
+}
+
+inline bool NodeFRAME::isLink() const
+{
+ if(!url().isEmpty())
+ return true;
+ else
+ return false;
+}
+
+// class NodeBASE________________________________________
+
+inline NodeBASE::NodeBASE()
+ : NodeLink()
+{
+ element_ = BASE;
+}
+
+inline NodeBASE::NodeBASE(QString const& content)
+ : NodeLink(content)
+{
+ element_ = BASE;
+}
+
+inline bool NodeBASE::isLink() const
+{
+ return false;
+}
+
+// class NodeTITLE________________________________________
+
+inline NodeTITLE::NodeTITLE()
+ : Node()
+{
+ element_ = TITLE;
+ parse();
+}
+
+inline NodeTITLE::NodeTITLE(QString const& content)
+ : Node(content)
+{
+ element_ = TITLE;
+ parse();
+}
+
+inline QString const& NodeTITLE::url() const
+{
+ return QString::null;
+}
+
+inline QString const& NodeTITLE::linkLabel() const
+{
+ return QString::null;
+}
+
+inline void NodeTITLE::parse()
+{
+ parseAttributeTITLE();
+}
+
+inline bool NodeTITLE::isLink() const
+{
+ return false;
+}
+
+inline QString const& NodeTITLE::attributeTITLE() const
+{
+ return attribute_title_;
+}
+
+inline void NodeTITLE::parseAttributeTITLE()
+{
+ attribute_title_ = content_;
+ attribute_title_.replace("<TITLE>", "", false);
+ attribute_title_.replace("</TITLE>", "", false);
+ attribute_title_.stripWhiteSpace();
+
+ //kdDebug(23100) << "TITLE: " << attribute_title_ << endl;
+}
diff --git a/klinkstatus/src/parser/url.cpp b/klinkstatus/src/parser/url.cpp
new file mode 100644
index 00000000..f7f1f6f8
--- /dev/null
+++ b/klinkstatus/src/parser/url.cpp
@@ -0,0 +1,350 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#include <kresolver.h>
+
+#include "url.h"
+#include "mstring.h"
+#include "../utils/utils.h"
+
+#include <kcharsets.h>
+
+
+Node::LinkType Url::resolveLinkType(QString const& url)
+{
+ QString aux(url);
+ aux = KURL::decode_string(aux);
+
+ if(aux.isNull())
+ return Node::relative;
+
+ if(findWord(url, "FILE:") != -1)
+ return Node::file_href;
+ else if(findWord(KCharsets::resolveEntities(url), "MAILTO:") != -1)
+ return Node::mailto;
+ else if( (int)url.find(":/") != -1)
+ return Node::href;
+ else
+ return Node::relative;
+}
+
+KURL Url::normalizeUrl(QString const& string_url, LinkStatus const& link_parent, QString const& document_root)
+{
+ QString _string_url = string_url.stripWhiteSpace();
+
+ QString s_url;
+ KURL base_url;
+
+ // resolve base url
+ if(link_parent.hasBaseURI())
+ base_url = link_parent.baseURI();
+ else
+ base_url = link_parent.absoluteUrl();
+
+ // resolve relative url
+ if(_string_url.isEmpty())
+ return base_url;
+ else if(Url::hasProtocol(_string_url))
+ return KURL(_string_url);
+ else
+ {
+ s_url.prepend(base_url.protocol() + "://" + base_url.host());
+
+ if(_string_url[0] == '/') {
+ if(!base_url.protocol().startsWith("http")) {
+ s_url.append(document_root);
+ }
+ }
+ else {
+ s_url.append(base_url.directory(true, false) + "/");
+ }
+
+ if( (_string_url[0] == ';' || // parameters
+ _string_url[0] == '?' || // query
+ _string_url[0] == '#') ) // fragment or reference
+ {
+ s_url.append(base_url.fileName(false));
+ }
+
+ s_url.append(_string_url);
+ KURL url(s_url);
+ if(base_url.hasUser())
+ url.setUser(base_url.user());
+ if(base_url.hasPass())
+ url.setPass(base_url.pass());
+
+ url.setPort(base_url.port());
+
+ url.cleanPath();
+
+// kdDebug(23100) << "Normalized URL: "
+// << KCharsets::resolveEntities(KURL::decode_string(url.url())) << endl;
+
+ return KURL(KCharsets::resolveEntities(KURL::decode_string(url.url())));
+ }
+}
+
+KURL Url::normalizeUrl(QString const& string_url)
+{
+ QString qs_url(KCharsets::resolveEntities(string_url.stripWhiteSpace()));
+
+ if(qs_url[0] == '/')
+ {
+ KURL url;
+ url.setPath(qs_url);
+ url.cleanPath();
+ return url;
+ }
+
+ else
+ {
+ if(!Url::hasProtocol(qs_url))
+ qs_url.prepend("http://");
+
+ KURL url(qs_url);
+ url.cleanPath();
+ return url;
+ }
+}
+
+bool Url::existUrl(KURL const& url, vector<LinkStatus*> const& v)
+{
+ if(url.prettyURL().isEmpty())
+ return true;
+
+ for(uint i = 0; i != v.size(); ++i)
+ if(v[i]->absoluteUrl() == url)
+ return true;
+
+ return false;
+}
+
+/**
+ www.iscte.pt, iscte.pt => true;
+ iscte.pt, www.iscte.pt => true;
+ www.iscte.pt, alunos.iscte.pt => true; (if restrict = false)
+ www.iscte.pt, alunos.iscte.pt => false; (if restrict = true)
+ alunos.iscte.pt, www.iscte.pt => false;
+ alunos.iscte.pt, iscte.pt => false.
+*/
+// FIXME - Rename this function to sameDomain
+bool Url::equalHost(QString const& host1, QString const& host2, bool restrict)
+{
+ //Q_ASSERT(!host1.isEmpty());
+ //Q_ASSERT(!host2.isEmpty()); // this fails if href="javascript:......."
+ //if(host2.isEmpty())
+ //return false;
+
+ if(host1 == host2)
+ return true;
+
+ QString host1_(KNetwork::KResolver::normalizeDomain(host1));
+ QString host2_(KNetwork::KResolver::normalizeDomain(host2));
+ removeLastCharIfExists(host1_, '/');
+ removeLastCharIfExists(host2_, '/');
+
+ vector<QString> v1 = tokenizeWordsSeparatedByDots(host1_);
+ vector<QString> v2 = tokenizeWordsSeparatedByDots(host2_);
+ uint const size1 = v1.size();
+ uint const size2 = v2.size();
+
+ if( !(size1 >= 1 && size2 >= 1) && // localhost would have size = 1
+ !(host1_[0].isNumber() || host2_[0].isNumber()) ) // not (host == IP)
+ {
+ kdDebug(23100) << "Invalid host: " << host2 << endl;
+ return false;
+ }
+
+ vector<QString>::size_type aux = 0;
+ vector<QString>::size_type aux2 = 0;
+ if(v1[0] == "www")
+ aux = 1;
+ if(v2[0] == "www")
+ aux2 = 1;
+
+ if((size2 - aux2 < size1 - aux) && restrict) // e.g. paradigma.co.pt < linkstatus.paradigma.co.pt
+ return false;
+
+ if(restrict && (size2 - aux2 > size1 - aux)) // e.g. linkstatus.paradigma.co.pt > paradigma.co.pt
+ return false;
+
+ int i = 1;
+ while( ((int)(size1 - i) >= (int)aux) && ((int)(size2 - i) >= (int)aux) )
+ {
+ if( !(v1[size1 - i] == v2[size2 - i]) )
+ return false;
+
+ ++i;
+ }
+
+ return true;
+}
+
+/* This should be done by parsing but I wan't to know when some new scheme comes along :) */
+bool Url::hasProtocol(QString const& url)
+{
+ QString s_url(url);
+ s_url.stripWhiteSpace();
+
+ if(s_url[0] == '/')
+ return false;
+
+ else
+ {
+ KURL url = KURL::fromPathOrURL(s_url);
+ if(!url.protocol().isEmpty())
+ return true;
+ /*
+ if(s_url.startsWith("http:") ||
+ s_url.startsWith("https:") ||
+ s_url.startsWith("ftp:") ||
+ s_url.startsWith("sftp:") ||
+ s_url.startsWith("webdav:") ||
+ s_url.startsWith("webdavs:") ||
+ s_url.startsWith("finger:") ||
+ s_url.startsWith("fish:") ||
+ s_url.startsWith("imap:") ||
+ s_url.startsWith("imaps:") ||
+ s_url.startsWith("lan:") ||
+ s_url.startsWith("ldap:") ||
+ s_url.startsWith("pop3:") ||
+ s_url.startsWith("pop3s:") ||
+ s_url.startsWith("smtp:") ||
+ s_url.startsWith("smtps:") ||
+ s_url.startsWith("file:") ||
+ s_url.startsWith("news:") ||
+ s_url.startsWith("gopher:") ||
+ s_url.startsWith("mailto:") ||
+ s_url.startsWith("telnet:") ||
+ s_url.startsWith("prospero:") ||
+ s_url.startsWith("wais:") ||
+ s_url.startsWith("nntp:") )
+ {
+ return true;
+ }
+ */
+ else
+ return false;
+ }
+}
+
+/**
+ http://linkstatus.paradigma.co.pt/en/index.html&bix=bix -> /en/index.html&bix=bix
+*/
+QString Url::convertToLocal(LinkStatus const* ls)
+{
+ KURL url = ls->absoluteUrl();
+ KURL base_url = ls->rootUrl();
+
+ if(base_url == url)
+ return "./" + url.fileName();
+ else
+ return KURL::relativeURL(base_url, url);
+}
+
+/**
+ If url2 has the same domain has url1 returns true.
+ If restrict, sourceforge.net != quanta.sourceforge.net.
+ Else is equal.
+*/
+bool Url::localDomain(KURL const& url1, KURL const& url2, bool restrict)
+{
+ if(url1.protocol() != url2.protocol())
+ {
+ //kdDebug(23100) << "NOT localDomain" << endl;
+ return false;
+ }
+ else if(!url1.hasHost())
+ {
+ //kdDebug(23100) << "localDomain" << endl;
+ return true;
+ }
+ else
+ {
+ //return ::equalHost(url1.host(), url2.host(), restrict);
+ if(Url::equalHost(url1.host(), url2.host(), restrict))
+ {
+ //kdDebug(23100) << "localDomain" << endl;
+ return true;
+ }
+ else
+ {
+ //kdDebug(23100) << "NOT localDomain" << endl;
+ return false;
+ }
+
+ }
+}
+
+/**
+ Returns true if url2 is a parent of url1.
+*/
+bool Url::parentDir(KURL const& url1, KURL const& url2)
+{
+ if(url1.protocol() != url2.protocol())
+ return false;
+
+ else if(!url1.hasHost())
+ return url2.isParentOf(url1);
+
+ else
+ {
+ if(!equalHost(url1.host(), url2.host()))
+ return false;
+
+ vector<QString> tokens_1 = tokenizeWordsSeparatedBy(url1.directory(true, false), QChar('/'));
+ vector<QString> tokens_2 = tokenizeWordsSeparatedBy(url2.directory(true, false), QChar('/'));
+
+ if(tokens_1.size() == 0)
+ return false;
+
+ //if(tokens_2.size() > tokens_1.size() or tokens_2.size() == 0)
+ //return true;
+ vector<QString>::size_type size = 0;
+ if(tokens_1.size() < tokens_2.size())
+ size = tokens_1.size();
+ else
+ size = tokens_2.size();
+
+ for(vector<QString>::size_type i = 0; i != size; ++i)
+ {
+ if(tokens_2[i] != tokens_1[i])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool Url::externalLink(KURL const& url1, KURL const& url2, bool restrict)
+{
+ if(url1.protocol() != url2.protocol())
+ {
+ kdDebug(23100) << "externalLink" << endl;
+ return true;
+ }
+ else if(!url1.hasHost() && !url2.hasHost())
+ {
+ kdDebug(23100) << "NOT externalLink" << endl;
+ return false;
+ }
+ else
+ return !Url::equalHost(url1.host(), url2.host(), restrict);
+}
diff --git a/klinkstatus/src/parser/url.h b/klinkstatus/src/parser/url.h
new file mode 100644
index 00000000..6f22743d
--- /dev/null
+++ b/klinkstatus/src/parser/url.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+ * Copyright (C) 2004 by Paulo Moura Guedes *
+ * moura@kdewebdev.org *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
+ ***************************************************************************/
+
+#ifndef URL_H
+#define URL_H
+
+#include "../engine/linkstatus.h"
+#include "node.h"
+
+#include <kurl.h>
+#include <qstring.h>
+
+#include <vector>
+
+using namespace std;
+
+
+class LinkStatus;
+
+namespace Url
+{
+Node::LinkType resolveLinkType(QString const& url);
+KURL normalizeUrl(QString const& string_url, LinkStatus const& link_parent, QString const& document_root);
+KURL normalizeUrl(QString const& string_url);
+bool validUrl(KURL const& url);
+bool existUrl(KURL const& url, vector<LinkStatus*> const& v);
+bool equalHost(QString const& host1, QString const& host2, bool restrict = false);
+bool hasProtocol(QString const& url);
+QString convertToLocal(LinkStatus const* ls);
+bool localDomain(KURL const& url1, KURL const& url2, bool restrict = true);
+bool parentDir(KURL const& url1, KURL const& url2);
+bool externalLink(KURL const& url1, KURL const& url2, bool restrict = true);
+}
+
+inline bool validUrl(KURL const& url)
+{
+ return (url.isValid() /*&& url.hasHost()*/);
+}
+
+#endif