/*************************************************************************** * Copyright (C) 2004 by Paulo Moura Guedes * * moura@tdewebdev.org * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * ***************************************************************************/ #ifndef GESTOR_PESTQUISA_H #define GESTOR_PESTQUISA_H #include #include #include #include #include #include class TQDomElement; #include #include "linkstatus.h" #include "linkchecker.h" #include "../parser/node.h" #include "../parser/url.h" using namespace std; typedef TQMap KHTMLPartMap; class SearchManager: public TQObject { Q_OBJECT TQ_OBJECT public: enum SearchMode { depth, domain, depth_and_domain }; SearchManager(int max_simultaneous_connections = 3, int time_out = 50, TQObject *parent = 0, const char *name = 0); ~SearchManager(); TQString toXML() const; void save(TQDomElement& element) const; KHTMLPartMap const& htmlParts() const { return html_parts_; } KHTMLPart* htmlPart(TQString const& key_url) const; void addHtmlPart(TQString const& key_url, KHTMLPart* html_part); void removeHtmlParts(); void startSearch(KURL const& root); void startSearch(KURL const& root, SearchMode const& modo); void resume(); void cancelSearch(); bool hasDocumentRoot() const; KURL const& documentRoot() const; void setDocumentRoot(KURL const& url); void setSearchMode(SearchMode modo); void setDepth(int depth); void setExternalDomainDepth(int depth); void setDomain(TQString const& domain); void setCheckParentDirs(bool flag); void setCheckExternalLinks(bool flag); void setCheckRegularExpressions(bool flag); void setRegularExpression(TQString const& reg_exp, bool case_sensitive); void setTimeOut(int time_out); void cleanItems(); void reset(); bool searching() const; bool localDomain(KURL const& url, bool restrict = true) const; //bool isLocalRestrict(KURL const& url) const; SearchMode const& searchMode() const; bool checkRegularExpressions() const { return check_regular_expressions_; } bool existUrl(KURL const& url, KURL const& url_parent) const; LinktqStatus const* linktqStatus(TQString const& s_url) const; int checkedLinks() const; TQTime timeElapsed() const; bool checkParentDirs() const; bool checkExternalLinks() const; LinktqStatus const* linkStatusRoot() const; int maxSimultaneousConnections() const; int timeOut() const; bool sendIdentification() const { return send_identification_; } TQString const& userAgent() const { return user_agent_; } private: void checkRoot(); void checkVectorLinks(vector const& links); // corresponde a um no de um nivel de depth vector tqchildren(LinktqStatus* link); void startSearch(); void continueSearch(); void finnish(); void pause(); vector const& nodeToAnalize() const; vector chooseLinks(vector const& links); void checkLinksSimultaneously(vector const& links); void addLevel(); bool checkableByDomain(KURL const& url, LinktqStatus const& link_parent) const; bool checkable(KURL const& url, LinktqStatus const& link_parent) const; int maximumCurrentConnections() const; bool onlyCheckHeader(LinktqStatus* ls) const; /* Entende-se por domain vago um domain do tipo www.google.pt ou google.pt, pelo que, por exemplo, imagens.google.pt, e considerado estar no mesmo domain. pwp.netcabo.pt ou www.google.pt/imagens nao sao considerados domains vagos. */ bool generalDomain() const; bool generalDomainChecked() const; // Para garantir que o procedimento generalDomain() so e chamado uma vez private slots: void slotRootChecked(const LinktqStatus * link, LinkChecker * checker); void slotLinkChecked(const LinktqStatus * link, LinkChecker * checker); void slotSearchFinished(); void slotLinkCheckerFinnished(LinkChecker * checker); signals: void signalRootChecked(const LinktqStatus * link, LinkChecker * checker); void signalLinkChecked(const LinktqStatus * link, LinkChecker * checker); void signalSearchFinished(); void signalSearchPaused(); void signalAddingLevelTotalSteps(uint number_of_links); void signalAddingLevelProgress(); void signalLinksToCheckTotalSteps(uint links_to_check); //void signalLinksToCheckProgress(); private: int max_simultaneous_connections_; SearchMode search_mode_; LinktqStatus root_; bool has_document_root_; KURL document_root_url_; // in case of non http protocols the document root must be explicitly given int depth_; int current_depth_; int external_domain_depth_; int current_node_; int current_index_; int links_being_checked_; int finished_connections_; int maximum_current_connections_; TQRegExp reg_exp_; TQString domain_; bool general_domain_; bool checked_general_domain_; int time_out_; int current_connections_; bool send_identification_; // user-agent TQString user_agent_; bool canceled_; bool searching_; int checked_links_; TQTime time_; int ignored_links_; bool check_parent_dirs_; bool check_external_links_; bool check_regular_expressions_; uint number_of_level_links_; uint number_of_links_to_check_; vector< vector< vector > > search_results_; KHTMLPartMap html_parts_; }; #include "searchmanager_impl.h" #endif