/*************************************************************************** * Copyright (C) 2008 by Jacob Kanev , * * Thomas Fischer * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #ifndef KBIBTEXWEBQUERYCITESEERX_H #define KBIBTEXWEBQUERYCITESEERX_H #include #include #include #include #include #include class QBuffer; namespace KBibTeX { /** @author Thomas Fischer */ class WebQueryCiteSeerXWidget : public WebQueryWidget { Q_OBJECT public: WebQueryCiteSeerXWidget( QWidget *parent, const char *name = 0 ); }; /// Convinience typedef for member pointer to parsing function class WebQueryCiteSeerX; typedef void ( WebQueryCiteSeerX::* DataParser )( const QString & ); /// Query the citeseer database. /** This class is used for querying the citeseer data base. CiteSeerX is still beta, so this class has to be adapted as soon as the CiteSeer people change their web interface. After entering the search term, citeseer returns a page with 10 links (one for each paper), and one link for the next 10 hits. This class uses a queue to schedule each reading job, and two parsing functions, one for the summary page and one for each paper result. BibTeX fields abstract, title, author, year, journal, and pages are found. @author Jacob Kanev */ class WebQueryCiteSeerX : public WebQuery { Q_OBJECT public: struct DataRequest { KURL url; DataParser parser; }; /// Construct. WebQueryCiteSeerX( QWidget* parent ); /// Destroy. virtual ~WebQueryCiteSeerX(); /// Main function: start query. void query(); /// Return title. QString title(); /// Return disclaimer. QString disclaimer(); /// Return disclaimer URL. QString disclaimerURL(); /// Return GUI element. WebQueryWidget *widget(); protected: /// Callback for cancelling. void cancelQuery(); private slots: /// Callback when the job is finished. /**Reads the data from the job, and hands it over to the currently set parser. */ void getData( KIO::Job *job ); private: /// Parses the main page and schedules single-paper reading jobs. /** Function parses the summary page, and schedules one job for each paper link, and one job for the "Next 10" summary page. */ void parseSummaryPage( const QString &data ); /// Parses single-paper pages. /** Function reads the "Abstract:" and the "@entrytype{" strings found in the html page. */ void parsePaperPage( const QString &data ); /// Execute next waiting job. /** Takes the next query out of the queue, sets the appropriate parser, and schedules getData with the URL. */ void nextJob(); /// Find single bibtex field in html page and add to entry. /** Function uses the first collected text from the description (a regular expression), and adds it as type "type" to the "entry". */ void parseForSingleExpression( QString description, const QString &data, BibTeX::Entry *entry, BibTeX::EntryField::FieldType type ); /// The currently active parser. DataParser m_currentParser; /// The internet address of CiteSeerX. QString m_citeSeerXServer; /// Number of hits desired by user. int m_desiredHits; /// Number of hits read from summary pages. int m_receivedHits; /// List with waiting queries std::deque m_queryQueue; /// Pointer to QT-dialog. WebQueryCiteSeerXWidget *m_widget; }; } #endif