summaryrefslogtreecommitdiffstats
path: root/src/fetch
diff options
context:
space:
mode:
authortpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
committertpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
commite38d2351b83fa65c66ccde443777647ef5cb6cff (patch)
tree1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/fetch
downloadtellico-e38d2351.tar.gz
tellico-e38d2351.zip
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/fetch')
-rw-r--r--src/fetch/Makefile.am46
-rw-r--r--src/fetch/amazonfetcher.cpp937
-rw-r--r--src/fetch/amazonfetcher.h158
-rw-r--r--src/fetch/animenfofetcher.cpp378
-rw-r--r--src/fetch/animenfofetcher.h86
-rw-r--r--src/fetch/arxivfetcher.cpp366
-rw-r--r--src/fetch/arxivfetcher.h93
-rw-r--r--src/fetch/bibsonomyfetcher.cpp209
-rw-r--r--src/fetch/bibsonomyfetcher.h82
-rw-r--r--src/fetch/citebasefetcher.cpp248
-rw-r--r--src/fetch/citebasefetcher.h83
-rw-r--r--src/fetch/configwidget.cpp66
-rw-r--r--src/fetch/configwidget.h78
-rw-r--r--src/fetch/crossreffetcher.cpp392
-rw-r--r--src/fetch/crossreffetcher.h97
-rw-r--r--src/fetch/discogsfetcher.cpp413
-rw-r--r--src/fetch/discogsfetcher.h117
-rw-r--r--src/fetch/entrezfetcher.cpp498
-rw-r--r--src/fetch/entrezfetcher.h113
-rw-r--r--src/fetch/execexternalfetcher.cpp561
-rw-r--r--src/fetch/execexternalfetcher.h118
-rw-r--r--src/fetch/fetch.h64
-rw-r--r--src/fetch/fetcher.cpp61
-rw-r--r--src/fetch/fetcher.h151
-rw-r--r--src/fetch/fetchmanager.cpp707
-rw-r--r--src/fetch/fetchmanager.h108
-rw-r--r--src/fetch/gcstarpluginfetcher.cpp486
-rw-r--r--src/fetch/gcstarpluginfetcher.h121
-rw-r--r--src/fetch/googlescholarfetcher.cpp233
-rw-r--r--src/fetch/googlescholarfetcher.h103
-rw-r--r--src/fetch/ibsfetcher.cpp415
-rw-r--r--src/fetch/ibsfetcher.h87
-rw-r--r--src/fetch/imdbfetcher.cpp1208
-rw-r--r--src/fetch/imdbfetcher.h141
-rw-r--r--src/fetch/isbndbfetcher.cpp350
-rw-r--r--src/fetch/isbndbfetcher.h94
-rw-r--r--src/fetch/messagehandler.cpp35
-rw-r--r--src/fetch/messagehandler.h49
-rw-r--r--src/fetch/scripts/Makefile.am30
-rw-r--r--src/fetch/scripts/boardgamegeek.rb235
-rw-r--r--src/fetch/scripts/boardgamegeek.rb.spec7
-rw-r--r--src/fetch/scripts/dark_horse_comics.py399
-rw-r--r--src/fetch/scripts/dark_horse_comics.py.spec7
-rwxr-xr-xsrc/fetch/scripts/fr.allocine.py335
-rw-r--r--src/fetch/scripts/fr.allocine.py.spec7
-rw-r--r--src/fetch/scripts/ministerio_de_cultura.py595
-rw-r--r--src/fetch/scripts/ministerio_de_cultura.py.spec7
-rw-r--r--src/fetch/srufetcher.cpp541
-rw-r--r--src/fetch/srufetcher.h131
-rw-r--r--src/fetch/yahoofetcher.cpp400
-rw-r--r--src/fetch/yahoofetcher.h105
-rw-r--r--src/fetch/z3950-servers.cfg106
-rw-r--r--src/fetch/z3950connection.cpp503
-rw-r--r--src/fetch/z3950connection.h126
-rw-r--r--src/fetch/z3950fetcher.cpp782
-rw-r--r--src/fetch/z3950fetcher.h153
56 files changed, 14021 insertions, 0 deletions
diff --git a/src/fetch/Makefile.am b/src/fetch/Makefile.am
new file mode 100644
index 0000000..fbf2ea1
--- /dev/null
+++ b/src/fetch/Makefile.am
@@ -0,0 +1,46 @@
+####### kdevelop will overwrite this part!!! (begin)##########
+noinst_LIBRARIES = libfetch.a
+
+AM_CPPFLAGS = $(all_includes) $(LIBXML_CFLAGS) $(LIBXSLT_CFLAGS) $(YAZ_CFLAGS)
+
+libfetch_a_METASOURCES = AUTO
+
+libfetch_a_SOURCES = amazonfetcher.cpp animenfofetcher.cpp arxivfetcher.cpp \
+ bibsonomyfetcher.cpp citebasefetcher.cpp configwidget.cpp crossreffetcher.cpp \
+ discogsfetcher.cpp entrezfetcher.cpp execexternalfetcher.cpp fetcher.cpp fetchmanager.cpp \
+ gcstarpluginfetcher.cpp googlescholarfetcher.cpp ibsfetcher.cpp imdbfetcher.cpp \
+ isbndbfetcher.cpp messagehandler.cpp srufetcher.cpp yahoofetcher.cpp z3950connection.cpp \
+ z3950fetcher.cpp
+
+####### kdevelop will overwrite this part!!! (end)############
+
+SUBDIRS = scripts
+
+CLEANFILES = *~
+
+KDE_OPTIONS = noautodist
+
+EXTRA_DIST = \
+fetcher.h fetcher.cpp fetchmanager.h fetchmanager.cpp \
+amazonfetcher.h amazonfetcher.cpp z3950fetcher.h z3950fetcher.cpp \
+imdbfetcher.h imdbfetcher.cpp fetch.h configwidget.h configwidget.cpp \
+entrezfetcher.h entrezfetcher.cpp \
+execexternalfetcher.h execexternalfetcher.cpp \
+messagehandler.h messagehandler.cpp \
+z3950connection.h z3950connection.cpp \
+yahoofetcher.h yahoofetcher.cpp \
+animenfofetcher.h animenfofetcher.cpp \
+ibsfetcher.h ibsfetcher.cpp \
+srufetcher.h srufetcher.cpp \
+isbndbfetcher.h isbndbfetcher.cpp \
+gcstarpluginfetcher.h gcstarpluginfetcher.cpp \
+crossreffetcher.h crossreffetcher.cpp \
+arxivfetcher.h arxivfetcher.cpp \
+citebasefetcher.h citebasefetcher.cpp \
+bibsonomyfetcher.h bibsonomyfetcher.cpp \
+googlescholarfetcher.h googlescholarfetcher.cpp \
+discogsfetcher.h discogsfetcher.cpp \
+z3950-servers.cfg
+
+appdir = $(kde_datadir)/tellico
+app_DATA = z3950-servers.cfg
diff --git a/src/fetch/amazonfetcher.cpp b/src/fetch/amazonfetcher.cpp
new file mode 100644
index 0000000..36c009f
--- /dev/null
+++ b/src/fetch/amazonfetcher.cpp
@@ -0,0 +1,937 @@
+/***************************************************************************
+ copyright : (C) 2004-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "amazonfetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../imagefactory.h"
+#include "../tellico_kernel.h"
+#include "../latin1literal.h"
+#include "../collection.h"
+#include "../document.h"
+#include "../entry.h"
+#include "../field.h"
+#include "../tellico_utils.h"
+#include "../tellico_debug.h"
+#include "../isbnvalidator.h"
+#include "../gui/combobox.h"
+
+#include <klocale.h>
+#include <kio/job.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+#include <klineedit.h>
+#include <kseparator.h>
+#include <kcombobox.h>
+#include <kaccelmanager.h>
+
+#include <qdom.h>
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qwhatsthis.h>
+#include <qcheckbox.h>
+#include <qfile.h>
+#include <qtextcodec.h>
+
+namespace {
+ static const int AMAZON_RETURNS_PER_REQUEST = 10;
+ static const int AMAZON_MAX_RETURNS_TOTAL = 20;
+ static const char* AMAZON_ACCESS_KEY = "0834VQ4S71KYPVSYQD02";
+ static const char* AMAZON_ASSOC_TOKEN = "tellico-20";
+ // need to have these in the translation file
+ static const char* linkText = I18N_NOOP("Amazon Link");
+}
+
+using Tellico::Fetch::AmazonFetcher;
+
+// static
+const AmazonFetcher::SiteData& AmazonFetcher::siteData(int site_) {
+ static SiteData dataVector[6] = {
+ {
+ i18n("Amazon (US)"),
+ "http://webservices.amazon.com/onca/xml"
+ }, {
+ i18n("Amazon (UK)"),
+ "http://webservices.amazon.co.uk/onca/xml"
+ }, {
+ i18n("Amazon (Germany)"),
+ "http://webservices.amazon.de/onca/xml"
+ }, {
+ i18n("Amazon (Japan)"),
+ "http://webservices.amazon.co.jp/onca/xml"
+ }, {
+ i18n("Amazon (France)"),
+ "http://webservices.amazon.fr/onca/xml"
+ }, {
+ i18n("Amazon (Canada)"),
+ "http://webservices.amazon.ca/onca/xml"
+ }
+ };
+
+ return dataVector[site_];
+}
+
+AmazonFetcher::AmazonFetcher(Site site_, QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_xsltHandler(0), m_site(site_), m_imageSize(MediumImage),
+ m_access(QString::fromLatin1(AMAZON_ACCESS_KEY)),
+ m_assoc(QString::fromLatin1(AMAZON_ASSOC_TOKEN)), m_addLinkField(true), m_limit(AMAZON_MAX_RETURNS_TOTAL),
+ m_countOffset(0), m_page(1), m_total(-1), m_numResults(0), m_job(0), m_started(false) {
+ m_name = siteData(site_).title;
+}
+
+AmazonFetcher::~AmazonFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString AmazonFetcher::defaultName() {
+ return i18n("Amazon.com Web Services");
+}
+
+QString AmazonFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool AmazonFetcher::canFetch(int type) const {
+ return type == Data::Collection::Book
+ || type == Data::Collection::ComicBook
+ || type == Data::Collection::Bibtex
+ || type == Data::Collection::Album
+ || type == Data::Collection::Video
+ || type == Data::Collection::Game;
+}
+
+void AmazonFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString s = config_.readEntry("AccessKey");
+ if(!s.isEmpty()) {
+ m_access = s;
+ }
+ s = config_.readEntry("AssocToken");
+ if(!s.isEmpty()) {
+ m_assoc = s;
+ }
+ int imageSize = config_.readNumEntry("Image Size", -1);
+ if(imageSize > -1) {
+ m_imageSize = static_cast<ImageSize>(imageSize);
+ }
+ m_fields = config_.readListEntry("Custom Fields", QString::fromLatin1("keyword"));
+}
+
+void AmazonFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+ m_page = 1;
+ m_total = -1;
+ m_countOffset = 0;
+ m_numResults = 0;
+ doSearch();
+}
+
+void AmazonFetcher::continueSearch() {
+ m_started = true;
+ m_limit += AMAZON_MAX_RETURNS_TOTAL;
+ doSearch();
+}
+
+void AmazonFetcher::doSearch() {
+ m_data.truncate(0);
+
+// myDebug() << "AmazonFetcher::doSearch() - value = " << m_value << endl;
+// myDebug() << "AmazonFetcher::doSearch() - getting page " << m_page << endl;
+
+ const SiteData& data = siteData(m_site);
+ KURL u = data.url;
+ u.addQueryItem(QString::fromLatin1("Service"), QString::fromLatin1("AWSECommerceService"));
+ u.addQueryItem(QString::fromLatin1("AssociateTag"), m_assoc);
+ u.addQueryItem(QString::fromLatin1("AWSAccessKeyId"), m_access);
+ u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemSearch"));
+ u.addQueryItem(QString::fromLatin1("ResponseGroup"), QString::fromLatin1("Large"));
+ u.addQueryItem(QString::fromLatin1("ItemPage"), QString::number(m_page));
+ u.addQueryItem(QString::fromLatin1("Version"), QString::fromLatin1("2007-10-29"));
+
+ const int type = Kernel::self()->collectionType();
+ switch(type) {
+ case Data::Collection::Book:
+ case Data::Collection::ComicBook:
+ case Data::Collection::Bibtex:
+ u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Books"));
+ u.addQueryItem(QString::fromLatin1("SortIndex"), QString::fromLatin1("relevancerank"));
+ break;
+
+ case Data::Collection::Album:
+ u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Music"));
+ break;
+
+ case Data::Collection::Video:
+ u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("Video"));
+ u.addQueryItem(QString::fromLatin1("SortIndex"), QString::fromLatin1("relevancerank"));
+ break;
+
+ case Data::Collection::Game:
+ u.addQueryItem(QString::fromLatin1("SearchIndex"), QString::fromLatin1("VideoGames"));
+ break;
+
+ case Data::Collection::Coin:
+ case Data::Collection::Stamp:
+ case Data::Collection::Wine:
+ case Data::Collection::Base:
+ case Data::Collection::Card:
+ default:
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ // I have not been able to find any documentation about what character set to use
+ // when URL encoding the search term in the Amazon REST interface. But I do know
+ // that utf8 DOES NOT WORK. So I'm arbitrarily using iso-8859-1, except for JP.
+ // Why different for JP? Well, I've not received any bug reports from that direction yet
+
+// QString value = KURL::decode_string(value_, 106);
+// QString value = QString::fromLocal8Bit(value_.utf8());
+ QString value = m_value;
+ // a mibenum of 106 is utf-8, 4 is iso-8859-1, 0 means use user's locale,
+ int mib = m_site == AmazonFetcher::JP ? 106 : 4;
+
+ switch(m_key) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("Title"), value, mib);
+ break;
+
+ case Person:
+ if(type == Data::Collection::Video) {
+ u.addQueryItem(QString::fromLatin1("Actor"), value, mib);
+ u.addQueryItem(QString::fromLatin1("Director"), value, mib);
+ } else if(type == Data::Collection::Album) {
+ u.addQueryItem(QString::fromLatin1("Artist"), value, mib);
+ } else if(type == Data::Collection::Game) {
+ u.addQueryItem(QString::fromLatin1("Manufacturer"), value, mib);
+ } else { // books and bibtex
+ QString s = QString::fromLatin1("author:%1 or publisher:%2").arg(value, value);
+// u.addQueryItem(QString::fromLatin1("Author"), value, mib);
+// u.addQueryItem(QString::fromLatin1("Publisher"), value, mib);
+ u.addQueryItem(QString::fromLatin1("Power"), s, mib);
+ }
+ break;
+
+ case ISBN:
+ {
+ u.removeQueryItem(QString::fromLatin1("Operation"));
+ u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemLookup"));
+
+ QString s = m_value; // not encValue!!!
+ s.remove('-');
+ // ISBN only get digits or 'X', and multiple values are connected with "; "
+ QStringList isbns = QStringList::split(QString::fromLatin1("; "), s);
+ // Amazon isbn13 search is still very flaky, so if possible, we're going to convert
+ // all of them to isbn10. If we run into a 979 isbn13, then we're forced to do an
+ // isbn13 search
+ bool isbn13 = false;
+ for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ) {
+ if(m_value.startsWith(QString::fromLatin1("979"))) {
+ if(m_site == JP) { // never works for JP
+ kdWarning() << "AmazonFetcher:doSearch() - ISBN-13 searching not implemented for Japan" << endl;
+ isbns.remove(it); // automatically skips to next
+ continue;
+ }
+ isbn13 = true;
+ break;
+ }
+ ++it;
+ }
+ // if we want isbn10, then convert all
+ if(!isbn13) {
+ for(QStringList::Iterator it = isbns.begin(); it != isbns.end(); ++it) {
+ if((*it).length() > 12) {
+ (*it) = ISBNValidator::isbn10(*it);
+ (*it).remove('-');
+ }
+ }
+ // the default search is by ASIN, which prohibits SearchIndex
+ u.removeQueryItem(QString::fromLatin1("SearchIndex"));
+ }
+ // limit to first 10
+ while(isbns.size() > 10) {
+ isbns.pop_back();
+ }
+ u.addQueryItem(QString::fromLatin1("ItemId"), isbns.join(QString::fromLatin1(",")));
+ if(isbn13) {
+ u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("EAN"));
+ }
+ }
+ break;
+
+ case UPC:
+ {
+ u.removeQueryItem(QString::fromLatin1("Operation"));
+ u.addQueryItem(QString::fromLatin1("Operation"), QString::fromLatin1("ItemLookup"));
+ // US allows UPC, all others are EAN
+ if(m_site == US) {
+ u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("UPC"));
+ } else {
+ u.addQueryItem(QString::fromLatin1("IdType"), QString::fromLatin1("EAN"));
+ }
+ QString s = m_value; // not encValue!!!
+ s.remove('-');
+ // limit to first 10
+ s.replace(QString::fromLatin1("; "), QString::fromLatin1(","));
+ s = s.section(',', 0, 9);
+ u.addQueryItem(QString::fromLatin1("ItemId"), s);
+ }
+ break;
+
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("Keywords"), m_value, mib);
+ break;
+
+ case Raw:
+ {
+ QString key = value.section('=', 0, 0).stripWhiteSpace();
+ QString str = value.section('=', 1).stripWhiteSpace();
+ u.addQueryItem(key, str, mib);
+ }
+ break;
+
+ default:
+ kdWarning() << "AmazonFetcher::search() - key not recognized: " << m_key << endl;
+ stop();
+ return;
+ }
+// myDebug() << "AmazonFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void AmazonFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "AmazonFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void AmazonFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void AmazonFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "AmazonFetcher::slotComplete()" << endl;
+
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "AmazonFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from amazonfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test%1.xml").arg(m_page));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ QStringList errors;
+ if(m_total == -1) {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "AmazonFetcher::slotComplete() - server did not return valid XML." << endl;
+ stop();
+ return;
+ }
+ // find TotalResults element
+ // it's in the first level under the root element
+ //ItemSearchResponse/Items/TotalResults
+ QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("Items"))
+ .namedItem(QString::fromLatin1("TotalResults"));
+ QDomElement e = n.toElement();
+ if(!e.isNull()) {
+ m_total = e.text().toInt();
+ }
+ n = dom.documentElement().namedItem(QString::fromLatin1("Items"))
+ .namedItem(QString::fromLatin1("Request"))
+ .namedItem(QString::fromLatin1("Errors"));
+ e = n.toElement();
+ if(!e.isNull()) {
+ QDomNodeList nodes = e.elementsByTagName(QString::fromLatin1("Error"));
+ for(uint i = 0; i < nodes.count(); ++i) {
+ e = nodes.item(i).toElement().namedItem(QString::fromLatin1("Code")).toElement();
+ if(!e.isNull() && e.text() == Latin1Literal("AWS.ECommerceService.NoExactMatches")) {
+ // no exact match, not a real error, so skip
+ continue;
+ }
+ // for some reason, Amazon will return an error simply when a valid ISBN is not found
+ // I really want to ignore that, so check the IsValid element in the Request element
+ QDomNode isValidNode = n.parentNode().namedItem(QString::fromLatin1("IsValid"));
+ if(m_key == ISBN && isValidNode.toElement().text().lower() == Latin1Literal("true")) {
+ continue;
+ }
+ e = nodes.item(i).toElement().namedItem(QString::fromLatin1("Message")).toElement();
+ if(!e.isNull()) {
+ errors << e.text();
+ }
+ }
+ }
+ }
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+// QRegExp stripHTML(QString::fromLatin1("<.*>"), true);
+// stripHTML.setMinimal(true);
+
+ // assume amazon is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ myDebug() << "AmazonFetcher::slotComplete() - no collection pointer" << endl;
+ stop();
+ return;
+ }
+
+ if(!m_addLinkField) {
+ // remove amazon field if it's not to be added
+ coll->removeField(QString::fromLatin1("amazon"));
+ }
+
+ Data::EntryVec entries = coll->entries();
+ if(entries.isEmpty() && !errors.isEmpty()) {
+ for(QStringList::ConstIterator it = errors.constBegin(); it != errors.constEnd(); ++it) {
+ myDebug() << "AmazonFetcher::" << *it << endl;
+ }
+ message(errors[0], MessageHandler::Error);
+ stop();
+ return;
+ }
+
+ int count = 0;
+ for(Data::EntryVec::Iterator entry = entries.begin();
+ m_numResults < m_limit && entry != entries.end();
+ ++entry, ++count) {
+ if(count < m_countOffset) {
+ continue;
+ }
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+
+ // special case book author
+ // amazon is really bad about not putting spaces after periods
+ if(coll->type() == Data::Collection::Book) {
+ QRegExp rx(QString::fromLatin1("\\.([^\\s])"));
+ QStringList values = entry->fields(QString::fromLatin1("author"), false);
+ for(QStringList::Iterator it = values.begin(); it != values.end(); ++it) {
+ (*it).replace(rx, QString::fromLatin1(". \\1"));
+ }
+ entry->setField(QString::fromLatin1("author"), values.join(QString::fromLatin1("; ")));
+ }
+
+ // UK puts the year in the title for some reason
+ if(m_site == UK && coll->type() == Data::Collection::Video) {
+ QRegExp rx(QString::fromLatin1("\\[(\\d{4})\\]"));
+ QString t = entry->title();
+ if(t.find(rx) > -1) {
+ QString y = rx.cap(1);
+ t.remove(rx).simplifyWhiteSpace();
+ entry->setField(QString::fromLatin1("title"), t);
+ if(entry->field(QString::fromLatin1("year")).isEmpty()) {
+ entry->setField(QString::fromLatin1("year"), y);
+ }
+ }
+ }
+
+ QString desc;
+ switch(coll->type()) {
+ case Data::Collection::Book:
+ case Data::Collection::ComicBook:
+ case Data::Collection::Bibtex:
+ desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+ break;
+
+ case Data::Collection::Video:
+ desc = entry->field(QString::fromLatin1("studio"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("director"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("medium"));
+ break;
+
+ case Data::Collection::Album:
+ desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ case Data::Collection::Game:
+ desc = entry->field(QString::fromLatin1("platform"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ default:
+ break;
+ }
+
+ // strip HTML from comments, or plot in movies
+ // tentatively don't do this, looks like ECS 4 cleaned everything up
+/*
+ if(coll->type() == Data::Collection::Video) {
+ QString plot = entry->field(QString::fromLatin1("plot"));
+ plot.remove(stripHTML);
+ entry->setField(QString::fromLatin1("plot"), plot);
+ } else if(coll->type() == Data::Collection::Game) {
+ QString desc = entry->field(QString::fromLatin1("description"));
+ desc.remove(stripHTML);
+ entry->setField(QString::fromLatin1("description"), desc);
+ } else {
+ QString comments = entry->field(QString::fromLatin1("comments"));
+ comments.remove(stripHTML);
+ entry->setField(QString::fromLatin1("comments"), comments);
+ }
+*/
+// myDebug() << "AmazonFetcher::slotComplete() - " << entry->title() << endl;
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ ++m_numResults;
+ }
+
+ // we might have gotten aborted
+ if(!m_started) {
+ return;
+ }
+
+ // are there any additional results to get?
+ m_hasMoreResults = m_page * AMAZON_RETURNS_PER_REQUEST < m_total;
+
+ const int currentTotal = QMIN(m_total, m_limit);
+ if(m_page * AMAZON_RETURNS_PER_REQUEST < currentTotal) {
+ int foundCount = (m_page-1) * AMAZON_RETURNS_PER_REQUEST + coll->entryCount();
+ message(i18n("Results from %1: %2/%3").arg(source()).arg(foundCount).arg(m_total), MessageHandler::Status);
+ ++m_page;
+ m_countOffset = 0;
+ doSearch();
+ } else if(m_value.contains(';') > 9) {
+ search(m_key, m_value.section(';', 10));
+ } else {
+ m_countOffset = m_entries.count() % AMAZON_RETURNS_PER_REQUEST;
+ if(m_countOffset == 0) {
+ ++m_page; // need to go to next page
+ }
+ stop();
+ }
+}
+
+Tellico::Data::EntryPtr AmazonFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ if(!entry) {
+ kdWarning() << "AmazonFetcher::fetchEntry() - no entry in dict" << endl;
+ return 0;
+ }
+
+ QStringList defaultFields = customFields().keys();
+ for(QStringList::Iterator it = defaultFields.begin(); it != defaultFields.end(); ++it) {
+ if(!m_fields.contains(*it)) {
+ entry->setField(*it, QString::null);
+ }
+ }
+
+ // do what we can to remove useless keywords
+ const int type = Kernel::self()->collectionType();
+ switch(type) {
+ case Data::Collection::Book:
+ case Data::Collection::ComicBook:
+ case Data::Collection::Bibtex:
+ {
+ const QString keywords = QString::fromLatin1("keyword");
+ QStringList oldWords = entry->fields(keywords, false);
+ StringSet words;
+ for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) {
+ // the amazon2tellico stylesheet separates keywords with '/'
+ QStringList nodes = QStringList::split('/', *it);
+ for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) {
+ if(*it2 == Latin1Literal("General") ||
+ *it2 == Latin1Literal("Subjects") ||
+ *it2 == Latin1Literal("Par prix") || // french stuff
+ *it2 == Latin1Literal("Divers") || // french stuff
+ (*it2).startsWith(QChar('(')) ||
+ (*it2).startsWith(QString::fromLatin1("Authors"))) {
+ continue;
+ }
+ words.add(*it2);
+ }
+ }
+ entry->setField(keywords, words.toList().join(QString::fromLatin1("; ")));
+ }
+ entry->setField(QString::fromLatin1("comments"), Tellico::decodeHTML(entry->field(QString::fromLatin1("comments"))));
+ break;
+
+ case Data::Collection::Video:
+ {
+ const QString genres = QString::fromLatin1("genre");
+ QStringList oldWords = entry->fields(genres, false);
+ StringSet words;
+ // only care about genres that have "Genres" in the amazon response
+ // and take the first word after that
+ for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) {
+ if((*it).find(QString::fromLatin1("Genres")) == -1) {
+ continue;
+ }
+
+ // the amazon2tellico stylesheet separates words with '/'
+ QStringList nodes = QStringList::split('/', *it);
+ for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) {
+ if(*it2 != Latin1Literal("Genres")) {
+ continue;
+ }
+ ++it2;
+ if(it2 != nodes.end() && *it2 != Latin1Literal("General")) {
+ words.add(*it2);
+ }
+ break; // we're done
+ }
+ }
+ entry->setField(genres, words.toList().join(QString::fromLatin1("; ")));
+ // language tracks get duplicated, too
+ QStringList langs = entry->fields(QString::fromLatin1("language"), false);
+ words.clear();
+ for(QStringList::ConstIterator it = langs.begin(); it != langs.end(); ++it) {
+ words.add(*it);
+ }
+ entry->setField(QString::fromLatin1("language"), words.toList().join(QString::fromLatin1("; ")));
+ }
+ entry->setField(QString::fromLatin1("plot"), Tellico::decodeHTML(entry->field(QString::fromLatin1("plot"))));
+ break;
+
+ case Data::Collection::Album:
+ {
+ const QString genres = QString::fromLatin1("genre");
+ QStringList oldWords = entry->fields(genres, false);
+ StringSet words;
+ // only care about genres that have "Styles" in the amazon response
+ // and take the first word after that
+ for(QStringList::Iterator it = oldWords.begin(); it != oldWords.end(); ++it) {
+ if((*it).find(QString::fromLatin1("Styles")) == -1) {
+ continue;
+ }
+
+ // the amazon2tellico stylesheet separates words with '/'
+ QStringList nodes = QStringList::split('/', *it);
+ bool isStyle = false;
+ for(QStringList::Iterator it2 = nodes.begin(); it2 != nodes.end(); ++it2) {
+ if(!isStyle) {
+ if(*it2 == Latin1Literal("Styles")) {
+ isStyle = true;
+ }
+ continue;
+ }
+ if(*it2 != Latin1Literal("General")) {
+ words.add(*it2);
+ }
+ }
+ }
+ entry->setField(genres, words.toList().join(QString::fromLatin1("; ")));
+ }
+ entry->setField(QString::fromLatin1("comments"), Tellico::decodeHTML(entry->field(QString::fromLatin1("comments"))));
+ break;
+
+ case Data::Collection::Game:
+ entry->setField(QString::fromLatin1("description"), Tellico::decodeHTML(entry->field(QString::fromLatin1("description"))));
+ break;
+ }
+
+ // clean up the title
+ parseTitle(entry, type);
+
+ // also sometimes table fields have rows but no values
+ Data::FieldVec fields = entry->collection()->fields();
+ QRegExp blank(QString::fromLatin1("[\\s:;]+")); // only white space, column separators and row separators
+ for(Data::FieldVec::Iterator fIt = fields.begin(); fIt != fields.end(); ++fIt) {
+ if(fIt->type() != Data::Field::Table) {
+ continue;
+ }
+ if(blank.exactMatch(entry->field(fIt))) {
+ entry->setField(fIt, QString::null);
+ }
+ }
+
+ KURL imageURL;
+ switch(m_imageSize) {
+ case SmallImage:
+ imageURL = entry->field(QString::fromLatin1("small-image"));
+ break;
+ case MediumImage:
+ imageURL = entry->field(QString::fromLatin1("medium-image"));
+ break;
+ case LargeImage:
+ imageURL = entry->field(QString::fromLatin1("large-image"));
+ break;
+ case NoImage:
+ default:
+ break;
+ }
+// myDebug() << "AmazonFetcher::fetchEntry() - grabbing " << imageURL.prettyURL() << endl;
+ if(!imageURL.isEmpty()) {
+ QString id = ImageFactory::addImage(imageURL, true);
+ // FIXME: need to add cover image field to bibtex collection
+ if(id.isEmpty()) {
+ message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
+ } else { // amazon serves up 1x1 gifs occasionally, but that's caught in the image constructor
+ // all relevant collection types have cover fields
+ entry->setField(QString::fromLatin1("cover"), id);
+ }
+ }
+
+ // don't want to show image urls in the fetch dialog
+ entry->setField(QString::fromLatin1("small-image"), QString::null);
+ entry->setField(QString::fromLatin1("medium-image"), QString::null);
+ entry->setField(QString::fromLatin1("large-image"), QString::null);
+ return entry;
+}
+
+void AmazonFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("amazon2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "AmazonFetcher::initXSLTHandler() - can not locate amazon2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "AmazonFetcher::initXSLTHandler() - error in amazon2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+void AmazonFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "AmazonFetcher::updateEntry()" << endl;
+
+ int type = entry_->collection()->type();
+ if(type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex) {
+ QString isbn = entry_->field(QString::fromLatin1("isbn"));
+ if(!isbn.isEmpty()) {
+ m_limit = 5; // no need for more
+ search(Fetch::ISBN, isbn);
+ return;
+ }
+ } else if(type == Data::Collection::Album) {
+ QString a = entry_->field(QString::fromLatin1("artist"));
+ if(!a.isEmpty()) {
+ search(Fetch::Person, a);
+ return;
+ }
+ }
+
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "AmazonFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+void AmazonFetcher::parseTitle(Data::EntryPtr entry, int collType) {
+ Q_UNUSED(collType);
+ // assume that everything in brackets or parentheses is extra
+ QRegExp rx(QString::fromLatin1("[\\(\\[](.*)[\\)\\]]"));
+ rx.setMinimal(true);
+ QString title = entry->field(QString::fromLatin1("title"));
+ int pos = rx.search(title);
+ while(pos > -1) {
+ if(parseTitleToken(entry, rx.cap(1))) {
+ title.remove(pos, rx.matchedLength());
+ --pos; // search again there
+ }
+ pos = rx.search(title, pos+1);
+ }
+ entry->setField(QString::fromLatin1("title"), title.stripWhiteSpace());
+}
+
+bool AmazonFetcher::parseTitleToken(Data::EntryPtr entry, const QString& token) {
+ // if res = true, then the token gets removed from the title
+ bool res = false;
+ if(token.find(QString::fromLatin1("widescreen"), 0, false /* case-insensitive*/) > -1 ||
+ token.find(i18n("Widescreen"), 0, false) > -1) {
+ entry->setField(QString::fromLatin1("widescreen"), QString::fromLatin1("true"));
+ // res = true; leave it in the title
+ } else if(token.find(QString::fromLatin1("full screen"), 0, false) > -1) {
+ // skip, but go ahead and remove from title
+ res = true;
+ }
+ if(token.find(QString::fromLatin1("blu-ray"), 0, false) > -1) {
+ entry->setField(QString::fromLatin1("medium"), i18n("Blu-ray"));
+ res = true;
+ } else if(token.find(QString::fromLatin1("hd dvd"), 0, false) > -1) {
+ entry->setField(QString::fromLatin1("medium"), i18n("HD DVD"));
+ res = true;
+ }
+ if(token.find(QString::fromLatin1("director's cut"), 0, false) > -1 ||
+ token.find(i18n("Director's Cut"), 0, false) > -1) {
+ entry->setField(QString::fromLatin1("directors-cut"), QString::fromLatin1("true"));
+ // res = true; leave it in the title
+ }
+ return res;
+}
+
+Tellico::Fetch::ConfigWidget* AmazonFetcher::configWidget(QWidget* parent_) const {
+ return new AmazonFetcher::ConfigWidget(parent_, this);
+}
+
+AmazonFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const AmazonFetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+ QLabel* label = new QLabel(i18n("Co&untry: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_siteCombo = new GUI::ComboBox(optionsWidget());
+ m_siteCombo->insertItem(i18n("United States"), US);
+ m_siteCombo->insertItem(i18n("United Kingdom"), UK);
+ m_siteCombo->insertItem(i18n("Germany"), DE);
+ m_siteCombo->insertItem(i18n("Japan"), JP);
+ m_siteCombo->insertItem(i18n("France"), FR);
+ m_siteCombo->insertItem(i18n("Canada"), CA);
+ connect(m_siteCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ connect(m_siteCombo, SIGNAL(activated(int)), SLOT(slotSiteChanged()));
+ l->addWidget(m_siteCombo, row, 1);
+ QString w = i18n("Amazon.com provides data from several different localized sites. Choose the one "
+ "you wish to use for this data source.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_siteCombo, w);
+ label->setBuddy(m_siteCombo);
+
+ label = new QLabel(i18n("&Image size: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_imageCombo = new GUI::ComboBox(optionsWidget());
+ m_imageCombo->insertItem(i18n("Small Image"), SmallImage);
+ m_imageCombo->insertItem(i18n("Medium Image"), MediumImage);
+ m_imageCombo->insertItem(i18n("Large Image"), LargeImage);
+ m_imageCombo->insertItem(i18n("No Image"), NoImage);
+ connect(m_imageCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ l->addWidget(m_imageCombo, row, 1);
+ w = i18n("The cover image may be downloaded as well. However, too many large images in the "
+ "collection may degrade performance.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_imageCombo, w);
+ label->setBuddy(m_imageCombo);
+
+ label = new QLabel(i18n("&Associate's ID: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_assocEdit = new KLineEdit(optionsWidget());
+ connect(m_assocEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_assocEdit, row, 1);
+ w = i18n("The associate's id identifies the person accessing the Amazon.com Web Services, and is included "
+ "in any links to the Amazon.com site.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_assocEdit, w);
+ label->setBuddy(m_assocEdit);
+
+ l->setRowStretch(++row, 10);
+
+ if(fetcher_) {
+ m_siteCombo->setCurrentData(fetcher_->m_site);
+ m_assocEdit->setText(fetcher_->m_assoc);
+ m_imageCombo->setCurrentData(fetcher_->m_imageSize);
+ } else { // defaults
+ m_assocEdit->setText(QString::fromLatin1(AMAZON_ASSOC_TOKEN));
+ m_imageCombo->setCurrentData(MediumImage);
+ }
+
+ addFieldsWidget(AmazonFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+
+ KAcceleratorManager::manage(optionsWidget());
+}
+
+void AmazonFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ int n = m_siteCombo->currentData().toInt();
+ config_.writeEntry("Site", n);
+ QString s = m_assocEdit->text().stripWhiteSpace();
+ if(!s.isEmpty()) {
+ config_.writeEntry("AssocToken", s);
+ }
+ n = m_imageCombo->currentData().toInt();
+ config_.writeEntry("Image Size", n);
+
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+QString AmazonFetcher::ConfigWidget::preferredName() const {
+ return AmazonFetcher::siteData(m_siteCombo->currentData().toInt()).title;
+}
+
+void AmazonFetcher::ConfigWidget::slotSiteChanged() {
+ emit signalName(preferredName());
+}
+
+//static
+Tellico::StringMap AmazonFetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("keyword")] = i18n("Keywords");
+ return map;
+}
+
+#include "amazonfetcher.moc"
diff --git a/src/fetch/amazonfetcher.h b/src/fetch/amazonfetcher.h
new file mode 100644
index 0000000..05df8d7
--- /dev/null
+++ b/src/fetch/amazonfetcher.h
@@ -0,0 +1,158 @@
+/***************************************************************************
+ copyright : (C) 2004-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef AMAZONFETCHER_H
+#define AMAZONFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kurl.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+class KLineEdit;
+
+class QCheckBox;
+class QLabel;
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+
+ class XSLTHandler;
+ namespace GUI {
+ class ComboBox;
+ }
+
+ namespace Fetch {
+
+/**
+ * A fetcher for Amazon.com.
+ *
+ * @author Robby Stephenson
+ */
+class AmazonFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ enum Site {
+ Unknown = -1,
+ US = 0,
+ UK = 1,
+ DE = 2,
+ JP = 3,
+ FR = 4,
+ CA = 5
+ };
+
+ enum ImageSize {
+ SmallImage=0,
+ MediumImage=1,
+ LargeImage=2,
+ NoImage=3
+ };
+
+ AmazonFetcher(Site site, QObject* parent, const char* name = 0);
+ virtual ~AmazonFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ // amazon can search title, person, isbn, or keyword. No Raw for now.
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == UPC || k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Amazon; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ struct SiteData {
+ QString title;
+ KURL url;
+ };
+ static const SiteData& siteData(int site);
+
+ /**
+ * Returns a widget for modifying the fetcher's config.
+ */
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const ;
+
+ static StringMap customFields();
+
+ class ConfigWidget;
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ void doSearch();
+ void parseTitle(Data::EntryPtr entry, int collType);
+ bool parseTitleToken(Data::EntryPtr entry, const QString& token);
+
+ XSLTHandler* m_xsltHandler;
+ Site m_site;
+ ImageSize m_imageSize;
+
+ QString m_access;
+ QString m_assoc;
+ bool m_addLinkField;
+ int m_limit;
+ int m_countOffset;
+
+ QByteArray m_data;
+ int m_page;
+ int m_total;
+ int m_numResults;
+ QMap<int, Data::EntryPtr> m_entries; // they get modified after collection is created, so can't be const
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+ QStringList m_fields;
+};
+
+class AmazonFetcher::ConfigWidget : public Fetch::ConfigWidget {
+Q_OBJECT
+
+public:
+ ConfigWidget(QWidget* parent_, const AmazonFetcher* fetcher = 0);
+
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+
+private slots:
+ void slotSiteChanged();
+
+private:
+ KLineEdit* m_assocEdit;
+ GUI::ComboBox* m_siteCombo;
+ GUI::ComboBox* m_imageCombo;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/animenfofetcher.cpp b/src/fetch/animenfofetcher.cpp
new file mode 100644
index 0000000..728c583
--- /dev/null
+++ b/src/fetch/animenfofetcher.cpp
@@ -0,0 +1,378 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "animenfofetcher.h"
+#include "messagehandler.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collections/videocollection.h"
+#include "../entry.h"
+#include "../filehandler.h"
+#include "../latin1literal.h"
+#include "../imagefactory.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kconfig.h>
+#include <kio/job.h>
+
+#include <qregexp.h>
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qfile.h>
+
+//#define ANIMENFO_TEST
+
+namespace {
+ static const char* ANIMENFO_BASE_URL = "http://www.animenfo.com/search.php";
+}
+
+using Tellico::Fetch::AnimeNfoFetcher;
+
+AnimeNfoFetcher::AnimeNfoFetcher(QObject* parent_, const char* name_ /*=0*/)
+ : Fetcher(parent_, name_), m_started(false) {
+}
+
+QString AnimeNfoFetcher::defaultName() {
+ return QString::fromLatin1("AnimeNfo.com");
+}
+
+QString AnimeNfoFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool AnimeNfoFetcher::canFetch(int type) const {
+ return type == Data::Collection::Video;
+}
+
+void AnimeNfoFetcher::readConfigHook(const KConfigGroup& config_) {
+ Q_UNUSED(config_);
+}
+
+void AnimeNfoFetcher::search(FetchKey key_, const QString& value_) {
+ m_started = true;
+ m_matches.clear();
+
+#ifdef ANIMENFO_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animenfo.html"));
+#else
+ KURL u(QString::fromLatin1(ANIMENFO_BASE_URL));
+ u.addQueryItem(QString::fromLatin1("action"), QString::fromLatin1("Go"));
+ u.addQueryItem(QString::fromLatin1("option"), QString::fromLatin1("keywords"));
+ u.addQueryItem(QString::fromLatin1("queryin"), QString::fromLatin1("anime_titles"));
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ switch(key_) {
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("query"), value_);
+ break;
+
+ default:
+ kdWarning() << "AnimeNfoFetcher::search() - key not recognized: " << key_ << endl;
+ stop();
+ return;
+ }
+#endif
+// myDebug() << "AnimeNfoFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void AnimeNfoFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void AnimeNfoFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void AnimeNfoFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "AnimeNfoFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "AnimeNfoFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+ QString s = Tellico::decodeHTML(QString(m_data));
+
+ QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[\"'][^>]*>(.*)</td>"), false);
+ infoRx.setMinimal(true);
+ QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false);
+ anchorRx.setMinimal(true);
+ QRegExp yearRx(QString::fromLatin1("\\d{4}"), false);
+
+ // search page comes in groups of threes
+ int n = 0;
+ QString u, t, y;
+
+ for(int pos = infoRx.search(s); m_started && pos > -1; pos = infoRx.search(s, pos+1)) {
+ if(n == 0 && !u.isEmpty()) {
+ SearchResult* r = new SearchResult(this, t, y, QString());
+ emit signalResultFound(r);
+
+#ifdef ANIMENFO_TEST
+ KURL url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/animetitle.html"));
+#else
+ KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u);
+ url.setQuery(QString::null);
+#endif
+ m_matches.insert(r->uid, url);
+
+ u.truncate(0);
+ t.truncate(0);
+ y.truncate(0);
+ }
+ switch(n) {
+ case 0: // title and url
+ {
+ int pos2 = anchorRx.search(infoRx.cap(1));
+ if(pos2 > -1) {
+ u = anchorRx.cap(1);
+ t = anchorRx.cap(2);
+ }
+ }
+ break;
+ case 1: // don't case
+ break;
+ case 2:
+ if(yearRx.exactMatch(infoRx.cap(1))) {
+ y = infoRx.cap(1);
+ }
+ break;
+ }
+
+ n = (n+1)%3;
+ }
+
+ // grab last response
+#ifndef ANIMENFO_TEST
+ if(!u.isEmpty()) {
+ SearchResult* r = new SearchResult(this, t, y, QString());
+ emit signalResultFound(r);
+ KURL url(QString::fromLatin1(ANIMENFO_BASE_URL), u);
+ url.setQuery(QString::null);
+ m_matches.insert(r->uid, url);
+ }
+#endif
+ stop();
+}
+
+Tellico::Data::EntryPtr AnimeNfoFetcher::fetchEntry(uint uid_) {
+ // if we already grabbed this one, then just pull it out of the dict
+ Data::EntryPtr entry = m_entries[uid_];
+ if(entry) {
+ return entry;
+ }
+
+ KURL url = m_matches[uid_];
+ if(url.isEmpty()) {
+ kdWarning() << "AnimeNfoFetcher::fetchEntry() - no url in map" << endl;
+ return 0;
+ }
+
+ QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
+ if(results.isEmpty()) {
+ myDebug() << "AnimeNfoFetcher::fetchEntry() - no text results" << endl;
+ return 0;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from animenfofetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.html"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << results;
+ }
+ f.close();
+#endif
+
+ entry = parseEntry(results);
+ if(!entry) {
+ myDebug() << "AnimeNfoFetcher::fetchEntry() - error in processing entry" << endl;
+ return 0;
+ }
+ m_entries.insert(uid_, entry); // keep for later
+ return entry;
+}
+
+Tellico::Data::EntryPtr AnimeNfoFetcher::parseEntry(const QString& str_) {
+ // myDebug() << "AnimeNfoFetcher::parseEntry()" << endl;
+ // class might be anime_info_top
+ QRegExp infoRx(QString::fromLatin1("<td\\s+[^>]*class\\s*=\\s*[\"']anime_info[^>]*>(.*)</td>"), false);
+ infoRx.setMinimal(true);
+ QRegExp tagRx(QString::fromLatin1("<.*>"));
+ tagRx.setMinimal(true);
+ QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](.*)[\"'][^>]*>(.*)</a>"), false);
+ anchorRx.setMinimal(true);
+ QRegExp jsRx(QString::fromLatin1("<script.*</script>"), false);
+ jsRx.setMinimal(true);
+
+ QString s = str_;
+ s.remove(jsRx);
+
+ Data::CollPtr coll = new Data::VideoCollection(true);
+
+ // add new fields
+ Data::FieldPtr f = new Data::Field(QString::fromLatin1("origtitle"), i18n("Original Title"));
+ coll->addField(f);
+
+ f = new Data::Field(QString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
+ f->setFormatFlag(Data::Field::FormatTitle);
+ coll->addField(f);
+
+ f = new Data::Field(QString::fromLatin1("distributor"), i18n("Distributor"));
+ f->setCategory(i18n("Other People"));
+ f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowMultiple | Data::Field::AllowGrouped);
+ f->setFormatFlag(Data::Field::FormatPlain);
+ coll->addField(f);
+
+ f = new Data::Field(QString::fromLatin1("episodes"), i18n("Episodes"), Data::Field::Number);
+ f->setCategory(i18n("Features"));
+ coll->addField(f);
+
+ // map captions in HTML to field names
+ QMap<QString, QString> fieldMap;
+ fieldMap.insert(QString::fromLatin1("Title"), QString::fromLatin1("title"));
+ fieldMap.insert(QString::fromLatin1("Japanese Title"), QString::fromLatin1("origtitle"));
+ fieldMap.insert(QString::fromLatin1("Total Episodes"), QString::fromLatin1("episodes"));
+ fieldMap.insert(QString::fromLatin1("Genres"), QString::fromLatin1("genre"));
+ fieldMap.insert(QString::fromLatin1("Year Published"), QString::fromLatin1("year"));
+ fieldMap.insert(QString::fromLatin1("Studio"), QString::fromLatin1("studio"));
+ fieldMap.insert(QString::fromLatin1("US Distribution"), QString::fromLatin1("distributor"));
+
+ Data::EntryPtr entry = new Data::Entry(coll);
+
+ int n = 0;
+ QString key, value;
+ int oldpos = -1;
+ for(int pos = infoRx.search(s); pos > -1; pos = infoRx.search(s, pos+1)) {
+ if(n == 0 && !key.isEmpty()) {
+ if(fieldMap.contains(key)) {
+ value = value.simplifyWhiteSpace();
+ if(value.length() > 2) { // might be "-"
+ if(key == Latin1Literal("Genres")) {
+ entry->setField(fieldMap[key], QStringList::split(QRegExp(QString::fromLatin1("\\s*,\\s*")),
+ value).join(QString::fromLatin1("; ")));
+ } else {
+ entry->setField(fieldMap[key], value);
+ }
+ }
+ }
+ key.truncate(0);
+ value.truncate(0);
+ }
+ switch(n) {
+ case 0:
+ key = infoRx.cap(1).remove(tagRx);
+ break;
+ case 1:
+ value = infoRx.cap(1).remove(tagRx);
+ break;
+ }
+ n = (n+1)%2;
+ oldpos = pos;
+ }
+
+ // image
+ QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*[\"']([^>]*)[\"']\\s+[^>]*alt\\s*=\\s*[\"']%1[\"']")
+ .arg(entry->field(QString::fromLatin1("title"))), false);
+ imgRx.setMinimal(true);
+ int pos = imgRx.search(s);
+ if(pos > -1) {
+ KURL imgURL(QString::fromLatin1(ANIMENFO_BASE_URL), imgRx.cap(1));
+ QString id = ImageFactory::addImage(imgURL, true);
+ if(!id.isEmpty()) {
+ entry->setField(QString::fromLatin1("cover"), id);
+ }
+ }
+
+ // now look for alternative titles and plot
+ const QString a = QString::fromLatin1("Alternative titles");
+ pos = s.find(a, oldpos+1, false);
+ if(pos > -1) {
+ pos += a.length();
+ }
+ int pos2 = -1;
+ if(pos > -1) {
+ pos2 = s.find(QString::fromLatin1("Description"), pos+1, true);
+ if(pos2 > -1) {
+ value = s.mid(pos, pos2-pos).remove(tagRx).simplifyWhiteSpace();
+ entry->setField(QString::fromLatin1("alttitle"), value);
+ }
+ }
+ QRegExp descRx(QString::fromLatin1("class\\s*=\\s*[\"']description[\"'][^>]*>(.*)<"), false);
+ descRx.setMinimal(true);
+ pos = descRx.search(s, QMAX(pos, pos2));
+ if(pos > -1) {
+ entry->setField(QString::fromLatin1("plot"), descRx.cap(1).simplifyWhiteSpace());
+ }
+
+ return entry;
+}
+
+void AnimeNfoFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Keyword, t);
+ return;
+ }
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* AnimeNfoFetcher::configWidget(QWidget* parent_) const {
+ return new AnimeNfoFetcher::ConfigWidget(parent_);
+}
+
+AnimeNfoFetcher::ConfigWidget::ConfigWidget(QWidget* parent_)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+QString AnimeNfoFetcher::ConfigWidget::preferredName() const {
+ return AnimeNfoFetcher::defaultName();
+}
+
+#include "animenfofetcher.moc"
diff --git a/src/fetch/animenfofetcher.h b/src/fetch/animenfofetcher.h
new file mode 100644
index 0000000..7e4028e
--- /dev/null
+++ b/src/fetch/animenfofetcher.h
@@ -0,0 +1,86 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_ANIMENFOFETCHER_H
+#define TELLICO_FETCH_ANIMENFOFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * A fetcher for animenfo.com
+ *
+ * @author Robby Stephenson
+ */
+class AnimeNfoFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ AnimeNfoFetcher(QObject* parent, const char* name = 0);
+ virtual ~AnimeNfoFetcher() {}
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ // only keyword search
+ virtual bool canSearch(FetchKey k) const { return k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return AnimeNfo; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_);
+ virtual void saveConfig(KConfigGroup&) {}
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ Data::EntryPtr parseEntry(const QString& str);
+
+ QByteArray m_data;
+ int m_total;
+ QMap<int, Data::EntryPtr> m_entries;
+ QMap<int, KURL> m_matches;
+ QGuardedPtr<KIO::Job> m_job;
+
+ bool m_started;
+// QStringList m_fields;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/arxivfetcher.cpp b/src/fetch/arxivfetcher.cpp
new file mode 100644
index 0000000..442ef30
--- /dev/null
+++ b/src/fetch/arxivfetcher.cpp
@@ -0,0 +1,366 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "arxivfetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../core/netaccess.h"
+#include "../imagefactory.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+
+#include <qdom.h>
+#include <qlabel.h>
+#include <qlayout.h>
+
+//#define ARXIV_TEST
+
+namespace {
+ static const int ARXIV_RETURNS_PER_REQUEST = 20;
+ static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query";
+}
+
+using Tellico::Fetch::ArxivFetcher;
+
+ArxivFetcher::ArxivFetcher(QObject* parent_)
+ : Fetcher(parent_), m_xsltHandler(0), m_start(0), m_job(0), m_started(false) {
+}
+
+ArxivFetcher::~ArxivFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString ArxivFetcher::defaultName() {
+ return i18n("arXiv.org");
+}
+
+QString ArxivFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool ArxivFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void ArxivFetcher::readConfigHook(const KConfigGroup&) {
+}
+
+void ArxivFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+ m_start = 0;
+ m_total = -1;
+ doSearch();
+}
+
+void ArxivFetcher::continueSearch() {
+ m_started = true;
+ doSearch();
+}
+
+void ArxivFetcher::doSearch() {
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ m_data.truncate(0);
+
+// myDebug() << "ArxivFetcher::search() - value = " << value_ << endl;
+
+ KURL u = searchURL(m_key, m_value);
+ if(u.isEmpty()) {
+ stop();
+ return;
+ }
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void ArxivFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "ArxivFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void ArxivFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void ArxivFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "ArxivFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "ArxivFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from arxivfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+ if(m_total == -1) {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, true /*namespace*/)) {
+ kdWarning() << "ArxivFetcher::slotComplete() - server did not return valid XML." << endl;
+ return;
+ }
+ // total is top level element, with attribute totalResultsAvailable
+ QDomNodeList list = dom.elementsByTagNameNS(QString::fromLatin1("http://a9.com/-/spec/opensearch/1.1/"),
+ QString::fromLatin1("totalResults"));
+ if(list.count() > 0) {
+ m_total = list.item(0).toElement().text().toInt();
+ }
+ }
+
+ // assume result is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+
+ if(!coll) {
+ myDebug() << "ArxivFetcher::slotComplete() - no valid result" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+
+ m_start = m_entries.count();
+ m_hasMoreResults = m_start < m_total;
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr ArxivFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ // if URL but no cover image, fetch it
+ if(!entry->field(QString::fromLatin1("url")).isEmpty()) {
+ Data::CollPtr coll = entry->collection();
+ Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover"));
+ if(!field && !coll->imageFields().isEmpty()) {
+ field = coll->imageFields().front();
+ } else if(!field) {
+ field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image);
+ coll->addField(field);
+ }
+ if(entry->field(field).isEmpty()) {
+ QPixmap pix = NetAccess::filePreview(entry->field(QString::fromLatin1("url")));
+ if(!pix.isNull()) {
+ QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG"));
+ if(!id.isEmpty()) {
+ entry->setField(field, id);
+ }
+ }
+ }
+ }
+ return entry;
+}
+
+void ArxivFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("arxiv2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "ArxivFetcher::initXSLTHandler() - can not locate arxiv2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "ArxivFetcher::initXSLTHandler() - error in arxiv2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+KURL ArxivFetcher::searchURL(FetchKey key_, const QString& value_) const {
+ KURL u(QString::fromLatin1(ARXIV_BASE_URL));
+ u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start));
+ u.addQueryItem(QString::fromLatin1("max_results"), QString::number(ARXIV_RETURNS_PER_REQUEST));
+
+ // quotes should be used if spaces are present, just use all the time
+ QString quotedValue = '"' + value_ + '"';
+ switch(key_) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("ti:%1").arg(quotedValue));
+ break;
+
+ case Person:
+ u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("au:%1").arg(quotedValue));
+ break;
+
+ case Keyword:
+ // keyword gets to use all the words without being quoted
+ u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("all:%1").arg(value_));
+ break;
+
+ case ArxivID:
+ {
+ // remove prefix and/or version number
+ QString value = value_;
+ value.remove(QRegExp(QString::fromLatin1("^arxiv:"), false));
+ value.remove(QRegExp(QString::fromLatin1("v\\d+$")));
+ u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("id:%1").arg(value));
+ }
+ break;
+
+ default:
+ kdWarning() << "ArxivFetcher::search() - key not recognized: " << m_key << endl;
+ return KURL();
+ }
+
+#ifdef ARXIV_TEST
+ u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/arxiv.xml"));
+#endif
+ myDebug() << "ArxivFetcher::search() - url: " << u.url() << endl;
+ return u;
+}
+
+void ArxivFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString id = entry_->field(QString::fromLatin1("arxiv"));
+ if(!id.isEmpty()) {
+ search(Fetch::ArxivID, id);
+ return;
+ }
+
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "ArxivFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+void ArxivFetcher::updateEntrySynchronous(Data::EntryPtr entry) {
+ if(!entry) {
+ return;
+ }
+ QString arxiv = entry->field(QString::fromLatin1("arxiv"));
+ if(arxiv.isEmpty()) {
+ return;
+ }
+
+ KURL u = searchURL(ArxivID, arxiv);
+ QString xml = FileHandler::readTextFile(u, true, true);
+ if(xml.isEmpty()) {
+ return;
+ }
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ return;
+ }
+ }
+
+ // assume result is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(xml);
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(coll && coll->entryCount() > 0) {
+ myLog() << "ArxivFetcher::updateEntrySynchronous() - found Arxiv result, merging" << endl;
+ Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/);
+ // the arxiv id might have a version#
+ entry->setField(QString::fromLatin1("arxiv"),
+ coll->entries().front()->field(QString::fromLatin1("arxiv")));
+ }
+}
+
+Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(QWidget* parent_) const {
+ return new ArxivFetcher::ConfigWidget(parent_, this);
+}
+
+ArxivFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ArxivFetcher*)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+void ArxivFetcher::ConfigWidget::saveConfig(KConfigGroup&) {
+}
+
+QString ArxivFetcher::ConfigWidget::preferredName() const {
+ return ArxivFetcher::defaultName();
+}
+
+#include "arxivfetcher.moc"
diff --git a/src/fetch/arxivfetcher.h b/src/fetch/arxivfetcher.h
new file mode 100644
index 0000000..bce5f9d
--- /dev/null
+++ b/src/fetch/arxivfetcher.h
@@ -0,0 +1,93 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_ARXIVFETCHER_H
+#define TELLICO_FETCH_ARXIVFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace Tellico {
+
+ class XSLTHandler;
+
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class ArxivFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ ArxivFetcher(QObject* parent);
+ ~ArxivFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == ArxivID; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Arxiv; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual void updateEntrySynchronous(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const ArxivFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ KURL searchURL(FetchKey key, const QString& value) const;
+ void doSearch();
+
+ XSLTHandler* m_xsltHandler;
+ int m_start;
+ int m_total;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ }
+}
+#endif
diff --git a/src/fetch/bibsonomyfetcher.cpp b/src/fetch/bibsonomyfetcher.cpp
new file mode 100644
index 0000000..faa48a4
--- /dev/null
+++ b/src/fetch/bibsonomyfetcher.cpp
@@ -0,0 +1,209 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "bibsonomyfetcher.h"
+#include "messagehandler.h"
+#include "../translators/bibteximporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../core/netaccess.h"
+#include "../filehandler.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+
+#include <qlabel.h>
+#include <qlayout.h>
+
+namespace {
+ // always bibtex
+ static const char* BIBSONOMY_BASE_URL = "http://bibsonomy.org";
+ static const int BIBSONOMY_MAX_RESULTS = 20;
+}
+
+using Tellico::Fetch::BibsonomyFetcher;
+
+BibsonomyFetcher::BibsonomyFetcher(QObject* parent_)
+ : Fetcher(parent_), m_job(0), m_started(false) {
+}
+
+BibsonomyFetcher::~BibsonomyFetcher() {
+}
+
+QString BibsonomyFetcher::defaultName() {
+ return QString::fromLatin1("Bibsonomy");
+}
+
+QString BibsonomyFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool BibsonomyFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void BibsonomyFetcher::readConfigHook(const KConfigGroup&) {
+}
+
+void BibsonomyFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ m_data.truncate(0);
+
+// myDebug() << "BibsonomyFetcher::search() - value = " << value_ << endl;
+
+ KURL u = searchURL(m_key, m_value);
+ if(u.isEmpty()) {
+ stop();
+ return;
+ }
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void BibsonomyFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "BibsonomyFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void BibsonomyFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void BibsonomyFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "BibsonomyFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "BibsonomyFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+ Import::BibtexImporter imp(QString::fromUtf8(m_data, m_data.size()));
+ Data::CollPtr coll = imp.collection();
+
+ if(!coll) {
+ myDebug() << "BibsonomyFetcher::slotComplete() - no valid result" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr BibsonomyFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+KURL BibsonomyFetcher::searchURL(FetchKey key_, const QString& value_) const {
+ KURL u(QString::fromLatin1(BIBSONOMY_BASE_URL));
+ u.setPath(QString::fromLatin1("/bib/"));
+
+ switch(key_) {
+ case Person:
+ u.addPath(QString::fromLatin1("author/%1").arg(value_));
+ break;
+
+ case Keyword:
+ u.addPath(QString::fromLatin1("search/%1").arg(value_));
+ break;
+
+ default:
+ kdWarning() << "BibsonomyFetcher::search() - key not recognized: " << m_key << endl;
+ return KURL();
+ }
+
+ u.addQueryItem(QString::fromLatin1("items"), QString::number(BIBSONOMY_MAX_RESULTS));
+ myDebug() << "BibsonomyFetcher::search() - url: " << u.url() << endl;
+ return u;
+}
+
+void BibsonomyFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString title = entry_->field(QString::fromLatin1("title"));
+ if(!title.isEmpty()) {
+ search(Fetch::Keyword, title);
+ return;
+ }
+
+ myDebug() << "BibsonomyFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* BibsonomyFetcher::configWidget(QWidget* parent_) const {
+ return new BibsonomyFetcher::ConfigWidget(parent_, this);
+}
+
+BibsonomyFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const BibsonomyFetcher*)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+void BibsonomyFetcher::ConfigWidget::saveConfig(KConfigGroup&) {
+}
+
+QString BibsonomyFetcher::ConfigWidget::preferredName() const {
+ return BibsonomyFetcher::defaultName();
+}
+
+#include "bibsonomyfetcher.moc"
diff --git a/src/fetch/bibsonomyfetcher.h b/src/fetch/bibsonomyfetcher.h
new file mode 100644
index 0000000..fc59928
--- /dev/null
+++ b/src/fetch/bibsonomyfetcher.h
@@ -0,0 +1,82 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_BIBSONOMYFETCHER_H
+#define TELLICO_FETCH_BIBSONOMYFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class BibsonomyFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ BibsonomyFetcher(QObject* parent);
+ ~BibsonomyFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+
+ virtual bool canSearch(FetchKey k) const { return k == Person || k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Bibsonomy; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const BibsonomyFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ KURL searchURL(FetchKey key, const QString& value) const;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ }
+}
+#endif
diff --git a/src/fetch/citebasefetcher.cpp b/src/fetch/citebasefetcher.cpp
new file mode 100644
index 0000000..798d690
--- /dev/null
+++ b/src/fetch/citebasefetcher.cpp
@@ -0,0 +1,248 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "citebasefetcher.h"
+#include "messagehandler.h"
+#include "../translators/bibteximporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../core/netaccess.h"
+#include "../filehandler.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+
+#include <qlabel.h>
+#include <qlayout.h>
+
+// #define CITEBASE_TEST
+
+namespace {
+ // always bibtex
+ static const char* CITEBASE_BASE_URL = "http://www.citebase.org/openurl/?url_ver=Z39.88-2004&svc_id=bibtex";
+}
+
+using Tellico::Fetch::CitebaseFetcher;
+
+CitebaseFetcher::CitebaseFetcher(QObject* parent_)
+ : Fetcher(parent_), m_job(0), m_started(false) {
+}
+
+CitebaseFetcher::~CitebaseFetcher() {
+}
+
+QString CitebaseFetcher::defaultName() {
+ return QString::fromLatin1("Citebase");
+}
+
+QString CitebaseFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool CitebaseFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void CitebaseFetcher::readConfigHook(const KConfigGroup&) {
+}
+
+void CitebaseFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ m_data.truncate(0);
+
+// myDebug() << "CitebaseFetcher::search() - value = " << value_ << endl;
+
+ KURL u = searchURL(m_key, m_value);
+ if(u.isEmpty()) {
+ stop();
+ return;
+ }
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void CitebaseFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "CitebaseFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void CitebaseFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void CitebaseFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "CitebaseFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "CitebaseFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from citebasefetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.bib"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ Import::BibtexImporter imp(QString::fromUtf8(m_data, m_data.size()));
+ Data::CollPtr coll = imp.collection();
+
+ if(!coll) {
+ myDebug() << "CitebaseFetcher::slotComplete() - no valid result" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr CitebaseFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+KURL CitebaseFetcher::searchURL(FetchKey key_, const QString& value_) const {
+ KURL u(QString::fromLatin1(CITEBASE_BASE_URL));
+
+ switch(key_) {
+ case ArxivID:
+ {
+ // remove prefix and/or version number
+ QString value = value_;
+ value.remove(QRegExp(QString::fromLatin1("^arxiv:"), false));
+ value.remove(QRegExp(QString::fromLatin1("v\\d+$")));
+ u.addQueryItem(QString::fromLatin1("rft_id"), QString::fromLatin1("oai:arXiv.org:%1").arg(value));
+ }
+ break;
+
+ default:
+ kdWarning() << "CitebaseFetcher::search() - key not recognized: " << m_key << endl;
+ return KURL();
+ }
+
+#ifdef CITEBASE_TEST
+ u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/citebase.bib"));
+#endif
+ myDebug() << "CitebaseFetcher::search() - url: " << u.url() << endl;
+ return u;
+}
+
+void CitebaseFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString arxiv = entry_->field(QString::fromLatin1("arxiv"));
+ if(!arxiv.isEmpty()) {
+ search(Fetch::ArxivID, arxiv);
+ return;
+ }
+
+ myDebug() << "CitebaseFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+void CitebaseFetcher::updateEntrySynchronous(Data::EntryPtr entry) {
+ if(!entry) {
+ return;
+ }
+ QString arxiv = entry->field(QString::fromLatin1("arxiv"));
+ if(arxiv.isEmpty()) {
+ return;
+ }
+
+ KURL u = searchURL(ArxivID, arxiv);
+ QString bibtex = FileHandler::readTextFile(u, true);
+ if(bibtex.isEmpty()) {
+ return;
+ }
+
+ // assume result is always utf-8
+ Import::BibtexImporter imp(bibtex);
+ Data::CollPtr coll = imp.collection();
+ if(coll && coll->entryCount() > 0) {
+ myLog() << "CitebaseFetcher::updateEntrySynchronous() - found arxiv result, merging" << endl;
+ Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/);
+ }
+}
+
+Tellico::Fetch::ConfigWidget* CitebaseFetcher::configWidget(QWidget* parent_) const {
+ return new CitebaseFetcher::ConfigWidget(parent_, this);
+}
+
+CitebaseFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const CitebaseFetcher*)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+void CitebaseFetcher::ConfigWidget::saveConfig(KConfigGroup&) {
+}
+
+QString CitebaseFetcher::ConfigWidget::preferredName() const {
+ return CitebaseFetcher::defaultName();
+}
+
+#include "citebasefetcher.moc"
diff --git a/src/fetch/citebasefetcher.h b/src/fetch/citebasefetcher.h
new file mode 100644
index 0000000..a292107
--- /dev/null
+++ b/src/fetch/citebasefetcher.h
@@ -0,0 +1,83 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_CITEBASEFETCHER_H
+#define TELLICO_FETCH_CITEBASEFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class CitebaseFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ CitebaseFetcher(QObject* parent);
+ ~CitebaseFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+
+ virtual bool canSearch(FetchKey k) const { return k == ArxivID; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Citebase; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual void updateEntrySynchronous(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const CitebaseFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ KURL searchURL(FetchKey key, const QString& value) const;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ }
+}
+#endif
diff --git a/src/fetch/configwidget.cpp b/src/fetch/configwidget.cpp
new file mode 100644
index 0000000..c7b3b59
--- /dev/null
+++ b/src/fetch/configwidget.cpp
@@ -0,0 +1,66 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "configwidget.h"
+
+#include <kconfig.h>
+#include <klocale.h>
+#include <kaccelmanager.h>
+
+#include <qvgroupbox.h>
+#include <qlayout.h>
+
+using Tellico::Fetch::ConfigWidget;
+
+ConfigWidget::ConfigWidget(QWidget* parent_) : QWidget(parent_), m_modified(false), m_accepted(false) {
+ QHBoxLayout* boxLayout = new QHBoxLayout(this);
+ boxLayout->setSpacing(10);
+
+ QGroupBox* vbox = new QVGroupBox(i18n("Source Options"), this);
+ boxLayout->addWidget(vbox, 10 /*stretch*/);
+
+ m_optionsWidget = new QWidget(vbox);
+}
+
+void ConfigWidget::addFieldsWidget(const StringMap& customFields_, const QStringList& fieldsToAdd_) {
+ if(customFields_.isEmpty()) {
+ return;
+ }
+
+ QVGroupBox* box = new QVGroupBox(i18n("Available Fields"), this);
+ static_cast<QBoxLayout*>(layout())->addWidget(box);
+ for(StringMap::ConstIterator it = customFields_.begin(); it != customFields_.end(); ++it) {
+ QCheckBox* cb = new QCheckBox(it.data(), box);
+ m_fields.insert(it.key(), cb);
+ if(fieldsToAdd_.contains(it.key())) {
+ cb->setChecked(true);
+ }
+ connect(cb, SIGNAL(clicked()), SLOT(slotSetModified()));
+ }
+
+ KAcceleratorManager::manage(this);
+
+ return;
+}
+
+void ConfigWidget::saveFieldsConfig(KConfigGroup& config_) const {
+ QStringList fields;
+ for(QDictIterator<QCheckBox> it(m_fields); it.current(); ++it) {
+ if(it.current()->isChecked()) {
+ fields << it.currentKey();
+ }
+ }
+ config_.writeEntry(QString::fromLatin1("Custom Fields"), fields);
+}
+
+#include "configwidget.moc"
diff --git a/src/fetch/configwidget.h b/src/fetch/configwidget.h
new file mode 100644
index 0000000..9f18f83
--- /dev/null
+++ b/src/fetch/configwidget.h
@@ -0,0 +1,78 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef FETCHCONFIGWIDGET_H
+#define FETCHCONFIGWIDGET_H
+
+#include "../datavectors.h"
+
+#include <qwidget.h>
+#include <qdict.h>
+#include <qcheckbox.h>
+
+class KConfigGroup;
+class QStringList;
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class ConfigWidget : public QWidget {
+Q_OBJECT
+
+public:
+ ConfigWidget(QWidget* parent);
+ virtual ~ConfigWidget() {}
+
+ void setAccepted(bool accepted_) { m_accepted = accepted_; }
+ bool shouldSave() const { return m_modified && m_accepted; }
+ /**
+ * Saves any configuration options. The config group must be
+ * set before calling this function.
+ *
+ * @param config_ The KConfig pointer
+ */
+ virtual void saveConfig(KConfigGroup& config) = 0;
+ /**
+ * Called when a fetcher data source is removed. Useful for any cleanup work necessary.
+ * The ExecExternalFetcher might need to remove the script, for example.
+ * Because of the way the ConfigDialog is setup, easier to have that in the ConfigWidget
+ * class than in the Fetcher class itself
+ */
+ virtual void removed() {}
+ virtual QString preferredName() const = 0;
+
+signals:
+ void signalName(const QString& name);
+
+public slots:
+ void slotSetModified(bool modified_ = true) { m_modified = modified_; }
+
+protected:
+ QWidget* optionsWidget() { return m_optionsWidget; }
+ void addFieldsWidget(const StringMap& customFields, const QStringList& fieldsToAdd);
+ void saveFieldsConfig(KConfigGroup& config) const;
+
+private:
+ bool m_modified;
+ bool m_accepted;
+ QWidget* m_optionsWidget;
+ QDict<QCheckBox> m_fields;
+};
+
+ }
+}
+
+#endif
diff --git a/src/fetch/crossreffetcher.cpp b/src/fetch/crossreffetcher.cpp
new file mode 100644
index 0000000..8c5d303
--- /dev/null
+++ b/src/fetch/crossreffetcher.cpp
@@ -0,0 +1,392 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "crossreffetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../core/netaccess.h"
+#include "../imagefactory.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+#include <klineedit.h>
+#include <kactivelabel.h>
+
+#include <qlabel.h>
+#include <qwhatsthis.h>
+#include <qlayout.h>
+#include <qfile.h>
+
+// #define CROSSREF_TEST
+
+#define CROSSREF_USE_UNIXREF
+
+namespace {
+ static const char* CROSSREF_BASE_URL = "http://www.crossref.org/openurl/?url_ver=Z39.88-2004&noredirect=true";
+}
+
+using Tellico::Fetch::CrossRefFetcher;
+
+CrossRefFetcher::CrossRefFetcher(QObject* parent_)
+ : Fetcher(parent_), m_xsltHandler(0), m_job(0), m_started(false) {
+}
+
+CrossRefFetcher::~CrossRefFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString CrossRefFetcher::defaultName() {
+ return QString::fromLatin1("CrossRef");
+}
+
+QString CrossRefFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool CrossRefFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void CrossRefFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString s = config_.readEntry("User");
+ if(!s.isEmpty()) {
+ m_user = s;
+ }
+ s = config_.readEntry("Password");
+ if(!s.isEmpty()) {
+ m_password = s;
+ }
+}
+
+void CrossRefFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+
+ if(m_user.isEmpty() || m_password.isEmpty()) {
+ message(i18n("%1 requires a username and password.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ m_data.truncate(0);
+
+// myDebug() << "CrossRefFetcher::search() - value = " << value_ << endl;
+
+ KURL u = searchURL(m_key, m_value);
+ if(u.isEmpty()) {
+ stop();
+ return;
+ }
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void CrossRefFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "CrossRefFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void CrossRefFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void CrossRefFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "CrossRefFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "CrossRefFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from crossreffetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+ // assume result is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+
+ if(!coll) {
+ myDebug() << "CrossRefFetcher::slotComplete() - no valid result" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr CrossRefFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ // if URL but no cover image, fetch it
+ if(!entry->field(QString::fromLatin1("url")).isEmpty()) {
+ Data::CollPtr coll = entry->collection();
+ Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover"));
+ if(!field && !coll->imageFields().isEmpty()) {
+ field = coll->imageFields().front();
+ } else if(!field) {
+ field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image);
+ coll->addField(field);
+ }
+ if(entry->field(field).isEmpty()) {
+ QPixmap pix = NetAccess::filePreview(entry->field(QString::fromLatin1("url")));
+ if(!pix.isNull()) {
+ QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG"));
+ if(!id.isEmpty()) {
+ entry->setField(field, id);
+ }
+ }
+ }
+ }
+ return entry;
+}
+
+void CrossRefFetcher::initXSLTHandler() {
+#ifdef CROSSREF_USE_UNIXREF
+ QString xsltfile = locate("appdata", QString::fromLatin1("unixref2tellico.xsl"));
+#else
+ QString xsltfile = locate("appdata", QString::fromLatin1("crossref2tellico.xsl"));
+#endif
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "CrossRefFetcher::initXSLTHandler() - can not locate xslt file." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "CrossRefFetcher::initXSLTHandler() - error in crossref2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+KURL CrossRefFetcher::searchURL(FetchKey key_, const QString& value_) const {
+ KURL u(QString::fromLatin1(CROSSREF_BASE_URL));
+#ifdef CROSSREF_USE_UNIXREF
+ u.addQueryItem(QString::fromLatin1("format"), QString::fromLatin1("unixref"));
+#endif
+ u.addQueryItem(QString::fromLatin1("req_dat"), QString::fromLatin1("ourl_%1:%2").arg(m_user, m_password));
+
+ switch(key_) {
+ case DOI:
+ u.addQueryItem(QString::fromLatin1("rft_id"), QString::fromLatin1("info:doi/%1").arg(value_));
+ break;
+
+ default:
+ kdWarning() << "CrossRefFetcher::search() - key not recognized: " << m_key << endl;
+ return KURL();
+ }
+
+#ifdef CROSSREF_TEST
+ u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/crossref.xml"));
+#endif
+ myDebug() << "CrossRefFetcher::search() - url: " << u.url() << endl;
+ return u;
+}
+
+void CrossRefFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString doi = entry_->field(QString::fromLatin1("doi"));
+ if(!doi.isEmpty()) {
+ search(Fetch::DOI, doi);
+ return;
+ }
+
+#if 0
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ m_limit = 10; // raise limit so more possibility of match
+ search(Fetch::Title, t);
+ return;
+ }
+#endif
+
+ myDebug() << "CrossRefFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+void CrossRefFetcher::updateEntrySynchronous(Data::EntryPtr entry) {
+ if(!entry) {
+ return;
+ }
+ if(m_user.isEmpty() || m_password.isEmpty()) {
+ myDebug() << "CrossRefFetcher::updateEntrySynchronous() - username and password is required" << endl;
+ return;
+ }
+ QString doi = entry->field(QString::fromLatin1("doi"));
+ if(doi.isEmpty()) {
+ return;
+ }
+
+ KURL u = searchURL(DOI, doi);
+ QString xml = FileHandler::readTextFile(u, true, true);
+ if(xml.isEmpty()) {
+ return;
+ }
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ return;
+ }
+ }
+
+ // assume result is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(xml);
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(coll && coll->entryCount() > 0) {
+ myLog() << "CrossRefFetcher::updateEntrySynchronous() - found DOI result, merging" << endl;
+ Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/);
+ }
+}
+
+Tellico::Fetch::ConfigWidget* CrossRefFetcher::configWidget(QWidget* parent_) const {
+ return new CrossRefFetcher::ConfigWidget(parent_, this);
+}
+
+CrossRefFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const CrossRefFetcher* fetcher_)
+ : Fetch::ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = 0;
+
+ KActiveLabel* al = new KActiveLabel(i18n("CrossRef requires an account for access. "
+ "Please read the terms and conditions and "
+ "<a href='http://www.crossref.org/requestaccount/'>"
+ "request an account</a>. Enter your OpenURL "
+ "account information below."),
+ optionsWidget());
+ ++row;
+ l->addMultiCellWidget(al, row, row, 0, 1);
+ // richtext gets weird with size
+ al->setMinimumWidth(al->sizeHint().width());
+
+ QLabel* label = new QLabel(i18n("&Username: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_userEdit = new KLineEdit(optionsWidget());
+ connect(m_userEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_userEdit, row, 1);
+ QString w = i18n("A username and password is required to access the CrossRef service. The password is "
+ "stored as plain text in the Tellico configuration file.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_userEdit, w);
+ label->setBuddy(m_userEdit);
+
+ label = new QLabel(i18n("&Password: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_passEdit = new KLineEdit(optionsWidget());
+ connect(m_passEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_passEdit, row, 1);
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_passEdit, w);
+ label->setBuddy(m_passEdit);
+
+ if(fetcher_) {
+ m_userEdit->setText(fetcher_->m_user);
+ m_passEdit->setText(fetcher_->m_password);
+ }
+}
+
+void CrossRefFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ QString s = m_userEdit->text();
+ config_.writeEntry("User", s);
+
+ s = m_passEdit->text();
+ config_.writeEntry("Password", s);
+
+ slotSetModified(false);
+}
+
+QString CrossRefFetcher::ConfigWidget::preferredName() const {
+ return CrossRefFetcher::defaultName();
+}
+
+#include "crossreffetcher.moc"
diff --git a/src/fetch/crossreffetcher.h b/src/fetch/crossreffetcher.h
new file mode 100644
index 0000000..392d46a
--- /dev/null
+++ b/src/fetch/crossreffetcher.h
@@ -0,0 +1,97 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_CROSSREFFETCHER_H
+#define TELLICO_FETCH_CROSSREFFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+class KLineEdit;
+
+namespace Tellico {
+
+ class XSLTHandler;
+
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class CrossRefFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ CrossRefFetcher(QObject* parent);
+ ~CrossRefFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+
+ virtual bool canSearch(FetchKey k) const { return k == DOI; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return CrossRef; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual void updateEntrySynchronous(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const CrossRefFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+ private:
+ KLineEdit* m_userEdit;
+ KLineEdit* m_passEdit;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ KURL searchURL(FetchKey key, const QString& value) const;
+
+ XSLTHandler* m_xsltHandler;
+
+ QString m_user;
+ QString m_password;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ }
+}
+#endif
diff --git a/src/fetch/discogsfetcher.cpp b/src/fetch/discogsfetcher.cpp
new file mode 100644
index 0000000..31a8bab
--- /dev/null
+++ b/src/fetch/discogsfetcher.cpp
@@ -0,0 +1,413 @@
+/***************************************************************************
+ copyright : (C) 2008 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "discogsfetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../imagefactory.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+#include <kio/job.h>
+
+#include <qlabel.h>
+#include <qlayout.h>
+#include <qfile.h>
+#include <qwhatsthis.h>
+
+//#define DISCOGS_TEST
+
+namespace {
+ static const int DISCOGS_MAX_RETURNS_TOTAL = 20;
+ static const char* DISCOGS_API_URL = "http://www.discogs.com";
+ static const char* DISCOGS_API_KEY = "de6cb96534";
+}
+
+using Tellico::Fetch::DiscogsFetcher;
+
+DiscogsFetcher::DiscogsFetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_xsltHandler(0),
+ m_limit(DISCOGS_MAX_RETURNS_TOTAL), m_job(0), m_started(false),
+ m_apiKey(QString::fromLatin1(DISCOGS_API_KEY)) {
+}
+
+DiscogsFetcher::~DiscogsFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString DiscogsFetcher::defaultName() {
+ return i18n("Discogs Audio Search");
+}
+
+QString DiscogsFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool DiscogsFetcher::canFetch(int type) const {
+ return type == Data::Collection::Album;
+}
+
+void DiscogsFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString k = config_.readEntry("API Key");
+ if(!k.isEmpty()) {
+ m_apiKey = k;
+ }
+ m_fetchImages = config_.readBoolEntry("Fetch Images", true);
+ m_fields = config_.readListEntry("Custom Fields");
+}
+
+void DiscogsFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_;
+ m_started = true;
+ m_start = 1;
+ m_total = -1;
+ doSearch();
+}
+
+void DiscogsFetcher::continueSearch() {
+ m_started = true;
+ doSearch();
+}
+
+void DiscogsFetcher::doSearch() {
+ KURL u(QString::fromLatin1(DISCOGS_API_URL));
+ u.addQueryItem(QString::fromLatin1("f"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("api_key"), m_apiKey);
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ switch(m_key) {
+ case Title:
+ u.setPath(QString::fromLatin1("/search"));
+ u.addQueryItem(QString::fromLatin1("q"), m_value);
+ u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("release"));
+ break;
+
+ case Person:
+ u.setPath(QString::fromLatin1("/artist/%1").arg(m_value));
+ break;
+
+ case Keyword:
+ u.setPath(QString::fromLatin1("/search"));
+ u.addQueryItem(QString::fromLatin1("q"), m_value);
+ u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all"));
+ break;
+
+ default:
+ kdWarning() << "DiscogsFetcher::search() - key not recognized: " << m_key << endl;
+ stop();
+ return;
+ }
+
+#ifdef DISCOGS_TEST
+ u = KURL(QString::fromLatin1("/home/robby/discogs-results.xml"));
+#endif
+// myDebug() << "DiscogsFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void DiscogsFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void DiscogsFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void DiscogsFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "DiscogsFetcher::slotComplete()" << endl;
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "DiscogsFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from discogsfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+ if(m_total == -1) {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "DiscogsFetcher::slotComplete() - server did not return valid XML." << endl;
+ return;
+ }
+ // total is /resp/searchresults/@numResults
+ QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("resp"))
+ .namedItem(QString::fromLatin1("searchresults"));
+ QDomElement e = n.toElement();
+ if(!e.isNull()) {
+ m_total = e.attribute(QString::fromLatin1("numResults")).toInt();
+ myDebug() << "total = " << m_total;
+ }
+ }
+
+ // assume discogs is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ myDebug() << "DiscogsFetcher::slotComplete() - no collection pointer" << endl;
+ stop();
+ return;
+ }
+
+ int count = 0;
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"));
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, QString());
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+ m_start = m_entries.count() + 1;
+ // not sure how tospecify start in the REST url
+ // m_hasMoreResults = m_start <= m_total;
+
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr DiscogsFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ if(!entry) {
+ kdWarning() << "DiscogsFetcher::fetchEntry() - no entry in dict" << endl;
+ return 0;
+ }
+ // one way we tell if this entry has been fully initialized is to
+ // check for a cover image
+ if(!entry->field(QString::fromLatin1("cover")).isEmpty()) {
+ myLog() << "DiscogsFetcher::fetchEntry() - already downloaded " << entry->title() << endl;
+ return entry;
+ }
+
+ QString release = entry->field(QString::fromLatin1("discogs-id"));
+ if(release.isEmpty()) {
+ myDebug() << "DiscogsFetcher::fetchEntry() - no discogs release found" << endl;
+ return entry;
+ }
+
+#ifdef DISCOGS_TEST
+ KURL u(QString::fromLatin1("/home/robby/discogs-release.xml"));
+#else
+ KURL u(QString::fromLatin1(DISCOGS_API_URL));
+ u.setPath(QString::fromLatin1("/release/%1").arg(release));
+ u.addQueryItem(QString::fromLatin1("f"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("api_key"), m_apiKey);
+#endif
+// myDebug() << "DiscogsFetcher::fetchEntry() - url: " << u << endl;
+
+ // quiet, utf8, allowCompressed
+ QString output = FileHandler::readTextFile(u, true, true, true);
+#if 0
+ kdWarning() << "Remove output debug from discogsfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << output;
+ }
+ f.close();
+#endif
+
+ Import::TellicoImporter imp(m_xsltHandler->applyStylesheet(output));
+ Data::CollPtr coll = imp.collection();
+// getTracks(entry);
+ if(!coll) {
+ kdWarning() << "DiscogsFetcher::fetchEntry() - no collection pointer" << endl;
+ return entry;
+ }
+
+ if(coll->entryCount() > 1) {
+ myDebug() << "DiscogsFetcher::fetchEntry() - weird, more than one entry found" << endl;
+ }
+
+ const StringMap customFields = this->customFields();
+ for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) {
+ if(!m_fields.contains(it.key())) {
+ coll->removeField(it.key());
+ }
+ }
+
+ // don't want to include id
+ coll->removeField(QString::fromLatin1("discogs-id"));
+
+ entry = coll->entries().front();
+ m_entries.replace(uid_, entry);
+ return entry;
+}
+
+void DiscogsFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("discogs2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "DiscogsFetcher::initXSLTHandler() - can not locate discogs2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "DiscogsFetcher::initXSLTHandler() - error in discogs2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+void DiscogsFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "DiscogsFetcher::updateEntry()" << endl;
+
+ QString value;
+ QString title = entry_->field(QString::fromLatin1("title"));
+ if(!title.isEmpty()) {
+ search(Title, value);
+ return;
+ }
+
+ QString artist = entry_->field(QString::fromLatin1("artist"));
+ if(!artist.isEmpty()) {
+ search(Person, artist);
+ return;
+ }
+
+ myDebug() << "DiscogsFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* DiscogsFetcher::configWidget(QWidget* parent_) const {
+ return new DiscogsFetcher::ConfigWidget(parent_, this);
+}
+
+DiscogsFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const DiscogsFetcher* fetcher_)
+ : Fetch::ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 2, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+ QLabel* label = new QLabel(i18n("API &key: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+
+ m_apiKeyEdit = new KLineEdit(optionsWidget());
+ connect(m_apiKeyEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_apiKeyEdit, row, 1);
+ QString w = i18n("With your discogs.com account you receive an API key for the usage of their XML-based interface "
+ "(See http://www.discogs.com/help/api).");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_apiKeyEdit, w);
+ label->setBuddy(m_apiKeyEdit);
+
+ m_fetchImageCheck = new QCheckBox(i18n("Download cover &image"), optionsWidget());
+ connect(m_fetchImageCheck, SIGNAL(clicked()), SLOT(slotSetModified()));
+ ++row;
+ l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1);
+ w = i18n("The cover image may be downloaded as well. However, too many large images in the "
+ "collection may degrade performance.");
+ QWhatsThis::add(m_fetchImageCheck, w);
+
+ l->setRowStretch(++row, 10);
+
+ // now add additional fields widget
+ addFieldsWidget(DiscogsFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+
+ if(fetcher_) {
+ m_apiKeyEdit->setText(fetcher_->m_apiKey);
+ m_fetchImageCheck->setChecked(fetcher_->m_fetchImages);
+ } else {
+ m_apiKeyEdit->setText(QString::fromLatin1(DISCOGS_API_KEY));
+ m_fetchImageCheck->setChecked(true);
+ }
+}
+
+void DiscogsFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ QString apiKey = m_apiKeyEdit->text().stripWhiteSpace();
+ if(!apiKey.isEmpty()) {
+ config_.writeEntry("API Key", apiKey);
+ }
+ config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked());
+
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+QString DiscogsFetcher::ConfigWidget::preferredName() const {
+ return DiscogsFetcher::defaultName();
+}
+
+Tellico::StringMap DiscogsFetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("producer")] = i18n("Producer");
+ map[QString::fromLatin1("nationality")] = i18n("Nationality");
+ map[QString::fromLatin1("discogs")] = i18n("Discogs Link");
+ return map;
+}
+
+#include "discogsfetcher.moc"
diff --git a/src/fetch/discogsfetcher.h b/src/fetch/discogsfetcher.h
new file mode 100644
index 0000000..ac8c1b8
--- /dev/null
+++ b/src/fetch/discogsfetcher.h
@@ -0,0 +1,117 @@
+/***************************************************************************
+ copyright : (C) 2008 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef DISCOGSFETCHER_H
+#define DISCOGSFETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+}
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+#include <klineedit.h>
+
+#include <qdom.h>
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * A fetcher for discogs.com
+ *
+ * @author Robby Stephenson
+ */
+class DiscogsFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ /**
+ */
+ DiscogsFetcher(QObject* parent, const char* name = 0);
+ /**
+ */
+ virtual ~DiscogsFetcher();
+
+ /**
+ */
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ // amazon can search title or person
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Discogs; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ /**
+ * Returns a widget for modifying the fetcher's config.
+ */
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ static StringMap customFields();
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const DiscogsFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup&);
+ virtual QString preferredName() const;
+ private:
+ KLineEdit *m_apiKeyEdit;
+ QCheckBox* m_fetchImageCheck;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ void doSearch();
+
+ XSLTHandler* m_xsltHandler;
+ int m_limit;
+ int m_start;
+ int m_total;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+
+ bool m_fetchImages;
+ QString m_apiKey;
+ QStringList m_fields;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/entrezfetcher.cpp b/src/fetch/entrezfetcher.cpp
new file mode 100644
index 0000000..14b9e20
--- /dev/null
+++ b/src/fetch/entrezfetcher.cpp
@@ -0,0 +1,498 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "entrezfetcher.h"
+#include "../tellico_kernel.h"
+#include "../latin1literal.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../filehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kconfig.h>
+#include <kstandarddirs.h>
+#include <kio/job.h>
+
+#include <qdom.h>
+#include <qlabel.h>
+#include <qlayout.h>
+#include <qfile.h>
+
+//#define ENTREZ_TEST
+
+namespace {
+ static const int ENTREZ_MAX_RETURNS_TOTAL = 25;
+ static const char* ENTREZ_BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
+ static const char* ENTREZ_SEARCH_CGI = "esearch.fcgi";
+ static const char* ENTREZ_SUMMARY_CGI = "esummary.fcgi";
+ static const char* ENTREZ_FETCH_CGI = "efetch.fcgi";
+ static const char* ENTREZ_LINK_CGI = "elink.fcgi";
+ static const char* ENTREZ_DEFAULT_DATABASE = "pubmed";
+}
+
+using Tellico::Fetch::EntrezFetcher;
+
+EntrezFetcher::EntrezFetcher(QObject* parent_, const char* name_) : Fetcher(parent_, name_), m_xsltHandler(0),
+ m_step(Begin), m_started(false) {
+}
+
+EntrezFetcher::~EntrezFetcher() {
+}
+
+QString EntrezFetcher::defaultName() {
+ return i18n("Entrez Database");
+}
+
+QString EntrezFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool EntrezFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void EntrezFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString s = config_.readEntry("Database", QString::fromLatin1(ENTREZ_DEFAULT_DATABASE)); // default to pubmed
+ if(!s.isEmpty()) {
+ m_dbname = s;
+ }
+ m_fields = config_.readListEntry("Custom Fields");
+}
+
+void EntrezFetcher::search(FetchKey key_, const QString& value_) {
+ m_started = true;
+ m_start = 1;
+ m_total = -1;
+
+// only search if current collection is a bibliography
+ if(!canFetch(Kernel::self()->collectionType())) {
+ myDebug() << "EntrezFetcher::search() - collection type mismatch, stopping" << endl;
+ stop();
+ return;
+ }
+ if(m_dbname.isEmpty()) {
+ m_dbname = QString::fromLatin1(ENTREZ_DEFAULT_DATABASE);
+ }
+
+#ifdef ENTREZ_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esearch.xml"));
+#else
+ KURL u(QString::fromLatin1(ENTREZ_BASE_URL));
+ u.addPath(QString::fromLatin1(ENTREZ_SEARCH_CGI));
+ u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico"));
+ u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y"));
+ u.addQueryItem(QString::fromLatin1("retmax"), QString::fromLatin1("1")); // we're just getting the count
+ u.addQueryItem(QString::fromLatin1("db"), m_dbname);
+ u.addQueryItem(QString::fromLatin1("term"), value_);
+ switch(key_) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("titl"));
+ break;
+
+ case Person:
+ u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("auth"));
+ break;
+
+ case Keyword:
+ // for Tellico Keyword searches basically mean search for any field matching
+// u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("word"));
+ break;
+
+ case PubmedID:
+ u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("pmid"));
+ break;
+
+ case DOI:
+ case Raw:
+ u.setQuery(u.query() + '&' + value_);
+ break;
+
+ default:
+ kdWarning() << "EntrezFetcher::search() - FetchKey not supported" << endl;
+ stop();
+ return;
+ }
+#endif
+
+ m_step = Search;
+// myLog() << "EntrezFetcher::doSearch() - url: " << u.url() << endl;
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void EntrezFetcher::continueSearch() {
+ m_started = true;
+ doSummary();
+}
+
+void EntrezFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ m_step = Begin;
+ emit signalDone(this);
+}
+
+void EntrezFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void EntrezFetcher::slotComplete(KIO::Job* job_) {
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "EntrezFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from entrezfetcher.cpp: " << __LINE__ << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ switch(m_step) {
+ case Search:
+ searchResults();
+ break;
+ case Summary:
+ summaryResults();
+ break;
+ case Begin:
+ case Fetch:
+ default:
+ myLog() << "EntrezFetcher::slotComplete() - wrong step = " << m_step << endl;
+ stop();
+ break;
+ }
+}
+
+void EntrezFetcher::searchResults() {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "EntrezFetcher::searchResults() - server did not return valid XML." << endl;
+ stop();
+ return;
+ }
+ // find Count, QueryKey, and WebEnv elements
+ int count = 0;
+ for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) {
+ QDomElement e = n.toElement();
+ if(e.isNull()) {
+ continue;
+ }
+ if(e.tagName() == Latin1Literal("Count")) {
+ m_total = e.text().toInt();
+ ++count;
+ } else if(e.tagName() == Latin1Literal("QueryKey")) {
+ m_queryKey = e.text();
+ ++count;
+ } else if(e.tagName() == Latin1Literal("WebEnv")) {
+ m_webEnv = e.text();
+ ++count;
+ }
+ if(count >= 3) {
+ break; // found them all
+ }
+ }
+
+ m_data.truncate(0);
+ doSummary();
+}
+
+void EntrezFetcher::doSummary() {
+#ifdef ENTREZ_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esummary.xml"));
+#else
+ KURL u(QString::fromLatin1(ENTREZ_BASE_URL));
+ u.addPath(QString::fromLatin1(ENTREZ_SUMMARY_CGI));
+ u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico"));
+ u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("retstart"), QString::number(m_start));
+ u.addQueryItem(QString::fromLatin1("retmax"), QString::number(QMIN(m_total-m_start-1, ENTREZ_MAX_RETURNS_TOTAL)));
+ u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y"));
+ u.addQueryItem(QString::fromLatin1("db"), m_dbname);
+ u.addQueryItem(QString::fromLatin1("query_key"), m_queryKey);
+ u.addQueryItem(QString::fromLatin1("WebEnv"), m_webEnv);
+#endif
+
+ m_step = Summary;
+// myLog() << "EntrezFetcher::searchResults() - url: " << u.url() << endl;
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void EntrezFetcher::summaryResults() {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "EntrezFetcher::summaryResults() - server did not return valid XML." << endl;
+ stop();
+ return;
+ }
+ // top child is eSummaryResult
+ // all children are DocSum
+ for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) {
+ QDomElement e = n.toElement();
+ if(e.isNull() || e.tagName() != Latin1Literal("DocSum")) {
+ continue;
+ }
+ QDomNodeList nodes = e.elementsByTagName(QString::fromLatin1("Id"));
+ if(nodes.count() == 0) {
+ myDebug() << "EntrezFetcher::summaryResults() - no Id elements" << endl;
+ continue;
+ }
+ int id = nodes.item(0).toElement().text().toInt();
+ QString title, pubdate, authors;
+ nodes = e.elementsByTagName(QString::fromLatin1("Item"));
+ for(uint j = 0; j < nodes.count(); ++j) {
+ if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("Title")) {
+ title = nodes.item(j).toElement().text();
+ } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("PubDate")) {
+ pubdate = nodes.item(j).toElement().text();
+ } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("AuthorList")) {
+ QStringList list;
+ for(QDomNode aNode = nodes.item(j).firstChild(); !aNode.isNull(); aNode = aNode.nextSibling()) {
+ // lazy, assume all children Items are authors
+ if(aNode.nodeName() == Latin1Literal("Item")) {
+ list << aNode.toElement().text();
+ }
+ }
+ authors = list.join(QString::fromLatin1("; "));
+ }
+ if(!title.isEmpty() && !pubdate.isEmpty() && !authors.isEmpty()) {
+ break; // done now
+ }
+ }
+ SearchResult* r = new SearchResult(this, title, pubdate + '/' + authors, QString());
+ m_matches.insert(r->uid, id);
+ emit signalResultFound(r);
+ }
+ m_start = m_matches.count() + 1;
+ m_hasMoreResults = m_start <= m_total;
+ stop(); // done searching
+}
+
+Tellico::Data::EntryPtr EntrezFetcher::fetchEntry(uint uid_) {
+ // if we already grabbed this one, then just pull it out of the dict
+ Data::EntryPtr entry = m_entries[uid_];
+ if(entry) {
+ return entry;
+ }
+
+ if(!m_matches.contains(uid_)) {
+ return 0;
+ }
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return 0;
+ }
+ }
+
+ int id = m_matches[uid_];
+#ifdef ENTREZ_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/pubmed.xml"));
+#else
+ KURL u(QString::fromLatin1(ENTREZ_BASE_URL));
+ u.addPath(QString::fromLatin1(ENTREZ_FETCH_CGI));
+ u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico"));
+ u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("rettype"), QString::fromLatin1("abstract"));
+ u.addQueryItem(QString::fromLatin1("db"), m_dbname);
+ u.addQueryItem(QString::fromLatin1("id"), QString::number(id));
+#endif
+ // now it's sychronous, and we know that it's utf8
+ QString xmlOutput = FileHandler::readTextFile(u, false /*quiet*/, true /*utf8*/);
+ if(xmlOutput.isEmpty()) {
+ kdWarning() << "EntrezFetcher::fetchEntry() - unable to download " << u << endl;
+ return 0;
+ }
+#if 0
+ kdWarning() << "EntrezFetcher::fetchEntry() - turn me off!" << endl;
+ QFile f1(QString::fromLatin1("/tmp/test-entry.xml"));
+ if(f1.open(IO_WriteOnly)) {
+ QTextStream t(&f1);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << xmlOutput;
+ }
+ f1.close();
+#endif
+ QString str = m_xsltHandler->applyStylesheet(xmlOutput);
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ kdWarning() << "EntrezFetcher::fetchEntry() - invalid collection" << endl;
+ return 0;
+ }
+ if(coll->entryCount() == 0) {
+ myDebug() << "EntrezFetcher::fetchEntry() - no entries in collection" << endl;
+ return 0;
+ } else if(coll->entryCount() > 1) {
+ myDebug() << "EntrezFetcher::fetchEntry() - collection has multiple entries, taking first one" << endl;
+ }
+
+ Data::EntryPtr e = coll->entries().front();
+
+ // try to get a link, but only if necessary
+ if(m_fields.contains(QString::fromLatin1("url"))) {
+ KURL link(QString::fromLatin1(ENTREZ_BASE_URL));
+ link.addPath(QString::fromLatin1(ENTREZ_LINK_CGI));
+ link.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico"));
+ link.addQueryItem(QString::fromLatin1("cmd"), QString::fromLatin1("llinks"));
+ link.addQueryItem(QString::fromLatin1("db"), m_dbname);
+ link.addQueryItem(QString::fromLatin1("dbfrom"), m_dbname);
+ link.addQueryItem(QString::fromLatin1("id"), QString::number(id));
+
+ QDomDocument linkDom = FileHandler::readXMLFile(link, false /* namespace */, true /* quiet */);
+ // need eLinkResult/LinkSet/IdUrlList/IdUrlSet/ObjUrl/Url
+ QDomNode linkNode = linkDom.namedItem(QString::fromLatin1("eLinkResult"))
+ .namedItem(QString::fromLatin1("LinkSet"))
+ .namedItem(QString::fromLatin1("IdUrlList"))
+ .namedItem(QString::fromLatin1("IdUrlSet"))
+ .namedItem(QString::fromLatin1("ObjUrl"))
+ .namedItem(QString::fromLatin1("Url"));
+ if(!linkNode.isNull()) {
+ QString u = linkNode.toElement().text();
+// myDebug() << u << endl;
+ if(!u.isEmpty()) {
+ if(!coll->hasField(QString::fromLatin1("url"))) {
+ Data::FieldPtr field = new Data::Field(QString::fromLatin1("url"), i18n("URL"), Data::Field::URL);
+ field->setCategory(i18n("Miscellaneous"));
+ coll->addField(field);
+ }
+ e->setField(QString::fromLatin1("url"), u);
+ }
+ }
+ }
+
+ const StringMap customFields = EntrezFetcher::customFields();
+ for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) {
+ if(!m_fields.contains(it.key())) {
+ coll->removeField(it.key());
+ }
+ }
+
+ m_entries.insert(uid_, e);
+ return e;
+}
+
+void EntrezFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("pubmed2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "EntrezFetcher::initXSLTHandler() - can not locate pubmed2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ if(!m_xsltHandler) {
+ m_xsltHandler = new XSLTHandler(u);
+ }
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "EntrezFetcher::initXSLTHandler() - error in pubmed2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+void EntrezFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "EntrezFetcher::updateEntry()" << endl;
+ QString s = entry_->field(QString::fromLatin1("pmid"));
+ if(!s.isEmpty()) {
+ search(PubmedID, s);
+ return;
+ }
+
+ s = entry_->field(QString::fromLatin1("doi"));
+ if(!s.isEmpty()) {
+ search(DOI, s);
+ return;
+ }
+
+ s = entry_->field(QString::fromLatin1("title"));
+ if(!s.isEmpty()) {
+ search(Title, s);
+ return;
+ }
+
+ myDebug() << "EntrezFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* EntrezFetcher::configWidget(QWidget* parent_) const {
+ return new EntrezFetcher::ConfigWidget(parent_, this);
+}
+
+EntrezFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const EntrezFetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+
+ // now add additional fields widget
+ addFieldsWidget(EntrezFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+}
+
+void EntrezFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+QString EntrezFetcher::ConfigWidget::preferredName() const {
+ return EntrezFetcher::defaultName();
+}
+
+//static
+Tellico::StringMap EntrezFetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("institution")] = i18n("Institution");
+ map[QString::fromLatin1("abstract")] = i18n("Abstract");
+ map[QString::fromLatin1("url")] = i18n("URL");
+ return map;
+}
+
+#include "entrezfetcher.moc"
diff --git a/src/fetch/entrezfetcher.h b/src/fetch/entrezfetcher.h
new file mode 100644
index 0000000..c8aac49
--- /dev/null
+++ b/src/fetch/entrezfetcher.h
@@ -0,0 +1,113 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_ENTREZFETCHER_H
+#define TELLICO_ENTREZFETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+}
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class EntrezFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ EntrezFetcher(QObject* parent, const char* name=0);
+ /**
+ */
+ virtual ~EntrezFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ // pubmed can search title, person, and keyword
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == Raw || k == PubmedID || k == DOI; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Entrez; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ static StringMap customFields();
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const EntrezFetcher* fetcher=0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ void doSummary();
+
+ void searchResults();
+ void summaryResults();
+
+ enum Step {
+ Begin,
+ Search,
+ Summary,
+ Fetch
+ };
+
+ XSLTHandler* m_xsltHandler;
+ QString m_dbname;
+
+ int m_start;
+ int m_total;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry
+ QMap<int, int> m_matches; // search result id to pubmed id
+ QGuardedPtr<KIO::Job> m_job;
+
+ QString m_queryKey;
+ QString m_webEnv;
+ Step m_step;
+
+ bool m_started;
+ QStringList m_fields;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/execexternalfetcher.cpp b/src/fetch/execexternalfetcher.cpp
new file mode 100644
index 0000000..07b99d8
--- /dev/null
+++ b/src/fetch/execexternalfetcher.cpp
@@ -0,0 +1,561 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "execexternalfetcher.h"
+#include "messagehandler.h"
+#include "fetchmanager.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../importdialog.h"
+#include "../translators/tellicoimporter.h"
+#include "../tellico_debug.h"
+#include "../gui/combobox.h"
+#include "../gui/lineedit.h"
+#include "../gui/collectiontypecombo.h"
+#include "../tellico_utils.h"
+#include "../newstuff/manager.h"
+
+#include <klocale.h>
+#include <kconfig.h>
+#include <kprocess.h>
+#include <kurlrequester.h>
+#include <kaccelmanager.h>
+
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qwhatsthis.h>
+#include <qregexp.h>
+#include <qvgroupbox.h>
+#include <qfile.h> // needed for QFile::remove
+
+using Tellico::Fetch::ExecExternalFetcher;
+
+QStringList ExecExternalFetcher::parseArguments(const QString& str_) {
+ // matching escaped quotes is too hard... :(
+// QRegExp quotes(QString::fromLatin1("[^\\\\](['\"])(.*[^\\\\])\\1"));
+ QRegExp quotes(QString::fromLatin1("(['\"])(.*)\\1"));
+ quotes.setMinimal(true);
+ QRegExp spaces(QString::fromLatin1("\\s+"));
+ spaces.setMinimal(true);
+
+ QStringList args;
+ int pos = 0;
+ for(int nextPos = quotes.search(str_); nextPos > -1; pos = nextPos+1, nextPos = quotes.search(str_, pos)) {
+ // a non-quotes arguments runs from pos to nextPos
+ args += QStringList::split(spaces, str_.mid(pos, nextPos-pos));
+ // move nextpos marker to end of match
+ pos = quotes.pos(2); // skip quotation mark
+ nextPos += quotes.matchedLength();
+ args += str_.mid(pos, nextPos-pos-1);
+ }
+ // catch the end stuff
+ args += QStringList::split(spaces, str_.mid(pos));
+
+#if 0
+ for(QStringList::ConstIterator it = args.begin(); it != args.end(); ++it) {
+ myDebug() << *it << endl;
+ }
+#endif
+
+ return args;
+}
+
+ExecExternalFetcher::ExecExternalFetcher(QObject* parent_, const char* name_/*=0*/) : Fetcher(parent_, name_),
+ m_started(false), m_collType(-1), m_formatType(-1), m_canUpdate(false), m_process(0), m_deleteOnRemove(false) {
+}
+
+ExecExternalFetcher::~ExecExternalFetcher() {
+ stop();
+}
+
+QString ExecExternalFetcher::defaultName() {
+ return i18n("External Application");
+}
+
+QString ExecExternalFetcher::source() const {
+ return m_name;
+}
+
+bool ExecExternalFetcher::canFetch(int type_) const {
+ return m_collType == -1 ? false : m_collType == type_;
+}
+
+void ExecExternalFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString s = config_.readPathEntry("ExecPath");
+ if(!s.isEmpty()) {
+ m_path = s;
+ }
+ QValueList<int> il;
+ if(config_.hasKey("ArgumentKeys")) {
+ il = config_.readIntListEntry("ArgumentKeys");
+ } else {
+ il.append(Keyword);
+ }
+ QStringList sl = config_.readListEntry("Arguments");
+ if(il.count() != sl.count()) {
+ kdWarning() << "ExecExternalFetcher::readConfig() - unequal number of arguments and keys" << endl;
+ }
+ int n = QMIN(il.count(), sl.count());
+ for(int i = 0; i < n; ++i) {
+ m_args[static_cast<FetchKey>(il[i])] = sl[i];
+ }
+ if(config_.hasKey("UpdateArgs")) {
+ m_canUpdate = true;
+ m_updateArgs = config_.readEntry("UpdateArgs");
+ } else {
+ m_canUpdate = false;
+ }
+ m_collType = config_.readNumEntry("CollectionType", -1);
+ m_formatType = config_.readNumEntry("FormatType", -1);
+ m_deleteOnRemove = config_.readBoolEntry("DeleteOnRemove", false);
+ m_newStuffName = config_.readEntry("NewStuffName");
+}
+
+void ExecExternalFetcher::search(FetchKey key_, const QString& value_) {
+ m_started = true;
+
+ if(!m_args.contains(key_)) {
+ stop();
+ return;
+ }
+
+ // should KProcess::quote() be used?
+ // %1 gets replaced by the search value, but since the arguments are going to be split
+ // the search value needs to be enclosed in quotation marks
+ // but first check to make sure the user didn't do that already
+ // AND the "%1" wasn't used in the settings
+ QString value = value_;
+ if(key_ == ISBN) {
+ value.remove('-'); // remove hyphens from isbn values
+ // shouldn't hurt and might keep from confusing stupid search sources
+ }
+ QRegExp rx1(QString::fromLatin1("['\"].*\\1"));
+ if(!rx1.exactMatch(value)) {
+ value.prepend('"').append('"');
+ }
+ QString args = m_args[key_];
+ QRegExp rx2(QString::fromLatin1("['\"]%1\\1"));
+ args.replace(rx2, QString::fromLatin1("%1"));
+ startSearch(parseArguments(args.arg(value))); // replace %1 with search value
+}
+
+void ExecExternalFetcher::startSearch(const QStringList& args_) {
+ if(m_path.isEmpty()) {
+ stop();
+ return;
+ }
+
+#if 0
+ myDebug() << m_path << endl;
+ for(QStringList::ConstIterator it = args_.begin(); it != args_.end(); ++it) {
+ myDebug() << " " << *it << endl;
+ }
+#endif
+
+ m_process = new KProcess();
+ connect(m_process, SIGNAL(receivedStdout(KProcess*, char*, int)), SLOT(slotData(KProcess*, char*, int)));
+ connect(m_process, SIGNAL(receivedStderr(KProcess*, char*, int)), SLOT(slotError(KProcess*, char*, int)));
+ connect(m_process, SIGNAL(processExited(KProcess*)), SLOT(slotProcessExited(KProcess*)));
+ *m_process << m_path << args_;
+ if(!m_process->start(KProcess::NotifyOnExit, KProcess::AllOutput)) {
+ myDebug() << "ExecExternalFetcher::startSearch() - process failed to start" << endl;
+ stop();
+ }
+}
+
+void ExecExternalFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_process) {
+ m_process->kill();
+ delete m_process;
+ m_process = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ m_errors.clear();
+ emit signalDone(this);
+}
+
+void ExecExternalFetcher::slotData(KProcess*, char* buffer_, int len_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(buffer_, len_);
+}
+
+void ExecExternalFetcher::slotError(KProcess*, char* buffer_, int len_) {
+ GUI::CursorSaver cs(Qt::arrowCursor);
+ QString msg = QString::fromLocal8Bit(buffer_, len_);
+ msg.prepend(source() + QString::fromLatin1(": "));
+ if(msg.endsWith(QChar('\n'))) {
+ msg.truncate(msg.length()-1);
+ }
+ myDebug() << "ExecExternalFetcher::slotError() - " << msg << endl;
+ m_errors << msg;
+}
+
+void ExecExternalFetcher::slotProcessExited(KProcess*) {
+// myDebug() << "ExecExternalFetcher::slotProcessExited()" << endl;
+ if(!m_process->normalExit() || m_process->exitStatus()) {
+ myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": process did not exit successfully" << endl;
+ if(!m_errors.isEmpty()) {
+ message(m_errors.join(QChar('\n')), MessageHandler::Error);
+ }
+ stop();
+ return;
+ }
+ if(!m_errors.isEmpty()) {
+ message(m_errors.join(QChar('\n')), MessageHandler::Warning);
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": no data" << endl;
+ stop();
+ return;
+ }
+
+ Import::Format format = static_cast<Import::Format>(m_formatType > -1 ? m_formatType : Import::TellicoXML);
+ Import::Importer* imp = ImportDialog::importer(format, KURL::List());
+ if(!imp) {
+ stop();
+ return;
+ }
+
+ imp->setText(QString::fromUtf8(m_data, m_data.size()));
+ Data::CollPtr coll = imp->collection();
+ if(!coll) {
+ if(!imp->statusMessage().isEmpty()) {
+ message(imp->statusMessage(), MessageHandler::Status);
+ }
+ myDebug() << "ExecExternalFetcher::slotProcessExited() - "<< source() << ": no collection pointer" << endl;
+ delete imp;
+ stop();
+ return;
+ }
+
+ delete imp;
+ if(coll->entryCount() == 0) {
+// myDebug() << "ExecExternalFetcher::slotProcessExited() - no results" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ QString desc;
+ switch(coll->type()) {
+ case Data::Collection::Book:
+ case Data::Collection::Bibtex:
+ desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+ break;
+
+ case Data::Collection::Video:
+ desc = entry->field(QString::fromLatin1("studio"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("director"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("medium"));
+ break;
+
+ case Data::Collection::Album:
+ desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ case Data::Collection::Game:
+ desc = entry->field(QString::fromLatin1("platform"));
+ break;
+
+ case Data::Collection::ComicBook:
+ desc = entry->field(QString::fromLatin1("publisher"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("pub_year"));
+ break;
+
+ case Data::Collection::BoardGame:
+ desc = entry->field(QString::fromLatin1("designer"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("publisher"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ default:
+ break;
+ }
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, entry);
+ emit signalResultFound(r);
+ }
+ stop(); // be sure to call this
+}
+
+Tellico::Data::EntryPtr ExecExternalFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+void ExecExternalFetcher::updateEntry(Data::EntryPtr entry_) {
+ if(!m_canUpdate) {
+ emit signalDone(this); // must do this
+ }
+
+ m_started = true;
+
+ Data::ConstEntryPtr e(entry_.data());
+ QStringList args = parseArguments(m_updateArgs);
+ for(QStringList::Iterator it = args.begin(); it != args.end(); ++it) {
+ *it = Data::Entry::dependentValue(e, *it, false);
+ }
+ startSearch(args);
+}
+
+Tellico::Fetch::ConfigWidget* ExecExternalFetcher::configWidget(QWidget* parent_) const {
+ return new ExecExternalFetcher::ConfigWidget(parent_, this);
+}
+
+ExecExternalFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ExecExternalFetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_), m_deleteOnRemove(false) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 5, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+
+ QLabel* label = new QLabel(i18n("Collection &type:"), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_collCombo = new GUI::CollectionTypeCombo(optionsWidget());
+ connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ l->addWidget(m_collCombo, row, 1);
+ QString w = i18n("Set the collection type of the data returned from the external application.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_collCombo, w);
+ label->setBuddy(m_collCombo);
+
+ label = new QLabel(i18n("&Result type: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_formatCombo = new GUI::ComboBox(optionsWidget());
+ Import::FormatMap formatMap = ImportDialog::formatMap();
+ for(Import::FormatMap::Iterator it = formatMap.begin(); it != formatMap.end(); ++it) {
+ if(ImportDialog::formatImportsText(it.key())) {
+ m_formatCombo->insertItem(it.data(), it.key());
+ }
+ }
+ connect(m_formatCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ l->addWidget(m_formatCombo, row, 1);
+ w = i18n("Set the result type of the data returned from the external application.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_formatCombo, w);
+ label->setBuddy(m_formatCombo);
+
+ label = new QLabel(i18n("Application &path: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_pathEdit = new KURLRequester(optionsWidget());
+ connect(m_pathEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_pathEdit, row, 1);
+ w = i18n("Set the path of the application to run that should output a valid Tellico data file.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_pathEdit, w);
+ label->setBuddy(m_pathEdit);
+
+ w = i18n("Select the search keys supported by the data source.");
+ QString w2 = i18n("Add any arguments that may be needed. <b>%1</b> will be replaced by the search term.");
+ QVGroupBox* box = new QVGroupBox(i18n("Arguments"), optionsWidget());
+ ++row;
+ l->addMultiCellWidget(box, row, row, 0, 1);
+ QWidget* grid = new QWidget(box);
+ QGridLayout* gridLayout = new QGridLayout(grid);
+ gridLayout->setSpacing(2);
+ row = -1;
+ const Fetch::KeyMap keyMap = Fetch::Manager::self()->keyMap();
+ for(Fetch::KeyMap::ConstIterator it = keyMap.begin(); it != keyMap.end(); ++it) {
+ FetchKey key = it.key();
+ if(key == Raw) {
+ continue;
+ }
+ QCheckBox* cb = new QCheckBox(it.data(), grid);
+ gridLayout->addWidget(cb, ++row, 0);
+ m_cbDict.insert(key, cb);
+ GUI::LineEdit* le = new GUI::LineEdit(grid);
+ le->setHint(QString::fromLatin1("%1")); // for example
+ le->completionObject()->addItem(QString::fromLatin1("%1"));
+ gridLayout->addWidget(le, row, 1);
+ m_leDict.insert(key, le);
+ if(fetcher_ && fetcher_->m_args.contains(key)) {
+ cb->setChecked(true);
+ le->setEnabled(true);
+ le->setText(fetcher_->m_args[key]);
+ } else {
+ cb->setChecked(false);
+ le->setEnabled(false);
+ }
+ connect(cb, SIGNAL(toggled(bool)), le, SLOT(setEnabled(bool)));
+ QWhatsThis::add(cb, w);
+ QWhatsThis::add(le, w2);
+ }
+ m_cbUpdate = new QCheckBox(i18n("Update"), grid);
+ gridLayout->addWidget(m_cbUpdate, ++row, 0);
+ m_leUpdate = new GUI::LineEdit(grid);
+ m_leUpdate->setHint(QString::fromLatin1("%{title}")); // for example
+ m_leUpdate->completionObject()->addItem(QString::fromLatin1("%{title}"));
+ m_leUpdate->completionObject()->addItem(QString::fromLatin1("%{isbn}"));
+ gridLayout->addWidget(m_leUpdate, row, 1);
+ /* TRANSLATORS: Do not translate %{author}. */
+ w2 = i18n("<p>Enter the arguments which should be used to search for available updates to an entry.</p><p>"
+ "The format is the same as for <i>Dependent</i> fields, where field values "
+ "are contained inside braces, such as <i>%{author}</i>. See the documentation for details.</p>");
+ QWhatsThis::add(m_cbUpdate, w);
+ QWhatsThis::add(m_leUpdate, w2);
+ if(fetcher_ && fetcher_->m_canUpdate) {
+ m_cbUpdate->setChecked(true);
+ m_leUpdate->setEnabled(true);
+ m_leUpdate->setText(fetcher_->m_updateArgs);
+ } else {
+ m_cbUpdate->setChecked(false);
+ m_leUpdate->setEnabled(false);
+ }
+ connect(m_cbUpdate, SIGNAL(toggled(bool)), m_leUpdate, SLOT(setEnabled(bool)));
+
+ l->setRowStretch(++row, 1);
+
+ if(fetcher_) {
+ m_pathEdit->setURL(fetcher_->m_path);
+ m_newStuffName = fetcher_->m_newStuffName;
+ }
+ if(fetcher_ && fetcher_->m_collType > -1) {
+ m_collCombo->setCurrentType(fetcher_->m_collType);
+ } else {
+ m_collCombo->setCurrentType(Data::Collection::Book);
+ }
+ if(fetcher_ && fetcher_->m_formatType > -1) {
+ m_formatCombo->setCurrentItem(formatMap[static_cast<Import::Format>(fetcher_->m_formatType)]);
+ } else {
+ m_formatCombo->setCurrentItem(formatMap[Import::TellicoXML]);
+ }
+ m_deleteOnRemove = fetcher_ && fetcher_->m_deleteOnRemove;
+ KAcceleratorManager::manage(optionsWidget());
+}
+
+ExecExternalFetcher::ConfigWidget::~ConfigWidget() {
+}
+
+void ExecExternalFetcher::ConfigWidget::readConfig(KConfig* config_) {
+ m_pathEdit->setURL(config_->readPathEntry("ExecPath"));
+ QValueList<int> argKeys = config_->readIntListEntry("ArgumentKeys");
+ QStringList argValues = config_->readListEntry("Arguments");
+ if(argKeys.count() != argValues.count()) {
+ kdWarning() << "ExecExternalFetcher::ConfigWidget::readConfig() - unequal number of arguments and keys" << endl;
+ }
+ int n = QMIN(argKeys.count(), argValues.count());
+ QMap<FetchKey, QString> args;
+ for(int i = 0; i < n; ++i) {
+ args[static_cast<FetchKey>(argKeys[i])] = argValues[i];
+ }
+ for(QValueList<int>::Iterator it = argKeys.begin(); it != argKeys.end(); ++it) {
+ if(*it == Raw) {
+ continue;
+ }
+ FetchKey key = static_cast<FetchKey>(*it);
+ QCheckBox* cb = m_cbDict[key];
+ KLineEdit* le = m_leDict[key];
+ if(cb && le) {
+ if(args.contains(key)) {
+ cb->setChecked(true);
+ le->setEnabled(true);
+ le->setText(args[key]);
+ } else {
+ cb->setChecked(false);
+ le->setEnabled(false);
+ le->clear();
+ }
+ }
+ }
+
+ if(config_->hasKey("UpdateArgs")) {
+ m_cbUpdate->setChecked(true);
+ m_leUpdate->setEnabled(true);
+ m_leUpdate->setText(config_->readEntry("UpdateArgs"));
+ } else {
+ m_cbUpdate->setChecked(false);
+ m_leUpdate->setEnabled(false);
+ m_leUpdate->clear();
+ }
+
+ int collType = config_->readNumEntry("CollectionType");
+ m_collCombo->setCurrentType(collType);
+
+ Import::FormatMap formatMap = ImportDialog::formatMap();
+ int formatType = config_->readNumEntry("FormatType");
+ m_formatCombo->setCurrentItem(formatMap[static_cast<Import::Format>(formatType)]);
+ m_deleteOnRemove = config_->readBoolEntry("DeleteOnRemove", false);
+ m_name = config_->readEntry("Name");
+ m_newStuffName = config_->readEntry("NewStuffName");
+}
+
+void ExecExternalFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ QString s = m_pathEdit->url();
+ if(!s.isEmpty()) {
+ config_.writePathEntry("ExecPath", s);
+ }
+ QValueList<int> keys;
+ QStringList args;
+ for(QIntDictIterator<QCheckBox> it(m_cbDict); it.current(); ++it) {
+ if(it.current()->isChecked()) {
+ keys << it.currentKey();
+ args << m_leDict[it.currentKey()]->text();
+ }
+ }
+ config_.writeEntry("ArgumentKeys", keys);
+ config_.writeEntry("Arguments", args);
+
+ if(m_cbUpdate->isChecked()) {
+ config_.writeEntry("UpdateArgs", m_leUpdate->text());
+ } else {
+ config_.deleteEntry("UpdateArgs");
+ }
+
+ config_.writeEntry("CollectionType", m_collCombo->currentType());
+ config_.writeEntry("FormatType", m_formatCombo->currentData().toInt());
+ config_.writeEntry("DeleteOnRemove", m_deleteOnRemove);
+ if(!m_newStuffName.isEmpty()) {
+ config_.writeEntry("NewStuffName", m_newStuffName);
+ }
+ slotSetModified(false);
+}
+
+void ExecExternalFetcher::ConfigWidget::removed() {
+ if(!m_deleteOnRemove) {
+ return;
+ }
+ if(!m_newStuffName.isEmpty()) {
+ NewStuff::Manager man(this);
+ man.removeScript(m_newStuffName);
+ }
+}
+
+QString ExecExternalFetcher::ConfigWidget::preferredName() const {
+ return m_name.isEmpty() ? ExecExternalFetcher::defaultName() : m_name;
+}
+
+#include "execexternalfetcher.moc"
diff --git a/src/fetch/execexternalfetcher.h b/src/fetch/execexternalfetcher.h
new file mode 100644
index 0000000..bdc2a40
--- /dev/null
+++ b/src/fetch/execexternalfetcher.h
@@ -0,0 +1,118 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_EXECEXTERNALFETCHER_H
+#define TELLICO_EXECEXTERNALFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qintdict.h>
+
+class KProcess;
+class KURLRequester;
+class KLineEdit;
+class KComboBox;
+
+class QCheckBox;
+
+namespace Tellico {
+ namespace GUI {
+ class ComboBox;
+ class LineEdit;
+ class CollectionTypeCombo;
+ }
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class ExecExternalFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ ExecExternalFetcher(QObject* parent, const char* name=0);
+ /**
+ */
+ virtual ~ExecExternalFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual bool canSearch(FetchKey k) const { return m_args.contains(k); }
+ virtual bool canUpdate() const { return m_canUpdate; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return ExecExternal; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ const QString& execPath() const { return m_path; }
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent = 0, const ExecExternalFetcher* fetcher = 0);
+ ~ConfigWidget();
+
+ void readConfig(KConfig* config);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual void removed();
+ virtual QString preferredName() const;
+
+ private:
+ bool m_deleteOnRemove : 1;
+ QString m_name, m_newStuffName;
+ KURLRequester* m_pathEdit;
+ GUI::CollectionTypeCombo* m_collCombo;
+ GUI::ComboBox* m_formatCombo;
+ QIntDict<QCheckBox> m_cbDict;
+ QIntDict<GUI::LineEdit> m_leDict;
+ QCheckBox* m_cbUpdate;
+ GUI::LineEdit* m_leUpdate;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KProcess* proc, char* buffer, int len);
+ void slotError(KProcess* proc, char* buffer, int len);
+ void slotProcessExited(KProcess* proc);
+
+private:
+ static QStringList parseArguments(const QString& str);
+
+ void startSearch(const QStringList& args);
+
+ bool m_started;
+ int m_collType;
+ int m_formatType;
+ QString m_path;
+ QMap<FetchKey, QString> m_args;
+ bool m_canUpdate : 1;
+ QString m_updateArgs;
+ KProcess* m_process;
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry
+ QStringList m_errors;
+ bool m_deleteOnRemove : 1;
+ QString m_newStuffName;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/fetch.h b/src/fetch/fetch.h
new file mode 100644
index 0000000..0cdb726
--- /dev/null
+++ b/src/fetch/fetch.h
@@ -0,0 +1,64 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_H
+#define TELLICO_FETCH_H
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * FetchFirst must be first, and the rest must follow consecutively in value.
+ * FetchLast must be last!
+ */
+enum FetchKey {
+ FetchFirst = 0,
+ Title,
+ Person,
+ ISBN,
+ UPC,
+ Keyword,
+ DOI,
+ ArxivID,
+ PubmedID,
+ LCCN,
+ Raw,
+ FetchLast
+};
+
+// real ones must start at 0!
+enum Type {
+ Unknown = -1,
+ Amazon = 0,
+ IMDB,
+ Z3950,
+ SRU,
+ Entrez,
+ ExecExternal,
+ Yahoo,
+ AnimeNfo,
+ IBS,
+ ISBNdb,
+ GCstarPlugin,
+ CrossRef,
+ Citebase,
+ Arxiv,
+ Bibsonomy,
+ GoogleScholar,
+ Discogs
+};
+
+ }
+}
+
+#endif
diff --git a/src/fetch/fetcher.cpp b/src/fetch/fetcher.cpp
new file mode 100644
index 0000000..3bc7749
--- /dev/null
+++ b/src/fetch/fetcher.cpp
@@ -0,0 +1,61 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "fetcher.h"
+#include "messagehandler.h"
+#include "../entry.h"
+
+#include <kglobal.h>
+#include <kconfig.h>
+
+using Tellico::Fetch::Fetcher;
+using Tellico::Fetch::SearchResult;
+
+Fetcher::~Fetcher() {
+ KConfigGroup config(KGlobal::config(), m_configGroup);
+ saveConfigHook(config);
+}
+
+void Fetcher::readConfig(const KConfigGroup& config_, const QString& groupName_) {
+ m_configGroup = groupName_;
+
+ QString s = config_.readEntry("Name");
+ if(!s.isEmpty()) {
+ m_name = s;
+ }
+ m_updateOverwrite = config_.readBoolEntry("UpdateOverwrite", false);
+ // be sure to read config for subclass
+ readConfigHook(config_);
+}
+
+void Fetcher::message(const QString& message_, int type_) const {
+ if(m_messager) {
+ m_messager->send(message_, static_cast<MessageHandler::Type>(type_));
+ }
+}
+
+void Fetcher::infoList(const QString& message_, const QStringList& list_) const {
+ if(m_messager) {
+ m_messager->infoList(message_, list_);
+ }
+}
+
+void Fetcher::updateEntry(Data::EntryPtr) {
+ emit signalDone(this);
+}
+
+Tellico::Data::EntryPtr SearchResult::fetchEntry() {
+ return fetcher->fetchEntry(uid);
+}
+
+#include "fetcher.moc"
diff --git a/src/fetch/fetcher.h b/src/fetch/fetcher.h
new file mode 100644
index 0000000..0d2496e
--- /dev/null
+++ b/src/fetch/fetcher.h
@@ -0,0 +1,151 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef FETCHER_H
+#define FETCHER_H
+
+#include "fetch.h"
+#include "../datavectors.h"
+
+#include <kapplication.h> // for KApplication::random()
+
+#include <qobject.h>
+#include <qstring.h>
+
+class KConfigGroup;
+
+namespace Tellico {
+ namespace Fetch {
+ class ConfigWidget;
+ class MessageHandler;
+ class SearchResult;
+
+/**
+ * The top-level abstract class for fetching data.
+ *
+ * @author Robby Stephenson
+ */
+class Fetcher : public QObject, public KShared {
+Q_OBJECT
+
+public:
+ typedef KSharedPtr<Fetcher> Ptr;
+ typedef KSharedPtr<const Fetcher> CPtr;
+
+ /**
+ */
+ Fetcher(QObject* parent, const char* name = 0) : QObject(parent, name), KShared(),
+ m_updateOverwrite(false), m_hasMoreResults(false),
+ m_messager(0) {}
+ /**
+ */
+ virtual ~Fetcher();
+
+ /**
+ * Returns true if the fetcher might return entries from a certain collection type.
+ */
+ virtual bool canFetch(int type) const = 0;
+ /**
+ * Returns true if the fetcher can search using a certain key.
+ */
+ virtual bool canSearch(FetchKey key) const = 0;
+ virtual bool canUpdate() const { return true; }
+
+ /**
+ * Returns the type of the data source.
+ */
+ virtual Type type() const = 0;
+ /**
+ * Returns the name of the data source, as defined by the user.
+ */
+ virtual QString source() const = 0;
+ /**
+ * Returns whether the fetcher will overwite existing info when updating
+ */
+ bool updateOverwrite() const { return m_updateOverwrite; }
+ /**
+ * Starts a search, using a key and value.
+ */
+ virtual void search(FetchKey key, const QString& value) = 0;
+ virtual void continueSearch() {}
+ virtual void updateEntry(Data::EntryPtr);
+ // mopst fetchers won't support this. it's particular useful for text fetchers
+ virtual void updateEntrySynchronous(Data::EntryPtr) {}
+ /**
+ * Returns true if the fetcher is currently searching.
+ */
+ virtual bool isSearching() const = 0;
+ /**
+ * Returns true if the fetcher can continue and fetch more results
+ * The fetcher is responsible for remembering state.
+ */
+ virtual bool hasMoreResults() const { return m_hasMoreResults; }
+ /**
+ * Stops the fetcher.
+ */
+ virtual void stop() = 0;
+ /**
+ * Fetches an entry, given the uid of the search result.
+ */
+ virtual Data::EntryPtr fetchEntry(uint uid) = 0;
+
+ void setMessageHandler(MessageHandler* handler) { m_messager = handler; }
+ MessageHandler* messageHandler() const { return m_messager; }
+ /**
+ */
+ void message(const QString& message, int type) const;
+ void infoList(const QString& message, const QStringList& list) const;
+
+ /**
+ * Reads the config for the widget, given a config group.
+ */
+ void readConfig(const KConfigGroup& config, const QString& groupName);
+ /**
+ * Returns a widget for modifying the fetcher's config.
+ */
+ virtual ConfigWidget* configWidget(QWidget* parent) const = 0;
+
+signals:
+// void signalStatus(const QString& status);
+ void signalResultFound(Tellico::Fetch::SearchResult* result);
+ void signalDone(Tellico::Fetch::Fetcher::Ptr);
+
+protected:
+ QString m_name;
+ bool m_updateOverwrite : 1;
+ bool m_hasMoreResults : 1;
+
+private:
+ virtual void readConfigHook(const KConfigGroup&) = 0;
+ virtual void saveConfigHook(KConfigGroup&) {}
+
+ MessageHandler* m_messager;
+ QString m_configGroup;
+};
+
+class SearchResult {
+public:
+ SearchResult(Fetcher::Ptr f, const QString& t, const QString& d, const QString& i)
+ : uid(KApplication::random()), fetcher(f), title(t), desc(d), isbn(i) {}
+ Data::EntryPtr fetchEntry();
+ uint uid;
+ Fetcher::Ptr fetcher;
+ QString title;
+ QString desc;
+ QString isbn;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/fetchmanager.cpp b/src/fetch/fetchmanager.cpp
new file mode 100644
index 0000000..84f4f39
--- /dev/null
+++ b/src/fetch/fetchmanager.cpp
@@ -0,0 +1,707 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include <config.h>
+
+#include "fetchmanager.h"
+#include "configwidget.h"
+#include "messagehandler.h"
+#include "../tellico_kernel.h"
+#include "../entry.h"
+#include "../collection.h"
+#include "../tellico_utils.h"
+#include "../tellico_debug.h"
+
+#ifdef AMAZON_SUPPORT
+#include "amazonfetcher.h"
+#endif
+#ifdef IMDB_SUPPORT
+#include "imdbfetcher.h"
+#endif
+#ifdef HAVE_YAZ
+#include "z3950fetcher.h"
+#endif
+#include "srufetcher.h"
+#include "entrezfetcher.h"
+#include "execexternalfetcher.h"
+#include "yahoofetcher.h"
+#include "animenfofetcher.h"
+#include "ibsfetcher.h"
+#include "isbndbfetcher.h"
+#include "gcstarpluginfetcher.h"
+#include "crossreffetcher.h"
+#include "arxivfetcher.h"
+#include "citebasefetcher.h"
+#include "bibsonomyfetcher.h"
+#include "googlescholarfetcher.h"
+#include "discogsfetcher.h"
+
+#include <kglobal.h>
+#include <kconfig.h>
+#include <klocale.h>
+#include <kiconloader.h>
+#include <kmimetype.h>
+#include <kstandarddirs.h>
+#include <dcopref.h>
+#include <ktempfile.h>
+#include <kio/netaccess.h>
+
+#include <qfileinfo.h>
+#include <qdir.h>
+
+#define LOAD_ICON(name, group, size) \
+ KGlobal::iconLoader()->loadIcon(name, static_cast<KIcon::Group>(group), size_)
+
+using Tellico::Fetch::Manager;
+Manager* Manager::s_self = 0;
+
+Manager::Manager() : QObject(), m_currentFetcherIndex(-1), m_messager(new ManagerMessage()),
+ m_count(0), m_loadDefaults(false) {
+ loadFetchers();
+
+// m_keyMap.insert(FetchFirst, QString::null);
+ m_keyMap.insert(Title, i18n("Title"));
+ m_keyMap.insert(Person, i18n("Person"));
+ m_keyMap.insert(ISBN, i18n("ISBN"));
+ m_keyMap.insert(UPC, i18n("UPC/EAN"));
+ m_keyMap.insert(Keyword, i18n("Keyword"));
+ m_keyMap.insert(DOI, i18n("DOI"));
+ m_keyMap.insert(ArxivID, i18n("arXiv ID"));
+ m_keyMap.insert(PubmedID, i18n("Pubmed ID"));
+ // to keep from having a new i18n string, just remove octothorpe
+ m_keyMap.insert(LCCN, i18n("LCCN#").remove('#'));
+ m_keyMap.insert(Raw, i18n("Raw Query"));
+// m_keyMap.insert(FetchLast, QString::null);
+}
+
+Manager::~Manager() {
+ delete m_messager;
+}
+
+void Manager::loadFetchers() {
+// myDebug() << "Manager::loadFetchers()" << endl;
+ m_fetchers.clear();
+ m_configMap.clear();
+
+ KConfig* config = KGlobal::config();
+ if(config->hasGroup(QString::fromLatin1("Data Sources"))) {
+ KConfigGroup configGroup(config, QString::fromLatin1("Data Sources"));
+ int nSources = configGroup.readNumEntry("Sources Count", 0);
+ for(int i = 0; i < nSources; ++i) {
+ QString group = QString::fromLatin1("Data Source %1").arg(i);
+ Fetcher::Ptr f = createFetcher(config, group);
+ if(f) {
+ m_configMap.insert(f, group);
+ m_fetchers.append(f);
+ f->setMessageHandler(m_messager);
+ }
+ }
+ m_loadDefaults = false;
+ } else { // add default sources
+ m_fetchers = defaultFetchers();
+ m_loadDefaults = true;
+ }
+}
+
+Tellico::Fetch::FetcherVec Manager::fetchers(int type_) {
+ FetcherVec vec;
+ for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it) {
+ if(it->canFetch(type_)) {
+ vec.append(it.data());
+ }
+ }
+ return vec;
+}
+
+Tellico::Fetch::KeyMap Manager::keyMap(const QString& source_) const {
+ // an empty string means return all
+ if(source_.isEmpty()) {
+ return m_keyMap;
+ }
+
+ // assume there's only one fetcher match
+ KSharedPtr<const Fetcher> f = 0;
+ for(FetcherVec::ConstIterator it = m_fetchers.constBegin(); it != m_fetchers.constEnd(); ++it) {
+ if(source_ == it->source()) {
+ f = it.data();
+ break;
+ }
+ }
+ if(!f) {
+ kdWarning() << "Manager::keyMap() - no fetcher found!" << endl;
+ return KeyMap();
+ }
+
+ KeyMap map;
+ for(KeyMap::ConstIterator it = m_keyMap.begin(); it != m_keyMap.end(); ++it) {
+ if(f->canSearch(it.key())) {
+ map.insert(it.key(), it.data());
+ }
+ }
+ return map;
+}
+
+void Manager::startSearch(const QString& source_, FetchKey key_, const QString& value_) {
+ if(value_.isEmpty()) {
+ emit signalDone();
+ return;
+ }
+
+ // assume there's only one fetcher match
+ int i = 0;
+ m_currentFetcherIndex = -1;
+ for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it, ++i) {
+ if(source_ == it->source()) {
+ ++m_count; // Fetcher::search() might emit done(), so increment before calling search()
+ connect(it.data(), SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)),
+ SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)));
+ connect(it.data(), SIGNAL(signalDone(Tellico::Fetch::Fetcher::Ptr)),
+ SLOT(slotFetcherDone(Tellico::Fetch::Fetcher::Ptr)));
+ it->search(key_, value_);
+ m_currentFetcherIndex = i;
+ break;
+ }
+ }
+}
+
+void Manager::continueSearch() {
+ if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast<int>(m_fetchers.count())) {
+ myDebug() << "Manager::continueSearch() - can't continue!" << endl;
+ emit signalDone();
+ return;
+ }
+ Fetcher::Ptr f = m_fetchers[m_currentFetcherIndex];
+ if(f && f->hasMoreResults()) {
+ ++m_count;
+ connect(f, SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)),
+ SIGNAL(signalResultFound(Tellico::Fetch::SearchResult*)));
+ connect(f, SIGNAL(signalDone(Tellico::Fetch::Fetcher::Ptr)),
+ SLOT(slotFetcherDone(Tellico::Fetch::Fetcher::Ptr)));
+ f->continueSearch();
+ } else {
+ emit signalDone();
+ }
+}
+
+bool Manager::hasMoreResults() const {
+ if(m_currentFetcherIndex < 0 || m_currentFetcherIndex >= static_cast<int>(m_fetchers.count())) {
+ return false;
+ }
+ Fetcher::Ptr f = m_fetchers[m_currentFetcherIndex];
+ return f && f->hasMoreResults();
+}
+
+void Manager::stop() {
+// myDebug() << "Manager::stop()" << endl;
+ for(FetcherVec::Iterator it = m_fetchers.begin(); it != m_fetchers.end(); ++it) {
+ if(it->isSearching()) {
+ it->stop();
+ }
+ }
+#ifndef NDEBUG
+ if(m_count != 0) {
+ myDebug() << "Manager::stop() - count should be 0!" << endl;
+ }
+#endif
+ m_count = 0;
+}
+
+void Manager::slotFetcherDone(Fetcher::Ptr fetcher_) {
+// myDebug() << "Manager::slotFetcherDone() - " << (fetcher_ ? fetcher_->source() : QString::null)
+// << " :" << m_count << endl;
+ fetcher_->disconnect(); // disconnect all signals
+ --m_count;
+ if(m_count <= 0) {
+ emit signalDone();
+ }
+}
+
+bool Manager::canFetch() const {
+ for(FetcherVec::ConstIterator it = m_fetchers.constBegin(); it != m_fetchers.constEnd(); ++it) {
+ if(it->canFetch(Kernel::self()->collectionType())) {
+ return true;
+ }
+ }
+ return false;
+}
+
+Tellico::Fetch::Fetcher::Ptr Manager::createFetcher(KConfig* config_, const QString& group_) {
+ if(!config_->hasGroup(group_)) {
+ myDebug() << "Manager::createFetcher() - no config group for " << group_ << endl;
+ return 0;
+ }
+
+ KConfigGroup config(config_, group_);
+
+ int fetchType = config.readNumEntry("Type", Fetch::Unknown);
+ if(fetchType == Fetch::Unknown) {
+ myDebug() << "Manager::createFetcher() - unknown type " << fetchType << ", skipping" << endl;
+ return 0;
+ }
+
+ Fetcher::Ptr f = 0;
+ switch(fetchType) {
+ case Amazon:
+#ifdef AMAZON_SUPPORT
+ {
+ int site = config.readNumEntry("Site", AmazonFetcher::Unknown);
+ if(site == AmazonFetcher::Unknown) {
+ myDebug() << "Manager::createFetcher() - unknown amazon site " << site << ", skipping" << endl;
+ } else {
+ f = new AmazonFetcher(static_cast<AmazonFetcher::Site>(site), this);
+ }
+ }
+#endif
+ break;
+
+ case IMDB:
+#ifdef IMDB_SUPPORT
+ f = new IMDBFetcher(this);
+#endif
+ break;
+
+ case Z3950:
+#ifdef HAVE_YAZ
+ f = new Z3950Fetcher(this);
+#endif
+ break;
+
+ case SRU:
+ f = new SRUFetcher(this);
+ break;
+
+ case Entrez:
+ f = new EntrezFetcher(this);
+ break;
+
+ case ExecExternal:
+ f = new ExecExternalFetcher(this);
+ break;
+
+ case Yahoo:
+ f = new YahooFetcher(this);
+ break;
+
+ case AnimeNfo:
+ f = new AnimeNfoFetcher(this);
+ break;
+
+ case IBS:
+ f = new IBSFetcher(this);
+ break;
+
+ case ISBNdb:
+ f = new ISBNdbFetcher(this);
+ break;
+
+ case GCstarPlugin:
+ f = new GCstarPluginFetcher(this);
+ break;
+
+ case CrossRef:
+ f = new CrossRefFetcher(this);
+ break;
+
+ case Arxiv:
+ f = new ArxivFetcher(this);
+ break;
+
+ case Citebase:
+ f = new CitebaseFetcher(this);
+ break;
+
+ case Bibsonomy:
+ f = new BibsonomyFetcher(this);
+ break;
+
+ case GoogleScholar:
+ f = new GoogleScholarFetcher(this);
+ break;
+
+ case Discogs:
+ f = new DiscogsFetcher(this);
+ break;
+
+ case Unknown:
+ default:
+ break;
+ }
+ if(f) {
+ f->readConfig(config, group_);
+ }
+ return f;
+}
+
+// static
+Tellico::Fetch::FetcherVec Manager::defaultFetchers() {
+ FetcherVec vec;
+#ifdef AMAZON_SUPPORT
+ vec.append(new AmazonFetcher(AmazonFetcher::US, this));
+#endif
+#ifdef IMDB_SUPPORT
+ vec.append(new IMDBFetcher(this));
+#endif
+ vec.append(SRUFetcher::libraryOfCongress(this));
+ vec.append(new ISBNdbFetcher(this));
+ vec.append(new YahooFetcher(this));
+ vec.append(new AnimeNfoFetcher(this));
+ vec.append(new ArxivFetcher(this));
+ vec.append(new GoogleScholarFetcher(this));
+ vec.append(new DiscogsFetcher(this));
+// only add IBS if user includes italian
+ if(KGlobal::locale()->languagesTwoAlpha().contains(QString::fromLatin1("it"))) {
+ vec.append(new IBSFetcher(this));
+ }
+ return vec;
+}
+
+Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_) {
+ if(m_loadDefaults) {
+ return defaultFetchers();
+ }
+
+ FetcherVec vec;
+ KConfigGroup config(KGlobal::config(), "Data Sources");
+ int nSources = config.readNumEntry("Sources Count", 0);
+ for(int i = 0; i < nSources; ++i) {
+ QString group = QString::fromLatin1("Data Source %1").arg(i);
+ // needs the KConfig*
+ Fetcher::Ptr f = createFetcher(KGlobal::config(), group);
+ if(f && f->canFetch(collType_) && f->canUpdate()) {
+ vec.append(f);
+ }
+ }
+ return vec;
+}
+
+Tellico::Fetch::FetcherVec Manager::createUpdateFetchers(int collType_, FetchKey key_) {
+ FetcherVec fetchers;
+ // creates new fetchers
+ FetcherVec allFetchers = createUpdateFetchers(collType_);
+ for(Fetch::FetcherVec::Iterator it = allFetchers.begin(); it != allFetchers.end(); ++it) {
+ if(it->canSearch(key_)) {
+ fetchers.append(it);
+ }
+ }
+ return fetchers;
+}
+
+Tellico::Fetch::Fetcher::Ptr Manager::createUpdateFetcher(int collType_, const QString& source_) {
+ Fetcher::Ptr fetcher = 0;
+ // creates new fetchers
+ FetcherVec fetchers = createUpdateFetchers(collType_);
+ for(Fetch::FetcherVec::Iterator it = fetchers.begin(); it != fetchers.end(); ++it) {
+ if(it->source() == source_) {
+ fetcher = it;
+ break;
+ }
+ }
+ return fetcher;
+}
+
+void Manager::updateStatus(const QString& message_) {
+ emit signalStatus(message_);
+}
+
+Tellico::Fetch::TypePairList Manager::typeList() {
+ Fetch::TypePairList list;
+#ifdef AMAZON_SUPPORT
+ list.append(TypePair(AmazonFetcher::defaultName(), Amazon));
+#endif
+#ifdef IMDB_SUPPORT
+ list.append(TypePair(IMDBFetcher::defaultName(), IMDB));
+#endif
+#ifdef HAVE_YAZ
+ list.append(TypePair(Z3950Fetcher::defaultName(), Z3950));
+#endif
+ list.append(TypePair(SRUFetcher::defaultName(), SRU));
+ list.append(TypePair(EntrezFetcher::defaultName(), Entrez));
+ list.append(TypePair(ExecExternalFetcher::defaultName(), ExecExternal));
+ list.append(TypePair(YahooFetcher::defaultName(), Yahoo));
+ list.append(TypePair(AnimeNfoFetcher::defaultName(), AnimeNfo));
+ list.append(TypePair(IBSFetcher::defaultName(), IBS));
+ list.append(TypePair(ISBNdbFetcher::defaultName(), ISBNdb));
+ list.append(TypePair(GCstarPluginFetcher::defaultName(), GCstarPlugin));
+ list.append(TypePair(CrossRefFetcher::defaultName(), CrossRef));
+ list.append(TypePair(ArxivFetcher::defaultName(), Arxiv));
+ list.append(TypePair(CitebaseFetcher::defaultName(), Citebase));
+ list.append(TypePair(BibsonomyFetcher::defaultName(), Bibsonomy));
+ list.append(TypePair(GoogleScholarFetcher::defaultName(),GoogleScholar));
+ list.append(TypePair(DiscogsFetcher::defaultName(), Discogs));
+
+ // now find all the scripts distributed with tellico
+ QStringList files = KGlobal::dirs()->findAllResources("appdata", QString::fromLatin1("data-sources/*.spec"),
+ false, true);
+ for(QStringList::Iterator it = files.begin(); it != files.end(); ++it) {
+ KConfig spec(*it, false, false);
+ QString name = spec.readEntry("Name");
+ if(name.isEmpty()) {
+ myDebug() << "Fetch::Manager::typeList() - no Name for " << *it << endl;
+ continue;
+ }
+
+ if(!bundledScriptHasExecPath(*it, &spec)) { // no available exec
+ continue;
+ }
+
+ list.append(TypePair(name, ExecExternal));
+ m_scriptMap.insert(name, *it);
+ }
+ list.sort();
+ return list;
+}
+
+
+// called when creating a new fetcher
+Tellico::Fetch::ConfigWidget* Manager::configWidget(QWidget* parent_, Type type_, const QString& name_) {
+ ConfigWidget* w = 0;
+ switch(type_) {
+#ifdef AMAZON_SUPPORT
+ case Amazon:
+ w = new AmazonFetcher::ConfigWidget(parent_);
+ break;
+#endif
+#ifdef IMDB_SUPPORT
+ case IMDB:
+ w = new IMDBFetcher::ConfigWidget(parent_);
+ break;
+#endif
+#ifdef HAVE_YAZ
+ case Z3950:
+ w = new Z3950Fetcher::ConfigWidget(parent_);
+ break;
+#endif
+ case SRU:
+ w = new SRUConfigWidget(parent_);
+ break;
+ case Entrez:
+ w = new EntrezFetcher::ConfigWidget(parent_);
+ break;
+ case ExecExternal:
+ w = new ExecExternalFetcher::ConfigWidget(parent_);
+ if(!name_.isEmpty() && m_scriptMap.contains(name_)) {
+ // bundledScriptHasExecPath() actually needs to write the exec path
+ // back to the config so the configWidget can read it. But if the spec file
+ // is not readablle, that doesn't work. So work around it with a copy to a temp file
+ KTempFile tmpFile;
+ tmpFile.setAutoDelete(true);
+ KURL from, to;
+ from.setPath(m_scriptMap[name_]);
+ to.setPath(tmpFile.name());
+ // have to overwrite since KTempFile already created it
+ if(!KIO::NetAccess::file_copy(from, to, -1, true /*overwrite*/)) {
+ myDebug() << KIO::NetAccess::lastErrorString() << endl;
+ }
+ KConfig spec(to.path(), false, false);
+ // pass actual location of spec file
+ if(name_ == spec.readEntry("Name") && bundledScriptHasExecPath(m_scriptMap[name_], &spec)) {
+ static_cast<ExecExternalFetcher::ConfigWidget*>(w)->readConfig(&spec);
+ } else {
+ kdWarning() << "Fetch::Manager::configWidget() - Can't read config file for " << to.path() << endl;
+ }
+ }
+ break;
+ case Yahoo:
+ w = new YahooFetcher::ConfigWidget(parent_);
+ break;
+ case AnimeNfo:
+ w = new AnimeNfoFetcher::ConfigWidget(parent_);
+ break;
+ case IBS:
+ w = new IBSFetcher::ConfigWidget(parent_);
+ break;
+ case ISBNdb:
+ w = new ISBNdbFetcher::ConfigWidget(parent_);
+ break;
+ case GCstarPlugin:
+ w = new GCstarPluginFetcher::ConfigWidget(parent_);
+ break;
+ case CrossRef:
+ w = new CrossRefFetcher::ConfigWidget(parent_);
+ break;
+ case Arxiv:
+ w = new ArxivFetcher::ConfigWidget(parent_);
+ break;
+ case Citebase:
+ w = new CitebaseFetcher::ConfigWidget(parent_);
+ break;
+ case Bibsonomy:
+ w = new BibsonomyFetcher::ConfigWidget(parent_);
+ break;
+ case GoogleScholar:
+ w = new GoogleScholarFetcher::ConfigWidget(parent_);
+ break;
+ case Discogs:
+ w = new DiscogsFetcher::ConfigWidget(parent_);
+ break;
+ case Unknown:
+ kdWarning() << "Fetch::Manager::configWidget() - no widget defined for type = " << type_ << endl;
+ }
+ return w;
+}
+
+// static
+QString Manager::typeName(Fetch::Type type_) {
+ switch(type_) {
+#ifdef AMAZON_SUPPORT
+ case Amazon: return AmazonFetcher::defaultName();
+#endif
+#ifdef IMDB_SUPPORT
+ case IMDB: return IMDBFetcher::defaultName();
+#endif
+#ifdef HAVE_YAZ
+ case Z3950: return Z3950Fetcher::defaultName();
+#endif
+ case SRU: return SRUFetcher::defaultName();
+ case Entrez: return EntrezFetcher::defaultName();
+ case ExecExternal: return ExecExternalFetcher::defaultName();
+ case Yahoo: return YahooFetcher::defaultName();
+ case AnimeNfo: return AnimeNfoFetcher::defaultName();
+ case IBS: return IBSFetcher::defaultName();
+ case ISBNdb: return ISBNdbFetcher::defaultName();
+ case GCstarPlugin: return GCstarPluginFetcher::defaultName();
+ case CrossRef: return CrossRefFetcher::defaultName();
+ case Arxiv: return ArxivFetcher::defaultName();
+ case Citebase: return CitebaseFetcher::defaultName();
+ case Bibsonomy: return BibsonomyFetcher::defaultName();
+ case GoogleScholar: return GoogleScholarFetcher::defaultName();
+ case Discogs: return DiscogsFetcher::defaultName();
+ case Unknown: break;
+ }
+ myWarning() << "Manager::typeName() - none found for " << type_ << endl;
+ return QString::null;
+}
+
+QPixmap Manager::fetcherIcon(Fetch::Fetcher::CPtr fetcher_, int group_, int size_) {
+#ifdef HAVE_YAZ
+ if(fetcher_->type() == Fetch::Z3950) {
+ const Fetch::Z3950Fetcher* f = static_cast<const Fetch::Z3950Fetcher*>(fetcher_.data());
+ KURL u;
+ u.setProtocol(QString::fromLatin1("http"));
+ u.setHost(f->host());
+ QString icon = favIcon(u);
+ if(u.isValid() && !icon.isEmpty()) {
+ return LOAD_ICON(icon, group_, size_);
+ }
+ } else
+#endif
+ if(fetcher_->type() == Fetch::ExecExternal) {
+ const Fetch::ExecExternalFetcher* f = static_cast<const Fetch::ExecExternalFetcher*>(fetcher_.data());
+ const QString p = f->execPath();
+ KURL u;
+ if(p.find(QString::fromLatin1("allocine")) > -1) {
+ u = QString::fromLatin1("http://www.allocine.fr");
+ } else if(p.find(QString::fromLatin1("ministerio_de_cultura")) > -1) {
+ u = QString::fromLatin1("http://www.mcu.es");
+ } else if(p.find(QString::fromLatin1("dark_horse_comics")) > -1) {
+ u = QString::fromLatin1("http://www.darkhorse.com");
+ } else if(p.find(QString::fromLatin1("boardgamegeek")) > -1) {
+ u = QString::fromLatin1("http://www.boardgamegeek.com");
+ } else if(f->source().find(QString::fromLatin1("amarok"), 0, false /*case-sensitive*/) > -1) {
+ return LOAD_ICON(QString::fromLatin1("amarok"), group_, size_);
+ }
+ if(!u.isEmpty() && u.isValid()) {
+ QString icon = favIcon(u);
+ if(!icon.isEmpty()) {
+ return LOAD_ICON(icon, group_, size_);
+ }
+ }
+ }
+ return fetcherIcon(fetcher_->type(), group_);
+}
+
+QPixmap Manager::fetcherIcon(Fetch::Type type_, int group_, int size_) {
+ QString name;
+ switch(type_) {
+ case Amazon:
+ name = favIcon("http://amazon.com"); break;
+ case IMDB:
+ name = favIcon("http://imdb.com"); break;
+ case Z3950:
+ name = QString::fromLatin1("network"); break; // rather arbitrary
+ case SRU:
+ name = QString::fromLatin1("network_local"); break; // just to be different than z3950
+ case Entrez:
+ name = favIcon("http://www.ncbi.nlm.nih.gov"); break;
+ case ExecExternal:
+ name = QString::fromLatin1("exec"); break;
+ case Yahoo:
+ name = favIcon("http://yahoo.com"); break;
+ case AnimeNfo:
+ name = favIcon("http://animenfo.com"); break;
+ case IBS:
+ name = favIcon("http://internetbookshop.it"); break;
+ case ISBNdb:
+ name = favIcon("http://isbndb.com"); break;
+ case GCstarPlugin:
+ name = QString::fromLatin1("gcstar"); break;
+ case CrossRef:
+ name = favIcon("http://crossref.org"); break;
+ case Arxiv:
+ name = favIcon("http://arxiv.org"); break;
+ case Citebase:
+ name = favIcon("http://citebase.org"); break;
+ case Bibsonomy:
+ name = favIcon("http://bibsonomy.org"); break;
+ case GoogleScholar:
+ name = favIcon("http://scholar.google.com"); break;
+ case Discogs:
+ name = favIcon("http://www.discogs.com"); break;
+ case Unknown:
+ kdWarning() << "Fetch::Manager::fetcherIcon() - no pixmap defined for type = " << type_ << endl;
+ }
+
+ return name.isEmpty() ? QPixmap() : LOAD_ICON(name, group_, size_);
+}
+
+QString Manager::favIcon(const KURL& url_) {
+ DCOPRef kded("kded", "favicons");
+ DCOPReply reply = kded.call("iconForURL(KURL)", url_);
+ QString iconName = reply.isValid() ? reply : QString();
+ if(!iconName.isEmpty()) {
+ return iconName;
+ } else {
+ // go ahead and try to download it for later
+ kded.call("downloadHostIcon(KURL)", url_);
+ }
+ return KMimeType::iconForURL(url_);
+}
+
+bool Manager::bundledScriptHasExecPath(const QString& specFile_, KConfig* config_) {
+ // make sure ExecPath is set and executable
+ // for the bundled scripts, either the exec name is not set, in which case it is the
+ // name of the spec file, minus the .spec, or the exec is set, and is local to the dir
+ // if not, look for it
+ QString exec = config_->readPathEntry("ExecPath");
+ QFileInfo specInfo(specFile_), execInfo(exec);
+ if(exec.isEmpty() || !execInfo.exists()) {
+ exec = specInfo.dirPath(true) + QDir::separator() + specInfo.baseName(true); // remove ".spec"
+ } else if(execInfo.isRelative()) {
+ exec = specInfo.dirPath(true) + exec;
+ } else if(!execInfo.isExecutable()) {
+ kdWarning() << "Fetch::Manager::execPathForBundledScript() - not executable: " << specFile_ << endl;
+ return false;
+ }
+ execInfo.setFile(exec);
+ if(!execInfo.exists() || !execInfo.isExecutable()) {
+ kdWarning() << "Fetch::Manager::execPathForBundledScript() - no exec file for " << specFile_ << endl;
+ kdWarning() << "exec = " << exec << endl;
+ return false; // we're not ok
+ }
+
+ config_->writePathEntry("ExecPath", exec);
+ config_->sync(); // might be readonly, but that's ok
+ return true;
+}
+
+#include "fetchmanager.moc"
diff --git a/src/fetch/fetchmanager.h b/src/fetch/fetchmanager.h
new file mode 100644
index 0000000..7036d71
--- /dev/null
+++ b/src/fetch/fetchmanager.h
@@ -0,0 +1,108 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef FETCHMANAGER_H
+#define FETCHMANAGER_H
+
+namespace Tellico {
+ namespace Fetch {
+ class SearchResult;
+ class ConfigWidget;
+ class ManagerMessage;
+ }
+}
+
+#include "fetcher.h"
+#include "../ptrvector.h"
+
+#include <ksortablevaluelist.h>
+
+#include <qobject.h>
+#include <qmap.h>
+
+namespace Tellico {
+ namespace Fetch {
+
+typedef KSortableItem<Type, QString> TypePair; // fetcher info, type and name of type
+typedef KSortableValueList<Type, QString> TypePairList;
+typedef QMap<FetchKey, QString> KeyMap; // map key type to name of key
+typedef Vector<Fetcher> FetcherVec;
+
+/**
+ * A manager for handling all the different classes of Fetcher.
+ *
+ * @author Robby Stephenson
+ */
+class Manager : public QObject {
+Q_OBJECT
+
+public:
+ static Manager* self() { if(!s_self) s_self = new Manager(); return s_self; }
+
+ ~Manager();
+
+ KeyMap keyMap(const QString& source = QString::null) const;
+ void startSearch(const QString& source, FetchKey key, const QString& value);
+ void continueSearch();
+ void stop();
+ bool canFetch() const;
+ bool hasMoreResults() const;
+ void loadFetchers();
+ const FetcherVec& fetchers() const { return m_fetchers; }
+ FetcherVec fetchers(int type);
+ TypePairList typeList();
+ ConfigWidget* configWidget(QWidget* parent, Type type, const QString& name);
+
+ // create fetcher for updating an entry
+ FetcherVec createUpdateFetchers(int collType);
+ FetcherVec createUpdateFetchers(int collType, FetchKey key);
+ Fetcher::Ptr createUpdateFetcher(int collType, const QString& source);
+
+ static QString typeName(Type type);
+ static QPixmap fetcherIcon(Fetch::Type type, int iconGroup=3 /*Small*/, int size=0 /* default */);
+ static QPixmap fetcherIcon(Fetch::Fetcher::CPtr ptr, int iconGroup=3 /*Small*/, int size=0 /* default*/);
+
+signals:
+ void signalStatus(const QString& status);
+ void signalResultFound(Tellico::Fetch::SearchResult* result);
+ void signalDone();
+
+private slots:
+ void slotFetcherDone(Tellico::Fetch::Fetcher::Ptr);
+
+private:
+ friend class ManagerMessage;
+ static Manager* s_self;
+
+ Manager();
+ Fetcher::Ptr createFetcher(KConfig* config, const QString& configGroup);
+ FetcherVec defaultFetchers();
+ void updateStatus(const QString& message);
+
+ static QString favIcon(const KURL& url);
+ static bool bundledScriptHasExecPath(const QString& specFile, KConfig* config);
+
+ FetcherVec m_fetchers;
+ int m_currentFetcherIndex;
+ KeyMap m_keyMap;
+ typedef QMap<Fetcher::Ptr, QString> ConfigMap;
+ ConfigMap m_configMap;
+ StringMap m_scriptMap;
+ ManagerMessage* m_messager;
+ uint m_count;
+ bool m_loadDefaults : 1;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/gcstarpluginfetcher.cpp b/src/fetch/gcstarpluginfetcher.cpp
new file mode 100644
index 0000000..4bffed7
--- /dev/null
+++ b/src/fetch/gcstarpluginfetcher.cpp
@@ -0,0 +1,486 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "gcstarpluginfetcher.h"
+#include "messagehandler.h"
+#include "fetchmanager.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../translators/tellicoimporter.h"
+#include "../gui/combobox.h"
+#include "../gui/collectiontypecombo.h"
+#include "../filehandler.h"
+#include "../tellico_kernel.h"
+#include "../tellico_debug.h"
+#include "../latin1literal.h"
+#include "../tellico_utils.h"
+
+#include <kconfig.h>
+#include <kprocess.h>
+#include <kprocio.h>
+#include <kstandarddirs.h>
+#include <kaccelmanager.h>
+
+#include <qdir.h>
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qwhatsthis.h>
+
+using Tellico::Fetch::GCstarPluginFetcher;
+
+GCstarPluginFetcher::PluginMap GCstarPluginFetcher::pluginMap;
+GCstarPluginFetcher::PluginParse GCstarPluginFetcher::pluginParse = NotYet;
+
+//static
+GCstarPluginFetcher::PluginList GCstarPluginFetcher::plugins(int collType_) {
+ if(!pluginMap.contains(collType_)) {
+ GUI::CursorSaver cs;
+ QString gcstar = KStandardDirs::findExe(QString::fromLatin1("gcstar"));
+
+ if(pluginParse == NotYet) {
+ KProcIO proc;
+ proc << gcstar << QString::fromLatin1("--version");
+ // wait 5 seconds at most, just a sanity thing, never want to block completely
+ if(proc.start(KProcess::Block) && proc.wait(5)) {
+ QString output;
+ proc.readln(output);
+ if(!output.isEmpty()) {
+ // always going to be x.y[.z] ?
+ QRegExp versionRx(QString::fromLatin1("(\\d+)\\.(\\d+)(?:\\.(\\d+))?"));
+ if(versionRx.search(output) > -1) {
+ int x = versionRx.cap(1).toInt();
+ int y = versionRx.cap(2).toInt();
+ int z = versionRx.cap(3).toInt(); // ok to be empty
+ myDebug() << QString::fromLatin1("GCstarPluginFetcher() - found %1.%2.%3").arg(x).arg(y).arg(z) << endl;
+ // --list-plugins argument was added for 1.3 release
+ pluginParse = (x >= 1 && y >=3) ? New : Old;
+ }
+ }
+ }
+ // if still zero, then we should use old in future
+ if(pluginParse == NotYet) {
+ pluginParse = Old;
+ }
+ }
+
+ if(pluginParse == New) {
+ readPluginsNew(collType_, gcstar);
+ } else {
+ readPluginsOld(collType_, gcstar);
+ }
+ }
+
+ return pluginMap.contains(collType_) ? pluginMap[collType_] : GCstarPluginFetcher::PluginList();
+}
+
+void GCstarPluginFetcher::readPluginsNew(int collType_, const QString& gcstar_) {
+ PluginList plugins;
+
+ QString gcstarCollection = gcstarType(collType_);
+ if(gcstarCollection.isEmpty()) {
+ pluginMap.insert(collType_, plugins);
+ return;
+ }
+
+ KProcIO proc;
+ proc << gcstar_
+ << QString::fromLatin1("-x")
+ << QString::fromLatin1("--list-plugins")
+ << QString::fromLatin1("--collection") << gcstarCollection;
+
+ if(!proc.start(KProcess::Block)) {
+ myWarning() << "GCstarPluginFetcher::readPluginsNew() - can't start" << endl;
+ return;
+ }
+
+ bool hasName = false;
+ PluginInfo info;
+ QString line;
+ for(int length = 0; length > -1; length = proc.readln(line)) {
+ if(line.isEmpty()) {
+ if(hasName) {
+ plugins << info;
+ }
+ hasName = false;
+ info.clear();
+ } else {
+ // authors have \t at beginning
+ line = line.stripWhiteSpace();
+ if(!hasName) {
+ info.insert(QString::fromLatin1("name"), line);
+ hasName = true;
+ } else {
+ info.insert(QString::fromLatin1("author"), line);
+ }
+// myDebug() << line << endl;
+ }
+ }
+
+ pluginMap.insert(collType_, plugins);
+}
+
+void GCstarPluginFetcher::readPluginsOld(int collType_, const QString& gcstar_) {
+ QDir dir(gcstar_, QString::fromLatin1("GC*.pm"));
+ dir.cd(QString::fromLatin1("../../lib/gcstar/GCPlugins/"));
+
+ QRegExp rx(QString::fromLatin1("get(Name|Author|Lang)\\s*\\{\\s*return\\s+['\"](.+)['\"]"));
+ rx.setMinimal(true);
+
+ PluginList plugins;
+
+ QString dirName = gcstarType(collType_);
+ if(dirName.isEmpty()) {
+ pluginMap.insert(collType_, plugins);
+ return;
+ }
+
+ QStringList files = dir.entryList();
+ for(QStringList::ConstIterator file = files.begin(); file != files.end(); ++file) {
+ KURL u;
+ u.setPath(dir.filePath(*file));
+ PluginInfo info;
+ QString text = FileHandler::readTextFile(u);
+ for(int pos = rx.search(text);
+ pos > -1;
+ pos = rx.search(text, pos+rx.matchedLength())) {
+ info.insert(rx.cap(1).lower(), rx.cap(2));
+ }
+ // only add if it has a name
+ if(info.contains(QString::fromLatin1("name"))) {
+ plugins << info;
+ }
+ }
+ // inserting empty map is ok
+ pluginMap.insert(collType_, plugins);
+}
+
+QString GCstarPluginFetcher::gcstarType(int collType_) {
+ switch(collType_) {
+ case Data::Collection::Book: return QString::fromLatin1("GCbooks");
+ case Data::Collection::Video: return QString::fromLatin1("GCfilms");
+ case Data::Collection::Game: return QString::fromLatin1("GCgames");
+ case Data::Collection::Album: return QString::fromLatin1("GCmusics");
+ case Data::Collection::Coin: return QString::fromLatin1("GCcoins");
+ case Data::Collection::Wine: return QString::fromLatin1("GCwines");
+ case Data::Collection::BoardGame: return QString::fromLatin1("GCboardgames");
+ default: break;
+ }
+ return QString();
+}
+
+GCstarPluginFetcher::GCstarPluginFetcher(QObject* parent_, const char* name_/*=0*/) : Fetcher(parent_, name_),
+ m_started(false), m_collType(-1), m_process(0) {
+}
+
+GCstarPluginFetcher::~GCstarPluginFetcher() {
+ stop();
+}
+
+QString GCstarPluginFetcher::defaultName() {
+ return i18n("GCstar Plugin");
+}
+
+QString GCstarPluginFetcher::source() const {
+ return m_name;
+}
+
+bool GCstarPluginFetcher::canFetch(int type_) const {
+ return m_collType == -1 ? false : m_collType == type_;
+}
+
+void GCstarPluginFetcher::readConfigHook(const KConfigGroup& config_) {
+ m_collType = config_.readNumEntry("CollectionType", -1);
+ m_plugin = config_.readEntry("Plugin");
+}
+
+void GCstarPluginFetcher::search(FetchKey key_, const QString& value_) {
+ m_started = true;
+ m_data.truncate(0);
+
+ if(key_ != Fetch::Title) {
+ myDebug() << "GCstarPluginFetcher::search() - only Title searches are supported" << endl;
+ stop();
+ return;
+ }
+
+ QString gcstar = KStandardDirs::findExe(QString::fromLatin1("gcstar"));
+ if(gcstar.isEmpty()) {
+ myWarning() << "GCstarPluginFetcher::search() - gcstar not found!" << endl;
+ stop();
+ return;
+ }
+
+ QString gcstarCollection = gcstarType(m_collType);
+
+ if(m_plugin.isEmpty()) {
+ myWarning() << "GCstarPluginFetcher::search() - no plugin name! " << endl;
+ stop();
+ return;
+ }
+
+ m_process = new KProcess();
+ connect(m_process, SIGNAL(receivedStdout(KProcess*, char*, int)), SLOT(slotData(KProcess*, char*, int)));
+ connect(m_process, SIGNAL(receivedStderr(KProcess*, char*, int)), SLOT(slotError(KProcess*, char*, int)));
+ connect(m_process, SIGNAL(processExited(KProcess*)), SLOT(slotProcessExited(KProcess*)));
+ QStringList args;
+ args << gcstar << QString::fromLatin1("-x")
+ << QString::fromLatin1("--collection") << gcstarCollection
+ << QString::fromLatin1("--export") << QString::fromLatin1("Tellico")
+ << QString::fromLatin1("--website") << m_plugin
+ << QString::fromLatin1("--download") << KProcess::quote(value_);
+ myLog() << "GCstarPluginFetcher::search() - " << args.join(QChar(' ')) << endl;
+ *m_process << args;
+ if(!m_process->start(KProcess::NotifyOnExit, KProcess::AllOutput)) {
+ myDebug() << "GCstarPluginFetcher::startSearch() - process failed to start" << endl;
+ stop();
+ }
+}
+
+void GCstarPluginFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_process) {
+ m_process->kill();
+ delete m_process;
+ m_process = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ m_errors.clear();
+ emit signalDone(this);
+}
+
+void GCstarPluginFetcher::slotData(KProcess*, char* buffer_, int len_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(buffer_, len_);
+}
+
+void GCstarPluginFetcher::slotError(KProcess*, char* buffer_, int len_) {
+ QString msg = QString::fromLocal8Bit(buffer_, len_);
+ msg.prepend(source() + QString::fromLatin1(": "));
+ myDebug() << "GCstarPluginFetcher::slotError() - " << msg << endl;
+ m_errors << msg;
+}
+
+void GCstarPluginFetcher::slotProcessExited(KProcess*) {
+// myDebug() << "GCstarPluginFetcher::slotProcessExited()" << endl;
+ if(!m_process->normalExit() || m_process->exitStatus()) {
+ myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": process did not exit successfully" << endl;
+ if(!m_errors.isEmpty()) {
+ message(m_errors.join(QChar('\n')), MessageHandler::Error);
+ }
+ stop();
+ return;
+ }
+ if(!m_errors.isEmpty()) {
+ message(m_errors.join(QChar('\n')), MessageHandler::Warning);
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": no data" << endl;
+ stop();
+ return;
+ }
+
+ Import::TellicoImporter imp(QString::fromUtf8(m_data, m_data.size()));
+
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ if(!imp.statusMessage().isEmpty()) {
+ message(imp.statusMessage(), MessageHandler::Status);
+ }
+ myDebug() << "GCstarPluginFetcher::slotProcessExited() - "<< source() << ": no collection pointer" << endl;
+ stop();
+ return;
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ QString desc;
+ switch(coll->type()) {
+ case Data::Collection::Book:
+ case Data::Collection::Bibtex:
+ desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+ break;
+
+ case Data::Collection::Video:
+ desc = entry->field(QString::fromLatin1("studio"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("director"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("medium"));
+ break;
+
+ case Data::Collection::Album:
+ desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ case Data::Collection::Game:
+ desc = entry->field(QString::fromLatin1("platform"));
+ break;
+
+ case Data::Collection::ComicBook:
+ desc = entry->field(QString::fromLatin1("publisher"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("pub_year"));
+ break;
+
+ case Data::Collection::BoardGame:
+ desc = entry->field(QString::fromLatin1("designer"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("publisher"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ default:
+ break;
+ }
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, entry);
+ emit signalResultFound(r);
+ }
+ stop(); // be sure to call this
+}
+
+Tellico::Data::EntryPtr GCstarPluginFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+void GCstarPluginFetcher::updateEntry(Data::EntryPtr entry_) {
+ // ry searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "GCstarPluginFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* GCstarPluginFetcher::configWidget(QWidget* parent_) const {
+ return new GCstarPluginFetcher::ConfigWidget(parent_, this);
+}
+
+GCstarPluginFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GCstarPluginFetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_), m_needPluginList(true) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 3, 4);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+
+ QLabel* label = new QLabel(i18n("Collection &type:"), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_collCombo = new GUI::CollectionTypeCombo(optionsWidget());
+ connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ connect(m_collCombo, SIGNAL(activated(int)), SLOT(slotTypeChanged()));
+ l->addMultiCellWidget(m_collCombo, row, row, 1, 3);
+ QString w = i18n("Set the collection type of the data returned from the plugin.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_collCombo, w);
+ label->setBuddy(m_collCombo);
+
+ label = new QLabel(i18n("&Plugin: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_pluginCombo = new GUI::ComboBox(optionsWidget());
+ connect(m_pluginCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ connect(m_pluginCombo, SIGNAL(activated(int)), SLOT(slotPluginChanged()));
+ l->addMultiCellWidget(m_pluginCombo, row, row, 1, 3);
+ w = i18n("Select the GCstar plugin used for the data source.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_pluginCombo, w);
+ label->setBuddy(m_pluginCombo);
+
+ label = new QLabel(i18n("Author: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_authorLabel = new QLabel(optionsWidget());
+ l->addWidget(m_authorLabel, row, 1);
+
+// label = new QLabel(i18n("Language: "), optionsWidget());
+// l->addWidget(label, row, 2);
+// m_langLabel = new QLabel(optionsWidget());
+// l->addWidget(m_langLabel, row, 3);
+
+ if(fetcher_ && fetcher_->m_collType > -1) {
+ m_collCombo->setCurrentType(fetcher_->m_collType);
+ } else {
+ m_collCombo->setCurrentType(Kernel::self()->collectionType());
+ }
+
+ if(fetcher_) {
+ m_originalPluginName = fetcher_->m_plugin;
+ }
+
+ KAcceleratorManager::manage(optionsWidget());
+}
+
+GCstarPluginFetcher::ConfigWidget::~ConfigWidget() {
+}
+
+void GCstarPluginFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ config_.writeEntry("CollectionType", m_collCombo->currentType());
+ config_.writeEntry("Plugin", m_pluginCombo->currentText());
+}
+
+QString GCstarPluginFetcher::ConfigWidget::preferredName() const {
+ return QString::fromLatin1("GCstar - ") + m_pluginCombo->currentText();
+}
+
+void GCstarPluginFetcher::ConfigWidget::slotTypeChanged() {
+ int collType = m_collCombo->currentType();
+ m_pluginCombo->clear();
+ QStringList pluginNames;
+ GCstarPluginFetcher::PluginList list = GCstarPluginFetcher::plugins(collType);
+ for(GCstarPluginFetcher::PluginList::ConstIterator it = list.begin(); it != list.end(); ++it) {
+ pluginNames << (*it)[QString::fromLatin1("name")].toString();
+ m_pluginCombo->insertItem(pluginNames.last(), *it);
+ }
+ slotPluginChanged();
+ emit signalName(preferredName());
+}
+
+void GCstarPluginFetcher::ConfigWidget::slotPluginChanged() {
+ PluginInfo info = m_pluginCombo->currentData().toMap();
+ m_authorLabel->setText(info[QString::fromLatin1("author")].toString());
+// m_langLabel->setText(info[QString::fromLatin1("lang")].toString());
+ emit signalName(preferredName());
+}
+
+void GCstarPluginFetcher::ConfigWidget::showEvent(QShowEvent*) {
+ if(m_needPluginList) {
+ m_needPluginList = false;
+ slotTypeChanged(); // update plugin combo box
+ if(!m_originalPluginName.isEmpty()) {
+ m_pluginCombo->setCurrentText(m_originalPluginName);
+ slotPluginChanged();
+ }
+ }
+}
+
+#include "gcstarpluginfetcher.moc"
diff --git a/src/fetch/gcstarpluginfetcher.h b/src/fetch/gcstarpluginfetcher.h
new file mode 100644
index 0000000..1994b58
--- /dev/null
+++ b/src/fetch/gcstarpluginfetcher.h
@@ -0,0 +1,121 @@
+/***************************************************************************
+ copyright : (C) 2007 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_GCSTARPLUGINFETCHER_H
+#define TELLICO_GCSTARPLUGINFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qintdict.h>
+
+class QLabel;
+class KProcess;
+
+namespace Tellico {
+ namespace GUI {
+ class ComboBox;
+ class CollectionTypeCombo;
+ }
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class GCstarPluginFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+
+ GCstarPluginFetcher(QObject* parent, const char* name=0);
+ /**
+ */
+ virtual ~GCstarPluginFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual bool canSearch(FetchKey k) const { return k == Title; }
+
+ virtual void search(FetchKey key, const QString& value);
+ virtual void updateEntry(Data::EntryPtr entry);
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return GCstarPlugin; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget;
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KProcess* proc, char* buffer, int len);
+ void slotError(KProcess* proc, char* buffer, int len);
+ void slotProcessExited(KProcess* proc);
+
+private:
+ // map Author, Name, Lang, etc...
+ typedef QMap<QString, QVariant> PluginInfo;
+ typedef QValueList<PluginInfo> PluginList;
+ // map collection type to all available plugins
+ typedef QMap<int, PluginList> PluginMap;
+ static PluginMap pluginMap;
+ static PluginList plugins(int collType);
+ // we need to keep track if we've searched for plugins yet and by what method
+ enum PluginParse {NotYet, Old, New};
+ static PluginParse pluginParse;
+ static void readPluginsNew(int collType, const QString& exe);
+ static void readPluginsOld(int collType, const QString& exe);
+ static QString gcstarType(int collType);
+
+ bool m_started;
+ int m_collType;
+ QString m_plugin;
+ KProcess* m_process;
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries; // map from search result id to entry
+ QStringList m_errors;
+};
+
+class GCstarPluginFetcher::ConfigWidget : public Fetch::ConfigWidget {
+Q_OBJECT
+
+public:
+ ConfigWidget(QWidget* parent, const GCstarPluginFetcher* fetcher = 0);
+ ~ConfigWidget();
+
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+
+private slots:
+ void slotTypeChanged();
+ void slotPluginChanged();
+
+private:
+ void showEvent(QShowEvent* event);
+
+ bool m_needPluginList;
+ QString m_originalPluginName;
+ GUI::CollectionTypeCombo* m_collCombo;
+ GUI::ComboBox* m_pluginCombo;
+ QLabel* m_authorLabel;
+ QLabel* m_langLabel;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/googlescholarfetcher.cpp b/src/fetch/googlescholarfetcher.cpp
new file mode 100644
index 0000000..21979c4
--- /dev/null
+++ b/src/fetch/googlescholarfetcher.cpp
@@ -0,0 +1,233 @@
+/***************************************************************************
+ copyright : (C) 2008 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "googlescholarfetcher.h"
+#include "messagehandler.h"
+#include "../filehandler.h"
+#include "../translators/bibteximporter.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../tellico_kernel.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kconfig.h>
+#include <kio/job.h>
+
+#include <qlabel.h>
+#include <qlayout.h>
+
+namespace {
+ static const int GOOGLE_MAX_RETURNS_TOTAL = 20;
+ static const char* SCHOLAR_BASE_URL = "http://scholar.google.com/scholar";
+}
+
+using Tellico::Fetch::GoogleScholarFetcher;
+
+GoogleScholarFetcher::GoogleScholarFetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_),
+ m_limit(GOOGLE_MAX_RETURNS_TOTAL), m_start(0), m_job(0), m_started(false),
+ m_cookieIsSet(false) {
+ m_bibtexRx = QRegExp(QString::fromLatin1("<a\\s.*href\\s*=\\s*\"([^>]*scholar\\.bib[^>]*)\""));
+ m_bibtexRx.setMinimal(true);
+}
+
+GoogleScholarFetcher::~GoogleScholarFetcher() {
+}
+
+QString GoogleScholarFetcher::defaultName() {
+ // no i18n
+ return QString::fromLatin1("Google Scholar");
+}
+
+QString GoogleScholarFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool GoogleScholarFetcher::canFetch(int type) const {
+ return type == Data::Collection::Bibtex;
+}
+
+void GoogleScholarFetcher::readConfigHook(const KConfigGroup& config_) {
+ Q_UNUSED(config_);
+}
+
+void GoogleScholarFetcher::search(FetchKey key_, const QString& value_) {
+ if(!m_cookieIsSet) {
+ // have to set preferences to have bibtex output
+ FileHandler::readTextFile(QString::fromLatin1("http://scholar.google.com/scholar_setprefs?num=100&scis=yes&scisf=4&submit=Save+Preferences"), true);
+ m_cookieIsSet = true;
+ }
+ m_key = key_;
+ m_value = value_;
+ m_started = true;
+ m_start = 0;
+ m_total = -1;
+ doSearch();
+}
+
+void GoogleScholarFetcher::continueSearch() {
+ m_started = true;
+ doSearch();
+}
+
+void GoogleScholarFetcher::doSearch() {
+// myDebug() << "GoogleScholarFetcher::search() - value = " << value_ << endl;
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ KURL u(QString::fromLatin1(SCHOLAR_BASE_URL));
+ u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start));
+
+ switch(m_key) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("q"), QString::fromLatin1("allintitle:%1").arg(m_value));
+ break;
+
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("q"), m_value);
+ break;
+
+ case Person:
+ u.addQueryItem(QString::fromLatin1("q"), QString::fromLatin1("author:%1").arg(m_value));
+ break;
+
+ default:
+ kdWarning() << "GoogleScholarFetcher::search() - key not recognized: " << m_key << endl;
+ stop();
+ return;
+ }
+// myDebug() << "GoogleScholarFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void GoogleScholarFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void GoogleScholarFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void GoogleScholarFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "GoogleScholarFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "GoogleScholarFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+ QString text = QString::fromUtf8(m_data, m_data.size());
+ QString bibtex;
+ int count = 0;
+ for(int pos = text.find(m_bibtexRx); count < m_limit && pos > -1; pos = text.find(m_bibtexRx, pos+m_bibtexRx.matchedLength()), ++count) {
+ KURL bibtexUrl(QString::fromLatin1(SCHOLAR_BASE_URL), m_bibtexRx.cap(1));
+// myDebug() << bibtexUrl << endl;
+ bibtex += FileHandler::readTextFile(bibtexUrl, true);
+ }
+
+ Import::BibtexImporter imp(bibtex);
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ myDebug() << "GoogleScholarFetcher::slotComplete() - no collection pointer" << endl;
+ stop();
+ return;
+ }
+
+ count = 0;
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+ m_start = m_entries.count();
+// m_hasMoreResults = m_start <= m_total;
+ m_hasMoreResults = false; // for now, no continued searches
+
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr GoogleScholarFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+void GoogleScholarFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "GoogleScholarFetcher::updateEntry()" << endl;
+ // limit to top 5 results
+ m_limit = 5;
+
+ QString title = entry_->field(QString::fromLatin1("title"));
+ if(!title.isEmpty()) {
+ search(Title, title);
+ return;
+ }
+
+ myDebug() << "GoogleScholarFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* GoogleScholarFetcher::configWidget(QWidget* parent_) const {
+ return new GoogleScholarFetcher::ConfigWidget(parent_, this);
+}
+
+GoogleScholarFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const GoogleScholarFetcher*/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+QString GoogleScholarFetcher::ConfigWidget::preferredName() const {
+ return GoogleScholarFetcher::defaultName();
+}
+
+#include "googlescholarfetcher.moc"
diff --git a/src/fetch/googlescholarfetcher.h b/src/fetch/googlescholarfetcher.h
new file mode 100644
index 0000000..4e15475
--- /dev/null
+++ b/src/fetch/googlescholarfetcher.h
@@ -0,0 +1,103 @@
+/***************************************************************************
+ copyright : (C) 2008 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef GOOGLESCHOLARFETCHER_H
+#define GOOGLESCHOLARFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qguardedptr.h>
+#include <qregexp.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * A fetcher for Google Scholar
+ *
+ * @author Robby Stephenson
+ */
+class GoogleScholarFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ /**
+ */
+ GoogleScholarFetcher(QObject* parent, const char* name = 0);
+ /**
+ */
+ virtual ~GoogleScholarFetcher();
+
+ /**
+ */
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ // amazon can search title or person
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return GoogleScholar; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ /**
+ * Returns a widget for modifying the fetcher's config.
+ */
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const GoogleScholarFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup&) {}
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void doSearch();
+
+ int m_limit;
+ int m_start;
+ int m_total;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+
+ QRegExp m_bibtexRx;
+ bool m_cookieIsSet;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/ibsfetcher.cpp b/src/fetch/ibsfetcher.cpp
new file mode 100644
index 0000000..b11258b
--- /dev/null
+++ b/src/fetch/ibsfetcher.cpp
@@ -0,0 +1,415 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "ibsfetcher.h"
+#include "messagehandler.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collections/bookcollection.h"
+#include "../entry.h"
+#include "../filehandler.h"
+#include "../latin1literal.h"
+#include "../imagefactory.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kconfig.h>
+#include <kio/job.h>
+
+#include <qregexp.h>
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qfile.h>
+
+//#define IBS_TEST
+
+namespace {
+ static const char* IBS_BASE_URL = "http://www.internetbookshop.it/ser/serpge.asp";
+}
+
+using Tellico::Fetch::IBSFetcher;
+
+IBSFetcher::IBSFetcher(QObject* parent_, const char* name_ /*=0*/)
+ : Fetcher(parent_, name_), m_started(false) {
+}
+
+QString IBSFetcher::defaultName() {
+ return i18n("Internet Bookshop (ibs.it)");
+}
+
+QString IBSFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool IBSFetcher::canFetch(int type) const {
+ return type == Data::Collection::Book || type == Data::Collection::Bibtex;
+}
+
+void IBSFetcher::readConfigHook(const KConfigGroup& config_) {
+ Q_UNUSED(config_);
+}
+
+void IBSFetcher::search(FetchKey key_, const QString& value_) {
+ m_started = true;
+ m_matches.clear();
+
+#ifdef IBS_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/ibs.html"));
+#else
+ KURL u(QString::fromLatin1(IBS_BASE_URL));
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ switch(key_) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword"));
+ u.addQueryItem(QString::fromLatin1("T"), value_);
+ break;
+
+ case Person:
+ u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword"));
+ u.addQueryItem(QString::fromLatin1("A"), value_);
+ break;
+
+ case ISBN:
+ {
+ QString s = value_;
+ s.remove('-');
+ // limit to first isbn
+ s = s.section(';', 0, 0);
+ u.setFileName(QString::fromLatin1("serdsp.asp"));
+ u.addQueryItem(QString::fromLatin1("isbn"), s);
+ }
+ break;
+
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("Type"), QString::fromLatin1("keyword"));
+ u.addQueryItem(QString::fromLatin1("S"), value_);
+ break;
+
+ default:
+ kdWarning() << "IBSFetcher::search() - key not recognized: " << key_ << endl;
+ stop();
+ return;
+ }
+#endif
+// myDebug() << "IBSFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ if(key_ == ISBN) {
+ connect(m_job, SIGNAL(result(KIO::Job*)), SLOT(slotCompleteISBN(KIO::Job*)));
+ } else {
+ connect(m_job, SIGNAL(result(KIO::Job*)), SLOT(slotComplete(KIO::Job*)));
+ }
+}
+
+void IBSFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void IBSFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void IBSFetcher::slotComplete(KIO::Job* job_) {
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "IBSFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+ QString s = Tellico::decodeHTML(QString(m_data));
+ // really specific regexp
+ QString pat = QString::fromLatin1("http://www.internetbookshop.it/code/");
+ QRegExp anchorRx(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*[\"'](") +
+ QRegExp::escape(pat) +
+ QString::fromLatin1("[^\"]*)\"[^>]*><b>([^<]+)<"), false);
+ anchorRx.setMinimal(true);
+ QRegExp tagRx(QString::fromLatin1("<.*>"));
+ tagRx.setMinimal(true);
+
+ QString u, t, d;
+ int pos2;
+ for(int pos = anchorRx.search(s); m_started && pos > -1; pos = anchorRx.search(s, pos+anchorRx.matchedLength())) {
+ if(!u.isEmpty()) {
+ SearchResult* r = new SearchResult(this, t, d, QString());
+ emit signalResultFound(r);
+
+#ifdef IBS_TEST
+ KURL url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/ibs2.html"));
+#else
+ // the url probable contains &amp; so be careful
+ KURL url = u.replace(QString::fromLatin1("&amp;"), QChar('&'));
+#endif
+ m_matches.insert(r->uid, url);
+
+ u.truncate(0);
+ t.truncate(0);
+ d.truncate(0);
+ }
+ u = anchorRx.cap(1);
+ t = anchorRx.cap(2);
+ pos2 = s.find(QString::fromLatin1("<br>"), pos, false);
+ if(pos2 > -1) {
+ int pos3 = s.find(QString::fromLatin1("<br>"), pos2+1, false);
+ if(pos3 > -1) {
+ d = s.mid(pos2, pos3-pos2).remove(tagRx).simplifyWhiteSpace();
+ }
+ }
+ }
+#ifndef IBS_TEST
+ if(!u.isEmpty()) {
+ SearchResult* r = new SearchResult(this, t, d, QString());
+ emit signalResultFound(r);
+ m_matches.insert(r->uid, u.replace(QString::fromLatin1("&amp;"), QChar('&')));
+ }
+#endif
+
+ stop();
+}
+
+void IBSFetcher::slotCompleteISBN(KIO::Job* job_) {
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "IBSFetcher::slotCompleteISBN() - no data" << endl;
+ stop();
+ return;
+ }
+
+ QString str = Tellico::decodeHTML(QString(m_data));
+ if(str.find(QString::fromLatin1("Libro non presente"), 0, false /* cas-sensitive */) > -1) {
+ stop();
+ return;
+ }
+ Data::EntryPtr entry = parseEntry(str);
+ if(entry) {
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + '/' + entry->field(QString::fromLatin1("publisher"));
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ emit signalResultFound(r);
+ m_matches.insert(r->uid, static_cast<KIO::TransferJob*>(job_)->url().url());
+ }
+
+ stop();
+}
+
+Tellico::Data::EntryPtr IBSFetcher::fetchEntry(uint uid_) {
+ // if we already grabbed this one, then just pull it out of the dict
+ Data::EntryPtr entry = m_entries[uid_];
+ if(entry) {
+ return entry;
+ }
+
+ KURL url = m_matches[uid_];
+ if(url.isEmpty()) {
+ kdWarning() << "IBSFetcher::fetchEntry() - no url in map" << endl;
+ return 0;
+ }
+
+ QString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
+ if(results.isEmpty()) {
+ myDebug() << "IBSFetcher::fetchEntry() - no text results" << endl;
+ return 0;
+ }
+
+// myDebug() << url.url() << endl;
+#if 0
+ kdWarning() << "Remove debug from ibsfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.html"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << results;
+ }
+ f.close();
+#endif
+
+ entry = parseEntry(results);
+ if(!entry) {
+ myDebug() << "IBSFetcher::fetchEntry() - error in processing entry" << endl;
+ return 0;
+ }
+ m_entries.insert(uid_, entry); // keep for later
+ return entry;
+}
+
+Tellico::Data::EntryPtr IBSFetcher::parseEntry(const QString& str_) {
+ // myDebug() << "IBSFetcher::parseEntry()" << endl;
+ // class might be anime_info_top
+ QString pat = QString::fromLatin1("%1(?:<[^>]+>)+([^<>\\s][^<>]+)");
+
+ QRegExp isbnRx(QString::fromLatin1("isbn=([\\dxX]{13})"), false);
+ QString isbn;
+ int pos = isbnRx.search(str_);
+ if(pos > -1) {
+ isbn = isbnRx.cap(1);
+ }
+
+ Data::CollPtr coll = new Data::BookCollection(true);
+
+ // map captions in HTML to field names
+ QMap<QString, QString> fieldMap;
+ fieldMap.insert(QString::fromLatin1("Titolo"), QString::fromLatin1("title"));
+ fieldMap.insert(QString::fromLatin1("Autore"), QString::fromLatin1("author"));
+ fieldMap.insert(QString::fromLatin1("Anno"), QString::fromLatin1("pub_year"));
+ fieldMap.insert(QString::fromLatin1("Categoria"), QString::fromLatin1("genre"));
+ fieldMap.insert(QString::fromLatin1("Rilegatura"), QString::fromLatin1("binding"));
+ fieldMap.insert(QString::fromLatin1("Editore"), QString::fromLatin1("publisher"));
+ fieldMap.insert(QString::fromLatin1("Dati"), QString::fromLatin1("edition"));
+
+ QRegExp pagesRx(QString::fromLatin1("(\\d+) p\\.(\\s*,\\s*)?"));
+ Data::EntryPtr entry = new Data::Entry(coll);
+
+ for(QMap<QString, QString>::Iterator it = fieldMap.begin(); it != fieldMap.end(); ++it) {
+ QRegExp infoRx(pat.arg(it.key()));
+ pos = infoRx.search(str_);
+ if(pos > -1) {
+ if(it.data() == Latin1Literal("edition")) {
+ int pos2 = pagesRx.search(infoRx.cap(1));
+ if(pos2 > -1) {
+ entry->setField(QString::fromLatin1("pages"), pagesRx.cap(1));
+ entry->setField(it.data(), infoRx.cap(1).remove(pagesRx));
+ } else {
+ entry->setField(it.data(), infoRx.cap(1));
+ }
+ } else {
+ entry->setField(it.data(), infoRx.cap(1));
+ }
+ }
+ }
+
+ // image
+ if(!isbn.isEmpty()) {
+ entry->setField(QString::fromLatin1("isbn"), isbn);
+#if 1
+ QString imgURL = QString::fromLatin1("http://giotto.ibs.it/cop/copt13.asp?f=%1").arg(isbn);
+ myLog() << "IBSFetcher() - cover = " << imgURL << endl;
+ QString id = ImageFactory::addImage(imgURL, true, QString::fromLatin1("http://internetbookshop.it"));
+ if(!id.isEmpty()) {
+ entry->setField(QString::fromLatin1("cover"), id);
+ }
+#else
+ QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*\\s*src\\s*=\\s*\"(http://[^/]*\\.ibs\\.it/[^\"]+e=%1)").arg(isbn));
+ imgRx.setMinimal(true);
+ pos = imgRx.search(str_);
+ if(pos > -1) {
+ myLog() << "IBSFetcher() - cover = " << imgRx.cap(1) << endl;
+ QString id = ImageFactory::addImage(imgRx.cap(1), true, QString::fromLatin1("http://internetbookshop.it"));
+ if(!id.isEmpty()) {
+ entry->setField(QString::fromLatin1("cover"), id);
+ }
+ }
+#endif
+ }
+
+ // now look for description
+ QRegExp descRx(QString::fromLatin1("Descrizione(?:<[^>]+>)+([^<>\\s].+)</span>"), false);
+ descRx.setMinimal(true);
+ pos = descRx.search(str_);
+ if(pos == -1) {
+ descRx.setPattern(QString::fromLatin1("In sintesi(?:<[^>]+>)+([^<>\\s].+)</span>"));
+ pos = descRx.search(str_);
+ }
+ if(pos > -1) {
+ Data::FieldPtr f = new Data::Field(QString::fromLatin1("plot"), i18n("Plot Summary"), Data::Field::Para);
+ coll->addField(f);
+ entry->setField(f, descRx.cap(1).simplifyWhiteSpace());
+ }
+
+ // IBS switches the surname and family name of the author
+ QStringList names = entry->fields(QString::fromLatin1("author"), false);
+ if(!names.isEmpty() && !names[0].isEmpty()) {
+ for(QStringList::Iterator it = names.begin(); it != names.end(); ++it) {
+ if((*it).find(',') > -1) {
+ continue; // skip if it has a comma
+ }
+ QStringList words = QStringList::split(' ', *it);
+ if(words.isEmpty()) {
+ continue;
+ }
+ // put first word in back
+ words.append(words[0]);
+ words.pop_front();
+ *it = words.join(QChar(' '));
+ }
+ entry->setField(QString::fromLatin1("author"), names.join(QString::fromLatin1("; ")));
+ }
+ return entry;
+}
+
+void IBSFetcher::updateEntry(Data::EntryPtr entry_) {
+ QString isbn = entry_->field(QString::fromLatin1("isbn"));
+ if(!isbn.isEmpty()) {
+ search(Fetch::ISBN, isbn);
+ return;
+ }
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "IBSFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* IBSFetcher::configWidget(QWidget* parent_) const {
+ return new IBSFetcher::ConfigWidget(parent_);
+}
+
+IBSFetcher::ConfigWidget::ConfigWidget(QWidget* parent_)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+QString IBSFetcher::ConfigWidget::preferredName() const {
+ return IBSFetcher::defaultName();
+}
+
+#include "ibsfetcher.moc"
diff --git a/src/fetch/ibsfetcher.h b/src/fetch/ibsfetcher.h
new file mode 100644
index 0000000..39326b2
--- /dev/null
+++ b/src/fetch/ibsfetcher.h
@@ -0,0 +1,87 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_IBSFETCHER_H
+#define TELLICO_FETCH_IBSFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * A fetcher for animenfo.com
+ *
+ * @author Robby Stephenson
+ */
+class IBSFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ IBSFetcher(QObject* parent, const char* name = 0);
+ virtual ~IBSFetcher() {}
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ // can search title, person, isbn, or keyword. No UPC or Raw for now.
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return IBS; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_);
+ virtual void saveConfig(KConfigGroup&) {}
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+ void slotCompleteISBN(KIO::Job* job);
+
+private:
+ Data::EntryPtr parseEntry(const QString& str);
+
+ QByteArray m_data;
+ int m_total;
+ QMap<int, Data::EntryPtr> m_entries;
+ QMap<int, KURL> m_matches;
+ QGuardedPtr<KIO::Job> m_job;
+
+ bool m_started;
+// QStringList m_fields;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/imdbfetcher.cpp b/src/fetch/imdbfetcher.cpp
new file mode 100644
index 0000000..1066177
--- /dev/null
+++ b/src/fetch/imdbfetcher.cpp
@@ -0,0 +1,1208 @@
+/***************************************************************************
+ copyright : (C) 2004-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "imdbfetcher.h"
+#include "../tellico_kernel.h"
+#include "../collections/videocollection.h"
+#include "../entry.h"
+#include "../field.h"
+#include "../filehandler.h"
+#include "../latin1literal.h"
+#include "../imagefactory.h"
+#include "../tellico_utils.h"
+#include "../gui/listboxtext.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kdialogbase.h>
+#include <kconfig.h>
+#include <klineedit.h>
+#include <knuminput.h>
+
+#include <qregexp.h>
+#include <qfile.h>
+#include <qmap.h>
+#include <qvbox.h>
+#include <qlabel.h>
+#include <qlistbox.h>
+#include <qwhatsthis.h>
+#include <qlayout.h>
+#include <qcheckbox.h>
+#include <qvgroupbox.h>
+
+//#define IMDB_TEST
+
+namespace {
+ static const char* IMDB_SERVER = "akas.imdb.com";
+ static const uint IMDB_MAX_RESULTS = 20;
+ static const QString sep = QString::fromLatin1("; ");
+}
+
+using Tellico::Fetch::IMDBFetcher;
+
+QRegExp* IMDBFetcher::s_tagRx = 0;
+QRegExp* IMDBFetcher::s_anchorRx = 0;
+QRegExp* IMDBFetcher::s_anchorTitleRx = 0;
+QRegExp* IMDBFetcher::s_anchorNameRx = 0;
+QRegExp* IMDBFetcher::s_titleRx = 0;
+
+// static
+void IMDBFetcher::initRegExps() {
+ s_tagRx = new QRegExp(QString::fromLatin1("<.*>"));
+ s_tagRx->setMinimal(true);
+
+ s_anchorRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"[^<]*>([^<]*)</a>"), false);
+ s_anchorRx->setMinimal(true);
+
+ s_anchorTitleRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/title/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
+ s_anchorTitleRx->setMinimal(true);
+
+ s_anchorNameRx = new QRegExp(QString::fromLatin1("<a\\s+[^>]*href\\s*=\\s*\"([^\"]*/name/[^\"]*)\"[^<]*>([^<]*)</a>"), false);
+ s_anchorNameRx->setMinimal(true);
+
+ s_titleRx = new QRegExp(QString::fromLatin1("<title>(.*)</title>"), false);
+ s_titleRx->setMinimal(true);
+}
+
+IMDBFetcher::IMDBFetcher(QObject* parent_, const char* name_) : Fetcher(parent_, name_),
+ m_job(0), m_started(false), m_fetchImages(true), m_host(QString::fromLatin1(IMDB_SERVER)),
+ m_limit(IMDB_MAX_RESULTS), m_countOffset(0) {
+ if(!s_tagRx) {
+ initRegExps();
+ }
+}
+
+IMDBFetcher::~IMDBFetcher() {
+}
+
+QString IMDBFetcher::defaultName() {
+ return i18n("Internet Movie Database");
+}
+
+QString IMDBFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool IMDBFetcher::canFetch(int type) const {
+ return type == Data::Collection::Video;
+}
+
+void IMDBFetcher::readConfigHook(const KConfigGroup& config_) {
+ QString h = config_.readEntry("Host");
+ if(!h.isEmpty()) {
+ m_host = h;
+ }
+ m_numCast = config_.readNumEntry("Max Cast", 10);
+ m_fetchImages = config_.readBoolEntry("Fetch Images", true);
+ m_fields = config_.readListEntry("Custom Fields");
+}
+
+// multiple values not supported
+void IMDBFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_;
+ m_started = true;
+ m_redirected = false;
+ m_data.truncate(0);
+ m_matches.clear();
+ m_popularTitles.truncate(0);
+ m_exactTitles.truncate(0);
+ m_partialTitles.truncate(0);
+ m_currentTitleBlock = Unknown;
+ m_countOffset = 0;
+
+// only search if current collection is a video collection
+ if(Kernel::self()->collectionType() != Data::Collection::Video) {
+ myDebug() << "IMDBFetcher::search() - collection type mismatch, stopping" << endl;
+ stop();
+ return;
+ }
+
+#ifdef IMDB_TEST
+ if(m_key == Title) {
+ m_url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title.html"));
+ m_redirected = false;
+ } else {
+ m_url = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-name.html"));
+ m_redirected = true;
+ }
+#else
+ m_url = KURL();
+ m_url.setProtocol(QString::fromLatin1("http"));
+ m_url.setHost(m_host.isEmpty() ? QString::fromLatin1(IMDB_SERVER) : m_host);
+ m_url.setPath(QString::fromLatin1("/find"));
+
+ switch(key_) {
+ case Title:
+ m_url.addQueryItem(QString::fromLatin1("s"), QString::fromLatin1("tt"));
+ break;
+
+ case Person:
+ m_url.addQueryItem(QString::fromLatin1("s"), QString::fromLatin1("nm"));
+ break;
+
+ default:
+ kdWarning() << "IMDBFetcher::search() - FetchKey not supported" << endl;
+ stop();
+ return;
+ }
+
+ // as far as I can tell, the url encoding should always be iso-8859-1
+ // not utf-8
+ m_url.addQueryItem(QString::fromLatin1("q"), value_, 4 /* iso-8859-1 */);
+
+// myDebug() << "IMDBFetcher::search() url = " << m_url << endl;
+#endif
+
+ m_job = KIO::get(m_url, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+ connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)),
+ SLOT(slotRedirection(KIO::Job*, const KURL&)));
+}
+
+void IMDBFetcher::continueSearch() {
+ m_started = true;
+ m_limit += IMDB_MAX_RESULTS;
+
+ if(m_currentTitleBlock == Popular) {
+ parseTitleBlock(m_popularTitles);
+ // if the offset is 0, then we need to be looking at the next block
+ m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;
+ }
+
+ // current title block might have changed
+ if(m_currentTitleBlock == Exact) {
+ parseTitleBlock(m_exactTitles);
+ m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
+ }
+
+ if(m_currentTitleBlock == Partial) {
+ parseTitleBlock(m_partialTitles);
+ m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
+ }
+
+ if(m_currentTitleBlock == SinglePerson) {
+ parseSingleNameResult();
+ }
+
+ stop();
+}
+
+void IMDBFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myLog() << "IMDBFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+
+ m_started = false;
+ m_redirected = false;
+
+ emit signalDone(this);
+}
+
+void IMDBFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void IMDBFetcher::slotRedirection(KIO::Job*, const KURL& toURL_) {
+ m_url = toURL_;
+ m_redirected = true;
+}
+
+void IMDBFetcher::slotComplete(KIO::Job* job_) {
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ stop();
+ return;
+ }
+
+ // a single result was found if we got redirected
+ if(m_key == Title) {
+ if(m_redirected) {
+ parseSingleTitleResult();
+ } else {
+ parseMultipleTitleResults();
+ }
+ } else {
+ if(m_redirected) {
+ parseSingleNameResult();
+ } else {
+ parseMultipleNameResults();
+ }
+ }
+}
+
+void IMDBFetcher::parseSingleTitleResult() {
+// myDebug() << "IMDBFetcher::parseSingleTitleResult()" << endl;
+ s_titleRx->search(Tellico::decodeHTML(QString(m_data)));
+ // split title at parenthesis
+ const QString cap1 = s_titleRx->cap(1);
+ int pPos = cap1.find('(');
+ // FIXME: maybe remove parentheses here?
+ SearchResult* r = new SearchResult(this,
+ pPos == -1 ? cap1 : cap1.left(pPos),
+ pPos == -1 ? QString::null : cap1.mid(pPos),
+ QString());
+ m_matches.insert(r->uid, m_url);
+ emit signalResultFound(r);
+
+ m_hasMoreResults = false;
+ stop();
+}
+
+void IMDBFetcher::parseMultipleTitleResults() {
+// myDebug() << "IMDBFetcher::parseMultipleTitleResults()" << endl;
+ QString output = Tellico::decodeHTML(QString(m_data));
+
+ // IMDb can return three title lists, popular, exact, and partial
+ // the popular titles are in the first table, after the "Popular Results" text
+ int pos_popular = output.find(QString::fromLatin1("Popular Titles"), 0, false);
+ int pos_exact = output.find(QString::fromLatin1("Exact Matches"), QMAX(pos_popular, 0), false);
+ int pos_partial = output.find(QString::fromLatin1("Partial Matches"), QMAX(pos_exact, 0), false);
+ int end_popular = pos_exact; // keep track of where to end
+ if(end_popular == -1) {
+ end_popular = pos_partial == -1 ? output.length() : pos_partial;
+ }
+ int end_exact = pos_partial; // keep track of where to end
+ if(end_exact == -1) {
+ end_exact = output.length();
+ }
+
+ // if found popular matches
+ if(pos_popular > -1) {
+ m_popularTitles = output.mid(pos_popular, end_popular-pos_popular);
+ }
+ // if found exact matches
+ if(pos_exact > -1) {
+ m_exactTitles = output.mid(pos_exact, end_exact-pos_exact);
+ }
+ if(pos_partial > -1) {
+ m_partialTitles = output.mid(pos_partial);
+ }
+
+ parseTitleBlock(m_popularTitles);
+ // if the offset is 0, then we need to be looking at the next block
+ m_currentTitleBlock = m_countOffset == 0 ? Exact : Popular;
+
+ if(m_matches.size() < m_limit) {
+ parseTitleBlock(m_exactTitles);
+ m_currentTitleBlock = m_countOffset == 0 ? Partial : Exact;
+ }
+
+ if(m_matches.size() < m_limit) {
+ parseTitleBlock(m_partialTitles);
+ m_currentTitleBlock = m_countOffset == 0 ? Unknown : Partial;
+ }
+
+#ifndef NDEBUG
+ if(m_matches.size() == 0) {
+ myDebug() << "IMDBFetcher::parseMultipleTitleResults() - no matches found." << endl;
+ }
+#endif
+
+ stop();
+}
+
+void IMDBFetcher::parseTitleBlock(const QString& str_) {
+ if(str_.isEmpty()) {
+ m_countOffset = 0;
+ return;
+ }
+// myDebug() << "IMDBFetcher::parseTitleBlock() - " << m_currentTitleBlock << endl;
+
+ QRegExp akaRx(QString::fromLatin1("aka (.*)(</li>|<br)"), false);
+ akaRx.setMinimal(true);
+
+ m_hasMoreResults = false;
+
+ int count = 0;
+ int start = s_anchorTitleRx->search(str_);
+ while(m_started && start > -1) {
+ // split title at parenthesis
+ const QString cap1 = s_anchorTitleRx->cap(1); // the anchor url
+ const QString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); // the anchor text
+ start += s_anchorTitleRx->matchedLength();
+ int pPos = cap2.find('('); // if it has parentheses, use that for description
+ QString desc;
+ if(pPos > -1) {
+ int pPos2 = cap2.find(')', pPos+1);
+ if(pPos2 > -1) {
+ desc = cap2.mid(pPos+1, pPos2-pPos-1);
+ }
+ } else {
+ // parenthesis might be outside anchor tag
+ int end = s_anchorTitleRx->search(str_, start);
+ if(end == -1) {
+ end = str_.length();
+ }
+ QString text = str_.mid(start, end-start);
+ pPos = text.find('(');
+ if(pPos > -1) {
+ int pNewLine = text.find(QString::fromLatin1("<br"));
+ if(pNewLine == -1 || pPos < pNewLine) {
+ int pPos2 = text.find(')', pPos);
+ desc = text.mid(pPos+1, pPos2-pPos-1);
+ }
+ pPos = -1;
+ }
+ }
+ // multiple matches might have 'aka' info
+ int end = s_anchorTitleRx->search(str_, start+1);
+ if(end == -1) {
+ end = str_.length();
+ }
+ int akaPos = akaRx.search(str_, start+1);
+ if(akaPos > -1 && akaPos < end) {
+ // limit to 50 chars
+ desc += QChar(' ') + akaRx.cap(1).stripWhiteSpace().remove(*s_tagRx);
+ if(desc.length() > 50) {
+ desc = desc.left(50) + QString::fromLatin1("...");
+ }
+ }
+
+ start = s_anchorTitleRx->search(str_, start);
+
+ if(count < m_countOffset) {
+ ++count;
+ continue;
+ }
+
+ // if we got this far, then there is a valid result
+ if(m_matches.size() >= m_limit) {
+ m_hasMoreResults = true;
+ break;
+ }
+
+ SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, QString());
+ KURL u(m_url, cap1);
+ u.setQuery(QString::null);
+ m_matches.insert(r->uid, u);
+ emit signalResultFound(r);
+ ++count;
+ }
+ if(!m_hasMoreResults && m_currentTitleBlock != Partial) {
+ m_hasMoreResults = true;
+ }
+ m_countOffset = m_matches.size() < m_limit ? 0 : count;
+}
+
+void IMDBFetcher::parseSingleNameResult() {
+// myDebug() << "IMDBFetcher::parseSingleNameResult()" << endl;
+
+ m_currentTitleBlock = SinglePerson;
+
+ QString output = Tellico::decodeHTML(QString(m_data));
+
+ int pos = s_anchorTitleRx->search(output);
+ if(pos == -1) {
+ stop();
+ return;
+ }
+
+ QRegExp tvRegExp(QString::fromLatin1("TV\\sEpisode"), false);
+
+ int len = 0;
+ int count = 0;
+ QString desc;
+ for( ; m_started && pos > -1; pos = s_anchorTitleRx->search(output, pos+len)) {
+ desc.truncate(0);
+ bool isEpisode = false;
+ len = s_anchorTitleRx->cap(0).length();
+ // split title at parenthesis
+ const QString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace();
+ int pPos = cap2.find('(');
+ if(pPos > -1) {
+ desc = cap2.mid(pPos);
+ } else {
+ // look until the next <a
+ int aPos = output.find(QString::fromLatin1("<a"), pos+len, false);
+ if(aPos == -1) {
+ aPos = output.length();
+ }
+ QString tmp = output.mid(pos+len, aPos-pos-len);
+ if(tmp.find(tvRegExp) > -1) {
+ isEpisode = true;
+ }
+ pPos = tmp.find('(');
+ if(pPos > -1) {
+ int pNewLine = tmp.find(QString::fromLatin1("<br"));
+ if(pNewLine == -1 || pPos < pNewLine) {
+ int pEnd = tmp.find(')', pPos+1);
+ desc = tmp.mid(pPos+1, pEnd-pPos-1).remove(*s_tagRx);
+ }
+ // but need to indicate it wasn't found initially
+ pPos = -1;
+ }
+ }
+
+ ;
+
+ if(count < m_countOffset) {
+ ++count;
+ continue;
+ }
+
+ ++count;
+ if(isEpisode) {
+ continue;
+ }
+
+ // if we got this far, then there is a valid result
+ if(m_matches.size() >= m_limit) {
+ m_hasMoreResults = true;
+ break;
+ }
+
+ // FIXME: maybe remove parentheses here?
+ SearchResult* r = new SearchResult(this, pPos == -1 ? cap2 : cap2.left(pPos), desc, QString());
+ KURL u(m_url, s_anchorTitleRx->cap(1)); // relative URL constructor
+ u.setQuery(QString::null);
+ m_matches.insert(r->uid, u);
+// myDebug() << u.prettyURL() << endl;
+// myDebug() << cap2 << endl;
+ emit signalResultFound(r);
+ }
+ if(pos == -1) {
+ m_hasMoreResults = false;
+ }
+ m_countOffset = count - 1;
+
+ stop();
+}
+
+void IMDBFetcher::parseMultipleNameResults() {
+// myDebug() << "IMDBFetcher::parseMultipleNameResults()" << endl;
+
+ // the exact results are in the first table after the "exact results" text
+ QString output = Tellico::decodeHTML(QString(m_data));
+ int pos = output.find(QString::fromLatin1("Popular Results"), 0, false);
+ if(pos == -1) {
+ pos = output.find(QString::fromLatin1("Exact Matches"), 0, false);
+ }
+
+ // find beginning of partial matches
+ int end = output.find(QString::fromLatin1("Other Results"), QMAX(pos, 0), false);
+ if(end == -1) {
+ end = output.find(QString::fromLatin1("Partial Matches"), QMAX(pos, 0), false);
+ if(end == -1) {
+ end = output.find(QString::fromLatin1("Approx Matches"), QMAX(pos, 0), false);
+ if(end == -1) {
+ end = output.length();
+ }
+ }
+ }
+
+ QMap<QString, KURL> map;
+ QMap<QString, int> nameMap;
+
+ QString s;
+ // if found exact matches
+ if(pos > -1) {
+ pos = s_anchorNameRx->search(output, pos+13);
+ while(pos > -1 && pos < end && m_matches.size() < m_limit) {
+ KURL u(m_url, s_anchorNameRx->cap(1));
+ s = s_anchorNameRx->cap(2).stripWhiteSpace() + ' ';
+ // if more than one exact, add parentheses
+ if(nameMap.contains(s) && nameMap[s] > 0) {
+ // fix the first one that didn't have a number
+ if(nameMap[s] == 1) {
+ KURL u2 = map[s];
+ map.remove(s);
+ map.insert(s + "(1) ", u2);
+ }
+ nameMap.insert(s, nameMap[s] + 1);
+ // check for duplicate names
+ s += QString::fromLatin1("(%1) ").arg(nameMap[s]);
+ } else {
+ nameMap.insert(s, 1);
+ }
+ map.insert(s, u);
+ pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
+ }
+ }
+
+ // go ahead and search for partial matches
+ pos = s_anchorNameRx->search(output, end);
+ while(pos > -1 && m_matches.size() < m_limit) {
+ KURL u(m_url, s_anchorNameRx->cap(1)); // relative URL
+ s = s_anchorNameRx->cap(2).stripWhiteSpace();
+ if(nameMap.contains(s) && nameMap[s] > 0) {
+ // fix the first one that didn't have a number
+ if(nameMap[s] == 1) {
+ KURL u2 = map[s];
+ map.remove(s);
+ map.insert(s + " (1)", u2);
+ }
+ nameMap.insert(s, nameMap[s] + 1);
+ // check for duplicate names
+ s += QString::fromLatin1(" (%1)").arg(nameMap[s]);
+ } else {
+ nameMap.insert(s, 1);
+ }
+ map.insert(s, u);
+ pos = s_anchorNameRx->search(output, pos+s_anchorNameRx->cap(0).length());
+ }
+
+ if(map.count() == 0) {
+ stop();
+ return;
+ }
+
+ KDialogBase* dlg = new KDialogBase(Kernel::self()->widget(), "imdb dialog",
+ true, i18n("Select IMDB Result"), KDialogBase::Ok|KDialogBase::Cancel);
+ QVBox* box = new QVBox(dlg);
+ box->setSpacing(10);
+ (void) new QLabel(i18n("<qt>Your search returned multiple matches. Please select one below.</qt>"), box);
+
+ QListBox* listBox = new QListBox(box);
+ listBox->setMinimumWidth(400);
+ listBox->setColumnMode(QListBox::FitToWidth);
+ const QStringList values = map.keys();
+ for(QStringList::ConstIterator it = values.begin(); it != values.end(); ++it) {
+ if((*it).endsWith(QChar(' '))) {
+ GUI::ListBoxText* box = new GUI::ListBoxText(listBox, *it, 0);
+ box->setColored(true);
+ } else {
+ (void) new GUI::ListBoxText(listBox, *it);
+ }
+ }
+ listBox->setSelected(0, true);
+ QWhatsThis::add(listBox, i18n("<qt>Select a search result.</qt>"));
+
+ dlg->setMainWidget(box);
+ if(dlg->exec() != QDialog::Accepted || listBox->currentText().isEmpty()) {
+ dlg->delayedDestruct();
+ stop();
+ return;
+ }
+
+ m_url = map[listBox->currentText()];
+ dlg->delayedDestruct();
+
+ // redirected is true since that's how I tell if an exact match has been found
+ m_redirected = true;
+ m_data.truncate(0);
+ m_job = KIO::get(m_url, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+ connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)),
+ SLOT(slotRedirection(KIO::Job*, const KURL&)));
+
+ // do not stop() here
+}
+
+Tellico::Data::EntryPtr IMDBFetcher::fetchEntry(uint uid_) {
+ // if we already grabbed this one, then just pull it out of the dict
+ Data::EntryPtr entry = m_entries[uid_];
+ if(entry) {
+ return entry;
+ }
+
+ KURL url = m_matches[uid_];
+ if(url.isEmpty()) {
+ myDebug() << "IMDBFetcher::fetchEntry() - no url found" << endl;
+ return 0;
+ }
+
+ KURL origURL = m_url; // keep to switch back
+ QString results;
+ // if the url matches the current one, no need to redownload it
+ if(url == m_url) {
+// myDebug() << "IMDBFetcher::fetchEntry() - matches previous URL, no downloading needed." << endl;
+ results = Tellico::decodeHTML(QString(m_data));
+ } else {
+ // now it's sychronous
+#ifdef IMDB_TEST
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title-result.html"));
+ results = Tellico::decodeHTML(FileHandler::readTextFile(u));
+#else
+ // be quiet about failure
+ results = Tellico::decodeHTML(FileHandler::readTextFile(url, true));
+ m_url = url; // needed for processing
+#endif
+ }
+ if(results.isEmpty()) {
+ myDebug() << "IMDBFetcher::fetchEntry() - no text results" << endl;
+ m_url = origURL;
+ return 0;
+ }
+
+ entry = parseEntry(results);
+ m_url = origURL;
+ if(!entry) {
+ myDebug() << "IMDBFetcher::fetchEntry() - error in processing entry" << endl;
+ return 0;
+ }
+ m_entries.insert(uid_, entry); // keep for later
+ return entry;
+}
+
+Tellico::Data::EntryPtr IMDBFetcher::parseEntry(const QString& str_) {
+ Data::CollPtr coll = new Data::VideoCollection(true);
+ Data::EntryPtr entry = new Data::Entry(coll);
+
+ doTitle(str_, entry);
+ doRunningTime(str_, entry);
+ doAspectRatio(str_, entry);
+ doAlsoKnownAs(str_, entry);
+ doPlot(str_, entry, m_url);
+ doLists(str_, entry);
+ doPerson(str_, entry, QString::fromLatin1("Director"), QString::fromLatin1("director"));
+ doPerson(str_, entry, QString::fromLatin1("Writer"), QString::fromLatin1("writer"));
+ doRating(str_, entry);
+ doCast(str_, entry, m_url);
+ if(m_fetchImages) {
+ // needs base URL
+ doCover(str_, entry, m_url);
+ }
+
+ const QString imdb = QString::fromLatin1("imdb");
+ if(!coll->hasField(imdb) && m_fields.findIndex(imdb) > -1) {
+ Data::FieldPtr field = new Data::Field(imdb, i18n("IMDB Link"), Data::Field::URL);
+ field->setCategory(i18n("General"));
+ coll->addField(field);
+ }
+ if(coll->hasField(imdb) && coll->fieldByName(imdb)->type() == Data::Field::URL) {
+ m_url.setQuery(QString::null);
+ entry->setField(imdb, m_url.url());
+ }
+ return entry;
+}
+
+void IMDBFetcher::doTitle(const QString& str_, Data::EntryPtr entry_) {
+ if(s_titleRx->search(str_) > -1) {
+ const QString cap1 = s_titleRx->cap(1);
+ // titles always have parentheses
+ int pPos = cap1.find('(');
+ QString title = cap1.left(pPos).stripWhiteSpace();
+ // remove first and last quotes is there
+ if(title.startsWith(QChar('"')) && title.endsWith(QChar('"'))) {
+ title = title.mid(1, title.length()-2);
+ }
+ entry_->setField(QString::fromLatin1("title"), title);
+ // remove parenthesis
+ uint pPos2 = pPos+1;
+ while(pPos2 < cap1.length() && cap1[pPos2].isDigit()) {
+ ++pPos2;
+ }
+ QString year = cap1.mid(pPos+1, pPos2-pPos-1);
+ if(!year.isEmpty()) {
+ entry_->setField(QString::fromLatin1("year"), year);
+ }
+ }
+}
+
+void IMDBFetcher::doRunningTime(const QString& str_, Data::EntryPtr entry_) {
+ // running time
+ QRegExp runtimeRx(QString::fromLatin1("runtime:.*(\\d+)\\s+min"), false);
+ runtimeRx.setMinimal(true);
+
+ if(runtimeRx.search(str_) > -1) {
+// myDebug() << "running-time = " << runtimeRx.cap(1) << endl;
+ entry_->setField(QString::fromLatin1("running-time"), runtimeRx.cap(1));
+ }
+}
+
+void IMDBFetcher::doAspectRatio(const QString& str_, Data::EntryPtr entry_) {
+ QRegExp rx(QString::fromLatin1("aspect ratio:.*([\\d\\.]+\\s*:\\s*[\\d\\.]+)"), false);
+ rx.setMinimal(true);
+
+ if(rx.search(str_) > -1) {
+// myDebug() << "aspect ratio = " << rx.cap(1) << endl;
+ entry_->setField(QString::fromLatin1("aspect-ratio"), rx.cap(1).stripWhiteSpace());
+ }
+}
+
+void IMDBFetcher::doAlsoKnownAs(const QString& str_, Data::EntryPtr entry_) {
+ if(m_fields.findIndex(QString::fromLatin1("alttitle")) == -1) {
+ return;
+ }
+
+ // match until next b tag
+// QRegExp akaRx(QString::fromLatin1("also known as(.*)<b(?:\\s.*)?>"));
+ QRegExp akaRx(QString::fromLatin1("also known as(.*)<(b[>\\s/]|div)"), false);
+ akaRx.setMinimal(true);
+
+ if(akaRx.search(str_) > -1 && !akaRx.cap(1).isEmpty()) {
+ Data::FieldPtr f = entry_->collection()->fieldByName(QString::fromLatin1("alttitle"));
+ if(!f) {
+ f = new Data::Field(QString::fromLatin1("alttitle"), i18n("Alternative Titles"), Data::Field::Table);
+ f->setFormatFlag(Data::Field::FormatTitle);
+ entry_->collection()->addField(f);
+ }
+
+ // split by <br>, remembering it could become valid xhtml!
+ QRegExp brRx(QString::fromLatin1("<br[\\s/]*>"), false);
+ brRx.setMinimal(true);
+ QStringList list = QStringList::split(brRx, akaRx.cap(1));
+ // lang could be included with [fr]
+// const QRegExp parRx(QString::fromLatin1("\\(.+\\)"));
+ const QRegExp brackRx(QString::fromLatin1("\\[\\w+\\]"));
+ QStringList values;
+ for(QStringList::Iterator it = list.begin(); it != list.end(); ++it) {
+ QString s = *it;
+ // sometimes, the word "more" gets linked to the releaseinfo page, check that
+ if(s.find(QString::fromLatin1("releaseinfo")) > -1) {
+ continue;
+ }
+ s.remove(*s_tagRx);
+ s.remove(brackRx);
+ s = s.stripWhiteSpace();
+ // the first value ends up being or starting with the colon after "Also know as"
+ // I'm too lazy to figure out a better regexp
+ if(s.startsWith(QChar(':'))) {
+ s = s.mid(1);
+ }
+ if(!s.isEmpty()) {
+ values += s;
+ }
+ }
+ if(!values.isEmpty()) {
+ entry_->setField(QString::fromLatin1("alttitle"), values.join(sep));
+ }
+ }
+}
+
+void IMDBFetcher::doPlot(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
+ // plot summaries provided by users are on a separate page
+ // should those be preferred?
+
+ bool useUserSummary = false;
+
+ QString thisPlot;
+ // match until next opening tag
+ QRegExp plotRx(QString::fromLatin1("plot (?:outline|summary):(.*)<[^/].*</"), false);
+ plotRx.setMinimal(true);
+ QRegExp plotURLRx(QString::fromLatin1("<a\\s+.*href\\s*=\\s*\".*/title/.*/plotsummary\""), false);
+ plotURLRx.setMinimal(true);
+ if(plotRx.search(str_) > -1) {
+ thisPlot = plotRx.cap(1);
+ thisPlot.remove(*s_tagRx); // remove HTML tags
+ entry_->setField(QString::fromLatin1("plot"), thisPlot);
+ // if thisPlot ends with (more) or contains
+ // a url that ends with plotsummary, then we'll grab it, otherwise not
+ if(plotRx.cap(0).endsWith(QString::fromLatin1("(more)</")) || plotURLRx.search(plotRx.cap(0)) > -1) {
+ useUserSummary = true;
+ }
+ }
+
+ if(useUserSummary) {
+ QRegExp idRx(QString::fromLatin1("title/(tt\\d+)"));
+ idRx.search(baseURL_.path());
+ KURL plotURL = baseURL_;
+ plotURL.setPath(QString::fromLatin1("/title/") + idRx.cap(1) + QString::fromLatin1("/plotsummary"));
+ // be quiet about failure
+ QString plotPage = FileHandler::readTextFile(plotURL, true);
+
+ if(!plotPage.isEmpty()) {
+ QRegExp plotRx(QString::fromLatin1("<p\\s+class\\s*=\\s*\"plotpar\">(.*)</p"));
+ plotRx.setMinimal(true);
+ if(plotRx.search(plotPage) > -1) {
+ QString userPlot = plotRx.cap(1);
+ userPlot.remove(*s_tagRx); // remove HTML tags
+ entry_->setField(QString::fromLatin1("plot"), Tellico::decodeHTML(userPlot));
+ }
+ }
+ }
+}
+
+void IMDBFetcher::doPerson(const QString& str_, Data::EntryPtr entry_,
+ const QString& imdbHeader_, const QString& fieldName_) {
+ QRegExp br2Rx(QString::fromLatin1("<br[\\s/]*>\\s*<br[\\s/]*>"), false);
+ br2Rx.setMinimal(true);
+ QRegExp divRx(QString::fromLatin1("<[/]*div"), false);
+ divRx.setMinimal(true);
+ QString name = QString::fromLatin1("/name/");
+
+ StringSet people;
+ for(int pos = str_.find(imdbHeader_); pos > 0; pos = str_.find(imdbHeader_, pos)) {
+ // loop until repeated <br> tags or </div> tag
+ const int endPos1 = str_.find(br2Rx, pos);
+ const int endPos2 = str_.find(divRx, pos);
+ const int endPos = QMIN(endPos1, endPos2); // ok to be -1
+ pos = s_anchorRx->search(str_, pos+1);
+ while(pos > -1 && pos < endPos) {
+ if(s_anchorRx->cap(1).find(name) > -1) {
+ people.add(s_anchorRx->cap(2).stripWhiteSpace());
+ }
+ pos = s_anchorRx->search(str_, pos+1);
+ }
+ }
+ if(!people.isEmpty()) {
+ entry_->setField(fieldName_, people.toList().join(sep));
+ }
+}
+
+void IMDBFetcher::doCast(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
+ // the extended cast list is on a separate page
+ // that's usually a lot of people
+ // but since it can be in billing order, the main actors might not
+ // be in the short list
+ QRegExp idRx(QString::fromLatin1("title/(tt\\d+)"));
+ idRx.search(baseURL_.path());
+#ifdef IMDB_TEST
+ KURL castURL = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/imdb-title-fullcredits.html"));
+#else
+ KURL castURL = baseURL_;
+ castURL.setPath(QString::fromLatin1("/title/") + idRx.cap(1) + QString::fromLatin1("/fullcredits"));
+#endif
+ // be quiet about failure and be sure to translate entities
+ QString castPage = Tellico::decodeHTML(FileHandler::readTextFile(castURL, true));
+
+ int pos = -1;
+ // the text to search, depends on which page is being read
+ QString castText = castPage;
+ if(castText.isEmpty()) {
+ // fall back to short list
+ castText = str_;
+ pos = castText.find(QString::fromLatin1("cast overview"), 0, false);
+ if(pos == -1) {
+ pos = castText.find(QString::fromLatin1("credited cast"), 0, false);
+ }
+ } else {
+ // first look for anchor
+ QRegExp castAnchorRx(QString::fromLatin1("<a\\s+name\\s*=\\s*\"cast\""), false);
+ pos = castText.find(castAnchorRx);
+ if(pos < 0) {
+ QRegExp tableClassRx(QString::fromLatin1("<table\\s+class\\s*=\\s*\"cast\""), false);
+ pos = castText.find(tableClassRx);
+ if(pos < 0) {
+ // fragile, the word "cast" appears in the title, but need to find
+ // the one right above the actual cast table
+ // for TV shows, there's a link on the sidebar for "episodes case"
+ // so need to not match that one
+ pos = castText.find(QString::fromLatin1("cast</"), 0, false);
+ if(pos > 9) {
+ // back up 9 places
+ if(castText.mid(pos-9, 9).startsWith(QString::fromLatin1("episodes"))) {
+ // find next cast list
+ pos = castText.find(QString::fromLatin1("cast</"), pos+6, false);
+ }
+ }
+ }
+ }
+ }
+ if(pos == -1) { // no cast list found
+ myDebug() << "IMDBFetcher::doCast() - no cast list found" << endl;
+ return;
+ }
+
+ const QString name = QString::fromLatin1("/name/");
+ QRegExp tdRx(QString::fromLatin1("<td[^>]*>(.*)</td>"), false);
+ tdRx.setMinimal(true);
+
+ QStringList cast;
+ // loop until closing table tag
+ const int endPos = castText.find(QString::fromLatin1("</table"), pos, false);
+ pos = s_anchorRx->search(castText, pos+1);
+ while(pos > -1 && pos < endPos && static_cast<int>(cast.count()) < m_numCast) {
+ if(s_anchorRx->cap(1).find(name) > -1) {
+ // now search for <td> item with character name
+ // there's a column with ellipses then the character
+ const int pos2 = tdRx.search(castText, pos);
+ if(pos2 > -1 && tdRx.search(castText, pos2+1) > -1) {
+ cast += s_anchorRx->cap(2).stripWhiteSpace()
+ + QString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx);
+ } else {
+ cast += s_anchorRx->cap(2).stripWhiteSpace();
+ }
+ }
+ pos = s_anchorRx->search(castText, pos+1);
+ }
+
+ if(!cast.isEmpty()) {
+ entry_->setField(QString::fromLatin1("cast"), cast.join(sep));
+ }
+}
+
+void IMDBFetcher::doRating(const QString& str_, Data::EntryPtr entry_) {
+ if(m_fields.findIndex(QString::fromLatin1("imdb-rating")) == -1) {
+ return;
+ }
+
+ // don't add a colon, since there's a <br> at the end
+ // some of the imdb images use /10.gif in their path, so check for space or bracket
+ QRegExp rx(QString::fromLatin1("[>\\s](\\d+.?\\d*)/10[<//s]"), false);
+ rx.setMinimal(true);
+
+ if(rx.search(str_) > -1 && !rx.cap(1).isEmpty()) {
+ Data::FieldPtr f = entry_->collection()->fieldByName(QString::fromLatin1("imdb-rating"));
+ if(!f) {
+ f = new Data::Field(QString::fromLatin1("imdb-rating"), i18n("IMDB Rating"), Data::Field::Rating);
+ f->setCategory(i18n("General"));
+ f->setProperty(QString::fromLatin1("maximum"), QString::fromLatin1("10"));
+ entry_->collection()->addField(f);
+ }
+
+ bool ok;
+ float value = rx.cap(1).toFloat(&ok);
+ if(ok) {
+ entry_->setField(QString::fromLatin1("imdb-rating"), QString::number(value));
+ }
+ }
+}
+
+void IMDBFetcher::doCover(const QString& str_, Data::EntryPtr entry_, const KURL& baseURL_) {
+ // cover is the img with the "cover" alt text
+ QRegExp imgRx(QString::fromLatin1("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), false);
+ imgRx.setMinimal(true);
+
+ QRegExp posterRx(QString::fromLatin1("<a\\s+[^>]*name\\s*=\\s*\"poster\"[^>]*>(.*)</a>"), false);
+ posterRx.setMinimal(true);
+
+ const QString cover = QString::fromLatin1("cover");
+
+ int pos = posterRx.search(str_);
+ while(pos > -1) {
+ if(imgRx.search(posterRx.cap(1)) > -1) {
+ KURL u(baseURL_, imgRx.cap(1));
+ QString id = ImageFactory::addImage(u, true);
+ if(!id.isEmpty()) {
+ entry_->setField(cover, id);
+ }
+ return;
+ }
+ pos = posterRx.search(str_, pos+1);
+ }
+
+ // didn't find the cover, IMDb also used to put "cover" inside the url
+ pos = imgRx.search(str_);
+ while(pos > -1) {
+ if(imgRx.cap(0).find(cover, 0, false) > -1) {
+ KURL u(baseURL_, imgRx.cap(1));
+ QString id = ImageFactory::addImage(u, true);
+ if(!id.isEmpty()) {
+ entry_->setField(cover, id);
+ }
+ return;
+ }
+ pos = imgRx.search(str_, pos+1);
+ }
+}
+
+// end up reparsing whole string, but it's not really that slow
+// loook at every anchor tag in the string
+void IMDBFetcher::doLists(const QString& str_, Data::EntryPtr entry_) {
+ const QString genre = QString::fromLatin1("/Genres/");
+ const QString country = QString::fromLatin1("/Countries/");
+ const QString lang = QString::fromLatin1("/Languages/");
+ const QString colorInfo = QString::fromLatin1("color-info");
+ const QString cert = QString::fromLatin1("certificates=");
+ const QString soundMix = QString::fromLatin1("sound-mix=");
+ const QString year = QString::fromLatin1("/Years/");
+ const QString company = QString::fromLatin1("/company/");
+
+ // IIMdb also has links with the word "sections" in them, remove that
+ // for genres and nationalities
+
+ QStringList genres, countries, langs, certs, tracks, studios;
+ for(int pos = s_anchorRx->search(str_); pos > -1; pos = s_anchorRx->search(str_, pos+1)) {
+ const QString cap1 = s_anchorRx->cap(1);
+ if(cap1.find(genre) > -1) {
+ if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) {
+ genres += s_anchorRx->cap(2).stripWhiteSpace();
+ }
+ } else if(cap1.find(country) > -1) {
+ if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) {
+ countries += s_anchorRx->cap(2).stripWhiteSpace();
+ }
+ } else if(cap1.find(lang) > -1) {
+ langs += s_anchorRx->cap(2).stripWhiteSpace();
+ } else if(cap1.find(colorInfo) > -1) {
+ // change "black and white" to "black & white"
+ entry_->setField(QString::fromLatin1("color"),
+ s_anchorRx->cap(2).replace(QString::fromLatin1("and"), QChar('&')).stripWhiteSpace());
+ } else if(cap1.find(cert) > -1) {
+ certs += s_anchorRx->cap(2).stripWhiteSpace();
+ } else if(cap1.find(soundMix) > -1) {
+ tracks += s_anchorRx->cap(2).stripWhiteSpace();
+ } else if(cap1.find(company) > -1) {
+ studios += s_anchorRx->cap(2).stripWhiteSpace();
+ // if year field wasn't set before, do it now
+ } else if(entry_->field(QString::fromLatin1("year")).isEmpty() && cap1.find(year) > -1) {
+ entry_->setField(QString::fromLatin1("year"), s_anchorRx->cap(2).stripWhiteSpace());
+ }
+ }
+
+ entry_->setField(QString::fromLatin1("genre"), genres.join(sep));
+ entry_->setField(QString::fromLatin1("nationality"), countries.join(sep));
+ entry_->setField(QString::fromLatin1("language"), langs.join(sep));
+ entry_->setField(QString::fromLatin1("audio-track"), tracks.join(sep));
+ entry_->setField(QString::fromLatin1("studio"), studios.join(sep));
+ if(!certs.isEmpty()) {
+ // first try to set default certification
+ const QStringList& certsAllowed = entry_->collection()->fieldByName(QString::fromLatin1("certification"))->allowed();
+ for(QStringList::ConstIterator it = certs.begin(); it != certs.end(); ++it) {
+ QString country = (*it).section(':', 0, 0);
+ QString cert = (*it).section(':', 1, 1);
+ if(cert == Latin1Literal("Unrated")) {
+ cert = QChar('U');
+ }
+ cert += QString::fromLatin1(" (") + country + ')';
+ if(certsAllowed.findIndex(cert) > -1) {
+ entry_->setField(QString::fromLatin1("certification"), cert);
+ break;
+ }
+ }
+
+ // now add new field for all certifications
+ const QString allc = QString::fromLatin1("allcertification");
+ if(m_fields.findIndex(allc) > -1) {
+ Data::FieldPtr f = entry_->collection()->fieldByName(allc);
+ if(!f) {
+ f = new Data::Field(allc, i18n("Certifications"), Data::Field::Table);
+ f->setFlags(Data::Field::AllowGrouped);
+ entry_->collection()->addField(f);
+ }
+ entry_->setField(QString::fromLatin1("allcertification"), certs.join(sep));
+ }
+ }
+}
+
+void IMDBFetcher::updateEntry(Data::EntryPtr entry_) {
+// myLog() << "IMDBFetcher::updateEntry() - " << entry_->title() << endl;
+ // only take first 5
+ m_limit = 5;
+ QString t = entry_->field(QString::fromLatin1("title"));
+ KURL link = entry_->field(QString::fromLatin1("imdb"));
+ if(!link.isEmpty() && link.isValid()) {
+ // check if we want a different host
+ if(link.host() != m_host) {
+// myLog() << "IMDBFetcher::updateEntry() - switching hosts to " << m_host << endl;
+ link.setHost(m_host);
+ }
+ m_key = Fetch::Title;
+ m_value = t;
+ m_started = true;
+ m_data.truncate(0);
+ m_matches.clear();
+ m_url = link;
+ m_redirected = true; // m_redirected is used as a flag later to tell if we get a single result
+ m_job = KIO::get(m_url, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+ connect(m_job, SIGNAL(redirection(KIO::Job *, const KURL&)),
+ SLOT(slotRedirection(KIO::Job*, const KURL&)));
+ return;
+ }
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* IMDBFetcher::configWidget(QWidget* parent_) const {
+ return new IMDBFetcher::ConfigWidget(parent_, this);
+}
+
+IMDBFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const IMDBFetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+ QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_hostEdit = new KLineEdit(optionsWidget());
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_hostEdit, row, 1);
+ QString w = i18n("The Internet Movie Database uses several different servers. Choose the one "
+ "you wish to use.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_hostEdit, w);
+ label->setBuddy(m_hostEdit);
+
+ label = new QLabel(i18n("&Maximum cast: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_numCast = new KIntSpinBox(0, 99, 1, 10, 10, optionsWidget());
+ connect(m_numCast, SIGNAL(valueChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_numCast, row, 1);
+ w = i18n("The list of cast members may include many people. Set the maximum number returned from the search.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_numCast, w);
+ label->setBuddy(m_numCast);
+
+ m_fetchImageCheck = new QCheckBox(i18n("Download cover &image"), optionsWidget());
+ connect(m_fetchImageCheck, SIGNAL(clicked()), SLOT(slotSetModified()));
+ ++row;
+ l->addMultiCellWidget(m_fetchImageCheck, row, row, 0, 1);
+ w = i18n("The cover image may be downloaded as well. However, too many large images in the "
+ "collection may degrade performance.");
+ QWhatsThis::add(m_fetchImageCheck, w);
+
+ l->setRowStretch(++row, 10);
+
+ // now add additional fields widget
+ addFieldsWidget(IMDBFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+
+ if(fetcher_) {
+ m_hostEdit->setText(fetcher_->m_host);
+ m_numCast->setValue(fetcher_->m_numCast);
+ m_fetchImageCheck->setChecked(fetcher_->m_fetchImages);
+ } else { //defaults
+ m_hostEdit->setText(QString::fromLatin1(IMDB_SERVER));
+ m_numCast->setValue(10);
+ m_fetchImageCheck->setChecked(true);
+ }
+}
+
+void IMDBFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ QString host = m_hostEdit->text().stripWhiteSpace();
+ if(!host.isEmpty()) {
+ config_.writeEntry("Host", host);
+ }
+ config_.writeEntry("Max Cast", m_numCast->value());
+ config_.writeEntry("Fetch Images", m_fetchImageCheck->isChecked());
+
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+QString IMDBFetcher::ConfigWidget::preferredName() const {
+ return IMDBFetcher::defaultName();
+}
+
+//static
+Tellico::StringMap IMDBFetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("imdb")] = i18n("IMDB Link");
+ map[QString::fromLatin1("imdb-rating")] = i18n("IMDB Rating");
+ map[QString::fromLatin1("alttitle")] = i18n("Alternative Titles");
+ map[QString::fromLatin1("allcertification")] = i18n("Certifications");
+ return map;
+}
+
+#include "imdbfetcher.moc"
diff --git a/src/fetch/imdbfetcher.h b/src/fetch/imdbfetcher.h
new file mode 100644
index 0000000..3dc19f2
--- /dev/null
+++ b/src/fetch/imdbfetcher.h
@@ -0,0 +1,141 @@
+/***************************************************************************
+ copyright : (C) 2004-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef IMDBFETCHER_H
+#define IMDBFETCHER_H
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kurl.h>
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qmap.h>
+#include <qguardedptr.h>
+
+class KLineEdit;
+class KIntSpinBox;
+class QCheckBox;
+class QRegExpr;
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class IMDBFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ IMDBFetcher(QObject* parent, const char* name=0);
+ /**
+ */
+ virtual ~IMDBFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ // imdb can search title, person
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return IMDB; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ static StringMap customFields();
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const IMDBFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+
+ private:
+ KLineEdit* m_hostEdit;
+ QCheckBox* m_fetchImageCheck;
+ KIntSpinBox* m_numCast;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+ void slotRedirection(KIO::Job* job, const KURL& toURL);
+
+private:
+ static void initRegExps();
+ static QRegExp* s_tagRx;
+ static QRegExp* s_anchorRx;
+ static QRegExp* s_anchorTitleRx;
+ static QRegExp* s_anchorNameRx;
+ static QRegExp* s_titleRx;
+
+ void doTitle(const QString& s, Data::EntryPtr e);
+ void doRunningTime(const QString& s, Data::EntryPtr e);
+ void doAspectRatio(const QString& s, Data::EntryPtr e);
+ void doAlsoKnownAs(const QString& s, Data::EntryPtr e);
+ void doPlot(const QString& s, Data::EntryPtr e, const KURL& baseURL_);
+ void doPerson(const QString& s, Data::EntryPtr e,
+ const QString& imdbHeader, const QString& fieldName);
+ void doCast(const QString& s, Data::EntryPtr e, const KURL& baseURL_);
+ void doLists(const QString& s, Data::EntryPtr e);
+ void doRating(const QString& s, Data::EntryPtr e);
+ void doCover(const QString& s, Data::EntryPtr e, const KURL& baseURL);
+
+ void parseSingleTitleResult();
+ void parseSingleNameResult();
+ void parseMultipleTitleResults();
+ void parseTitleBlock(const QString& str);
+ void parseMultipleNameResults();
+ Data::EntryPtr parseEntry(const QString& str);
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QMap<int, KURL> m_matches;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+ bool m_fetchImages;
+
+ QString m_host;
+ int m_numCast;
+ KURL m_url;
+ bool m_redirected;
+ uint m_limit;
+ QStringList m_fields;
+
+ QString m_popularTitles;
+ QString m_exactTitles;
+ QString m_partialTitles;
+ enum TitleBlock { Unknown = 0, Popular = 1, Exact = 2, Partial = 3, SinglePerson = 4};
+ TitleBlock m_currentTitleBlock;
+ int m_countOffset;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/isbndbfetcher.cpp b/src/fetch/isbndbfetcher.cpp
new file mode 100644
index 0000000..5ffc379
--- /dev/null
+++ b/src/fetch/isbndbfetcher.cpp
@@ -0,0 +1,350 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "isbndbfetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+
+#include <qdom.h>
+#include <qlabel.h>
+#include <qlayout.h>
+#include <qfile.h>
+
+namespace {
+ static const int ISBNDB_RETURNS_PER_REQUEST = 10;
+ static const int ISBNDB_MAX_RETURNS_TOTAL = 25;
+ static const char* ISBNDB_BASE_URL = "http://isbndb.com/api/books.xml";
+ static const char* ISBNDB_APP_ID = "3B9S3BQS";
+}
+
+using Tellico::Fetch::ISBNdbFetcher;
+
+ISBNdbFetcher::ISBNdbFetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_xsltHandler(0),
+ m_limit(ISBNDB_MAX_RETURNS_TOTAL), m_page(1), m_total(-1), m_countOffset(0),
+ m_job(0), m_started(false) {
+}
+
+ISBNdbFetcher::~ISBNdbFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString ISBNdbFetcher::defaultName() {
+ return i18n("ISBNdb.com");
+}
+
+QString ISBNdbFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool ISBNdbFetcher::canFetch(int type) const {
+ return type == Data::Collection::Book || type == Data::Collection::ComicBook || type == Data::Collection::Bibtex;
+}
+
+void ISBNdbFetcher::readConfigHook(const KConfigGroup& config_) {
+ Q_UNUSED(config_);
+}
+
+void ISBNdbFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_.stripWhiteSpace();
+ m_started = true;
+ m_page = 1;
+ m_total = -1;
+ m_numResults = 0;
+ m_countOffset = 0;
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+ doSearch();
+}
+
+void ISBNdbFetcher::continueSearch() {
+ m_started = true;
+ m_limit += ISBNDB_MAX_RETURNS_TOTAL;
+ doSearch();
+}
+
+void ISBNdbFetcher::doSearch() {
+ m_data.truncate(0);
+
+// myDebug() << "ISBNdbFetcher::search() - value = " << value_ << endl;
+
+ KURL u(QString::fromLatin1(ISBNDB_BASE_URL));
+ u.addQueryItem(QString::fromLatin1("access_key"), QString::fromLatin1(ISBNDB_APP_ID));
+ u.addQueryItem(QString::fromLatin1("results"), QString::fromLatin1("details,authors,subjects,texts"));
+ u.addQueryItem(QString::fromLatin1("page_number"), QString::number(m_page));
+
+ switch(m_key) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("title"));
+ u.addQueryItem(QString::fromLatin1("value1"), m_value);
+ break;
+
+ case Person:
+ // yes, this also queries titles, too, it's a limitation of the isbndb api service
+ u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("combined"));
+ u.addQueryItem(QString::fromLatin1("value1"), m_value);
+ break;
+
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("full"));
+ u.addQueryItem(QString::fromLatin1("value1"), m_value);
+ break;
+
+ case ISBN:
+ u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("isbn"));
+ {
+ // only grab first value
+ QString v = m_value.section(QChar(';'), 0);
+ v.remove('-');
+ u.addQueryItem(QString::fromLatin1("value1"), v);
+ }
+ break;
+
+ default:
+ kdWarning() << "ISBNdbFetcher::search() - key not recognized: " << m_key << endl;
+ stop();
+ return;
+ }
+// myDebug() << "ISBNdbFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void ISBNdbFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "ISBNdbFetcher::stop()" << endl;
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void ISBNdbFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void ISBNdbFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "ISBNdbFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "ISBNdbFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from isbndbfetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "ISBNdbFetcher::slotComplete() - server did not return valid XML." << endl;
+ return;
+ }
+
+ if(m_total == -1) {
+ QDomNode n = dom.documentElement().namedItem(QString::fromLatin1("BookList"));
+ QDomElement e = n.toElement();
+ if(!e.isNull()) {
+ m_total = e.attribute(QString::fromLatin1("total_results"), QString::number(-1)).toInt();
+ }
+ }
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+ // assume result is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+
+ int count = 0;
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); m_numResults < m_limit && entry != entries.end(); ++entry, ++count) {
+ if(count < m_countOffset) {
+ continue;
+ }
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/') + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ ++m_numResults;
+ }
+
+ // are there any additional results to get?
+ m_hasMoreResults = m_page * ISBNDB_RETURNS_PER_REQUEST < m_total;
+
+ const int currentTotal = QMIN(m_total, m_limit);
+ if(m_page * ISBNDB_RETURNS_PER_REQUEST < currentTotal) {
+ int foundCount = (m_page-1) * ISBNDB_RETURNS_PER_REQUEST + coll->entryCount();
+ message(i18n("Results from %1: %2/%3").arg(source()).arg(foundCount).arg(m_total), MessageHandler::Status);
+ ++m_page;
+ m_countOffset = 0;
+ doSearch();
+ } else {
+ m_countOffset = m_entries.count() % ISBNDB_RETURNS_PER_REQUEST;
+ if(m_countOffset == 0) {
+ ++m_page; // need to go to next page
+ }
+ stop(); // required
+ }
+}
+
+Tellico::Data::EntryPtr ISBNdbFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ if(!entry) {
+ kdWarning() << "ISBNdbFetcher::fetchEntry() - no entry in dict" << endl;
+ return 0;
+ }
+
+ // if the publisher id is set, then we need to grab the real publisher name
+ const QString id = entry->field(QString::fromLatin1("pub_id"));
+ if(!id.isEmpty()) {
+ KURL u(QString::fromLatin1(ISBNDB_BASE_URL));
+ u.setFileName(QString::fromLatin1("publishers.xml"));
+ u.addQueryItem(QString::fromLatin1("access_key"), QString::fromLatin1(ISBNDB_APP_ID));
+ u.addQueryItem(QString::fromLatin1("index1"), QString::fromLatin1("publisher_id"));
+ u.addQueryItem(QString::fromLatin1("value1"), id);
+
+ QDomDocument dom = FileHandler::readXMLFile(u, true);
+ if(!dom.isNull()) {
+ QString pub = dom.documentElement().namedItem(QString::fromLatin1("PublisherList"))
+ .namedItem(QString::fromLatin1("PublisherData"))
+ .namedItem(QString::fromLatin1("Name"))
+ .toElement().text();
+ if(!pub.isEmpty()) {
+ entry->setField(QString::fromLatin1("publisher"), pub);
+ }
+ }
+ entry->setField(QString::fromLatin1("pub_id"), QString());
+ }
+
+ return entry;
+}
+
+void ISBNdbFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("isbndb2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "ISBNdbFetcher::initXSLTHandler() - can not locate isbndb2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "ISBNdbFetcher::initXSLTHandler() - error in isbndb2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+void ISBNdbFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "ISBNdbFetcher::updateEntry()" << endl;
+ // limit to top 5 results
+ m_limit = 5;
+
+ QString isbn = entry_->field(QString::fromLatin1("isbn"));
+ if(!isbn.isEmpty()) {
+ search(Fetch::ISBN, isbn);
+ return;
+ }
+
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ m_limit = 10; // raise limit so more possibility of match
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "ISBNdbFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* ISBNdbFetcher::configWidget(QWidget* parent_) const {
+ return new ISBNdbFetcher::ConfigWidget(parent_, this);
+}
+
+ISBNdbFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ISBNdbFetcher*/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+QString ISBNdbFetcher::ConfigWidget::preferredName() const {
+ return ISBNdbFetcher::defaultName();
+}
+
+#include "isbndbfetcher.moc"
diff --git a/src/fetch/isbndbfetcher.h b/src/fetch/isbndbfetcher.h
new file mode 100644
index 0000000..e49246a
--- /dev/null
+++ b/src/fetch/isbndbfetcher.h
@@ -0,0 +1,94 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_ISBNDBFETCHER_H
+#define TELLICO_FETCH_ISBNDBFETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+}
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <kio/job.h>
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class ISBNdbFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ ISBNdbFetcher(QObject* parent = 0, const char* name = 0);
+ ~ISBNdbFetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == Keyword || k == ISBN; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return ISBNdb; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const ISBNdbFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup&) {}
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ void doSearch();
+
+ XSLTHandler* m_xsltHandler;
+ int m_limit;
+ int m_page;
+ int m_total;
+ int m_numResults;
+ int m_countOffset;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ }
+}
+#endif
diff --git a/src/fetch/messagehandler.cpp b/src/fetch/messagehandler.cpp
new file mode 100644
index 0000000..f3c36a1
--- /dev/null
+++ b/src/fetch/messagehandler.cpp
@@ -0,0 +1,35 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "messagehandler.h"
+#include "fetchmanager.h"
+#include "../tellico_kernel.h"
+
+#include <kmessagebox.h>
+
+using Tellico::Fetch::ManagerMessage;
+
+// all messages go to manager
+void ManagerMessage::send(const QString& message_, Type type_) {
+ Fetch::Manager::self()->updateStatus(message_);
+ // plus errors get a message box
+ if(type_ == Error) {
+ KMessageBox::sorry(Kernel::self()->widget(), message_);
+ } else if(type_ == Warning) {
+ KMessageBox::information(Kernel::self()->widget(), message_);
+ }
+}
+
+void ManagerMessage::infoList(const QString& message_, const QStringList& list_) {
+ KMessageBox::informationList(Kernel::self()->widget(), message_, list_);
+}
diff --git a/src/fetch/messagehandler.h b/src/fetch/messagehandler.h
new file mode 100644
index 0000000..0ec9269
--- /dev/null
+++ b/src/fetch/messagehandler.h
@@ -0,0 +1,49 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_MESSAGEHANDLER_H
+#define TELLICO_FETCH_MESSAGEHANDLER_H
+
+class QString;
+class QStringList;
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class MessageHandler {
+public:
+ enum Type { Status, Warning, Error, ListError };
+
+ MessageHandler() {}
+ virtual ~MessageHandler() {}
+
+ virtual void send(const QString& message, Type type) = 0;
+ virtual void infoList(const QString& message, const QStringList& list) = 0;
+};
+
+class ManagerMessage : public MessageHandler {
+public:
+ ManagerMessage() : MessageHandler() {}
+ virtual ~ManagerMessage() {}
+
+ virtual void send(const QString& message, Type type);
+ virtual void infoList(const QString& message, const QStringList& list);
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/scripts/Makefile.am b/src/fetch/scripts/Makefile.am
new file mode 100644
index 0000000..050c460
--- /dev/null
+++ b/src/fetch/scripts/Makefile.am
@@ -0,0 +1,30 @@
+####### kdevelop will overwrite this part!!! (begin)##########
+
+EXTRA_DIST = \
+fr.allocine.py fr.allocine.py.spec \
+ministerio_de_cultura.py ministerio_de_cultura.py.spec \
+dark_horse_comics.py dark_horse_comics.py.spec \
+boardgamegeek.rb boardgamegeek.rb.spec
+
+####### kdevelop will overwrite this part!!! (end)############
+
+scriptdir = $(kde_datadir)/tellico/data-sources
+script_SCRIPTS = \
+fr.allocine.py \
+ministerio_de_cultura.py \
+dark_horse_comics.py \
+boardgamegeek.rb
+
+script_DATA = \
+fr.allocine.py.spec \
+ministerio_de_cultura.py.spec \
+dark_horse_comics.py.spec \
+boardgamegeek.rb.spec
+
+KDE_OPTIONS = noautodist
+
+CLEANFILES = *~
+
+# probably a better way to do this
+uninstall-hook:
+ -if [ -d $(scriptdir) ]; then rmdir $(scriptdir); fi
diff --git a/src/fetch/scripts/boardgamegeek.rb b/src/fetch/scripts/boardgamegeek.rb
new file mode 100644
index 0000000..b3cf4f3
--- /dev/null
+++ b/src/fetch/scripts/boardgamegeek.rb
@@ -0,0 +1,235 @@
+#!/usr/bin/env ruby
+#
+# ***************************************************************************
+# copyright : (C) 2006 by Steve Beattie
+# : (C) 2008 by Sven Werlen
+# email : sbeattie@suse.de
+# : sven@boisdechet.org
+# ***************************************************************************
+#
+# ***************************************************************************
+# * *
+# * This program is free software; you can redistribute it and/or modify *
+# * it under the terms of version 2 of the GNU General Public License as *
+# * published by the Free Software Foundation; *
+# * *
+# ***************************************************************************
+
+# $Id: boardgamegeek.rb 313 2006-10-02 22:17:11Z steve $
+
+# This program is expected to be invoked from tellico
+# (http://periapsis.org/tellico) as an external data source. It provides
+# searches for boardgames from the boardgamegeek.com website, via
+# boardgamegeek's xmlapi interface
+# (http://www.boardgamegeek.com/xmlapi/)
+#
+# It only allows searches via name; the boardgamegeek xmlapi is not yet
+# rich enough to support queries by designer, publisher, category, or
+# mechanism. I'd like to add support for querying by boardgamegeek id,
+# but that needs additional support in tellico.
+#
+# Sven Werlen: 03 Feb 2008: script has been extended to retrieve cover
+# images (/thumbnail from xmlapi). Images are retrieved from the website
+# and base64 is generated on-the-fly.
+#
+require 'rexml/document'
+require 'net/http'
+require 'cgi'
+require "base64"
+include REXML
+
+$my_version = '$Rev: 313 $'
+
+class Game
+ attr_writer :year
+ attr_writer :description
+ attr_writer :cover
+ attr_writer :image
+
+ def initialize(name, id)
+ @name = name
+ @id = id
+ @publishers = []
+ @designers = []
+ @players = []
+ end
+
+ def add_publisher(publisher)
+ @publishers << publisher
+ end
+
+ def add_designer(designer)
+ @designers << designer
+ end
+
+ def add_players(players)
+ @players << players
+ end
+
+ def to_s()
+ "@name (#@id #@publishers #@year)"
+ end
+
+ def toXML()
+ element = Element.new 'entry'
+ element.add_element Element.new('title').add_text(@name)
+ element.add_element Element.new('description').add_text(@description) if @description
+ element.add_element Element.new('year').add_text(@year) if @year
+ element.add_element Element.new('boardgamegeek-link').add_text("http://www.boardgamegeek/game/#{@id}") if @id
+ element.add_element Element.new('bggid').add_text(@id) if @id
+ element.add_element Element.new('cover').add_text(@cover) if @cover
+
+ if @publishers.length > 0
+ pub_elements = Element.new('publishers')
+ @publishers.each {|p| pub_elements.add_element Element.new('publisher').add_text(p)}
+ element.add_element pub_elements
+ end
+ if @designers.length > 0
+ des_elements = Element.new('designers')
+ @designers.each {|d| des_elements.add_element Element.new('designer').add_text(d)}
+ element.add_element des_elements
+ end
+ if @players.length > 0
+ players_elements = Element.new('num-players')
+ @players.each {|n| players_elements.add_element Element.new('num-player').add_text(n.to_s)}
+ element.add_element players_elements
+ end
+ return element
+ end
+
+ def image()
+ image = Element.new 'image'
+ image.add_attribute('format', 'JPEG')
+ image.add_attribute('id', @id + ".jpg")
+ image.add_text(@image)
+ return image
+ end
+end
+
+def getGameList(query)
+ #puts("Query is #{query}")
+
+ search_result = nil
+ Net::HTTP.start('www.boardgamegeek.com', 80) do
+ |http| search_result = (http.get("/xmlapi/search?search=#{CGI.escape(query)}",
+ {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"}).body)
+ http.finish
+ end
+ doc = REXML::Document.new(search_result)
+
+ games = XPath.match(doc, "//game")
+ #games.each {|g| puts g.elements['name'].text+g.attributes['gameid']}
+ ids = []
+ games.each {|g| ids << g.attributes['gameid']}
+ return ids
+end
+
+def getGameDetails(ids)
+ #ids.each {|id| puts id}
+
+ query = "/xmlapi/game/#{ids.join(',')}"
+ #puts query
+ search_result = nil
+ Net::HTTP.start('www.boardgamegeek.com', 80) do |http|
+ search_result = http.get(query, {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"})
+ http.finish
+ end
+ games = []
+ case search_result
+ when Net::HTTPOK then
+ doc = REXML::Document.new(search_result.body)
+
+ games_xml = XPath.match(doc, "//game")
+ games_xml.each do |g|
+ if( g.elements['name'] != nil )
+ game = Game.new(g.elements['name'].text, g.attributes['gameid'])
+ game.year = g.elements['yearpublished'].text
+ game.description = g.elements['description'].text
+ g.elements.each('publisher'){|p| game.add_publisher p.elements['name'].text}
+ g.elements.each('designer'){|d| game.add_designer d.elements['name'].text}
+ minp = Integer(g.elements['minplayers'].text)
+ maxp = Integer(g.elements['maxplayers'].text)
+ minp.upto(maxp) {|n| game.add_players(n)}
+
+ # retrieve cover
+ coverurl = g.elements['thumbnail'] != nil ? g.elements['thumbnail'].text : nil
+ if( coverurl =~ /files.boardgamegeek.com(.*)$/ )
+ # puts "downloading... " + $1
+ cover = nil
+ Net::HTTP.start('files.boardgamegeek.com', 80) do |http|
+ cover = (http.get($1, {"User-Agent" => "BoardGameGeek plugin for Tellico #{$my_version}"}))
+ end
+ case cover
+ when Net::HTTPOK then
+ game.cover = g.attributes['gameid'] + ".jpg";
+ game.image = Base64.encode64(cover.body);
+ end
+ else
+ # puts "invalid cover: " + coverurl
+ end
+ games << game
+ end
+ end
+ end
+ return games
+end
+
+def listToXML(gameList)
+ doc = REXML::Document.new
+ doc << REXML::DocType.new('tellico PUBLIC', '"-//Robby Stephenson/DTD Tellico V10.0//EN" "http://periapsis.org/tellico/dtd/v10/tellico.dtd"')
+ doc << XMLDecl.new
+ tellico = Element.new 'tellico'
+ tellico.add_attribute('xmlns', 'http://periapsis.org/tellico/')
+ tellico.add_attribute('syntaxVersion', '10')
+ collection = Element.new 'collection'
+ collection.add_attribute('title', 'My Collection')
+ collection.add_attribute('type', '13')
+
+ fields = Element.new 'fields'
+ field = Element.new 'field'
+ field.add_attribute('name', '_default')
+ fields.add_element(field)
+ field = Element.new 'field'
+ field.add_attribute('name', 'bggid')
+ field.add_attribute('title', 'BoardGameGeek ID')
+ field.add_attribute('category', 'General')
+ field.add_attribute('flags', '0')
+ field.add_attribute('format', '4')
+ field.add_attribute('type', '6')
+ field.add_attribute('i18n', 'true')
+ fields.add_element(field)
+ collection.add_element(fields)
+
+ images = Element.new 'images'
+
+ id = 0
+ gameList.each do
+ |g| element = g.toXML()
+ element.add_attribute('id', id)
+ id = id + 1
+ collection.add_element(element)
+ images.add_element(g.image());
+ end
+ collection.add_element(images);
+ tellico.add_element(collection)
+ doc.add_element(tellico)
+ doc.write($stdout, 0)
+ puts ""
+end
+
+if __FILE__ == $0
+
+ def showUsage
+ warn "usage: #{__FILE__} game_query"
+ exit 1
+ end
+
+ showUsage unless ARGV.length == 1
+
+ idList = getGameList(ARGV.shift)
+ if idList
+ gameList = getGameDetails(idList)
+ end
+
+ listToXML(gameList)
+end
diff --git a/src/fetch/scripts/boardgamegeek.rb.spec b/src/fetch/scripts/boardgamegeek.rb.spec
new file mode 100644
index 0000000..6e0aab0
--- /dev/null
+++ b/src/fetch/scripts/boardgamegeek.rb.spec
@@ -0,0 +1,7 @@
+Name=BoardGameGeek
+Type=data-source
+ArgumentKeys=1
+Arguments=%1
+CollectionType=13
+FormatType=0
+UpdateArgs=%{title}
diff --git a/src/fetch/scripts/dark_horse_comics.py b/src/fetch/scripts/dark_horse_comics.py
new file mode 100644
index 0000000..4f3b651
--- /dev/null
+++ b/src/fetch/scripts/dark_horse_comics.py
@@ -0,0 +1,399 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+
+# ***************************************************************************
+# copyright : (C) 2006 by Mathias Monnerville
+# email : tellico_dev@yahoo.fr
+# ***************************************************************************
+#
+# ***************************************************************************
+# * *
+# * This program is free software; you can redistribute it and/or modify *
+# * it under the terms of version 2 of the GNU General Public License as *
+# * published by the Free Software Foundation; *
+# * *
+# ***************************************************************************
+
+# $Id: comics_darkhorsecomics.py 123 2006-03-24 08:47:48Z mathias $
+
+"""
+This script has to be used with tellico (http://periapsis.org/tellico) as an external data source program.
+It allows searching through the Dark Horse Comics web database.
+
+Related info and cover are fetched automatically. It takes only one argument (comic title).
+
+Tellico data source setup:
+- source name: Dark Horse Comics (US) (or whatever you want :)
+- Collection type: comics collection
+- Result type: tellico
+- Path: /path/to/script/comics_darkhorsecomics.py
+- Arguments:
+Title (checked) = %1
+Update (checked) = %{title}
+"""
+
+import sys, os, re, md5, random, string
+import urllib, urllib2, time, base64
+import xml.dom.minidom
+
+XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
+DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">"""
+NULLSTRING = ''
+
+VERSION = "0.2"
+
+
+def genMD5():
+ """
+ Generates and returns a random md5 string. Its main purpose is to allow random
+ image file name generation.
+ """
+ obj = md5.new()
+ float = random.random()
+ obj.update(str(float))
+ return obj.hexdigest()
+
+class BasicTellicoDOM:
+ """
+ This class manages tellico's XML data model (DOM)
+ """
+ def __init__(self):
+ self.__doc = xml.dom.minidom.Document()
+ self.__root = self.__doc.createElement('tellico')
+ self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
+ self.__root.setAttribute('syntaxVersion', '9')
+
+ self.__collection = self.__doc.createElement('collection')
+ self.__collection.setAttribute('title', 'My Comics')
+ self.__collection.setAttribute('type', '6')
+
+ self.__images = self.__doc.createElement('images')
+
+ self.__root.appendChild(self.__collection)
+ self.__doc.appendChild(self.__root)
+
+ # Current movie id. See entry's id attribute in self.addEntry()
+ self.__currentId = 0
+
+
+ def addEntry(self, movieData):
+ """
+ Add a comic entry.
+ Returns an entry node instance
+ """
+ d = movieData
+ entryNode = self.__doc.createElement('entry')
+ entryNode.setAttribute('id', str(self.__currentId))
+
+ titleNode = self.__doc.createElement('title')
+ titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8')))
+
+ yearNode = self.__doc.createElement('pub_year')
+ yearNode.appendChild(self.__doc.createTextNode(d['pub_year']))
+
+ countryNode = self.__doc.createElement('country')
+ countryNode.appendChild(self.__doc.createTextNode(d['country']))
+ pubNode = self.__doc.createElement('publisher')
+ pubNode.appendChild(self.__doc.createTextNode(d['publisher']))
+ langNode = self.__doc.createElement('language')
+ langNode.appendChild(self.__doc.createTextNode(d['language']))
+
+ writersNode = self.__doc.createElement('writers')
+ for g in d['writer']:
+ writerNode = self.__doc.createElement('writer')
+ writerNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ writersNode.appendChild(writerNode)
+
+ genresNode = self.__doc.createElement('genres')
+ for g in d['genre']:
+ genreNode = self.__doc.createElement('genre')
+ genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ genresNode.appendChild(genreNode)
+
+ commentsNode = self.__doc.createElement('comments')
+ #for g in d['comments']:
+ # commentsNode.appendChild(self.__doc.createTextNode(unicode("%s\n\n" % g, 'latin-1').encode('utf-8')))
+ commentsData = string.join(d['comments'], '\n\n')
+ commentsNode.appendChild(self.__doc.createTextNode(unicode(commentsData, 'latin-1').encode('utf-8')))
+
+ artistsNode = self.__doc.createElement('artists')
+ for k, v in d['artist'].iteritems():
+ artistNode = self.__doc.createElement('artist')
+ artistNode.appendChild(self.__doc.createTextNode(unicode(v, 'latin-1').encode('utf-8')))
+ artistsNode.appendChild(artistNode)
+
+ pagesNode = self.__doc.createElement('pages')
+ pagesNode.appendChild(self.__doc.createTextNode(d['pages']))
+
+ issueNode = self.__doc.createElement('issue')
+ issueNode.appendChild(self.__doc.createTextNode(d['issue']))
+
+ if d['image']:
+ imageNode = self.__doc.createElement('image')
+ imageNode.setAttribute('format', 'JPEG')
+ imageNode.setAttribute('id', d['image'][0])
+ imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8')))
+
+ coverNode = self.__doc.createElement('cover')
+ coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
+
+ for name in ( 'writersNode', 'genresNode', 'artistsNode', 'pagesNode', 'yearNode',
+ 'titleNode', 'issueNode', 'commentsNode', 'pubNode', 'langNode',
+ 'countryNode' ):
+ entryNode.appendChild(eval(name))
+
+ if d['image']:
+ entryNode.appendChild(coverNode)
+ self.__images.appendChild(imageNode)
+
+ self.__collection.appendChild(entryNode)
+
+ self.__currentId += 1
+ return entryNode
+
+ def printEntry(self, nEntry):
+ """
+ Prints entry's XML content to stdout
+ """
+ try:
+ print nEntry.toxml()
+ except:
+ print sys.stderr, "Error while outputing XML content from entry to Tellico"
+
+ def printXMLTree(self):
+ """
+ Outputs XML content to stdout
+ """
+ self.__collection.appendChild(self.__images)
+ print XML_HEADER; print DOCTYPE
+ print self.__root.toxml()
+
+
+class DarkHorseParser:
+ def __init__(self):
+ self.__baseURL = 'http://www.darkhorse.com'
+ self.__basePath = '/profile/profile.php?sku='
+ self.__searchURL = '/search/search.php?frompage=userinput&sstring=%s&x=0&y=0'
+ self.__coverPath = 'http://images.darkhorse.com/covers/'
+ self.__movieURL = self.__baseURL + self.__basePath
+
+ # Define some regexps
+ self.__regExps = { 'title' : '<font size="\+2"><b>(?P<title>.*?)</b></font>',
+ 'pub_date' : '<b>Pub.* Date:</b> *<a.*>(?P<pub_date>.*)</a>',
+ 'desc' : '<p>(?P<desc>.*?)<br>',
+ 'writer' : '<b>Writer: *</b> *<a.*?>(?P<writer>.*)</a>',
+ 'cover_artist' : '<b>Cover Artist: *</b> *<a.*>(?P<cover_artist>.*)</a>',
+ 'penciller' : '<b>Penciller: *</b> *<a.*>(?P<penciller>.*)</a>',
+ 'inker' : '<b>Inker: *</b> *<a.*>(?P<inker>.*)</a>',
+ 'letterer' : '<b>Letterer: *</b> *<a.*>(?P<letterer>.*)</a>',
+ 'colorist' : '<b>Colorist: *</b> *<a.*>(?P<colorist>.*)</a>',
+ 'genre' : '<b>Genre: *</b> *<a.*?>(?P<genre>.*?)</a><br>',
+ 'format' : '<b>Format: *</b> *(?P<format>.*?)<br>',
+ }
+
+ # Compile patterns objects
+ self.__regExpsPO = {}
+ for k, pattern in self.__regExps.iteritems():
+ self.__regExpsPO[k] = re.compile(pattern)
+
+ self.__domTree = BasicTellicoDOM()
+
+ def run(self, title):
+ """
+ Runs the allocine.fr parser: fetch movie related links, then fills and prints the DOM tree
+ to stdout (in tellico format) so that tellico can use it.
+ """
+ self.__getMovie(title)
+ # Print results to stdout
+ self.__domTree.printXMLTree()
+
+ def __getHTMLContent(self, url):
+ """
+ Fetch HTML data from url
+ """
+ u = urllib2.urlopen(url)
+ self.__data = u.read()
+ u.close()
+
+ def __fetchMovieLinks(self):
+ """
+ Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
+ that need to be parsed.
+ """
+ matchList = re.findall("""<a *href="%s(?P<page>.*?)">(?P<title>.*?)</a>""" % self.__basePath.replace('?', '\?'), self.__data)
+ if not matchList: return None
+
+ return matchList
+
+ def __fetchCover(self, path, delete = True):
+ """
+ Fetch cover to /tmp. Returns base64 encoding of data.
+ The image is deleted if delete is True
+ """
+ md5 = genMD5()
+ imObj = urllib2.urlopen(path.strip())
+ img = imObj.read()
+ imObj.close()
+ imgPath = "/tmp/%s.jpeg" % md5
+ try:
+ f = open(imgPath, 'w')
+ f.write(img)
+ f.close()
+ except:
+ print sys.stderr, "Error: could not write image into /tmp"
+
+ b64data = (md5 + '.jpeg', base64.encodestring(img))
+
+ # Delete temporary image
+ if delete:
+ try:
+ os.remove(imgPath)
+ except:
+ print sys.stderr, "Error: could not delete temporary image /tmp/%s.jpeg" % md5
+
+ return b64data
+
+ def __fetchMovieInfo(self, url):
+ """
+ Looks for movie information
+ """
+ self.__getHTMLContent(url)
+
+ # First grab picture data
+ imgMatch = re.search("""<img src="%s(?P<imgpath>.*?)".*>""" % self.__coverPath, self.__data)
+ if imgMatch:
+ imgPath = self.__coverPath + imgMatch.group('imgpath')
+ # Fetch cover and gets its base64 encoded data
+ b64img = self.__fetchCover(imgPath)
+ else:
+ b64img = None
+
+ # Now isolate data between <div class="bodytext">...</div> elements
+ # re.S sets DOTALL; it makes the "." special character match any character at all, including a newline
+ m = re.search("""<div class="bodytext">(?P<part>.*)</div>""", self.__data, re.S)
+ self.__data = m.group('part')
+
+ matches = {}
+ data = {}
+ data['comments'] = []
+ data['artist'] = {}
+
+ # Default values
+ data['publisher'] = 'Dark Horse Comics'
+ data['language'] = 'English'
+ data['country'] = 'USA'
+
+ data['image'] = b64img
+ data['pub_year'] = NULLSTRING
+
+ for name, po in self.__regExpsPO.iteritems():
+ data[name] = NULLSTRING
+ if name == 'desc':
+ matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
+ else:
+ matches[name] = po.search(self.__data)
+
+ if matches[name]:
+ if name == 'title':
+ title = matches[name].group('title').strip()
+ data[name] = title
+ # Look for issue information
+ m = re.search("#(?P<issue>[0-9]+)", title)
+ if m:
+ data['issue'] = m.group('issue')
+ else:
+ data['issue'] = ''
+
+ elif name == 'pub_date':
+ pub_date = matches[name].group('pub_date').strip()
+ data['pub_year'] = pub_date[-4:]
+ # Add this to comments field
+ data['comments'].insert(0, "Pub. Date: %s" % pub_date)
+
+ elif name == 'desc':
+ # Find biggest size
+ max = 0
+ for i in range(len(matches[name])):
+ if len(matches[name][i]) > len(matches[name][max]):
+ max = i
+ data['comments'].append(matches[name][max].strip())
+
+ elif name == 'writer':
+ # We may find several writers
+ data[name] = []
+ writersList = re.sub('</?a.*?>', '', matches[name].group('writer')).split(',')
+ for d in writersList:
+ data[name].append(d.strip())
+
+ elif name == 'cover_artist':
+ data['artist']['Cover Artist'] = matches[name].group('cover_artist').strip()
+
+ elif name == 'penciller':
+ data['artist']['Penciller'] = matches[name].group('penciller').strip()
+
+ elif name == 'inker':
+ data['artist']['Inker'] = matches[name].group('inker').strip()
+
+ elif name == 'colorist':
+ data['artist']['Colorist'] = matches[name].group('colorist').strip()
+
+ elif name == 'letterer':
+ data['artist']['Letterer'] = matches[name].group('letterer').strip()
+
+ elif name == 'genre':
+ # We may find several genres
+ data[name] = []
+ genresList = re.sub('</?a.*?>', '', matches[name].group('genre')).split(',')
+ for d in genresList:
+ data[name].append(d.strip())
+
+ elif name == 'format':
+ format = matches[name].group('format').strip()
+ data['comments'].insert(1, format)
+ m = re.search("(?P<pages>[0-9]+)", format)
+ if m:
+ data['pages'] = m.group('pages')
+ else:
+ data['pages'] = ''
+
+ return data
+
+
+ def __getMovie(self, title):
+ if not len(title): return
+
+ self.__title = title
+ self.__getHTMLContent("%s%s" % (self.__baseURL, self.__searchURL % urllib.quote(self.__title)))
+
+ # Get all links
+ links = self.__fetchMovieLinks()
+
+ # Now retrieve infos
+ if links:
+ for entry in links:
+ data = self.__fetchMovieInfo( url = self.__movieURL + entry[0] )
+ # Add DC link (custom field)
+ data['darkhorse'] = "%s%s" % (self.__movieURL, entry[0])
+ node = self.__domTree.addEntry(data)
+ # Print entries on-the-fly
+ #self.__domTree.printEntry(node)
+ else:
+ return None
+
+def halt():
+ print "HALT."
+ sys.exit(0)
+
+def showUsage():
+ print "Usage: %s comic" % sys.argv[0]
+ sys.exit(1)
+
+def main():
+ if len(sys.argv) < 2:
+ showUsage()
+
+ parser = DarkHorseParser()
+ parser.run(sys.argv[1])
+
+if __name__ == '__main__':
+ main()
diff --git a/src/fetch/scripts/dark_horse_comics.py.spec b/src/fetch/scripts/dark_horse_comics.py.spec
new file mode 100644
index 0000000..9481dc8
--- /dev/null
+++ b/src/fetch/scripts/dark_horse_comics.py.spec
@@ -0,0 +1,7 @@
+Name=Dark Horse Comics
+Type=data-source
+ArgumentKeys=1
+Arguments=%1
+CollectionType=6
+FormatType=0
+UpdateArgs=%{title}
diff --git a/src/fetch/scripts/fr.allocine.py b/src/fetch/scripts/fr.allocine.py
new file mode 100755
index 0000000..97a2247
--- /dev/null
+++ b/src/fetch/scripts/fr.allocine.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+
+# ***************************************************************************
+# copyright : (C) 2006 by Mathias Monnerville
+# email : tellico@monnerville.com
+# ***************************************************************************
+#
+# ***************************************************************************
+# * *
+# * This program is free software; you can redistribute it and/or modify *
+# * it under the terms of version 2 of the GNU General Public License as *
+# * published by the Free Software Foundation; *
+# * *
+# ***************************************************************************
+
+# Version 0.4: 2007-08-27
+# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres
+# could not be retrieved. Fixed bad http request error due to some changes in HTML code.
+#
+# Version 0.3:
+# * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed.
+#
+# Version 0.2:
+# * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore.
+#
+# Version 0.1:
+# * Initial release.
+
+import sys, os, re, md5, random
+import urllib, urllib2, time, base64
+import xml.dom.minidom
+
+XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
+DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">"""
+
+VERSION = "0.4"
+
+def genMD5():
+ obj = md5.new()
+ float = random.random()
+ obj.update(str(float))
+ return obj.hexdigest()
+
+class BasicTellicoDOM:
+ def __init__(self):
+ self.__doc = xml.dom.minidom.Document()
+ self.__root = self.__doc.createElement('tellico')
+ self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
+ self.__root.setAttribute('syntaxVersion', '9')
+
+ self.__collection = self.__doc.createElement('collection')
+ self.__collection.setAttribute('title', 'My Movies')
+ self.__collection.setAttribute('type', '3')
+
+ self.__fields = self.__doc.createElement('fields')
+ # Add all default (standard) fields
+ self.__dfltField = self.__doc.createElement('field')
+ self.__dfltField.setAttribute('name', '_default')
+
+ # Add a custom 'Collection' field
+ self.__customField = self.__doc.createElement('field')
+ self.__customField.setAttribute('name', 'titre-original')
+ self.__customField.setAttribute('title', 'Original Title')
+ self.__customField.setAttribute('flags', '8')
+ self.__customField.setAttribute('category', 'General')
+ self.__customField.setAttribute('format', '1')
+ self.__customField.setAttribute('type', '1')
+ self.__customField.setAttribute('i18n', 'yes')
+
+ self.__fields.appendChild(self.__dfltField)
+ self.__fields.appendChild(self.__customField)
+ self.__collection.appendChild(self.__fields)
+
+ self.__images = self.__doc.createElement('images')
+
+ self.__root.appendChild(self.__collection)
+ self.__doc.appendChild(self.__root)
+
+ # Current movie id
+ self.__currentId = 0
+
+
+ def addEntry(self, movieData):
+ """
+ Add a movie entry
+ """
+ d = movieData
+ entryNode = self.__doc.createElement('entry')
+ entryNode.setAttribute('id', str(self.__currentId))
+
+ titleNode = self.__doc.createElement('title')
+ titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8')))
+
+ otitleNode = self.__doc.createElement('titre-original')
+ otitleNode.appendChild(self.__doc.createTextNode(unicode(d['otitle'], 'latin-1').encode('utf-8')))
+
+ yearNode = self.__doc.createElement('year')
+ yearNode.appendChild(self.__doc.createTextNode(unicode(d['year'], 'latin-1').encode('utf-8')))
+
+ genresNode = self.__doc.createElement('genres')
+ for g in d['genres']:
+ genreNode = self.__doc.createElement('genre')
+ genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ genresNode.appendChild(genreNode)
+
+ natsNode = self.__doc.createElement('nationalitys')
+ natNode = self.__doc.createElement('nat')
+ natNode.appendChild(self.__doc.createTextNode(unicode(d['nat'], 'latin-1').encode('utf-8')))
+ natsNode.appendChild(natNode)
+
+ castsNode = self.__doc.createElement('casts')
+ for g in d['actors']:
+ castNode = self.__doc.createElement('cast')
+ col1Node = self.__doc.createElement('column')
+ col2Node = self.__doc.createElement('column')
+ col1Node.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ castNode.appendChild(col1Node)
+ castNode.appendChild(col2Node)
+ castsNode.appendChild(castNode)
+
+ dirsNode = self.__doc.createElement('directors')
+ for g in d['dirs']:
+ dirNode = self.__doc.createElement('director')
+ dirNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8')))
+ dirsNode.appendChild(dirNode)
+
+ timeNode = self.__doc.createElement('running-time')
+ timeNode.appendChild(self.__doc.createTextNode(unicode(d['time'], 'latin-1').encode('utf-8')))
+
+ allocineNode = self.__doc.createElement(unicode('allociné-link', 'latin-1').encode('utf-8'))
+ allocineNode.appendChild(self.__doc.createTextNode(unicode(d['allocine'], 'latin-1').encode('utf-8')))
+
+ plotNode = self.__doc.createElement('plot')
+ plotNode.appendChild(self.__doc.createTextNode(unicode(d['plot'], 'latin-1').encode('utf-8')))
+
+ if d['image']:
+ imageNode = self.__doc.createElement('image')
+ imageNode.setAttribute('format', 'JPEG')
+ imageNode.setAttribute('id', d['image'][0])
+ imageNode.setAttribute('width', '120')
+ imageNode.setAttribute('height', '160')
+ imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8')))
+
+ coverNode = self.__doc.createElement('cover')
+ coverNode.appendChild(self.__doc.createTextNode(d['image'][0]))
+
+ for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode',
+ 'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ):
+ entryNode.appendChild(eval(name))
+
+ if d['image']:
+ entryNode.appendChild(coverNode)
+ self.__images.appendChild(imageNode)
+
+ self.__collection.appendChild(entryNode)
+
+ self.__currentId += 1
+
+ def printXML(self):
+ """
+ Outputs XML content to stdout
+ """
+ self.__collection.appendChild(self.__images)
+ print XML_HEADER; print DOCTYPE
+ print self.__root.toxml()
+
+
+class AlloCineParser:
+ def __init__(self):
+ self.__baseURL = 'http://www.allocine.fr'
+ self.__basePath = '/film/fichefilm_gen_cfilm'
+ self.__searchURL= 'http://www.allocine.fr/recherche/?motcle=%s&f=3&rub=1'
+ self.__movieURL = self.__baseURL + self.__basePath
+
+ # Define some regexps
+ self.__regExps = { 'title' : '<title>(?P<title>.+?)</title>',
+ 'dirs' : 'Réalisé par <a.*?>(?P<step1>.+?)</a>.*?</h4>',
+ 'actors' : '<h4>Avec *<a.*?>(?P<step1>.+)</a> &nbsp;',
+ 'nat' : '<h4>Film *(?P<nat>.+?)[,\.]',
+ 'genres' : '<h4>Genre *: *<a.*?>(?P<step1>.+?)</a></h4>',
+ 'time' : '<h4>Durée *: *(?P<hours>[0-9])?h *(?P<mins>[0-9]{1,2})min',
+ 'year' : 'Année de production *: *(?P<year>[0-9]{4})',
+ # Original movie title
+ 'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>',
+ 'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""",
+ 'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""}
+
+
+ self.__domTree = BasicTellicoDOM()
+
+ def run(self, title):
+ """
+ Runs the allocine.fr parser: fetch movie related links, then fills and prints the DOM tree
+ to stdout (in tellico format) so that tellico can use it.
+ """
+ self.__getMovie(title)
+ # Print results to stdout
+ self.__domTree.printXML()
+
+ def __getHTMLContent(self, url):
+ """
+ Fetch HTML data from url
+ """
+
+ u = urllib2.urlopen(url)
+ self.__data = u.read()
+ u.close()
+
+ def __fetchMovieLinks(self):
+ """
+ Retrieve all links related to movie
+ """
+ matchList = re.findall("""<h4><a *href="%s=(?P<page>.*?\.html?)" *class="link1">(?P<title>.*?)</a>""" % self.__basePath, self.__data)
+ if not matchList: return None
+
+ return matchList
+
+ def __fetchMovieInfo(self, url):
+ """
+ Looks for movie information
+ """
+ self.__getHTMLContent(url)
+
+ matches = data = {}
+
+ for name, regexp in self.__regExps.iteritems():
+ if name == 'image':
+ matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I)
+ else:
+ matches[name] = re.search(regexp, self.__data)
+
+ if matches[name]:
+ if name == 'title':
+ data[name] = matches[name].group('title').strip()
+ elif name == 'dirs':
+ dirsList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',')
+ data[name] = []
+ for d in dirsList:
+ data[name].append(d.strip())
+
+ elif name == 'actors':
+ actorsList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',')
+ data[name] = []
+ for d in actorsList:
+ data[name].append(d.strip())
+
+ elif name == 'nat':
+ data[name] = matches[name].group('nat').strip()
+
+ elif name == 'genres':
+ genresList = re.sub('</?a.*?>', '', matches[name].group('step1')).split(',')
+ data[name] = []
+ for d in genresList:
+ data[name].append(d.strip())
+
+ elif name == 'time':
+ h, m = matches[name].group('hours'), matches[name].group('mins')
+ totmin = int(h)*60+int(m)
+ data[name] = str(totmin)
+
+ elif name == 'year':
+ data[name] = matches[name].group('year').strip()
+
+ elif name == 'otitle':
+ data[name] = matches[name].group('otitle').strip()
+
+ elif name == 'plot':
+ data[name] = matches[name].group('plot').strip()
+
+ # Image path
+ elif name == 'image':
+ # Save image to a temporary folder
+ md5 = genMD5()
+ imObj = urllib2.urlopen(matches[name][0].strip())
+ img = imObj.read()
+ imObj.close()
+ imgPath = "/tmp/%s.jpeg" % md5
+ try:
+ f = open(imgPath, 'w')
+ f.write(img)
+ f.close()
+ except:
+ # Could be great if we can pass exit code and some message
+ # to tellico in case of failure...
+ pass
+
+ data[name] = (md5 + '.jpeg', base64.encodestring(img))
+ # Delete temporary image
+ try:
+ os.remove(imgPath)
+ except:
+ # Could be great if we can pass exit code and some msg
+ # to tellico in case of failure...
+ pass
+ else:
+ matches[name] = ''
+
+ return data
+
+
+ def __getMovie(self, title):
+ if not len(title): return
+
+ self.__title = title
+ self.__getHTMLContent(self.__searchURL % urllib.quote(self.__title))
+
+ # Get all links
+ links = self.__fetchMovieLinks()
+
+ # Now retrieve infos
+ if links:
+ for entry in links:
+ data = self.__fetchMovieInfo( url = "%s=%s" % (self.__movieURL, entry[0]) )
+ # Add allocine link (custom field)
+ data['allocine'] = "%s=%s" % (self.__movieURL, entry[0])
+ self.__domTree.addEntry(data)
+ else:
+ return None
+
+
+
+def showUsage():
+ print "Usage: %s movietitle" % sys.argv[0]
+ sys.exit(1)
+
+def main():
+ if len(sys.argv) < 2:
+ showUsage()
+
+ parser = AlloCineParser()
+ parser.run(sys.argv[1])
+
+if __name__ == '__main__':
+ main()
diff --git a/src/fetch/scripts/fr.allocine.py.spec b/src/fetch/scripts/fr.allocine.py.spec
new file mode 100644
index 0000000..773b951
--- /dev/null
+++ b/src/fetch/scripts/fr.allocine.py.spec
@@ -0,0 +1,7 @@
+Name=Allocine.fr
+Type=data-source
+ArgumentKeys=1
+Arguments=%1
+CollectionType=3
+FormatType=0
+UpdateArgs=%{title}
diff --git a/src/fetch/scripts/ministerio_de_cultura.py b/src/fetch/scripts/ministerio_de_cultura.py
new file mode 100644
index 0000000..8a768f9
--- /dev/null
+++ b/src/fetch/scripts/ministerio_de_cultura.py
@@ -0,0 +1,595 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+
+# ***************************************************************************
+# copyright : (C) 2006-2008 by Mathias Monnerville
+# email : tellico@monnerville.com
+# ***************************************************************************
+#
+# ***************************************************************************
+# * *
+# * This program is free software; you can redistribute it and/or modify *
+# * it under the terms of version 2 of the GNU General Public License as *
+# * published by the Free Software Foundation; *
+# * *
+# ***************************************************************************
+
+# $Id: books_ministerio_de_cultura.py 428 2007-03-07 13:17:17Z mathias $
+
+"""
+This script has to be used with tellico (http://periapsis.org/tellico) as an external data source program.
+It allows searching for books in Spanish Ministry of Culture's database (at http://www.mcu.es/bases/spa/isbn/ISBN.html).
+
+Multiple ISBN/UPC searching is supported through the -m option:
+ ./books_ministerio_de_cultura.py -m filename
+where filename holds one ISBN or UPC per line.
+
+Tellico data source setup:
+- Source type: External Application
+- Source name: Ministerio de Cultura (ES) (or whatever you want :)
+- Collection type: Book Collection
+- Result type: Tellico
+- Path: /path/to/script/books_ministerio_de_cultura.py
+- Arguments:
+Title (checked) = -t %1
+Person (checked) = -a %1
+ISBN (checked) = -i %1
+UPC (checked) = -i %1
+Update (checked) = %{title}
+
+** Please note that this script is also part of the Tellico's distribution.
+** You will always find the latest version in the SVN trunk of Tellico
+
+SVN Version:
+ * Removes translators for Authors List
+ * Adds translators to translator field
+ * Change from "Collection" to "Series"
+ * Process "Series Number"
+ * Adds in comments "ed.lit." authors
+ * If there isn't connection to Spanish Ministry of Culture
+ shows a nice error message (timeout: 5 seconds)
+ * Removed "translated from/to" from Comments field as already
+ exists in "Publishing" field
+ * Removed "Collection" field as I moved to Series/Series Number
+
+Version 0.3.2:
+ * Now find 'notas' field related information
+ * search URL modified to fetch information of exhausted books too
+
+Version 0.3.1:
+Bug Fixes:
+ * The 'tr.' string does not appear among authors anymore
+ * Fixed an AttributeError exception related to a regexp matching the number of pages
+
+Version 0.3:
+Bug Fixes:
+ * URL of the search engine has changed:
+ http://www.mcu.es/bases/spa/isbn/ISBN.html is now http://www.mcu.es/comun/bases/isbn/ISBN.html
+ * All the regexps have been rewritten to match the new site's content
+
+Version 0.2:
+New features:
+ * Support for multiple ISBN/UPC searching (support from command line with -m option)
+ * Default books collection enhanced with a new custom field 'Collection'
+ * Search extended for both available and exhausted books
+ * Hyphens are stripped out in the ISBN (or UPC) search
+
+Bug Fixes:
+ * Publication year now holds only the year
+ * ISBN regexp fix
+ * Fix for publisher field (values were inverted)
+ * -i parameter works for both ISBN and UPC based search
+
+Version 0.1:
+ * Initial Release
+"""
+
+import sys, os, re, md5, random, string
+import urllib, urllib2, time, base64
+import xml.dom.minidom, types
+import socket
+
+XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>"""
+DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">"""
+NULLSTRING = ''
+
+VERSION = "0.3.2"
+
+ISBN, AUTHOR, TITLE = range(3)
+
+TRANSLATOR_STR = "tr."
+EDLIT_STR = "ed. lit."
+
+class EngineError(Exception): pass
+
+class BasicTellicoDOM:
+ """
+ This class manages tellico's XML data model (DOM)
+ """
+ def __init__(self):
+ self.__doc = xml.dom.minidom.Document()
+ self.__root = self.__doc.createElement('tellico')
+ self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/')
+ self.__root.setAttribute('syntaxVersion', '9')
+
+ self.__collection = self.__doc.createElement('collection')
+ self.__collection.setAttribute('title', 'My Books')
+ self.__collection.setAttribute('type', '2')
+
+ self.__fields = self.__doc.createElement('fields')
+ # Add all default (standard) fields
+ self.__dfltField = self.__doc.createElement('field')
+ self.__dfltField.setAttribute('name', '_default')
+
+ # Add a custom 'Collection' field (Left by reference for
+ # the future)
+ #self.__customCollectionField = self.__doc.createElement('field')
+ #self.__customCollectionField.setAttribute('name', 'book_collection')
+ #self.__customCollectionField.setAttribute('title', 'Collection')
+ #self.__customCollectionField.setAttribute('flags', '7')
+ #self.__customCollectionField.setAttribute('category', 'Classification')
+ #self.__customCollectionField.setAttribute('format', '0')
+ #self.__customCollectionField.setAttribute('type', '1')
+ #self.__customCollectionField.setAttribute('i18n', 'yes')
+
+
+ self.__fields.appendChild(self.__dfltField)
+ #self.__fields.appendChild(self.__customCollectionField)
+ self.__collection.appendChild(self.__fields)
+
+ self.__root.appendChild(self.__collection)
+ self.__doc.appendChild(self.__root)
+
+ # Current movie id. See entry's id attribute in self.addEntry()
+ self.__currentId = 0
+
+
+ def addEntry(self, movieData):
+ """
+ Add a comic entry.
+ Returns an entry node instance
+ """
+
+ d = movieData
+
+ # Convert all strings to UTF-8
+ for i in d.keys():
+ if type(d[i]) == types.ListType:
+ d[i] = [unicode(d[i][j], 'latin-1').encode('utf-8') for j in range(len(d[i]))]
+ elif type(d[i]) == types.StringType:
+ d[i] = unicode(d[i], 'latin-1').encode('utf-8')
+
+ entryNode = self.__doc.createElement('entry')
+ entryNode.setAttribute('id', str(self.__currentId))
+
+ titleNode = self.__doc.createElement('title')
+ titleNode.appendChild(self.__doc.createTextNode(d['title']))
+
+ yearNode = self.__doc.createElement('pub_year')
+ yearNode.appendChild(self.__doc.createTextNode(d['pub_year']))
+
+ pubNode = self.__doc.createElement('publisher')
+ pubNode.appendChild(self.__doc.createTextNode(d['publisher']))
+
+ langsNode = self.__doc.createElement('languages')
+ for l in d['language']:
+ langNode = self.__doc.createElement('language')
+ langNode.appendChild(self.__doc.createTextNode(l))
+ langsNode.appendChild(langNode)
+
+ keywordsNode = self.__doc.createElement('keywords')
+ keywordNode = self.__doc.createElement('keyword')
+ keywordNode.appendChild(self.__doc.createTextNode(d['keyword']))
+ keywordsNode.appendChild(keywordNode)
+
+ edNode = self.__doc.createElement('edition')
+ edNode.appendChild(self.__doc.createTextNode(d['edition']))
+
+ writersNode = self.__doc.createElement('authors')
+ for g in d['author']:
+ writerNode = self.__doc.createElement('author')
+ writerNode.appendChild(self.__doc.createTextNode(g))
+ writersNode.appendChild(writerNode)
+
+ commentsNode = self.__doc.createElement('comments')
+ commentsData = string.join(d['comments'], '<br/>')
+ commentsNode.appendChild(self.__doc.createTextNode(commentsData))
+
+ pagesNode = self.__doc.createElement('pages')
+ pagesNode.appendChild(self.__doc.createTextNode(d['pages']))
+
+ isbnNode = self.__doc.createElement('isbn')
+ isbnNode.appendChild(self.__doc.createTextNode(d['isbn']))
+
+ priceNode = self.__doc.createElement('pur_price')
+ priceNode.appendChild(self.__doc.createTextNode(d['pur_price']))
+
+ seriesNode = self.__doc.createElement('series')
+ seriesNode.appendChild(self.__doc.createTextNode(d['series']))
+
+ seriesNumNode = self.__doc.createElement('series_num')
+ seriesNumNode.appendChild(self.__doc.createTextNode(d['series_num']))
+
+ translatorNode = self.__doc.createElement('translator')
+ translatorNode.appendChild(self.__doc.createTextNode(d['translator']))
+
+ for name in ( 'title', 'year', 'pub', 'langs', 'keyword', 'ed', 'writers',
+ 'comments', 'pages', 'isbn', 'price', 'series', 'seriesNum', 'translator' ):
+ entryNode.appendChild(eval(name + 'Node'))
+
+ self.__collection.appendChild(entryNode)
+ self.__currentId += 1
+
+ return entryNode
+
+ def printEntry(self, nEntry):
+ """
+ Prints entry's XML content to stdout
+ """
+
+ try:
+ print nEntry.toxml()
+ except:
+ print sys.stderr, "Error while outputing XML content from entry to Tellico"
+
+ def printXMLTree(self):
+ """
+ Outputs XML content to stdout
+ """
+
+ print XML_HEADER; print DOCTYPE
+ print self.__root.toxml()
+
+
+class MinisterioCulturaParser:
+ def __init__(self):
+ # Search form is at http://www.mcu.es/comun/bases/isbn/ISBN.html
+ self.__baseURL = 'http://www.mcu.es'
+ self.__searchURL = '/cgi-brs/BasesHTML/isbn/BRSCGI?CMD=VERLST&BASE=ISBN&DOCS=1-15&CONF=AEISPA.cnf&OPDEF=AND&SEPARADOR=' + \
+ '&WDIS-C=DISPONIBLE+or+AGOTADO&WGEN-C=&WISB-C=%s&WAUT-C=%s&WTIT-C=%s&WMAT-C=&WEDI-C=&'
+
+ self.__suffixURL = 'WFEP-C=&%40T353-GE=&%40T353-LE=&WSER-C=&WLUG-C=&WLEN-C=&WCLA-C=&WSOP-C='
+
+ # Define some regexps
+ self.__regExps = { 'author' : '<th scope="row">Autor:.*?<td>(?P<author>.*?)</td>',
+ 'isbn' : '<span class="cabTitulo">ISBN.*?<strong>(?P<isbn>.*?)</strong>', # Matches ISBN 13
+ 'title' : '<th scope="row">T&iacute;tulo:.*?<td>(?P<title>.*?)</td>',
+ 'language' : '<th scope="row">Lengua:.*?<td>(?P<language>.*?)</td>',
+ 'edition' : '<th scope="row">Edici&oacute;n:.*?<td>.*?<span>(?P<edition>.*?)</span>',
+ 'pur_price' : '<th scope="row">Precio:.*?<td>.*?<span>(?P<pur_price>.*?)&euro;</span>',
+ 'desc' : '<th scope="row">Descripci&oacute;n:.*?<td>.*?<span>(?P<desc>.*?)</span>',
+ 'publication' : '<th scope="row">Publicaci&oacute;n:.*?<td>.*?<span>(?P<publication>.*?)</span>',
+ 'keyword' : '<th scope="row">Materias:.*?<td>.*?<span>(?P<keywords>.*?)</span>',
+ 'notas' : '<th scope="row">Notas:.*?<td>.*?<span>(?P<notas>.*?)</span>',
+ 'cdu' : '<th scope="row">CDU:.*?<td><span>(?P<cdu>.*?)</span></td>',
+ 'encuadernacion': '<th scope="row">Encuadernaci&oacute;n:.*?<td>.*?<span>(?P<encuadernacion>.*?)</span>',
+ 'series' : '<th scope="row">Colecci&oacute;n:.*?<td>.*?<span>(?P<series>.*?)</span>'
+ }
+
+ # Compile patterns objects
+ self.__regExpsPO = {}
+ for k, pattern in self.__regExps.iteritems():
+ self.__regExpsPO[k] = re.compile(pattern)
+
+ self.__domTree = BasicTellicoDOM()
+
+ def run(self, criteria, kind):
+ """
+ Runs the parser: fetch book related links, then fills and prints the DOM tree
+ to stdout (in tellico format) so that tellico can use it.
+ """
+
+ # Strip out hyphens if kind is ISBN
+ if kind == ISBN:
+ criteria = criteria.replace('-', NULLSTRING)
+ # Support for multiple search
+ isbnList = criteria.split(';')
+ for n in isbnList:
+ self.__getBook(n, kind)
+ else:
+ self.__getBook(criteria, kind)
+
+ # Print results to stdout
+ self.__domTree.printXMLTree()
+
+ def __getHTMLContent(self, url):
+ """
+ Fetch HTML data from url
+ """
+
+ try:
+ u = urllib2.urlopen(url)
+ except Exception, e:
+ u.close()
+ sys.exit("""
+Network error while getting HTML content.
+Tellico cannot connect to: http://www.mcu.es/comun/bases/isbn/ISBN.htm webpage:
+'%s'""" % e)
+
+
+ self.__data = u.read()
+ u.close()
+
+ def __fetchBookLinks(self):
+ """
+ Retrieve all links related to the search. self.__data contains HTML content fetched by self.__getHTMLContent()
+ that need to be parsed.
+ """
+
+ matchList = re.findall("""<div class="isbnResDescripcion">.*?<p>.*?<A target="_top" HREF="(?P<url>.*?)">""", self.__data, re.S)
+
+ if not matchList: return None
+ return matchList
+
+ def __fetchBookInfo(self, url):
+ """
+ Looks for book information
+ """
+
+ self.__getHTMLContent(url)
+
+ matches = {}
+ data = {}
+
+ data['comments'] = []
+ # Empty string if series not available
+ data['series_num'] = NULLSTRING
+ data['translator'] = NULLSTRING
+
+ for name, po in self.__regExpsPO.iteritems():
+ data[name] = NULLSTRING
+ matches[name] = re.search(self.__regExps[name], self.__data, re.S | re.I)
+
+
+ if matches[name]:
+ if name == 'title':
+ d = matches[name].group('title').strip()
+ d = re.sub('<.?strong>', NULLSTRING, d)
+ d = re.sub('\n', NULLSTRING, d)
+ data['title'] = d
+
+ elif name == 'isbn':
+ data['isbn'] = matches[name].group('isbn').strip()
+
+ elif name == 'edition':
+ data['edition'] = matches[name].group('edition').strip()
+
+ elif name == 'pur_price':
+ d = matches[name].group('pur_price')
+ data['pur_price'] = d.strip() + ' EUR'
+
+ elif name == 'publication':
+ d = matches[name].group('publication')
+ for p in ('</?[Aa].*?>', '&nbsp;', ':', ','):
+ d = re.sub(p, NULLSTRING, d)
+
+ d = d.split('\n')
+ # d[1] is an empty string
+ data['publisher'] = "%s (%s)" % (d[2], d[0])
+ data['pub_year'] = re.sub('\d{2}\/', NULLSTRING, d[3])
+ del data['publication']
+
+ elif name == 'desc':
+ d = matches[name].group('desc')
+ m = re.search('\d+ ', d)
+ # When not available
+ data['pages'] = NULLSTRING
+ if m:
+ data['pages'] = m.group(0).strip()
+ m = re.search('; (?P<format>.*cm)', d)
+ if m:
+ data['comments'].append('Format: ' + m.group('format').strip())
+ del data['desc']
+
+ elif name == 'encuadernacion':
+ data['comments'].append(matches[name].group('encuadernacion').strip())
+
+ elif name == 'keyword':
+ d = matches[name].group('keywords')
+ d = re.sub('</?[Aa].*?>', NULLSTRING, d)
+ data['keyword'] = d.strip()
+
+ elif name == 'cdu':
+ data['comments'].append('CDU: ' + matches[name].group('cdu').strip())
+
+ elif name == 'notas':
+ data['comments'].append(matches[name].group('notas').strip())
+
+ elif name == 'series':
+ d = matches[name].group('series').strip()
+ d = re.sub('&nbsp;', ' ', d)
+ data[name] = d
+ # data[name] can contain something like 'Byblos, 162/24'
+
+ # Maybe better to add the reg exp to get seriesNum in self.__regExps
+ p = re.compile('[0-9]+$')
+ s = re.search(p, data[name])
+
+ if s:
+ # if series ends with a number, it seems that is a
+ # number of the book inside the series. We save in seriesNum
+ data['series_num'] = s.group()
+
+ # it removes lasts digits (plus one because is space or /) from
+ # data['series']
+ l = len(data['series_num']) + 1
+ data[name] = data[name][0:-l]
+ data[name] = data[name].rstrip(",") # remove the , between series and series_num
+
+ elif name == 'author':
+ # We may find several authors
+ data[name] = []
+ authorsList = re.findall('<a.*?>(?P<author>.*?)</a>', matches[name].group('author'), re.S | re.I)
+ if not authorsList:
+ # No href links
+ authors = re.search('<li>(?P<author>.*?)</li>', matches[name].group('author'), re.S | re.I)
+ try:
+ results = authors.group('author').strip().split(',')
+ except AttributeError:
+ results = []
+ results = [r.strip() for r in results]
+ data[name] = results
+ else:
+ for d in authorsList:
+ # Sometimes, the search engine outputs some image between a elements
+ if d.strip()[:4] != '<img':
+ data[name].append(d.strip())
+
+ # Move tr authors (translators) to translators list
+ translator = self.__getSpecialRol(data[name], TRANSLATOR_STR)
+ edlit = self.__getSpecialRol(data[name], EDLIT_STR)
+ data[name] = self.__removeSpecialsFromAuthors(data[name], translator, TRANSLATOR_STR)
+ data[name] = self.__removeSpecialsFromAuthors(data[name], edlit, EDLIT_STR)
+
+ if len(translator) > 0:
+ data['translator'] = self.__formatSpecials(translator, NULLSTRING)
+
+ if len(edlit) > 0:
+ data['comments'].append(self.__formatSpecials(edlit, "Editor Literario: "))
+
+ elif name == 'language':
+ # We may find several languages
+ d = matches[name].group('language')
+ d = re.sub('\n', NULLSTRING, d)
+ d = d.split('<span>')
+ a = []
+ for lg in d:
+ if len(lg):
+ lg = re.sub('</span>', NULLSTRING, lg)
+ # Because HTML is not interpreted in the 'language' field of Tellico
+ lg = re.sub('&oacute;', 'o', lg)
+ a.append(lg.strip())
+ # Removes that word so that only the language name remains.
+ a[0] = re.sub('publicacion: ', NULLSTRING, a[0])
+ data['language'] = a
+ # Add other language related info to the 'comments' field too
+ #for lg in a[1:]:
+ #data['comments'].append(lg)
+
+ return data
+
+
+ def __getBook(self, data, kind = ISBN):
+ if not len(data):
+ raise EngineError, "No data given. Unable to proceed."
+
+ if kind == ISBN:
+ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
+ (urllib.quote(data), # ISBN
+ NULLSTRING, # AUTHOR
+ NULLSTRING), # TITLE
+ self.__suffixURL)
+ )
+ elif kind == AUTHOR:
+ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
+ (NULLSTRING, # ISBN
+ urllib.quote(data), # AUTHOR
+ NULLSTRING), # TITLE
+ self.__suffixURL)
+ )
+
+ elif kind == TITLE:
+ self.__getHTMLContent("%s%s%s" % (self.__baseURL, self.__searchURL % \
+ (NULLSTRING, # ISBN
+ NULLSTRING, # AUTHOR
+ urllib.quote(data)), # TITLE
+ self.__suffixURL)
+ )
+
+ # Get all links
+ links = self.__fetchBookLinks()
+
+ # Now retrieve infos
+ if links:
+ for entry in links:
+ data = self.__fetchBookInfo( url = self.__baseURL + entry.replace(' ', '%20') )
+ node = self.__domTree.addEntry(data)
+ else:
+ return None
+
+ def __getSpecialRol(self, authors, special):
+ """
+ Receives a list like ['Stephen King','Lorenzo Cortina','tr.',
+ 'Rosalía Vázquez','tr.'] and returns a list with special names
+ """
+
+ j = 0; max = len(authors)
+ special_rol = []
+ while j < max:
+ if authors[j] == special:
+ special_rol.append(authors[j-1])
+ j += 1
+
+ return special_rol
+
+ def __removeSpecialsFromAuthors(self, authors, specials, string):
+ """
+ Receives a list with authors+translators and removes 'tr.' and
+ authors from there. Example:
+ authors: ['Stephen King','Lorenzo Cortina','tr.','Rosalía Vázquez','tr.']
+ translators: ['Lorenzo Cortina','Rosalía Vázquez']
+ returns: ['Stephen King']
+
+ (We could also guess string value because is the next position
+ in authors list)
+ """
+
+ newauthors = authors[:]
+
+ for t in specials:
+ newauthors.remove(t)
+ newauthors.remove(string)
+
+ return newauthors
+
+ def __formatSpecials(self, translators, prefix):
+ """
+ Receives a list with translators and returns a string
+ (authors are handled different: each author in a different node)
+ """
+
+ return prefix + string.join(translators, '; ')
+
+def halt():
+ print "HALT."
+ sys.exit(0)
+
+def showUsage():
+ print """Usage: %s options
+Where options are:
+ -t title
+ -i (ISBN|UPC)
+ -a author
+ -m filename (support for multiple ISBN/UPC search)""" % sys.argv[0]
+ sys.exit(1)
+
+def main():
+ if len(sys.argv) < 3:
+ showUsage()
+
+ socket.setdefaulttimeout(5)
+
+ # ;-separated ISBNs string
+ isbnStringList = NULLSTRING
+
+ opts = {'-t' : TITLE, '-i' : ISBN, '-a' : AUTHOR, '-m' : isbnStringList}
+ if sys.argv[1] not in opts.keys():
+ showUsage()
+
+ if sys.argv[1] == '-m':
+ try:
+ f = open(sys.argv[2], 'r')
+ data = f.readlines()
+ # remove trailing \n
+ sys.argv[2] = string.join([d[:-1] for d in data], ';')
+ sys.argv[1] = '-i'
+ f.close()
+ except IOError, e:
+ print "Error: %s" % e
+ sys.exit(1)
+
+ parser = MinisterioCulturaParser()
+ parser.run(sys.argv[2], opts[sys.argv[1]])
+
+if __name__ == '__main__':
+ main()
diff --git a/src/fetch/scripts/ministerio_de_cultura.py.spec b/src/fetch/scripts/ministerio_de_cultura.py.spec
new file mode 100644
index 0000000..ef24ac5
--- /dev/null
+++ b/src/fetch/scripts/ministerio_de_cultura.py.spec
@@ -0,0 +1,7 @@
+Name=Spanish Ministry of Culture
+Type=data-source
+ArgumentKeys=1,2,3,4
+Arguments=-t %1,-a %1,-i %1,-i %1
+CollectionType=2
+FormatType=0
+UpdateArgs=-t %{title}
diff --git a/src/fetch/srufetcher.cpp b/src/fetch/srufetcher.cpp
new file mode 100644
index 0000000..1d7289b
--- /dev/null
+++ b/src/fetch/srufetcher.cpp
@@ -0,0 +1,541 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "srufetcher.h"
+#include "messagehandler.h"
+#include "../field.h"
+#include "../collection.h"
+#include "../translators/tellico_xml.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../translators/dcimporter.h"
+#include "../tellico_kernel.h"
+#include "../tellico_debug.h"
+#include "../gui/lineedit.h"
+#include "../gui/combobox.h"
+#include "../latin1literal.h"
+#include "../tellico_utils.h"
+#include "../lccnvalidator.h"
+
+#include <klocale.h>
+#include <kio/job.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+#include <kcombobox.h>
+#include <kaccelmanager.h>
+#include <knuminput.h>
+
+#include <qlabel.h>
+#include <qlayout.h>
+#include <qwhatsthis.h>
+
+//#define SRU_DEBUG
+
+namespace {
+ // 7090 was the old default port, but that wa sjust because LoC used it
+ // let's use default HTTP port of 80 now
+ static const int SRU_DEFAULT_PORT = 80;
+ static const int SRU_MAX_RECORDS = 25;
+}
+
+using Tellico::Fetch::SRUFetcher;
+using Tellico::Fetch::SRUConfigWidget;
+
+SRUFetcher::SRUFetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_job(0), m_MARCXMLHandler(0), m_MODSHandler(0), m_started(false) {
+}
+
+SRUFetcher::SRUFetcher(const QString& name_, const QString& host_, uint port_, const QString& path_,
+ QObject* parent_) : Fetcher(parent_),
+ m_host(host_), m_port(port_), m_path(path_),
+ m_job(0), m_MARCXMLHandler(0), m_MODSHandler(0), m_started(false) {
+ m_name = name_; // m_name is protected in super class
+}
+
+SRUFetcher::~SRUFetcher() {
+ delete m_MARCXMLHandler;
+ m_MARCXMLHandler = 0;
+ delete m_MODSHandler;
+ m_MODSHandler = 0;
+}
+
+QString SRUFetcher::defaultName() {
+ return i18n("SRU Server");
+}
+
+QString SRUFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool SRUFetcher::canFetch(int type) const {
+ return type == Data::Collection::Book || type == Data::Collection::Bibtex;
+}
+
+void SRUFetcher::readConfigHook(const KConfigGroup& config_) {
+ m_host = config_.readEntry("Host");
+ int p = config_.readNumEntry("Port", SRU_DEFAULT_PORT);
+ if(p > 0) {
+ m_port = p;
+ }
+ m_path = config_.readEntry("Path");
+ // used to be called Database
+ if(m_path.isEmpty()) {
+ m_path = config_.readEntry("Database");
+ }
+ if(!m_path.startsWith(QChar('/'))) {
+ m_path.prepend('/');
+ }
+ m_format = config_.readEntry("Format", QString::fromLatin1("mods"));
+ m_fields = config_.readListEntry("Custom Fields");
+}
+
+void SRUFetcher::search(FetchKey key_, const QString& value_) {
+ if(m_host.isEmpty() || m_path.isEmpty()) {
+ myDebug() << "SRUFetcher::search() - settings are not set!" << endl;
+ stop();
+ return;
+ }
+
+ m_started = true;
+
+#ifdef SRU_DEBUG
+ KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/sru.xml"));
+#else
+ KURL u;
+ u.setProtocol(QString::fromLatin1("http"));
+ u.setHost(m_host);
+ u.setPort(m_port);
+ u.setPath(m_path);
+
+ u.addQueryItem(QString::fromLatin1("operation"), QString::fromLatin1("searchRetrieve"));
+ u.addQueryItem(QString::fromLatin1("version"), QString::fromLatin1("1.1"));
+ u.addQueryItem(QString::fromLatin1("maximumRecords"), QString::number(SRU_MAX_RECORDS));
+ u.addQueryItem(QString::fromLatin1("recordSchema"), m_format);
+
+ const int type = Kernel::self()->collectionType();
+ QString str = QChar('"') + value_ + QChar('"');
+ switch(key_) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("query"), QString::fromLatin1("dc.title=") + str);
+ break;
+
+ case Person:
+ {
+ QString s;
+ if(type == Data::Collection::Book || type == Data::Collection::Bibtex) {
+ s = QString::fromLatin1("author=") + str + QString::fromLatin1(" or dc.author=") + str;
+ } else {
+ s = QString::fromLatin1("dc.creator=") + str + QString::fromLatin1(" or dc.editor=") + str;
+ }
+ u.addQueryItem(QString::fromLatin1("query"), s);
+ }
+ break;
+
+ case ISBN:
+ // no validation here
+ str.remove('-');
+ // limit to first isbn
+ str = str.section(';', 0, 0);
+ u.addQueryItem(QString::fromLatin1("query"), QString::fromLatin1("bath.isbn=") + str);
+ break;
+
+ case LCCN:
+ {
+ // limit to first lccn
+ str.remove('-');
+ str = str.section(';', 0, 0);
+ // also try formalized lccn
+ QString lccn = LCCNValidator::formalize(str);
+ u.addQueryItem(QString::fromLatin1("query"),
+ QString::fromLatin1("bath.lccn=") + str +
+ QString::fromLatin1(" or bath.lccn=") + lccn
+ );
+ }
+ break;
+
+ case Keyword:
+ u.addQueryItem(QString::fromLatin1("query"), str);
+ break;
+
+ case Raw:
+ {
+ QString key = value_.section('=', 0, 0).stripWhiteSpace();
+ QString str = value_.section('=', 1).stripWhiteSpace();
+ u.addQueryItem(key, str);
+ }
+ break;
+
+ default:
+ kdWarning() << "SRUFetcher::search() - key not recognized: " << key_ << endl;
+ stop();
+ break;
+ }
+#endif
+// myDebug() << u.prettyURL() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void SRUFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void SRUFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void SRUFetcher::slotComplete(KIO::Job* job_) {
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ stop();
+ return;
+ }
+
+ Data::CollPtr coll;
+ QString msg;
+
+ const QString result = QString::fromUtf8(m_data, m_data.size());
+
+ // first check for SRU errors
+ const QString& diag = XML::nsZingDiag;
+ Import::XMLImporter xmlImporter(result);
+ QDomDocument dom = xmlImporter.domDocument();
+
+ QDomNodeList diagList = dom.elementsByTagNameNS(diag, QString::fromLatin1("diagnostic"));
+ for(uint i = 0; i < diagList.count(); ++i) {
+ QDomElement elem = diagList.item(i).toElement();
+ QDomNodeList nodeList1 = elem.elementsByTagNameNS(diag, QString::fromLatin1("message"));
+ QDomNodeList nodeList2 = elem.elementsByTagNameNS(diag, QString::fromLatin1("details"));
+ for(uint j = 0; j < nodeList1.count(); ++j) {
+ QString d = nodeList1.item(j).toElement().text();
+ if(!d.isEmpty()) {
+ QString d2 = nodeList2.item(j).toElement().text();
+ if(!d2.isEmpty()) {
+ d += " (" + d2 + ')';
+ }
+ myDebug() << "SRUFetcher::slotComplete() - " << d << endl;
+ if(!msg.isEmpty()) msg += '\n';
+ msg += d;
+ }
+ }
+ }
+
+ QString modsResult;
+ if(m_format == Latin1Literal("mods")) {
+ modsResult = result;
+ } else if(m_format == Latin1Literal("marcxml") && initMARCXMLHandler()) {
+ modsResult = m_MARCXMLHandler->applyStylesheet(result);
+ }
+ if(!modsResult.isEmpty() && initMODSHandler()) {
+ Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(modsResult));
+ coll = imp.collection();
+ if(!msg.isEmpty()) msg += '\n';
+ msg += imp.statusMessage();
+ } else if(m_format == Latin1Literal("dc")) {
+ Import::DCImporter imp(dom);
+ coll = imp.collection();
+ if(!msg.isEmpty()) msg += '\n';
+ msg += imp.statusMessage();
+ } else {
+ myDebug() << "SRUFetcher::slotComplete() - unrecognized format: " << m_format << endl;
+ stop();
+ return;
+ }
+
+ if(coll && !msg.isEmpty()) {
+ message(msg, coll->entryCount() == 0 ? MessageHandler::Warning : MessageHandler::Status);
+ }
+
+ if(!coll) {
+ myDebug() << "SRUFetcher::slotComplete() - no collection pointer" << endl;
+ if(!msg.isEmpty()) {
+ message(msg, MessageHandler::Error);
+ }
+ stop();
+ return;
+ }
+
+ const StringMap customFields = SRUFetcher::customFields();
+ for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) {
+ if(!m_fields.contains(it.key())) {
+ coll->removeField(it.key());
+ }
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ QString desc;
+ switch(coll->type()) {
+ case Data::Collection::Book:
+ desc = entry->field(QString::fromLatin1("author"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+ break;
+
+ case Data::Collection::Video:
+ desc = entry->field(QString::fromLatin1("studio"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("director"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ case Data::Collection::Album:
+ desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+ break;
+
+ default:
+ break;
+ }
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, entry);
+ emit signalResultFound(r);
+ }
+ stop();
+}
+
+Tellico::Data::EntryPtr SRUFetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+void SRUFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "SRUFetcher::updateEntry() - " << source() << ": " << entry_->title() << endl;
+ QString isbn = entry_->field(QString::fromLatin1("isbn"));
+ if(!isbn.isEmpty()) {
+ search(Fetch::ISBN, isbn);
+ return;
+ }
+
+ QString lccn = entry_->field(QString::fromLatin1("lccn"));
+ if(!lccn.isEmpty()) {
+ search(Fetch::LCCN, lccn);
+ return;
+ }
+
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "SRUFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+bool SRUFetcher::initMARCXMLHandler() {
+ if(m_MARCXMLHandler) {
+ return true;
+ }
+
+ QString xsltfile = locate("appdata", QString::fromLatin1("MARC21slim2MODS3.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "SRUFetcher::initHandlers() - can not locate MARC21slim2MODS3.xsl." << endl;
+ return false;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ m_MARCXMLHandler = new XSLTHandler(u);
+ if(!m_MARCXMLHandler->isValid()) {
+ kdWarning() << "SRUFetcher::initHandlers() - error in MARC21slim2MODS3.xsl." << endl;
+ delete m_MARCXMLHandler;
+ m_MARCXMLHandler = 0;
+ return false;
+ }
+ return true;
+}
+
+bool SRUFetcher::initMODSHandler() {
+ if(m_MODSHandler) {
+ return true;
+ }
+
+ QString xsltfile = locate("appdata", QString::fromLatin1("mods2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "SRUFetcher::initHandlers() - can not locate mods2tellico.xsl." << endl;
+ return false;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ m_MODSHandler = new XSLTHandler(u);
+ if(!m_MODSHandler->isValid()) {
+ kdWarning() << "SRUFetcher::initHandlers() - error in mods2tellico.xsl." << endl;
+ delete m_MODSHandler;
+ m_MODSHandler = 0;
+ return false;
+ }
+ return true;
+}
+
+Tellico::Fetch::Fetcher::Ptr SRUFetcher::libraryOfCongress(QObject* parent_) {
+ return new SRUFetcher(i18n("Library of Congress (US)"), QString::fromLatin1("z3950.loc.gov"), 7090,
+ QString::fromLatin1("voyager"), parent_);
+}
+
+// static
+Tellico::StringMap SRUFetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("address")] = i18n("Address");
+ map[QString::fromLatin1("abstract")] = i18n("Abstract");
+ return map;
+}
+
+Tellico::Fetch::ConfigWidget* SRUFetcher::configWidget(QWidget* parent_) const {
+ return new SRUConfigWidget(parent_, this);
+}
+
+SRUConfigWidget::SRUConfigWidget(QWidget* parent_, const SRUFetcher* fetcher_ /*=0*/)
+ : ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 4, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+ QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_hostEdit = new GUI::LineEdit(optionsWidget());
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SIGNAL(signalName(const QString&)));
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotCheckHost()));
+ l->addWidget(m_hostEdit, row, 1);
+ QString w = i18n("Enter the host name of the server.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_hostEdit, w);
+ label->setBuddy(m_hostEdit);
+
+ label = new QLabel(i18n("&Port: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_portSpinBox = new KIntSpinBox(0, 999999, 1, SRU_DEFAULT_PORT, 10, optionsWidget());
+ connect(m_portSpinBox, SIGNAL(valueChanged(int)), SLOT(slotSetModified()));
+ l->addWidget(m_portSpinBox, row, 1);
+ w = i18n("Enter the port number of the server. The default is %1.").arg(SRU_DEFAULT_PORT);
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_portSpinBox, w);
+ label->setBuddy(m_portSpinBox);
+
+ label = new QLabel(i18n("Path: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_pathEdit = new GUI::LineEdit(optionsWidget());
+ connect(m_pathEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_pathEdit, row, 1);
+ w = i18n("Enter the path to the database used by the server.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_pathEdit, w);
+ label->setBuddy(m_pathEdit);
+
+ label = new QLabel(i18n("Format: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_formatCombo = new GUI::ComboBox(optionsWidget());
+ m_formatCombo->insertItem(QString::fromLatin1("MODS"), QString::fromLatin1("mods"));
+ m_formatCombo->insertItem(QString::fromLatin1("MARCXML"), QString::fromLatin1("marcxml"));
+ m_formatCombo->insertItem(QString::fromLatin1("Dublin Core"), QString::fromLatin1("dc"));
+ connect(m_formatCombo, SIGNAL(activated(int)), SLOT(slotSetModified()));
+ l->addWidget(m_formatCombo, row, 1);
+ w = i18n("Enter the result format used by the server.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_formatCombo, w);
+ label->setBuddy(m_formatCombo);
+
+ l->setRowStretch(++row, 1);
+
+ // now add additional fields widget
+ addFieldsWidget(SRUFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+
+ if(fetcher_) {
+ m_hostEdit->setText(fetcher_->m_host);
+ m_portSpinBox->setValue(fetcher_->m_port);
+ m_pathEdit->setText(fetcher_->m_path);
+ m_formatCombo->setCurrentData(fetcher_->m_format);
+ }
+ KAcceleratorManager::manage(optionsWidget());
+}
+
+void SRUConfigWidget::saveConfig(KConfigGroup& config_) {
+ QString s = m_hostEdit->text().stripWhiteSpace();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Host", s);
+ }
+ int port = m_portSpinBox->value();
+ if(port > 0) {
+ config_.writeEntry("Port", port);
+ }
+ s = m_pathEdit->text().stripWhiteSpace();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Path", s);
+ }
+ s = m_formatCombo->currentData().toString();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Format", s);
+ }
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+QString SRUConfigWidget::preferredName() const {
+ QString s = m_hostEdit->text();
+ return s.isEmpty() ? SRUFetcher::defaultName() : s;
+}
+
+void SRUConfigWidget::slotCheckHost() {
+ QString s = m_hostEdit->text();
+ // someone might be pasting a full URL, check that
+ if(s.find(':') > -1 || s.find('/') > -1) {
+ KURL u(s);
+ if(u.isValid()) {
+ m_hostEdit->setText(u.host());
+ if(u.port() > 0) {
+ m_portSpinBox->setValue(u.port());
+ }
+ if(!u.path().isEmpty()) {
+ m_pathEdit->setText(u.path());
+ }
+ }
+ }
+}
+
+#include "srufetcher.moc"
diff --git a/src/fetch/srufetcher.h b/src/fetch/srufetcher.h
new file mode 100644
index 0000000..fd07323
--- /dev/null
+++ b/src/fetch/srufetcher.h
@@ -0,0 +1,131 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_SRUFETCHER_H
+#define TELLICO_SRUFETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+ namespace GUI {
+ class LineEdit;
+ class ComboBox;
+ }
+}
+
+class KIntSpinBox;
+class KComboBox;
+namespace KIO {
+ class Job;
+}
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+
+class SRUConfigWidget;
+
+/**
+ * A fetcher for SRU servers.
+ * Right now, only MODS is supported.
+ *
+ * @author Robby Stephenson
+ */
+class SRUFetcher : public Fetcher {
+Q_OBJECT
+
+friend class SRUConfigWidget;
+
+public:
+ /**
+ */
+ SRUFetcher(QObject* parent, const char* name = 0);
+ SRUFetcher(const QString& name, const QString& host, uint port, const QString& dbname,
+ QObject* parent);
+ /**
+ */
+ virtual ~SRUFetcher();
+
+ /**
+ */
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ // only search title, person, isbn, or keyword. No Raw for now.
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword || k == LCCN; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return SRU; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ static StringMap customFields();
+
+ virtual ConfigWidget* configWidget(QWidget* parent) const;
+
+ static QString defaultName();
+
+ static Fetcher::Ptr libraryOfCongress(QObject* parent);
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ bool initMARCXMLHandler();
+ bool initMODSHandler();
+
+ QString m_host;
+ uint m_port;
+ QString m_path;
+ QString m_format;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries;
+ QGuardedPtr<KIO::Job> m_job;
+ XSLTHandler* m_MARCXMLHandler;
+ XSLTHandler* m_MODSHandler;
+ bool m_started;
+ QStringList m_fields;
+};
+
+class SRUConfigWidget : public ConfigWidget {
+Q_OBJECT
+
+friend class SRUFetcher;
+
+public:
+ SRUConfigWidget(QWidget* parent_, const SRUFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup& config);
+ virtual QString preferredName() const;
+
+private slots:
+ void slotCheckHost();
+
+private:
+ GUI::LineEdit* m_hostEdit;
+ KIntSpinBox* m_portSpinBox;
+ GUI::LineEdit* m_pathEdit;
+ GUI::ComboBox* m_formatCombo;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/yahoofetcher.cpp b/src/fetch/yahoofetcher.cpp
new file mode 100644
index 0000000..002b63b
--- /dev/null
+++ b/src/fetch/yahoofetcher.cpp
@@ -0,0 +1,400 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "yahoofetcher.h"
+#include "messagehandler.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../imagefactory.h"
+#include "../tellico_kernel.h"
+#include "../tellico_utils.h"
+#include "../collection.h"
+#include "../entry.h"
+#include "../tellico_debug.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kconfig.h>
+#include <kio/job.h>
+
+#include <qdom.h>
+#include <qlabel.h>
+#include <qlayout.h>
+#include <qfile.h>
+
+namespace {
+ static const int YAHOO_MAX_RETURNS_TOTAL = 20;
+ static const char* YAHOO_BASE_URL = "http://search.yahooapis.com/AudioSearchService/V1/albumSearch";
+ static const char* YAHOO_APP_ID = "tellico-robby";
+}
+
+using Tellico::Fetch::YahooFetcher;
+
+YahooFetcher::YahooFetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_xsltHandler(0),
+ m_limit(YAHOO_MAX_RETURNS_TOTAL), m_job(0), m_started(false) {
+}
+
+YahooFetcher::~YahooFetcher() {
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+}
+
+QString YahooFetcher::defaultName() {
+ return i18n("Yahoo! Audio Search");
+}
+
+QString YahooFetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool YahooFetcher::canFetch(int type) const {
+ return type == Data::Collection::Album;
+}
+
+void YahooFetcher::readConfigHook(const KConfigGroup& config_) {
+ Q_UNUSED(config_);
+}
+
+void YahooFetcher::search(FetchKey key_, const QString& value_) {
+ m_key = key_;
+ m_value = value_;
+ m_started = true;
+ m_start = 1;
+ m_total = -1;
+ doSearch();
+}
+
+void YahooFetcher::continueSearch() {
+ m_started = true;
+ doSearch();
+}
+
+void YahooFetcher::doSearch() {
+// myDebug() << "YahooFetcher::search() - value = " << value_ << endl;
+
+ KURL u(QString::fromLatin1(YAHOO_BASE_URL));
+ u.addQueryItem(QString::fromLatin1("appid"), QString::fromLatin1(YAHOO_APP_ID));
+ u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all"));
+ u.addQueryItem(QString::fromLatin1("output"), QString::fromLatin1("xml"));
+ u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start));
+ u.addQueryItem(QString::fromLatin1("results"), QString::number(YAHOO_MAX_RETURNS_TOTAL));
+
+ if(!canFetch(Kernel::self()->collectionType())) {
+ message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning);
+ stop();
+ return;
+ }
+
+ switch(m_key) {
+ case Title:
+ u.addQueryItem(QString::fromLatin1("album"), m_value);
+ break;
+
+ case Person:
+ u.addQueryItem(QString::fromLatin1("artist"), m_value);
+ break;
+
+ // raw is used for the entry updates
+ case Raw:
+// u.removeQueryItem(QString::fromLatin1("type"));
+// u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("phrase"));
+ u.setQuery(u.query() + '&' + m_value);
+ break;
+
+ default:
+ kdWarning() << "YahooFetcher::search() - key not recognized: " << m_key << endl;
+ stop();
+ return;
+ }
+// myDebug() << "YahooFetcher::search() - url: " << u.url() << endl;
+
+ m_job = KIO::get(u, false, false);
+ connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)),
+ SLOT(slotData(KIO::Job*, const QByteArray&)));
+ connect(m_job, SIGNAL(result(KIO::Job*)),
+ SLOT(slotComplete(KIO::Job*)));
+}
+
+void YahooFetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+ if(m_job) {
+ m_job->kill();
+ m_job = 0;
+ }
+ m_data.truncate(0);
+ m_started = false;
+ emit signalDone(this);
+}
+
+void YahooFetcher::slotData(KIO::Job*, const QByteArray& data_) {
+ QDataStream stream(m_data, IO_WriteOnly | IO_Append);
+ stream.writeRawBytes(data_.data(), data_.size());
+}
+
+void YahooFetcher::slotComplete(KIO::Job* job_) {
+// myDebug() << "YahooFetcher::slotComplete()" << endl;
+ // since the fetch is done, don't worry about holding the job pointer
+ m_job = 0;
+
+ if(job_->error()) {
+ job_->showErrorDialog(Kernel::self()->widget());
+ stop();
+ return;
+ }
+
+ if(m_data.isEmpty()) {
+ myDebug() << "YahooFetcher::slotComplete() - no data" << endl;
+ stop();
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from yahoofetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << QCString(m_data, m_data.size()+1);
+ }
+ f.close();
+#endif
+
+ if(!m_xsltHandler) {
+ initXSLTHandler();
+ if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading
+ stop();
+ return;
+ }
+ }
+
+ if(m_total == -1) {
+ QDomDocument dom;
+ if(!dom.setContent(m_data, false)) {
+ kdWarning() << "YahooFetcher::slotComplete() - server did not return valid XML." << endl;
+ return;
+ }
+ // total is top level element, with attribute totalResultsAvailable
+ QDomElement e = dom.documentElement();
+ if(!e.isNull()) {
+ m_total = e.attribute(QString::fromLatin1("totalResultsAvailable")).toInt();
+ }
+ }
+
+ // assume yahoo is always utf-8
+ QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size()));
+ Import::TellicoImporter imp(str);
+ Data::CollPtr coll = imp.collection();
+ if(!coll) {
+ myDebug() << "YahooFetcher::slotComplete() - no collection pointer" << endl;
+ stop();
+ return;
+ }
+
+ int count = 0;
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); count < m_limit && entry != entries.end(); ++entry, ++count) {
+ if(!m_started) {
+ // might get aborted
+ break;
+ }
+ QString desc = entry->field(QString::fromLatin1("artist"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("label"))
+ + QChar('/')
+ + entry->field(QString::fromLatin1("year"));
+
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, Data::EntryPtr(entry));
+ emit signalResultFound(r);
+ }
+ m_start = m_entries.count() + 1;
+ m_hasMoreResults = m_start <= m_total;
+ stop(); // required
+}
+
+Tellico::Data::EntryPtr YahooFetcher::fetchEntry(uint uid_) {
+ Data::EntryPtr entry = m_entries[uid_];
+ if(!entry) {
+ kdWarning() << "YahooFetcher::fetchEntry() - no entry in dict" << endl;
+ return 0;
+ }
+
+ KURL imageURL = entry->field(QString::fromLatin1("image"));
+ if(!imageURL.isEmpty()) {
+ QString id = ImageFactory::addImage(imageURL, true);
+ if(id.isEmpty()) {
+ // rich text causes layout issues
+// emit signalStatus(i18n("<qt>The cover image for <i>%1</i> could not be loaded.</qt>").arg(
+// entry->field(QString::fromLatin1("title"))));
+ message(i18n("The cover image could not be loaded."), MessageHandler::Warning);
+ } else {
+ entry->setField(QString::fromLatin1("cover"), id);
+ }
+ }
+
+ getTracks(entry);
+
+ // don't want to show image urls in the fetch dialog
+ entry->setField(QString::fromLatin1("image"), QString::null);
+ // no need for album id now ?
+ entry->setField(QString::fromLatin1("yahoo"), QString::null);
+ return entry;
+}
+
+void YahooFetcher::initXSLTHandler() {
+ QString xsltfile = locate("appdata", QString::fromLatin1("yahoo2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "YahooFetcher::initXSLTHandler() - can not locate yahoo2tellico.xsl." << endl;
+ return;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ delete m_xsltHandler;
+ m_xsltHandler = new XSLTHandler(u);
+ if(!m_xsltHandler->isValid()) {
+ kdWarning() << "YahooFetcher::initXSLTHandler() - error in yahoo2tellico.xsl." << endl;
+ delete m_xsltHandler;
+ m_xsltHandler = 0;
+ return;
+ }
+}
+
+void YahooFetcher::getTracks(Data::EntryPtr entry_) {
+ // get album id
+ if(!entry_ || entry_->field(QString::fromLatin1("yahoo")).isEmpty()) {
+ return;
+ }
+
+ const QString albumid = entry_->field(QString::fromLatin1("yahoo"));
+
+ KURL u(QString::fromLatin1(YAHOO_BASE_URL));
+ u.setFileName(QString::fromLatin1("songSearch"));
+ u.addQueryItem(QString::fromLatin1("appid"), QString::fromLatin1(YAHOO_APP_ID));
+ u.addQueryItem(QString::fromLatin1("type"), QString::fromLatin1("all"));
+ u.addQueryItem(QString::fromLatin1("output"), QString::fromLatin1("xml"));
+ // go ahesad and ask for all results, since there might well be more than 10 songs on the CD
+ u.addQueryItem(QString::fromLatin1("results"), QString::number(50));
+ u.addQueryItem(QString::fromLatin1("albumid"), albumid);
+
+// myDebug() << "YahooFetcher::getTracks() - url: " << u.url() << endl;
+ QDomDocument dom = FileHandler::readXMLFile(u, false /*no namespace*/, true /*quiet*/);
+ if(dom.isNull()) {
+ myDebug() << "YahooFetcher::getTracks() - null dom returned" << endl;
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from yahoofetcher.cpp" << endl;
+ QFile f(QString::fromLatin1("/tmp/test.xml"));
+ if(f.open(IO_WriteOnly)) {
+ QTextStream t(&f);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << dom.toString();
+ }
+ f.close();
+#endif
+
+ const QString track = QString::fromLatin1("track");
+
+ QDomNodeList nodes = dom.documentElement().childNodes();
+ for(uint i = 0; i < nodes.count(); ++i) {
+ QDomElement e = nodes.item(i).toElement();
+ if(e.isNull()) {
+ continue;
+ }
+ QString t = e.namedItem(QString::fromLatin1("Title")).toElement().text();
+ QString n = e.namedItem(QString::fromLatin1("Track")).toElement().text();
+ bool ok;
+ int trackNum = Tellico::toUInt(n, &ok);
+ // trackNum might be 0
+ if(t.isEmpty() || !ok || trackNum < 1) {
+ continue;
+ }
+ QString a = e.namedItem(QString::fromLatin1("Artist")).toElement().text();
+ QString l = e.namedItem(QString::fromLatin1("Length")).toElement().text();
+
+ int len = Tellico::toUInt(l, &ok);
+ QString value = t + "::" + a;
+ if(ok && len > 0) {
+ value += + "::" + Tellico::minutes(len);
+ }
+ entry_->setField(track, insertValue(entry_->field(track), value, trackNum));
+ }
+}
+
+// not zero-based
+QString YahooFetcher::insertValue(const QString& str_, const QString& value_, uint pos_) {
+ QStringList list = Data::Field::split(str_, true);
+ for(uint i = list.count(); i < pos_; ++i) {
+ list += QString::null;
+ }
+ bool write = true;
+ if(!list[pos_-1].isNull()) {
+ // for some reason, some songs are repeated from yahoo, with 0 length, don't overwrite that
+ if(value_.contains(QString::fromLatin1("::")) < 2) { // means no length value
+ write = false;
+ }
+ }
+ if(!value_.isEmpty() && write) {
+ list[pos_-1] = value_;
+ }
+ return list.join(QString::fromLatin1("; "));
+}
+
+void YahooFetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "YahooFetcher::updateEntry()" << endl;
+ // limit to top 5 results
+ m_limit = 5;
+
+ QString value;
+ QString title = entry_->field(QString::fromLatin1("title"));
+ if(!title.isEmpty()) {
+ value += QString::fromLatin1("album=") + title;
+ }
+ QString artist = entry_->field(QString::fromLatin1("artist"));
+ if(!artist.isEmpty()) {
+ if(!value.isEmpty()) {
+ value += '&';
+ }
+ value += QString::fromLatin1("artist=") + artist;
+ }
+ if(!value.isEmpty()) {
+ search(Fetch::Raw, value);
+ return;
+ }
+
+ myDebug() << "YahooFetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* YahooFetcher::configWidget(QWidget* parent_) const {
+ return new YahooFetcher::ConfigWidget(parent_, this);
+}
+
+YahooFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const YahooFetcher*/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QVBoxLayout* l = new QVBoxLayout(optionsWidget());
+ l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget()));
+ l->addStretch();
+}
+
+QString YahooFetcher::ConfigWidget::preferredName() const {
+ return YahooFetcher::defaultName();
+}
+
+#include "yahoofetcher.moc"
diff --git a/src/fetch/yahoofetcher.h b/src/fetch/yahoofetcher.h
new file mode 100644
index 0000000..7ff5733
--- /dev/null
+++ b/src/fetch/yahoofetcher.h
@@ -0,0 +1,105 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef YAHOOFETCHER_H
+#define YAHOOFETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+}
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qcstring.h> // for QByteArray
+#include <qguardedptr.h>
+
+namespace KIO {
+ class Job;
+}
+
+namespace Tellico {
+ namespace Fetch {
+
+/**
+ * @author Robby Stephenson
+ */
+class YahooFetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ /**
+ */
+ YahooFetcher(QObject* parent, const char* name = 0);
+ /**
+ */
+ virtual ~YahooFetcher();
+
+ /**
+ */
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Yahoo; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+
+ /**
+ * Returns a widget for modifying the fetcher's config.
+ */
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget : public Fetch::ConfigWidget {
+ public:
+ ConfigWidget(QWidget* parent_, const YahooFetcher* fetcher = 0);
+ virtual void saveConfig(KConfigGroup&) {}
+ virtual QString preferredName() const;
+ };
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+private slots:
+ void slotData(KIO::Job* job, const QByteArray& data);
+ void slotComplete(KIO::Job* job);
+
+private:
+ void initXSLTHandler();
+ void doSearch();
+ void getTracks(Data::EntryPtr entry);
+ QString insertValue(const QString& str, const QString& value, uint pos);
+
+ XSLTHandler* m_xsltHandler;
+ int m_limit;
+ int m_start;
+ int m_total;
+
+ QByteArray m_data;
+ QMap<int, Data::EntryPtr> m_entries; // they get modified after collection is created, so can't be const
+ QGuardedPtr<KIO::Job> m_job;
+
+ FetchKey m_key;
+ QString m_value;
+ bool m_started;
+};
+
+ } // end namespace
+} // end namespace
+#endif
diff --git a/src/fetch/z3950-servers.cfg b/src/fetch/z3950-servers.cfg
new file mode 100644
index 0000000..f4f6157
--- /dev/null
+++ b/src/fetch/z3950-servers.cfg
@@ -0,0 +1,106 @@
+[loc]
+Charset=marc8
+Database=Voyager
+Host=z3950.loc.gov
+Locale=en
+Name=Library of Congress (US)
+Port=7090
+Syntax=mods
+
+[blzcat]
+Host=3950cat.bl.uk
+Port=9909
+Database=BLAC
+Name=The British Library
+Charset=marc-8
+Locale=en_GB
+
+[sudoc]
+Host=carmin.sudoc.abes.fr
+Port=210
+Database=ABES-Z39-PUBLIC
+Name=Sudoc (France)
+Charset=iso-5426
+Locale=fr
+Syntax=usmarc
+
+[bibsys]
+Host=z3950.bibsys.no
+Port=2100
+Database=BIBSYS
+Name=BIBSYS (Norway)
+Charset=iso-8859-1
+Locale=no
+Syntax=usmarc
+
+[sbn]
+Host=opac.sbn.it
+Port=3950
+Database=nopac
+Name=Italian National Library
+Charset=iso-8859-1
+Locale=it
+Syntax=unimarc
+
+[porbase]
+Host=z3950.bn.pt
+Port=210
+Database=bnd
+Name=Portuguese National Library
+Charset=iso-8859-1
+Locale=pt
+Syntax=unimarc
+
+[nlp]
+Host=alpha.bn.org.pl
+Port=210
+Database=INNOPAC
+Name=National Library of Poland
+Charset=iso6937
+Locale=pl
+Syntax=usmarc
+
+[amicus]
+Host=amicus.collectionscanada.ca
+Port=210
+Database=NL
+Name=National Library of Canada
+Charset=iso-8859-1
+Locale=ca
+Syntax=marc21
+
+[iul]
+Host=libnet.ac.il
+Port=9991
+Database=ULI02
+Name=Israel Union List
+Charset=utf-8
+Locale=il
+Syntax=marc21
+
+[naul]
+Host=catalogue.nla.gov.au
+Port=7090
+Database=Voyager
+Name=National Library of Australia
+Charset=utf-8
+Locale=au
+Syntax=marc21
+
+[libis]
+Host=z3950.libis.lt
+Port=210
+Database=knygos
+Name=National Library of Lithuania
+Charset=utf-8
+Syntax=unimarc
+Locale=lt
+
+[copac]
+Host=z3950.copac.ac.uk
+Port=210
+Database=COPAC
+Name=Copac (UK and Ireland)
+Charset=utf-8
+Locale=uk,ie,en
+Syntax=mods
diff --git a/src/fetch/z3950connection.cpp b/src/fetch/z3950connection.cpp
new file mode 100644
index 0000000..27efe51
--- /dev/null
+++ b/src/fetch/z3950connection.cpp
@@ -0,0 +1,503 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : $EMAIL
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "z3950connection.h"
+#include "z3950fetcher.h"
+#include "messagehandler.h"
+#include "../latin1literal.h"
+#include "../tellico_debug.h"
+#include "../iso5426converter.h"
+#include "../iso6937converter.h"
+
+#include <config.h>
+
+#ifdef HAVE_YAZ
+extern "C" {
+#include <yaz/zoom.h>
+#include <yaz/marcdisp.h>
+#include <yaz/yaz-version.h>
+}
+#endif
+
+#include <klocale.h>
+
+#include <qfile.h>
+
+namespace {
+ static const size_t Z3950_DEFAULT_MAX_RECORDS = 20;
+}
+
+using Tellico::Fetch::Z3950ResultFound;
+using Tellico::Fetch::Z3950Connection;
+
+Z3950ResultFound::Z3950ResultFound(const QString& s) : QCustomEvent(uid())
+ , m_result(QDeepCopy<QString>(s)) {
+ ++Z3950Connection::resultsLeft;
+}
+
+Z3950ResultFound::~Z3950ResultFound() {
+ --Z3950Connection::resultsLeft;
+}
+
+class Z3950Connection::Private {
+public:
+ Private() {}
+#ifdef HAVE_YAZ
+ ~Private() {
+ ZOOM_options_destroy(conn_opt);
+ ZOOM_connection_destroy(conn);
+ };
+
+ ZOOM_options conn_opt;
+ ZOOM_connection conn;
+#endif
+};
+
+int Z3950Connection::resultsLeft = 0;
+
+// since the character set goes into a yaz api call
+// I'm paranoid about user insertions, so just grab 64
+// characters at most
+Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher,
+ const QString& host,
+ uint port,
+ const QString& dbname,
+ const QString& sourceCharSet,
+ const QString& syntax,
+ const QString& esn)
+ : QThread()
+ , d(new Private())
+ , m_connected(false)
+ , m_aborted(false)
+ , m_fetcher(fetcher)
+ , m_host(QDeepCopy<QString>(host))
+ , m_port(port)
+ , m_dbname(QDeepCopy<QString>(dbname))
+ , m_sourceCharSet(QDeepCopy<QString>(sourceCharSet.left(64)))
+ , m_syntax(QDeepCopy<QString>(syntax))
+ , m_esn(QDeepCopy<QString>(esn))
+ , m_start(0)
+ , m_limit(Z3950_DEFAULT_MAX_RECORDS)
+ , m_hasMore(false) {
+}
+
+Z3950Connection::~Z3950Connection() {
+ m_connected = false;
+ delete d;
+ d = 0;
+}
+
+void Z3950Connection::reset() {
+ m_start = 0;
+ m_limit = Z3950_DEFAULT_MAX_RECORDS;
+}
+
+void Z3950Connection::setQuery(const QString& query_) {
+ m_pqn = QDeepCopy<QString>(query_);
+}
+
+void Z3950Connection::setUserPassword(const QString& user_, const QString& pword_) {
+ m_user = QDeepCopy<QString>(user_);
+ m_password = QDeepCopy<QString>(pword_);
+}
+
+void Z3950Connection::run() {
+// myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl;
+ m_aborted = false;
+ m_hasMore = false;
+ resultsLeft = 0;
+#ifdef HAVE_YAZ
+
+ if(!makeConnection()) {
+ done();
+ return;
+ }
+
+ ZOOM_query query = ZOOM_query_create();
+ myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl;
+ int errcode = ZOOM_query_prefix(query, toCString(m_pqn));
+ if(errcode != 0) {
+ myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl;
+ ZOOM_query_destroy(query);
+ QString s = i18n("Query error!");
+ s += ' ' + m_pqn;
+ done(s, MessageHandler::Error);
+ return;
+ }
+
+ ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query);
+
+ // check abort status
+ if(m_aborted) {
+ done();
+ return;
+ }
+
+ // I know the LOC wants the syntax = "xml" and esn = "mods"
+ // to get MODS data, that seems a bit odd...
+ // esn only makes sense for marc and grs-1
+ // if syntax is mods, set esn to mods too
+ QCString type = "raw";
+ if(m_syntax == Latin1Literal("mods")) {
+ m_syntax = QString::fromLatin1("xml");
+ ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
+ type = "xml";
+ } else {
+ ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
+ }
+ ZOOM_resultset_option_set(resultSet, "start", QCString().setNum(m_start));
+ ZOOM_resultset_option_set(resultSet, "count", QCString().setNum(m_limit-m_start));
+ // search in default syntax, unless syntax is already set
+ if(!m_syntax.isEmpty()) {
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1());
+ }
+
+ const char* errmsg;
+ const char* addinfo;
+ errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
+ if(errcode != 0) {
+ ZOOM_resultset_destroy(resultSet);
+ ZOOM_query_destroy(query);
+ m_connected = false;
+
+ QString s = i18n("Connection search error %1: %2").arg(errcode).arg(toString(errmsg));
+ if(!QCString(addinfo).isEmpty()) {
+ s += " (" + toString(addinfo) + ")";
+ }
+ myDebug() << "Z3950Connection::run() - " << s << endl;
+ done(s, MessageHandler::Error);
+ return;
+ }
+
+ const size_t numResults = ZOOM_resultset_size(resultSet);
+
+ QString newSyntax = m_syntax;
+ if(numResults > 0) {
+ myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl;
+ // so now we know that results exist, might have to check syntax
+ int len;
+ ZOOM_record rec = ZOOM_resultset_record(resultSet, 0);
+ // want raw unless it's mods
+ ZOOM_record_get(rec, type, &len);
+ if(len > 0 && m_syntax.isEmpty()) {
+ newSyntax = QString::fromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower();
+ myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl;
+ if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) {
+ m_syntax = QString::fromLatin1("xml");
+ ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
+ } else if(newSyntax == Latin1Literal("grs-1")) {
+ // if it's defaulting to grs-1, go ahead and change it to try to get a marc
+ // record since grs-1 is a last resort for us
+ newSyntax.truncate(0);
+ }
+ }
+ // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1
+ if(newSyntax != Latin1Literal("xml") &&
+ newSyntax != Latin1Literal("usmarc") &&
+ newSyntax != Latin1Literal("marc21") &&
+ newSyntax != Latin1Literal("unimarc") &&
+ newSyntax != Latin1Literal("grs-1")) {
+ myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl;
+ newSyntax = QString::fromLatin1("xml");
+ ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
+ rec = ZOOM_resultset_record(resultSet, 0);
+ ZOOM_record_get(rec, "xml", &len);
+ if(len == 0) {
+ // change set name back
+ ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
+ newSyntax = QString::fromLatin1("usmarc"); // try usmarc
+ myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl;
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
+ rec = ZOOM_resultset_record(resultSet, 0);
+ ZOOM_record_get(rec, "raw", &len);
+ }
+ if(len == 0) {
+ newSyntax = QString::fromLatin1("marc21"); // try marc21
+ myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl;
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
+ rec = ZOOM_resultset_record(resultSet, 0);
+ ZOOM_record_get(rec, "raw", &len);
+ }
+ if(len == 0) {
+ newSyntax = QString::fromLatin1("unimarc"); // try unimarc
+ myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl;
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
+ rec = ZOOM_resultset_record(resultSet, 0);
+ ZOOM_record_get(rec, "raw", &len);
+ }
+ if(len == 0) {
+ newSyntax = QString::fromLatin1("grs-1"); // try grs-1
+ myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl;
+ ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
+ rec = ZOOM_resultset_record(resultSet, 0);
+ ZOOM_record_get(rec, "raw", &len);
+ }
+ if(len == 0) {
+ myLog() << "Z3950Connection::run() - giving up" << endl;
+ ZOOM_resultset_destroy(resultSet);
+ ZOOM_query_destroy(query);
+ done(i18n("Record syntax error"), MessageHandler::Error);
+ return;
+ }
+ myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl;
+ }
+ }
+
+ // go back to fooling ourselves and calling it mods
+ if(m_syntax == Latin1Literal("xml")) {
+ m_syntax = QString::fromLatin1("mods");
+ }
+ if(newSyntax == Latin1Literal("xml")) {
+ newSyntax = QString::fromLatin1("mods");
+ }
+ // save syntax change for next time
+ if(m_syntax != newSyntax) {
+ kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax));
+ m_syntax = newSyntax;
+ }
+
+ if(m_sourceCharSet.isEmpty()) {
+ m_sourceCharSet = QString::fromLatin1("marc-8");
+ }
+
+ const size_t realLimit = QMIN(numResults, m_limit);
+
+ for(size_t i = m_start; i < realLimit && !m_aborted; ++i) {
+ myLog() << "Z3950Connection::run() - grabbing index " << i << endl;
+ ZOOM_record rec = ZOOM_resultset_record(resultSet, i);
+ if(!rec) {
+ myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl;
+ continue;
+ }
+ int len;
+ QString data;
+ if(m_syntax == Latin1Literal("mods")) {
+ data = toString(ZOOM_record_get(rec, "xml", &len));
+ } else if(m_syntax == Latin1Literal("grs-1")) { // grs-1
+ // we're going to parse the rendered data, very ugly...
+ data = toString(ZOOM_record_get(rec, "render", &len));
+ } else {
+#if 0
+ kdWarning() << "Remove debug from z3950connection.cpp" << endl;
+ {
+ QFile f1(QString::fromLatin1("/tmp/z3950.raw"));
+ if(f1.open(IO_WriteOnly)) {
+ QDataStream t(&f1);
+ t << ZOOM_record_get(rec, "raw", &len);
+ }
+ f1.close();
+ }
+#endif
+ data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet);
+ }
+ Z3950ResultFound* ev = new Z3950ResultFound(data);
+ QApplication::postEvent(m_fetcher, ev);
+ }
+
+ ZOOM_resultset_destroy(resultSet);
+ ZOOM_query_destroy(query);
+
+ m_hasMore = m_limit < numResults;
+ if(m_hasMore) {
+ m_start = m_limit;
+ m_limit += Z3950_DEFAULT_MAX_RECORDS;
+ }
+#endif
+ done();
+}
+
+bool Z3950Connection::makeConnection() {
+ if(m_connected) {
+ return true;
+ }
+// myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl;
+// I don't know what to do except assume database, user, and password are in locale encoding
+#ifdef HAVE_YAZ
+ d->conn_opt = ZOOM_options_create();
+ ZOOM_options_set(d->conn_opt, "implementationName", "Tellico");
+ ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname));
+ ZOOM_options_set(d->conn_opt, "user", toCString(m_user));
+ ZOOM_options_set(d->conn_opt, "password", toCString(m_password));
+
+ d->conn = ZOOM_connection_create(d->conn_opt);
+ ZOOM_connection_connect(d->conn, m_host.latin1(), m_port);
+
+ int errcode;
+ const char* errmsg; // unused: carries same info as 'errcode'
+ const char* addinfo;
+ errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
+ if(errcode != 0) {
+ ZOOM_options_destroy(d->conn_opt);
+ ZOOM_connection_destroy(d->conn);
+ m_connected = false;
+
+ QString s = i18n("Connection error %1: %2").arg(errcode).arg(toString(errmsg));
+ if(!QCString(addinfo).isEmpty()) {
+ s += " (" + toString(addinfo) + ")";
+ }
+ myDebug() << "Z3950Connection::makeConnection() - " << s << endl;
+ done(s, MessageHandler::Error);
+ return false;
+ }
+#endif
+ m_connected = true;
+ return true;
+}
+
+void Z3950Connection::done() {
+ checkPendingEvents();
+ kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
+}
+
+void Z3950Connection::done(const QString& msg_, int type_) {
+ checkPendingEvents();
+ if(m_aborted) {
+ kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
+ } else {
+ kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_));
+ }
+}
+
+void Z3950Connection::checkPendingEvents() {
+ // if there's still some pending result events, go ahead and just wait 1 second
+ if(resultsLeft > 0) {
+ sleep(1);
+ }
+}
+
+inline
+QCString Z3950Connection::toCString(const QString& text_) {
+ return iconvRun(text_.utf8(), QString::fromLatin1("utf-8"), m_sourceCharSet);
+}
+
+inline
+QString Z3950Connection::toString(const QCString& text_) {
+ return QString::fromUtf8(iconvRun(text_, m_sourceCharSet, QString::fromLatin1("utf-8")));
+}
+
+// static
+QCString Z3950Connection::iconvRun(const QCString& text_, const QString& fromCharSet_, const QString& toCharSet_) {
+#ifdef HAVE_YAZ
+ if(text_.isEmpty()) {
+ return text_;
+ }
+
+ if(fromCharSet_ == toCharSet_) {
+ return text_;
+ }
+
+ yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1());
+ if(!cd) {
+ // maybe it's iso 5426, which we sorta support
+ QString charSetLower = fromCharSet_.lower();
+ charSetLower.remove('-').remove(' ');
+ if(charSetLower == Latin1Literal("iso5426")) {
+ return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_);
+ } else if(charSetLower == Latin1Literal("iso6937")) {
+ return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), QString::fromLatin1("utf-8"), toCharSet_);
+ }
+ kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_
+ << " to " << toCharSet_ << " is unsupported" << endl;
+ return text_;
+ }
+
+ const char* input = text_;
+ size_t inlen = text_.length();
+
+ size_t outlen = 2 * inlen; // this is enough, right?
+ QMemArray<char> result0(outlen);
+ char* result = result0.data();
+
+ int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen);
+ if(r <= 0) {
+ myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl;
+ return text_;
+ }
+ // bug in yaz, need to flush buffer to catch last character
+ yaz_iconv(cd, 0, 0, &result, &outlen);
+
+ // length is pointer difference
+ size_t len = result - result0;
+
+ QCString output = QCString(result0, len+1);
+// myDebug() << "-------------------------------------------" << endl;
+// myDebug() << output << endl;
+// myDebug() << "-------------------------------------------" << endl;
+ yaz_iconv_close(cd);
+ return output;
+#endif
+ return text_;
+}
+
+QString Z3950Connection::toXML(const QCString& marc_, const QString& charSet_) {
+#ifdef HAVE_YAZ
+ if(marc_.isEmpty()) {
+ myDebug() << "Z3950Connection::toXML() - empty string" << endl;
+ return QString::null;
+ }
+
+ yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1());
+ if(!cd) {
+ // maybe it's iso 5426, which we sorta support
+ QString charSetLower = charSet_.lower();
+ charSetLower.remove('-').remove(' ');
+ if(charSetLower == Latin1Literal("iso5426")) {
+ return toXML(Iso5426Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8"));
+ } else if(charSetLower == Latin1Literal("iso6937")) {
+ return toXML(Iso6937Converter::toUtf8(marc_).utf8(), QString::fromLatin1("utf-8"));
+ }
+ kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl;
+ return QString::null;
+ }
+
+ yaz_marc_t mt = yaz_marc_create();
+ yaz_marc_iconv(mt, cd);
+ yaz_marc_xml(mt, YAZ_MARC_MARCXML);
+
+ // first 5 bytes are length
+ bool ok;
+#if YAZ_VERSIONL < 0x030000
+ int len = marc_.left(5).toInt(&ok);
+#else
+ size_t len = marc_.left(5).toInt(&ok);
+#endif
+ if(ok && (len < 25 || len > 100000)) {
+ myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl;
+ return QString::null;
+ }
+
+#if YAZ_VERSIONL < 0x030000
+ char* result;
+#else
+ const char* result;
+#endif
+ int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len);
+ if(r <= 0) {
+ myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl;
+ return QString::null;
+ }
+
+ QString output = QString::fromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ output += QString::fromUtf8(QCString(result, len+1), len+1);
+// myDebug() << QCString(result) << endl;
+// myDebug() << "-------------------------------------------" << endl;
+// myDebug() << output << endl;
+ yaz_iconv_close(cd);
+ yaz_marc_destroy(mt);
+
+ return output;
+#else // no yaz
+ return QString::null;
+#endif
+}
diff --git a/src/fetch/z3950connection.h b/src/fetch/z3950connection.h
new file mode 100644
index 0000000..0929cb7
--- /dev/null
+++ b/src/fetch/z3950connection.h
@@ -0,0 +1,126 @@
+/***************************************************************************
+ copyright : (C) 2005-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_FETCH_Z3950CONNECTION_H
+#define TELLICO_FETCH_Z3950CONNECTION_H
+
+#include <qthread.h>
+#include <qevent.h>
+#include <qdeepcopy.h>
+
+#include <ksharedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+ class Z3950Fetcher;
+
+class Z3950ResultFound : public QCustomEvent {
+public:
+ Z3950ResultFound(const QString& s);
+ ~Z3950ResultFound();
+ const QString& result() const { return m_result; }
+
+ static int uid() { return User + 11111; }
+
+private:
+ QString m_result;
+};
+
+class Z3950ConnectionDone : public QCustomEvent {
+public:
+ Z3950ConnectionDone(bool more) : QCustomEvent(uid()), m_type(-1), m_hasMore(more) {}
+ Z3950ConnectionDone(bool more, const QString& s, int t) : QCustomEvent(uid()), m_msg(QDeepCopy<QString>(s)), m_type(t), m_hasMore(more) {}
+
+ const QString& message() const { return m_msg; }
+ int messageType() const { return m_type; }
+ bool hasMoreResults() const { return m_hasMore; }
+
+ static int uid() { return User + 22222; }
+
+private:
+ QString m_msg;
+ int m_type;
+ bool m_hasMore;
+};
+
+class Z3950SyntaxChange : public QCustomEvent {
+public:
+ Z3950SyntaxChange(const QString& s) : QCustomEvent(uid()), m_syntax(QDeepCopy<QString>(s)) {}
+ const QString& syntax() const { return m_syntax; }
+
+ static int uid() { return User + 33333; }
+
+private:
+ QString m_syntax;
+};
+
+/**
+ * @author Robby Stephenson
+ */
+class Z3950Connection : public QThread {
+public:
+ Z3950Connection(Z3950Fetcher* fetcher,
+ const QString& host,
+ uint port,
+ const QString& dbname,
+ const QString& sourceCharSet,
+ const QString& syntax,
+ const QString& esn);
+ ~Z3950Connection();
+
+ void reset();
+ void setQuery(const QString& query);
+ void setUserPassword(const QString& user, const QString& pword);
+ void run();
+
+ void abort() { m_aborted = true; }
+
+private:
+ static QCString iconvRun(const QCString& text, const QString& fromCharSet, const QString& toCharSet);
+ static QString toXML(const QCString& marc, const QString& fromCharSet);
+
+ bool makeConnection();
+ void done();
+ void done(const QString& message, int type);
+ QCString toCString(const QString& text);
+ QString toString(const QCString& text);
+ void checkPendingEvents();
+
+ class Private;
+ Private* d;
+
+ bool m_connected;
+ bool m_aborted;
+
+ KSharedPtr<Z3950Fetcher> m_fetcher;
+ QString m_host;
+ uint m_port;
+ QString m_dbname;
+ QString m_user;
+ QString m_password;
+ QString m_sourceCharSet;
+ QString m_syntax;
+ QString m_pqn;
+ QString m_esn;
+ size_t m_start;
+ size_t m_limit;
+ bool m_hasMore;
+
+ friend class Z3950ResultFound;
+ static int resultsLeft;
+};
+
+ } // end namespace
+} // end namespace
+
+#endif
diff --git a/src/fetch/z3950fetcher.cpp b/src/fetch/z3950fetcher.cpp
new file mode 100644
index 0000000..5e045cf
--- /dev/null
+++ b/src/fetch/z3950fetcher.cpp
@@ -0,0 +1,782 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ * In addition, as a special exception, the author gives permission to *
+ * link the code of this program with the OpenSSL library released by *
+ * the OpenSSL Project (or with modified versions of OpenSSL that use *
+ * the same license as OpenSSL), and distribute linked combinations *
+ * including the two. You must obey the GNU General Public License in *
+ * all respects for all of the code used other than OpenSSL. If you *
+ * modify this file, you may extend this exception to your version of *
+ * the file, but you are not obligated to do so. If you do not wish to *
+ * do so, delete this exception statement from your version. *
+ * *
+ ***************************************************************************/
+
+#include <config.h>
+
+#include "z3950fetcher.h"
+#include "z3950connection.h"
+#include "messagehandler.h"
+#include "../collection.h"
+#include "../latin1literal.h"
+#include "../translators/xslthandler.h"
+#include "../translators/tellicoimporter.h"
+#include "../translators/grs1importer.h"
+#include "../tellico_debug.h"
+#include "../gui/lineedit.h"
+#include "../gui/combobox.h"
+#include "../isbnvalidator.h"
+#include "../lccnvalidator.h"
+
+#include <klocale.h>
+#include <kstandarddirs.h>
+#include <kapplication.h>
+#include <knuminput.h>
+#include <kconfig.h>
+#include <kcombobox.h>
+#include <kaccelmanager.h>
+#include <kseparator.h>
+
+#include <qfile.h>
+#include <qlayout.h>
+#include <qlabel.h>
+#include <qwhatsthis.h>
+#include <qdom.h>
+
+namespace {
+ static const int Z3950_DEFAULT_PORT = 210;
+ static const QString Z3950_DEFAULT_ESN = QString::fromLatin1("F");
+}
+
+using Tellico::Fetch::Z3950Fetcher;
+
+Z3950Fetcher::Z3950Fetcher(QObject* parent_, const char* name_)
+ : Fetcher(parent_, name_), m_conn(0), m_port(Z3950_DEFAULT_PORT), m_esn(Z3950_DEFAULT_ESN),
+ m_started(false), m_done(true), m_MARC21XMLHandler(0),
+ m_UNIMARCXMLHandler(0), m_MODSHandler(0) {
+}
+
+Z3950Fetcher::~Z3950Fetcher() {
+ delete m_MARC21XMLHandler;
+ m_MARC21XMLHandler = 0;
+ delete m_UNIMARCXMLHandler;
+ m_UNIMARCXMLHandler = 0;
+ delete m_MODSHandler;
+ m_MODSHandler = 0;
+ delete m_conn;
+ m_conn = 0;
+}
+
+QString Z3950Fetcher::defaultName() {
+ return i18n("z39.50 Server");
+}
+
+QString Z3950Fetcher::source() const {
+ return m_name.isEmpty() ? defaultName() : m_name;
+}
+
+bool Z3950Fetcher::canFetch(int type) const {
+ return type == Data::Collection::Book || type == Data::Collection::Bibtex;
+}
+
+void Z3950Fetcher::readConfigHook(const KConfigGroup& config_) {
+ QString preset = config_.readEntry("Preset");
+ if(preset.isEmpty()) {
+ m_host = config_.readEntry("Host");
+ int p = config_.readNumEntry("Port", Z3950_DEFAULT_PORT);
+ if(p > 0) {
+ m_port = p;
+ }
+ m_dbname = config_.readEntry("Database");
+ m_sourceCharSet = config_.readEntry("Charset");
+ m_syntax = config_.readEntry("Syntax");
+ m_user = config_.readEntry("User");
+ m_password = config_.readEntry("Password");
+ } else {
+ m_preset = preset;
+ QString serverFile = locate("appdata", QString::fromLatin1("z3950-servers.cfg"));
+ if(!serverFile.isEmpty()) {
+ KConfig cfg(serverFile, true /* read-only */, false /* read KDE */);
+ const QStringList servers = cfg.groupList();
+ for(QStringList::ConstIterator server = servers.begin(); server != servers.end(); ++server) {
+ cfg.setGroup(*server);
+
+ const QString id = *server;
+ if(id == preset) {
+ const QString name = cfg.readEntry("Name");
+ m_host = cfg.readEntry("Host");
+ m_port = cfg.readNumEntry("Port", Z3950_DEFAULT_PORT);
+ m_dbname = cfg.readEntry("Database");
+ m_sourceCharSet = cfg.readEntry("Charset");
+ m_syntax = cfg.readEntry("Syntax");
+ m_user = cfg.readEntry("User");
+ m_password = cfg.readEntry("Password");
+ }
+ }
+ }
+ }
+
+ m_fields = config_.readListEntry("Custom Fields");
+}
+
+void Z3950Fetcher::saveConfigHook(KConfigGroup& config_) {
+ config_.writeEntry("Syntax", m_syntax);
+ config_.sync();
+}
+
+void Z3950Fetcher::search(FetchKey key_, const QString& value_) {
+#ifdef HAVE_YAZ
+ m_started = true;
+ m_done = false;
+ if(m_host.isEmpty() || m_dbname.isEmpty()) {
+ myDebug() << "Z3950Fetcher::search() - settings are not set!" << endl;
+ stop();
+ return;
+ }
+ m_key = key_;
+ m_value = value_;
+ m_started = true;
+
+ QString svalue = m_value;
+ QRegExp rx1(QString::fromLatin1("['\"].*\\1"));
+ if(!rx1.exactMatch(svalue)) {
+ svalue.prepend('"').append('"');
+ }
+
+ switch(key_) {
+ case Title:
+ m_pqn = QString::fromLatin1("@attr 1=4 ") + svalue;
+ break;
+ case Person:
+// m_pqn = QString::fromLatin1("@or ");
+// m_pqn += QString::fromLatin1("@attr 1=1 \"") + m_value + '"';
+ m_pqn = QString::fromLatin1(" @attr 1=1003 ") + svalue;
+ break;
+ case ISBN:
+ {
+ m_pqn.truncate(0);
+ QString s = m_value;
+ s.remove('-');
+ QStringList isbnList = QStringList::split(QString::fromLatin1("; "), s);
+ // also going to search for isbn10 values
+ for(QStringList::Iterator it = isbnList.begin(); it != isbnList.end(); ++it) {
+ if((*it).startsWith(QString::fromLatin1("978"))) {
+ QString isbn10 = ISBNValidator::isbn10(*it);
+ isbn10.remove('-');
+ isbnList.insert(it, isbn10);
+ }
+ }
+ const int count = isbnList.count();
+ if(count > 1) {
+ m_pqn = QString::fromLatin1("@or ");
+ }
+ for(int i = 0; i < count; ++i) {
+ m_pqn += QString::fromLatin1(" @attr 1=7 ") + isbnList[i];
+ if(i < count-2) {
+ m_pqn += QString::fromLatin1(" @or");
+ }
+ }
+ }
+ break;
+ case LCCN:
+ {
+ m_pqn.truncate(0);
+ QString s = m_value;
+ s.remove('-');
+ QStringList lccnList = QStringList::split(QString::fromLatin1("; "), s);
+ while(!lccnList.isEmpty()) {
+ m_pqn += QString::fromLatin1(" @or @attr 1=9 ") + lccnList.front();
+ if(lccnList.count() > 1) {
+ m_pqn += QString::fromLatin1(" @or");
+ }
+ m_pqn += QString::fromLatin1(" @attr 1=9 ") + LCCNValidator::formalize(lccnList.front());
+ lccnList.pop_front();
+ }
+ }
+ break;
+ case Keyword:
+ m_pqn = QString::fromLatin1("@attr 1=1016 ") + svalue;
+ break;
+ case Raw:
+ m_pqn = m_value;
+ break;
+ default:
+ kdWarning() << "Z3950Fetcher::search() - key not recognized: " << key_ << endl;
+ stop();
+ return;
+ }
+// m_pqn = QString::fromLatin1("@attr 1=7 0253333490");
+ myLog() << "Z3950Fetcher::search() - PQN query = " << m_pqn << endl;
+
+ if(m_conn) {
+ m_conn->reset(); // reset counts
+ }
+
+ process();
+#else // HAVE_YAZ
+ Q_UNUSED(key_);
+ Q_UNUSED(value_);
+ stop();
+ return;
+#endif
+}
+
+void Z3950Fetcher::continueSearch() {
+#ifdef HAVE_YAZ
+ m_started = true;
+ process();
+#endif
+}
+
+void Z3950Fetcher::stop() {
+ if(!m_started) {
+ return;
+ }
+// myDebug() << "Z3950Fetcher::stop()" << endl;
+ m_started = false;
+ if(m_conn) {
+ // give it a second to cleanup
+ m_conn->abort();
+ m_conn->wait(1000);
+ }
+ emit signalDone(this);
+}
+
+bool Z3950Fetcher::initMARC21Handler() {
+ if(m_MARC21XMLHandler) {
+ return true;
+ }
+
+ QString xsltfile = locate("appdata", QString::fromLatin1("MARC21slim2MODS3.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - can not locate MARC21slim2MODS3.xsl." << endl;
+ return false;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ m_MARC21XMLHandler = new XSLTHandler(u);
+ if(!m_MARC21XMLHandler->isValid()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - error in MARC21slim2MODS3.xsl." << endl;
+ delete m_MARC21XMLHandler;
+ m_MARC21XMLHandler = 0;
+ return false;
+ }
+ return true;
+}
+
+bool Z3950Fetcher::initUNIMARCHandler() {
+ if(m_UNIMARCXMLHandler) {
+ return true;
+ }
+
+ QString xsltfile = locate("appdata", QString::fromLatin1("UNIMARC2MODS3.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - can not locate UNIMARC2MODS3.xsl." << endl;
+ return false;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ m_UNIMARCXMLHandler = new XSLTHandler(u);
+ if(!m_UNIMARCXMLHandler->isValid()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - error in UNIMARC2MODS3.xsl." << endl;
+ delete m_UNIMARCXMLHandler;
+ m_UNIMARCXMLHandler = 0;
+ return false;
+ }
+ return true;
+}
+
+bool Z3950Fetcher::initMODSHandler() {
+ if(m_MODSHandler) {
+ return true;
+ }
+
+ QString xsltfile = locate("appdata", QString::fromLatin1("mods2tellico.xsl"));
+ if(xsltfile.isEmpty()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - can not locate mods2tellico.xsl." << endl;
+ return false;
+ }
+
+ KURL u;
+ u.setPath(xsltfile);
+
+ m_MODSHandler = new XSLTHandler(u);
+ if(!m_MODSHandler->isValid()) {
+ kdWarning() << "Z3950Fetcher::initHandlers() - error in mods2tellico.xsl." << endl;
+ delete m_MODSHandler;
+ m_MODSHandler = 0;
+ // no use in keeping the MARC handlers now
+ delete m_MARC21XMLHandler;
+ m_MARC21XMLHandler = 0;
+ delete m_UNIMARCXMLHandler;
+ m_UNIMARCXMLHandler = 0;
+ return false;
+ }
+ return true;
+}
+
+void Z3950Fetcher::process() {
+ if(m_conn) {
+ m_conn->wait();
+ } else {
+ m_conn = new Z3950Connection(this, m_host, m_port, m_dbname, m_sourceCharSet, m_syntax, m_esn);
+ if(!m_user.isEmpty()) {
+ m_conn->setUserPassword(m_user, m_password);
+ }
+ }
+
+ m_conn->setQuery(m_pqn);
+ m_conn->start();
+}
+
+void Z3950Fetcher::handleResult(const QString& result_) {
+ if(result_.isEmpty()) {
+ myDebug() << "Z3950Fetcher::handleResult() - empty record found, maybe the character encoding or record format is wrong?" << endl;
+ return;
+ }
+
+#if 0
+ kdWarning() << "Remove debug from z3950fetcher.cpp" << endl;
+ {
+ QFile f1(QString::fromLatin1("/tmp/marc.xml"));
+ if(f1.open(IO_WriteOnly)) {
+// if(f1.open(IO_WriteOnly | IO_Append)) {
+ QTextStream t(&f1);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << result_;
+ }
+ f1.close();
+ }
+#endif
+ // assume always utf-8
+ QString str, msg;
+ Data::CollPtr coll = 0;
+ // not marc, has to be grs-1
+ if(m_syntax == Latin1Literal("grs-1")) {
+ Import::GRS1Importer imp(result_);
+ coll = imp.collection();
+ msg = imp.statusMessage();
+ } else { // now the MODS stuff
+ if(m_syntax == Latin1Literal("mods")) {
+ str = result_;
+ } else if(m_syntax == Latin1Literal("unimarc") && initUNIMARCHandler()) {
+ str = m_UNIMARCXMLHandler->applyStylesheet(result_);
+ } else if(initMARC21Handler()) { // got to be usmarc/marc21
+ str = m_MARC21XMLHandler->applyStylesheet(result_);
+ }
+ if(str.isEmpty() || !initMODSHandler()) {
+ myDebug() << "Z3950Fetcher::handleResult() - empty string or can't init" << endl;
+ stop();
+ return;
+ }
+#if 0
+ kdWarning() << "Remove debug from z3950fetcher.cpp" << endl;
+ {
+ QFile f2(QString::fromLatin1("/tmp/mods.xml"));
+// if(f2.open(IO_WriteOnly)) {
+ if(f2.open(IO_WriteOnly | IO_Append)) {
+ QTextStream t(&f2);
+ t.setEncoding(QTextStream::UnicodeUTF8);
+ t << str;
+ }
+ f2.close();
+ }
+#endif
+ Import::TellicoImporter imp(m_MODSHandler->applyStylesheet(str));
+ imp.setOptions(imp.options() & ~Import::ImportProgress); // no progress needed
+ coll = imp.collection();
+ msg = imp.statusMessage();
+ }
+
+ if(!coll) {
+ if(!msg.isEmpty()) {
+ message(msg, MessageHandler::Warning);
+ }
+ myDebug() << "Z3950Fetcher::handleResult() - no collection pointer: " << msg << endl;
+ return;
+ }
+
+ if(coll->entryCount() == 0) {
+// myDebug() << "Z3950Fetcher::handleResult() - no Tellico entry in result" << endl;
+ return;
+ }
+
+ const StringMap customFields = Z3950Fetcher::customFields();
+ for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) {
+ if(!m_fields.contains(it.key())) {
+ coll->removeField(it.key());
+ }
+ }
+
+ Data::EntryVec entries = coll->entries();
+ for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) {
+ QString desc = entry->field(QString::fromLatin1("author")) + '/'
+ + entry->field(QString::fromLatin1("publisher"));
+ if(!entry->field(QString::fromLatin1("cr_year")).isEmpty()) {
+ desc += QChar('/') + entry->field(QString::fromLatin1("cr_year"));
+ } else if(!entry->field(QString::fromLatin1("pub_year")).isEmpty()){
+ desc += QChar('/') + entry->field(QString::fromLatin1("pub_year"));
+ }
+ SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn")));
+ m_entries.insert(r->uid, entry);
+ emit signalResultFound(r);
+ }
+}
+
+void Z3950Fetcher::done() {
+ m_done = true;
+ stop();
+}
+
+Tellico::Data::EntryPtr Z3950Fetcher::fetchEntry(uint uid_) {
+ return m_entries[uid_];
+}
+
+void Z3950Fetcher::customEvent(QCustomEvent* event_) {
+ if(!m_conn) {
+ return;
+ }
+
+ if(event_->type() == Z3950ResultFound::uid()) {
+ if(m_done) {
+ kdWarning() << "Z3950Fetcher::customEvent() - result returned after done signal!" << endl;
+ }
+ Z3950ResultFound* e = static_cast<Z3950ResultFound*>(event_);
+ handleResult(e->result());
+ } else if(event_->type() == Z3950ConnectionDone::uid()) {
+ Z3950ConnectionDone* e = static_cast<Z3950ConnectionDone*>(event_);
+ if(e->messageType() > -1) {
+ message(e->message(), e->messageType());
+ }
+ m_hasMoreResults = e->hasMoreResults();
+ m_conn->wait();
+ done();
+ } else if(event_->type() == Z3950SyntaxChange::uid()) {
+ if(m_done) {
+ kdWarning() << "Z3950Fetcher::customEvent() - syntax changed after done signal!" << endl;
+ }
+ Z3950SyntaxChange* e = static_cast<Z3950SyntaxChange*>(event_);
+ if(m_syntax != e->syntax()) {
+ m_syntax = e->syntax();
+ // it gets saved when saveConfigHook() get's called from the Fetcher() d'tor
+ }
+ } else {
+ kdWarning() << "Z3950Fetcher::customEvent() - weird type: " << event_->type() << endl;
+ }
+}
+
+void Z3950Fetcher::updateEntry(Data::EntryPtr entry_) {
+// myDebug() << "Z3950Fetcher::updateEntry() - " << source() << ": " << entry_->title() << endl;
+ QString isbn = entry_->field(QString::fromLatin1("isbn"));
+ if(!isbn.isEmpty()) {
+ search(Fetch::ISBN, isbn);
+ return;
+ }
+
+ QString lccn = entry_->field(QString::fromLatin1("lccn"));
+ if(!lccn.isEmpty()) {
+ search(Fetch::LCCN, lccn);
+ return;
+ }
+
+ // optimistically try searching for title and rely on Collection::sameEntry() to figure things out
+ QString t = entry_->field(QString::fromLatin1("title"));
+ if(!t.isEmpty()) {
+ search(Fetch::Title, t);
+ return;
+ }
+
+ myDebug() << "Z3950Fetcher::updateEntry() - insufficient info to search" << endl;
+ emit signalDone(this); // always need to emit this if not continuing with the search
+}
+
+Tellico::Fetch::ConfigWidget* Z3950Fetcher::configWidget(QWidget* parent_) const {
+ return new Z3950Fetcher::ConfigWidget(parent_, this);
+}
+
+Z3950Fetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const Z3950Fetcher* fetcher_/*=0*/)
+ : Fetch::ConfigWidget(parent_) {
+ QGridLayout* l = new QGridLayout(optionsWidget(), 7, 2);
+ l->setSpacing(4);
+ l->setColStretch(1, 10);
+
+ int row = -1;
+
+ m_usePreset = new QCheckBox(i18n("Use preset &server:"), optionsWidget());
+ l->addWidget(m_usePreset, ++row, 0);
+ connect(m_usePreset, SIGNAL(toggled(bool)), SLOT(slotTogglePreset(bool)));
+ m_serverCombo = new GUI::ComboBox(optionsWidget());
+ connect(m_serverCombo, SIGNAL(activated(int)), SLOT(slotPresetChanged()));
+ l->addWidget(m_serverCombo, row, 1);
+ ++row;
+ l->addMultiCellWidget(new KSeparator(optionsWidget()), row, row, 0, 1);
+ l->setRowSpacing(row, 10);
+
+ QLabel* label = new QLabel(i18n("Hos&t: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_hostEdit = new GUI::LineEdit(optionsWidget());
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ connect(m_hostEdit, SIGNAL(textChanged(const QString&)), SIGNAL(signalName(const QString&)));
+ l->addWidget(m_hostEdit, row, 1);
+ QString w = i18n("Enter the host name of the server.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_hostEdit, w);
+ label->setBuddy(m_hostEdit);
+
+ label = new QLabel(i18n("&Port: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_portSpinBox = new KIntSpinBox(0, 999999, 1, Z3950_DEFAULT_PORT, 10, optionsWidget());
+ connect(m_portSpinBox, SIGNAL(valueChanged(int)), SLOT(slotSetModified()));
+ l->addWidget(m_portSpinBox, row, 1);
+ w = i18n("Enter the port number of the server. The default is %1.").arg(Z3950_DEFAULT_PORT);
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_portSpinBox, w);
+ label->setBuddy(m_portSpinBox);
+
+ label = new QLabel(i18n("&Database: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_databaseEdit = new GUI::LineEdit(optionsWidget());
+ connect(m_databaseEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_databaseEdit, row, 1);
+ w = i18n("Enter the database name used by the server.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_databaseEdit, w);
+ label->setBuddy(m_databaseEdit);
+
+ label = new QLabel(i18n("Ch&aracter set: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_charSetCombo = new KComboBox(true, optionsWidget());
+ m_charSetCombo->insertItem(QString::null);
+ m_charSetCombo->insertItem(QString::fromLatin1("marc8"));
+ m_charSetCombo->insertItem(QString::fromLatin1("iso-8859-1"));
+ m_charSetCombo->insertItem(QString::fromLatin1("utf-8"));
+ connect(m_charSetCombo, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_charSetCombo, row, 1);
+ w = i18n("Enter the character set encoding used by the z39.50 server. The most likely choice "
+ "is MARC-8, although ISO-8859-1 is common as well.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_charSetCombo, w);
+ label->setBuddy(m_charSetCombo);
+
+ label = new QLabel(i18n("&Format: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_syntaxCombo = new GUI::ComboBox(optionsWidget());
+ m_syntaxCombo->insertItem(i18n("Auto-detect"), QString());
+ m_syntaxCombo->insertItem(QString::fromLatin1("MODS"), QString::fromLatin1("mods"));
+ m_syntaxCombo->insertItem(QString::fromLatin1("MARC21"), QString::fromLatin1("marc21"));
+ m_syntaxCombo->insertItem(QString::fromLatin1("UNIMARC"), QString::fromLatin1("unimarc"));
+ m_syntaxCombo->insertItem(QString::fromLatin1("USMARC"), QString::fromLatin1("usmarc"));
+ m_syntaxCombo->insertItem(QString::fromLatin1("GRS-1"), QString::fromLatin1("grs-1"));
+ connect(m_syntaxCombo, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_syntaxCombo, row, 1);
+ w = i18n("Enter the data format used by the z39.50 server. Tellico will attempt to "
+ "automatically detect the best setting if <i>auto-detect</i> is selected.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_syntaxCombo, w);
+ label->setBuddy(m_syntaxCombo);
+
+ label = new QLabel(i18n("&User: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_userEdit = new GUI::LineEdit(optionsWidget());
+ m_userEdit->setHint(i18n("Optional"));
+ connect(m_userEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_userEdit, row, 1);
+ w = i18n("Enter the authentication user name used by the z39.50 database. Most servers "
+ "do not need one.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_userEdit, w);
+ label->setBuddy(m_userEdit);
+
+ label = new QLabel(i18n("Pass&word: "), optionsWidget());
+ l->addWidget(label, ++row, 0);
+ m_passwordEdit = new GUI::LineEdit(optionsWidget());
+ m_passwordEdit->setHint(i18n("Optional"));
+ m_passwordEdit->setEchoMode(QLineEdit::Password);
+ connect(m_passwordEdit, SIGNAL(textChanged(const QString&)), SLOT(slotSetModified()));
+ l->addWidget(m_passwordEdit, row, 1);
+ w = i18n("Enter the authentication password used by the z39.50 database. Most servers "
+ "do not need one. The password will be saved in plain text in the Tellico "
+ "configuration file.");
+ QWhatsThis::add(label, w);
+ QWhatsThis::add(m_passwordEdit, w);
+ label->setBuddy(m_passwordEdit);
+
+ l->setRowStretch(++row, 1);
+
+ // now add additional fields widget
+ addFieldsWidget(Z3950Fetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList());
+
+ loadPresets(fetcher_ ? fetcher_->m_preset : QString::null);
+ if(fetcher_) {
+ m_hostEdit->setText(fetcher_->m_host);
+ m_portSpinBox->setValue(fetcher_->m_port);
+ m_databaseEdit->setText(fetcher_->m_dbname);
+ m_userEdit->setText(fetcher_->m_user);
+ m_passwordEdit->setText(fetcher_->m_password);
+ m_charSetCombo->setCurrentText(fetcher_->m_sourceCharSet);
+ // the syntax is detected automatically by the fetcher
+ // since the config group gets deleted in the config file,
+ // the value needs to be retained here
+ m_syntax = fetcher_->m_syntax;
+ m_syntaxCombo->setCurrentData(m_syntax);
+ }
+ KAcceleratorManager::manage(optionsWidget());
+
+ // start with presets turned off
+ m_usePreset->setChecked(fetcher_ && !fetcher_->m_preset.isEmpty());
+
+ slotTogglePreset(m_usePreset->isChecked());
+}
+
+Z3950Fetcher::ConfigWidget::~ConfigWidget() {
+}
+
+void Z3950Fetcher::ConfigWidget::saveConfig(KConfigGroup& config_) {
+ if(m_usePreset->isChecked()) {
+ QString presetID = m_serverCombo->currentData().toString();
+ config_.writeEntry("Preset", presetID);
+ return;
+ }
+ config_.deleteEntry("Preset");
+
+ QString s = m_hostEdit->text().stripWhiteSpace();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Host", s);
+ }
+ int port = m_portSpinBox->value();
+ if(port > 0) {
+ config_.writeEntry("Port", port);
+ }
+ s = m_databaseEdit->text().stripWhiteSpace();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Database", s);
+ }
+ s = m_charSetCombo->currentText();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Charset", s);
+ }
+ s = m_userEdit->text();
+ if(!s.isEmpty()) {
+ config_.writeEntry("User", s);
+ }
+ s = m_passwordEdit->text();
+ if(!s.isEmpty()) {
+ config_.writeEntry("Password", s);
+ }
+ s = m_syntaxCombo->currentData().toString();
+ if(!s.isEmpty()) {
+ m_syntax = s;
+ }
+ config_.writeEntry("Syntax", m_syntax);
+
+ saveFieldsConfig(config_);
+ slotSetModified(false);
+}
+
+// static
+Tellico::StringMap Z3950Fetcher::customFields() {
+ StringMap map;
+ map[QString::fromLatin1("address")] = i18n("Address");
+ map[QString::fromLatin1("abstract")] = i18n("Abstract");
+ map[QString::fromLatin1("illustrator")] = i18n("Illustrator");
+ return map;
+}
+
+void Z3950Fetcher::ConfigWidget::slotTogglePreset(bool on) {
+ m_serverCombo->setEnabled(on);
+ if(on) {
+ emit signalName(m_serverCombo->currentText());
+ }
+ m_hostEdit->setEnabled(!on);
+ if(!on && !m_hostEdit->text().isEmpty()) {
+ emit signalName(m_hostEdit->text());
+ }
+ m_portSpinBox->setEnabled(!on);
+ m_databaseEdit->setEnabled(!on);
+ m_userEdit->setEnabled(!on);
+ m_passwordEdit->setEnabled(!on);
+ m_charSetCombo->setEnabled(!on);
+ m_syntaxCombo->setEnabled(!on);
+ if(on) {
+ emit signalName(m_serverCombo->currentText());
+ }
+}
+
+void Z3950Fetcher::ConfigWidget::slotPresetChanged() {
+ emit signalName(m_serverCombo->currentText());
+}
+
+void Z3950Fetcher::ConfigWidget::loadPresets(const QString& current_) {
+ QString lang = KGlobal::locale()->languageList().first();
+ QString lang2A;
+ {
+ QString dummy;
+ KGlobal::locale()->splitLocale(lang, lang2A, dummy, dummy);
+ }
+
+ QString serverFile = locate("appdata", QString::fromLatin1("z3950-servers.cfg"));
+ if(serverFile.isEmpty()) {
+ kdWarning() << "Z3950Fetcher::loadPresets() - no z3950 servers file found" << endl;
+ return;
+ }
+
+ int idx = -1;
+
+ KConfig cfg(serverFile, true /* read-only */, false /* read KDE */);
+ const QStringList servers = cfg.groupList();
+ // I want the list of servers sorted by name
+ QMap<QString, QString> serverNameMap;
+ for(QStringList::ConstIterator server = servers.constBegin(); server != servers.constEnd(); ++server) {
+ if((*server).isEmpty()) {
+ myDebug() << "Z3950Fetcher::ConfigWidget::loadPresets() - empty id" << endl;
+ continue;
+ }
+ cfg.setGroup(*server);
+ const QString name = cfg.readEntry("Name");
+ if(!name.isEmpty()) {
+ serverNameMap.insert(name, *server);
+ }
+ }
+ for(QMap<QString, QString>::ConstIterator it = serverNameMap.constBegin(); it != serverNameMap.constEnd(); ++it) {
+ const QString name = it.key();
+ const QString id = it.data();
+ cfg.setGroup(id);
+
+ m_serverCombo->insertItem(i18n(name.utf8()), id);
+ if(current_.isEmpty() && idx == -1) {
+ // set the initial selection to something depending on the language
+ const QStringList locales = cfg.readListEntry("Locale");
+ if(locales.findIndex(lang) > -1 || locales.findIndex(lang2A) > -1) {
+ idx = m_serverCombo->count() - 1;
+ }
+ } else if(id == current_) {
+ idx = m_serverCombo->count() - 1;
+ }
+ }
+ if(idx > -1) {
+ m_serverCombo->setCurrentItem(idx);
+ }
+}
+
+QString Z3950Fetcher::ConfigWidget::preferredName() const {
+ if(m_usePreset->isChecked()) {
+ return m_serverCombo->currentText();
+ }
+ QString s = m_hostEdit->text();
+ return s.isEmpty() ? i18n("z39.50 Server") : s;
+}
+
+#include "z3950fetcher.moc"
diff --git a/src/fetch/z3950fetcher.h b/src/fetch/z3950fetcher.h
new file mode 100644
index 0000000..ec6dca0
--- /dev/null
+++ b/src/fetch/z3950fetcher.h
@@ -0,0 +1,153 @@
+/***************************************************************************
+ copyright : (C) 2003-2006 by Robby Stephenson
+ email : robby@periapsis.org
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ * In addition, as a special exception, the author gives permission to *
+ * link the code of this program with the OpenSSL library released by *
+ * the OpenSSL Project (or with modified versions of OpenSSL that use *
+ * the same license as OpenSSL), and distribute linked combinations *
+ * including the two. You must obey the GNU General Public License in *
+ * all respects for all of the code used other than OpenSSL. If you *
+ * modify this file, you may extend this exception to your version of *
+ * the file, but you are not obligated to do so. If you do not wish to *
+ * do so, delete this exception statement from your version. *
+ * *
+ ***************************************************************************/
+
+#ifndef TELLICO_Z3950FETCHER_H
+#define TELLICO_Z3950FETCHER_H
+
+namespace Tellico {
+ class XSLTHandler;
+ namespace GUI {
+ class LineEdit;
+ class ComboBox;
+ }
+}
+
+class KIntSpinBox;
+class KComboBox;
+
+#include "fetcher.h"
+#include "configwidget.h"
+#include "../datavectors.h"
+
+#include <qguardedptr.h>
+
+namespace Tellico {
+ namespace Fetch {
+ class Z3950Connection;
+
+/**
+ * @author Robby Stephenson
+ */
+class Z3950Fetcher : public Fetcher {
+Q_OBJECT
+
+public:
+ Z3950Fetcher(QObject* parent, const char* name = 0);
+
+ virtual ~Z3950Fetcher();
+
+ virtual QString source() const;
+ virtual bool isSearching() const { return m_started; }
+ virtual void search(FetchKey key, const QString& value);
+ virtual void continueSearch();
+ // can search title, person, isbn, or keyword. No UPC or Raw for now.
+ virtual bool canSearch(FetchKey k) const { return k == Title || k == Person || k == ISBN || k == Keyword || k == LCCN; }
+ virtual void stop();
+ virtual Data::EntryPtr fetchEntry(uint uid);
+ virtual Type type() const { return Z3950; }
+ virtual bool canFetch(int type) const;
+ virtual void readConfigHook(const KConfigGroup& config);
+ virtual void saveConfigHook(KConfigGroup& config);
+
+ virtual void updateEntry(Data::EntryPtr entry);
+ const QString& host() const { return m_host; }
+
+ static StringMap customFields();
+
+ virtual Fetch::ConfigWidget* configWidget(QWidget* parent) const;
+
+ class ConfigWidget;
+ friend class ConfigWidget;
+
+ static QString defaultName();
+
+protected:
+ virtual void customEvent(QCustomEvent* event);
+
+private:
+ bool initMARC21Handler();
+ bool initUNIMARCHandler();
+ bool initMODSHandler();
+ void process();
+ void handleResult(const QString& result);
+ void done();
+
+ Z3950Connection* m_conn;
+
+ QString m_host;
+ uint m_port;
+ QString m_dbname;
+ QString m_user;
+ QString m_password;
+ QString m_sourceCharSet;
+ QString m_syntax;
+ QString m_pqn; // prefix query notation
+ QString m_esn; // element set name
+
+ FetchKey m_key;
+ QString m_value;
+ QMap<int, Data::EntryPtr> m_entries;
+ bool m_started;
+ bool m_done;
+ QString m_preset;
+
+ XSLTHandler* m_MARC21XMLHandler;
+ XSLTHandler* m_UNIMARCXMLHandler;
+ XSLTHandler* m_MODSHandler;
+ QStringList m_fields;
+
+ friend class Z3950Connection;
+};
+
+class Z3950Fetcher::ConfigWidget : public Fetch::ConfigWidget {
+Q_OBJECT
+
+public:
+ ConfigWidget(QWidget* parent, const Z3950Fetcher* fetcher = 0);
+ virtual ~ConfigWidget();
+ virtual void saveConfig(KConfigGroup& config_);
+ virtual QString preferredName() const;
+
+private slots:
+ void slotTogglePreset(bool on);
+ void slotPresetChanged();
+
+private:
+ void loadPresets(const QString& current);
+
+ QCheckBox* m_usePreset;
+ GUI::ComboBox* m_serverCombo;
+ GUI::LineEdit* m_hostEdit;
+ KIntSpinBox* m_portSpinBox;
+ GUI::LineEdit* m_databaseEdit;
+ GUI::LineEdit* m_userEdit;
+ GUI::LineEdit* m_passwordEdit;
+ KComboBox* m_charSetCombo;
+ GUI::ComboBox* m_syntaxCombo;
+ // have to remember syntax
+ QString m_syntax;
+};
+
+ } // end namespace
+} // end namespace
+#endif