summaryrefslogtreecommitdiffstats
path: root/src/webquerypubmed.cpp
diff options
context:
space:
mode:
authorSlávek Banko <slavek.banko@axis.cz>2013-06-24 02:08:15 +0200
committerSlávek Banko <slavek.banko@axis.cz>2013-07-04 02:44:37 +0200
commit998f21e02a725cd553d7c278819f67cd81295af4 (patch)
tree4bd158018e9302c31367b00c01cd2b41eb228414 /src/webquerypubmed.cpp
downloadkbibtex-998f21e02a725cd553d7c278819f67cd81295af4.tar.gz
kbibtex-998f21e02a725cd553d7c278819f67cd81295af4.zip
Initial import
Diffstat (limited to 'src/webquerypubmed.cpp')
-rw-r--r--src/webquerypubmed.cpp535
1 files changed, 535 insertions, 0 deletions
diff --git a/src/webquerypubmed.cpp b/src/webquerypubmed.cpp
new file mode 100644
index 0000000..f3dcb82
--- /dev/null
+++ b/src/webquerypubmed.cpp
@@ -0,0 +1,535 @@
+/***************************************************************************
+* Copyright (C) 2004-2009 by Thomas Fischer *
+* fischer@unix-ag.uni-kl.de *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+***************************************************************************/
+#include <qwidget.h>
+#include <qdom.h>
+#include <qapplication.h>
+#include <qstringlist.h>
+#include <qbuffer.h>
+#include <qcstring.h>
+#include <qregexp.h>
+#include <qspinbox.h>
+
+#include <klineedit.h>
+#include <kdialog.h>
+#include <klocale.h>
+#include <kurl.h>
+#include <kmessagebox.h>
+#include <kio/netaccess.h>
+
+#include <entryfield.h>
+#include <value.h>
+#include <settings.h>
+#include "webquerypubmed.h"
+
+namespace KBibTeX
+{
+ WebQueryPubMedWidget::WebQueryPubMedWidget( QWidget *parent, const char *name )
+ : WebQueryWidget( parent, name )
+ {
+ init();
+
+ Settings *settings = Settings::self();
+ QString value = settings->getWebQueryDefault( "PubMed" );
+ value = value == QString::null ? "" : value;
+ lineEditQuery->setText( value );
+ slotTextChanged( value, true );
+ }
+
+ WebQueryPubMed::WebQueryPubMed( QWidget *parent ) : WebQuery( parent )
+ {
+ m_widget = new WebQueryPubMedWidget( parent );
+ }
+
+ WebQueryPubMed::~WebQueryPubMed()
+ {
+ delete m_widget;
+ }
+
+ QString WebQueryPubMed::title()
+ {
+ return i18n( "NCBI (PubMed)" );
+ }
+
+ QString WebQueryPubMed::disclaimer()
+ {
+ return i18n( "NCBI's Disclaimer and Copyright" );
+ }
+
+ QString WebQueryPubMed::disclaimerURL()
+ {
+ return "http://eutils.ncbi.nlm.nih.gov/About/disclaimer.html";
+ }
+
+ WebQueryWidget *WebQueryPubMed::widget()
+ {
+ return m_widget;
+ }
+
+ void WebQueryPubMed::query()
+ {
+ WebQuery::query();
+ Settings *settings = Settings::self();
+ settings->setWebQueryDefault( "PubMed", m_widget->lineEditQuery->text() );
+
+ setNumStages( 2 );
+ int numberOfResults = m_widget->spinBoxMaxHits->value();
+
+ QString searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
+ if ( searchTerm.isEmpty() )
+ {
+ setEndSearch( WebQuery::statusInvalidQuery );
+ return;
+ }
+
+ searchTerm = searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" );
+ KURL url = KURL( QString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=%2&retmax=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( numberOfResults ).arg( searchTerm ) );
+
+ QString data = downloadHTML( url );
+ if ( data != QString::null && !m_aborted )
+ {
+ QBuffer buffer;
+ buffer.open( IO_WriteOnly );
+ QTextStream ts( &buffer );
+ ts.setEncoding( QTextStream::UnicodeUTF8 );
+ ts << data << endl;
+ buffer.close();
+
+ buffer.open( IO_ReadOnly );
+ QValueList<int> intList;
+ QXmlInputSource inputSource( &buffer );
+ QXmlSimpleReader reader;
+ WebQueryPubMedStructureParserQuery handler( &intList );
+ reader.setContentHandler( &handler );
+ reader.parse( &inputSource );
+ buffer.close();
+
+ QString ids;
+ QValueList<int>::iterator it = intList.begin();
+ if ( it != intList.end() )
+ {
+ ids.append( QString::number( *it ) );
+ ++it;
+ for ( ; it != intList.end(); ++it )
+ {
+ ids.append( "," );
+ ids.append( QString::number( *it ) );
+ }
+ }
+
+ url = KURL( QString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( ids ) );
+ data = downloadHTML( url );
+ if ( data != QString::null && !m_aborted )
+ {
+ buffer.open( IO_WriteOnly );
+ QTextStream ts( &buffer );
+ ts.setEncoding( QTextStream::UnicodeUTF8 );
+ ts << data << endl;
+ buffer.close();
+
+ buffer.open( IO_ReadOnly );
+ QDomDocument doc( "efetch'ed" );
+ doc.setContent( &buffer );
+ QDomElement docElem = doc.documentElement();
+ WebQueryPubMedResultParser resultParser;
+ connect( &resultParser, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ), this, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ) );
+ resultParser.parse( docElem );
+ buffer.close();
+ setEndSearch( WebQuery::statusSuccess );
+ }
+ else if ( !m_aborted )
+ {
+ QString message = KIO::NetAccess::lastErrorString();
+ message.prepend( QString( i18n( "Querying database '%1' failed." ) ).arg( title() ) );
+ KMessageBox::error( m_parent, message );
+ setEndSearch( WebQuery::statusError );
+ }
+ else
+ setEndSearch( WebQuery::statusAborted );
+ }
+ else if ( !m_aborted )
+ {
+ QString message = KIO::NetAccess::lastErrorString();
+ if ( message.isEmpty() )
+ message.prepend( '\n' );
+ message.prepend( QString( i18n( "Querying database '%1' failed." ) ).arg( title() ) );
+ KMessageBox::error( m_parent, message );
+ setEndSearch( WebQuery::statusError );
+ }
+ else
+ setEndSearch( WebQuery::statusAborted );
+ }
+
+ WebQueryPubMedStructureParserQuery::WebQueryPubMedStructureParserQuery( QValueList<int> *intList ) : QXmlDefaultHandler(), m_intList( intList )
+ {
+ m_intList->clear();
+ }
+
+ WebQueryPubMedStructureParserQuery::~WebQueryPubMedStructureParserQuery( )
+ {
+ // nothing
+ }
+
+ bool WebQueryPubMedStructureParserQuery::startElement( const QString & /*namespaceURI*/, const QString & /*localName*/, const QString & /*qName*/, const QXmlAttributes & /*atts*/ )
+ {
+ concatString = QString();
+ return TRUE;
+ }
+
+ bool WebQueryPubMedStructureParserQuery::endElement( const QString & /*namespaceURI*/, const QString & /*localName*/, const QString & qName )
+ {
+ if ( qName == "Id" )
+ {
+ bool ok;
+ int id = concatString.toInt( &ok );
+ if ( ok && id > 0 && m_intList != NULL )
+ m_intList->append( id );
+ }
+
+ return TRUE;
+ }
+
+ bool WebQueryPubMedStructureParserQuery::characters( const QString & ch )
+ {
+ concatString.append( ch );
+ return TRUE;
+ }
+
+ WebQueryPubMedResultParser::WebQueryPubMedResultParser( ) : QObject()
+ {
+ // nothing
+ }
+
+ void WebQueryPubMedResultParser::parse( const QDomElement& rootElement )
+ {
+ if ( rootElement.tagName() == "PubmedArticleSet" )
+ for ( QDomNode n = rootElement.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+ if ( !e.isNull() && e.tagName() == "PubmedArticle" )
+ {
+ BibTeX::Entry * entry = new BibTeX::Entry( BibTeX::Entry::etMisc, "PubMed" );
+ parsePubmedArticle( e, entry );
+ emit foundEntry( entry, false );
+ }
+ }
+ }
+
+ WebQueryPubMedResultParser::~WebQueryPubMedResultParser()
+ {
+ // nothing
+ }
+
+ void WebQueryPubMedResultParser::parsePubmedArticle( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+ if ( !e.isNull() && e.tagName() == "MedlineCitation" )
+ parseMedlineCitation( e, entry );
+ }
+ }
+
+ void WebQueryPubMedResultParser::parseMedlineCitation( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+ if ( !e.isNull() )
+ {
+ if ( e.tagName() == "PMID" )
+ {
+ entry->setId( QString( "PubMed_%1" ).arg( e.text() ) );
+
+ /** add url to pubmed website */
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftURL );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftURL );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( QString( "http://www.ncbi.nlm.nih.gov/pubmed/" ).append( e.text() ) ) );
+ }
+ else if ( e.tagName() == "Article" )
+ parseArticle( e, entry );
+ else if ( e.tagName() == "MedlineJournalInfo" )
+ {
+ for ( QDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() )
+ {
+ QDomElement e2 = n2.toElement();
+ if ( e2.tagName() == "MedlineTA" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e2.text() ) );
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ void WebQueryPubMedResultParser::parseArticle( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+
+ if ( e.tagName() == "Journal" )
+ {
+ parseJournal( e, entry );
+ entry->setEntryType( BibTeX::Entry::etArticle );
+ }
+ else if ( e.tagName() == "ArticleTitle" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftTitle );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftTitle );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "Pagination" )
+ {
+ QDomElement medlinePgn = e.firstChild().toElement(); // may fail?
+ if ( !medlinePgn.text().isEmpty() )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftPages );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftPages );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( medlinePgn.text() ) );
+ }
+ }
+ else if ( e.tagName() == "Abstract" )
+ {
+ QDomElement abstractText = e.firstChild().toElement();
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAbstract );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftAbstract );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( abstractText.text() ) );
+ }
+ else if ( e.tagName() == "Affiliation" )
+ {
+ BibTeX::EntryField * field = entry->getField( "affiliation" );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( "affiliation" );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "AuthorList" )
+ parseAuthorList( e, entry );
+ }
+ }
+
+ void WebQueryPubMedResultParser::parseJournal( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+
+ if ( e.tagName() == "ISSN" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftISSN );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftISSN );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "JournalIssue" )
+ parseJournalIssue( e, entry );
+ else if ( e.tagName() == "Title" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ }
+ }
+
+ void WebQueryPubMedResultParser::parseJournalIssue( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+
+ if ( e.tagName() == "Volume" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftVolume );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftVolume );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "Issue" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftNumber );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftNumber );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "PubDate" )
+ parsePubDate( e, entry );
+ }
+ }
+
+ void WebQueryPubMedResultParser::parsePubDate( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+
+ if ( e.tagName() == "Year" )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
+ entry->addField( field );
+ }
+ field->setValue( new BibTeX::Value( e.text() ) );
+ }
+ else if ( e.tagName() == "Month" )
+ {
+ QString month = e.text().lower();
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth );
+ entry->addField( field );
+ }
+ BibTeX::Value *value = new BibTeX::Value();
+ value->items.append( new BibTeX::MacroKey( month ) );
+ field->setValue( value );
+ }
+ else if ( e.tagName() == "MedlineDate" )
+ {
+ QStringList frags = QStringList::split( QRegExp( "\\s+" ), e.text() );
+ for ( QStringList::Iterator it = frags.begin(); it != frags.end(); ++it )
+ {
+ bool ok;
+ int num = ( *it ).toInt( &ok );
+ if ( ok && num > 1000 && num < 3000 )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftYear );
+ entry->addField( field );
+ }
+ BibTeX::Value *value = new BibTeX::Value();
+ value->items.append( new BibTeX::MacroKey( QString::number( num ) ) );
+ field->setValue( value );
+ }
+ else if ( !ok && ( *it ).length() == 3 )
+ {
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth );
+ entry->addField( field );
+ }
+ BibTeX::Value *value = new BibTeX::Value();
+ value->items.append( new BibTeX::MacroKey(( *it ).lower() ) );
+ field->setValue( value );
+ }
+ }
+ }
+ }
+ }
+
+ void WebQueryPubMedResultParser::parseAuthorList( const QDomElement& element, BibTeX::Entry *entry )
+ {
+ if ( element.attribute( "CompleteYN", "Y" ) == "Y" )
+ {
+ QStringList authorList;
+ for ( QDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() )
+ {
+ QDomElement e = n.toElement();
+ if ( e.tagName() == "Author" && e.attribute( "ValidYN", "Y" ) == "Y" )
+ {
+ QString lastName = QString::null, firstName = QString::null;
+ for ( QDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() )
+ {
+ QDomElement e2 = n2.toElement();
+ if ( e2.tagName() == "LastName" )
+ lastName = e2.text();
+ else if ( e2.tagName() == "CollectiveName" )
+ lastName = e2.text();
+ else if ( e2.tagName() == "FirstName" || e2.tagName() == "ForeName" )
+ firstName = e2.text();
+ }
+ QString name = lastName;
+ if ( !firstName.isNull() && !firstName.isEmpty() )
+ {
+ if ( name.isNull() ) name = "UNSET";
+ name.prepend( "|" ).prepend( firstName );
+ }
+ if ( !name.isNull() )
+ authorList.append( name );
+ }
+ }
+
+ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAuthor );
+ if ( field == NULL )
+ {
+ field = new BibTeX::EntryField( BibTeX::EntryField::ftAuthor );
+ entry->addField( field );
+ }
+ BibTeX::Value *value = new BibTeX::Value();
+ Settings *settings = Settings::self();
+ BibTeX::PersonContainer *personContainer = new BibTeX::PersonContainer( settings->editing_FirstNameFirst );
+ value->items.append( personContainer );
+ for ( QStringList::Iterator sli = authorList.begin(); sli != authorList.end(); ++sli )
+ {
+ QStringList nameParts = QStringList::split( '|', *sli );
+ QString firstName = nameParts.count() > 1 ? nameParts[0] : "";
+ QString lastName = nameParts[nameParts.count() - 1];
+ personContainer->persons.append( new BibTeX::Person( firstName, lastName, settings->editing_FirstNameFirst ) );
+ }
+ field->setValue( value );
+ }
+ }
+
+}
+#include "webquerypubmed.moc"