/*************************************************************************** * Copyright (C) 2004-2009 by Thomas Fischer * * fischer@unix-ag.uni-kl.de * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "webquerypubmed.h" namespace KBibTeX { WebQueryPubMedWidget::WebQueryPubMedWidget( TQWidget *parent, const char *name ) : WebQueryWidget( parent, name ) { init(); Settings *settings = Settings::self(); TQString value = settings->getWebQueryDefault( "PubMed" ); value = value == TQString::null ? "" : value; lineEditQuery->setText( value ); slotTextChanged( value, true ); } WebQueryPubMed::WebQueryPubMed( TQWidget *parent ) : WebQuery( parent ) { m_widget = new WebQueryPubMedWidget( parent ); } WebQueryPubMed::~WebQueryPubMed() { delete m_widget; } TQString WebQueryPubMed::title() { return i18n( "NCBI (PubMed)" ); } TQString WebQueryPubMed::disclaimer() { return i18n( "NCBI's Disclaimer and Copyright" ); } TQString WebQueryPubMed::disclaimerURL() { return "http://eutils.ncbi.nlm.nih.gov/About/disclaimer.html"; } WebQueryWidget *WebQueryPubMed::widget() { return m_widget; } void WebQueryPubMed::query() { WebQuery::query(); Settings *settings = Settings::self(); settings->setWebQueryDefault( "PubMed", m_widget->lineEditQuery->text() ); setNumStages( 2 ); int numberOfResults = m_widget->spinBoxMaxHits->value(); TQString searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" ); if ( searchTerm.isEmpty() ) { setEndSearch( WebQuery::statusInvalidQuery ); return; } searchTerm = searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" ); KURL url = KURL( TQString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=%2&retmax=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( numberOfResults ).arg( searchTerm ) ); TQString data = downloadHTML( url ); if ( data != TQString::null && !m_aborted ) { TQBuffer buffer; buffer.open( IO_WriteOnly ); TQTextStream ts( &buffer ); ts.setEncoding( TQTextStream::UnicodeUTF8 ); ts << data << endl; buffer.close(); buffer.open( IO_ReadOnly ); TQValueList intList; TQXmlInputSource inputSource( &buffer ); TQXmlSimpleReader reader; WebQueryPubMedStructureParserQuery handler( &intList ); reader.setContentHandler( &handler ); reader.parse( &inputSource ); buffer.close(); TQString ids; TQValueList::iterator it = intList.begin(); if ( it != intList.end() ) { ids.append( TQString::number( *it ) ); ++it; for ( ; it != intList.end(); ++it ) { ids.append( "," ); ids.append( TQString::number( *it ) ); } } url = KURL( TQString( "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=%1&tool=KBibTeX&email=kbibtex@unix-ag.uni-kl.de" ).arg( ids ) ); data = downloadHTML( url ); if ( data != TQString::null && !m_aborted ) { buffer.open( IO_WriteOnly ); TQTextStream ts( &buffer ); ts.setEncoding( TQTextStream::UnicodeUTF8 ); ts << data << endl; buffer.close(); buffer.open( IO_ReadOnly ); TQDomDocument doc( "efetch'ed" ); doc.setContent( &buffer ); TQDomElement docElem = doc.documentElement(); WebQueryPubMedResultParser resultParser; connect( &resultParser, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ), this, SIGNAL( foundEntry( BibTeX::Entry*, bool ) ) ); resultParser.parse( docElem ); buffer.close(); setEndSearch( WebQuery::statusSuccess ); } else if ( !m_aborted ) { TQString message = TDEIO::NetAccess::lastErrorString(); message.prepend( TQString( i18n( "Querying database '%1' failed." ) ).arg( title() ) ); KMessageBox::error( m_parent, message ); setEndSearch( WebQuery::statusError ); } else setEndSearch( WebQuery::statusAborted ); } else if ( !m_aborted ) { TQString message = TDEIO::NetAccess::lastErrorString(); if ( message.isEmpty() ) message.prepend( '\n' ); message.prepend( TQString( i18n( "Querying database '%1' failed." ) ).arg( title() ) ); KMessageBox::error( m_parent, message ); setEndSearch( WebQuery::statusError ); } else setEndSearch( WebQuery::statusAborted ); } WebQueryPubMedStructureParserQuery::WebQueryPubMedStructureParserQuery( TQValueList *intList ) : TQXmlDefaultHandler(), m_intList( intList ) { m_intList->clear(); } WebQueryPubMedStructureParserQuery::~WebQueryPubMedStructureParserQuery( ) { // nothing } bool WebQueryPubMedStructureParserQuery::startElement( const TQString & /*namespaceURI*/, const TQString & /*localName*/, const TQString & /*qName*/, const TQXmlAttributes & /*atts*/ ) { concatString = TQString(); return TRUE; } bool WebQueryPubMedStructureParserQuery::endElement( const TQString & /*namespaceURI*/, const TQString & /*localName*/, const TQString & qName ) { if ( qName == "Id" ) { bool ok; int id = concatString.toInt( &ok ); if ( ok && id > 0 && m_intList != NULL ) m_intList->append( id ); } return TRUE; } bool WebQueryPubMedStructureParserQuery::characters( const TQString & ch ) { concatString.append( ch ); return TRUE; } WebQueryPubMedResultParser::WebQueryPubMedResultParser( ) : TQObject() { // nothing } void WebQueryPubMedResultParser::parse( const TQDomElement& rootElement ) { if ( rootElement.tagName() == "PubmedArticleSet" ) for ( TQDomNode n = rootElement.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( !e.isNull() && e.tagName() == "PubmedArticle" ) { BibTeX::Entry * entry = new BibTeX::Entry( BibTeX::Entry::etMisc, "PubMed" ); parsePubmedArticle( e, entry ); emit foundEntry( entry, false ); } } } WebQueryPubMedResultParser::~WebQueryPubMedResultParser() { // nothing } void WebQueryPubMedResultParser::parsePubmedArticle( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( !e.isNull() && e.tagName() == "MedlineCitation" ) parseMedlineCitation( e, entry ); } } void WebQueryPubMedResultParser::parseMedlineCitation( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( !e.isNull() ) { if ( e.tagName() == "PMID" ) { entry->setId( TQString( "PubMed_%1" ).arg( e.text() ) ); /** add url to pubmed website */ BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftURL ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftURL ); entry->addField( field ); } field->setValue( new BibTeX::Value( TQString( "http://www.ncbi.nlm.nih.gov/pubmed/" ).append( e.text() ) ) ); } else if ( e.tagName() == "Article" ) parseArticle( e, entry ); else if ( e.tagName() == "MedlineJournalInfo" ) { for ( TQDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() ) { TQDomElement e2 = n2.toElement(); if ( e2.tagName() == "MedlineTA" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal ); entry->addField( field ); } field->setValue( new BibTeX::Value( e2.text() ) ); } } } } } } void WebQueryPubMedResultParser::parseArticle( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( e.tagName() == "Journal" ) { parseJournal( e, entry ); entry->setEntryType( BibTeX::Entry::etArticle ); } else if ( e.tagName() == "ArticleTitle" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftTitle ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftTitle ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "Pagination" ) { TQDomElement medlinePgn = e.firstChild().toElement(); // may fail? if ( !medlinePgn.text().isEmpty() ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftPages ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftPages ); entry->addField( field ); } field->setValue( new BibTeX::Value( medlinePgn.text() ) ); } } else if ( e.tagName() == "Abstract" ) { TQDomElement abstractText = e.firstChild().toElement(); BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAbstract ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftAbstract ); entry->addField( field ); } field->setValue( new BibTeX::Value( abstractText.text() ) ); } else if ( e.tagName() == "Affiliation" ) { BibTeX::EntryField * field = entry->getField( "affiliation" ); if ( field == NULL ) { field = new BibTeX::EntryField( "affiliation" ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "AuthorList" ) parseAuthorList( e, entry ); } } void WebQueryPubMedResultParser::parseJournal( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( e.tagName() == "ISSN" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftISSN ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftISSN ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "JournalIssue" ) parseJournalIssue( e, entry ); else if ( e.tagName() == "Title" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftJournal ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftJournal ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } } } void WebQueryPubMedResultParser::parseJournalIssue( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( e.tagName() == "Volume" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftVolume ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftVolume ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "Issue" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftNumber ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftNumber ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "PubDate" ) parsePubDate( e, entry ); } } void WebQueryPubMedResultParser::parsePubDate( const TQDomElement& element, BibTeX::Entry *entry ) { for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( e.tagName() == "Year" ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftYear ); entry->addField( field ); } field->setValue( new BibTeX::Value( e.text() ) ); } else if ( e.tagName() == "Month" ) { TQString month = e.text().lower(); BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth ); entry->addField( field ); } BibTeX::Value *value = new BibTeX::Value(); value->items.append( new BibTeX::MacroKey( month ) ); field->setValue( value ); } else if ( e.tagName() == "MedlineDate" ) { TQStringList frags = TQStringList::split( TQRegExp( "\\s+" ), e.text() ); for ( TQStringList::Iterator it = frags.begin(); it != frags.end(); ++it ) { bool ok; int num = ( *it ).toInt( &ok ); if ( ok && num > 1000 && num < 3000 ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftYear ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftYear ); entry->addField( field ); } BibTeX::Value *value = new BibTeX::Value(); value->items.append( new BibTeX::MacroKey( TQString::number( num ) ) ); field->setValue( value ); } else if ( !ok && ( *it ).length() == 3 ) { BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftMonth ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftMonth ); entry->addField( field ); } BibTeX::Value *value = new BibTeX::Value(); value->items.append( new BibTeX::MacroKey(( *it ).lower() ) ); field->setValue( value ); } } } } } void WebQueryPubMedResultParser::parseAuthorList( const TQDomElement& element, BibTeX::Entry *entry ) { if ( element.attribute( "CompleteYN", "Y" ) == "Y" ) { TQStringList authorList; for ( TQDomNode n = element.firstChild(); !n.isNull(); n = n.nextSibling() ) { TQDomElement e = n.toElement(); if ( e.tagName() == "Author" && e.attribute( "ValidYN", "Y" ) == "Y" ) { TQString lastName = TQString::null, firstName = TQString::null; for ( TQDomNode n2 = e.firstChild(); !n2.isNull(); n2 = n2.nextSibling() ) { TQDomElement e2 = n2.toElement(); if ( e2.tagName() == "LastName" ) lastName = e2.text(); else if ( e2.tagName() == "CollectiveName" ) lastName = e2.text(); else if ( e2.tagName() == "FirstName" || e2.tagName() == "ForeName" ) firstName = e2.text(); } TQString name = lastName; if ( !firstName.isNull() && !firstName.isEmpty() ) { if ( name.isNull() ) name = "UNSET"; name.prepend( "|" ).prepend( firstName ); } if ( !name.isNull() ) authorList.append( name ); } } BibTeX::EntryField * field = entry->getField( BibTeX::EntryField::ftAuthor ); if ( field == NULL ) { field = new BibTeX::EntryField( BibTeX::EntryField::ftAuthor ); entry->addField( field ); } BibTeX::Value *value = new BibTeX::Value(); Settings *settings = Settings::self(); BibTeX::PersonContainer *personContainer = new BibTeX::PersonContainer( settings->editing_FirstNameFirst ); value->items.append( personContainer ); for ( TQStringList::Iterator sli = authorList.begin(); sli != authorList.end(); ++sli ) { TQStringList nameParts = TQStringList::split( '|', *sli ); TQString firstName = nameParts.count() > 1 ? nameParts[0] : ""; TQString lastName = nameParts[nameParts.count() - 1]; personContainer->persons.append( new BibTeX::Person( firstName, lastName, settings->editing_FirstNameFirst ) ); } field->setValue( value ); } } } #include "webquerypubmed.moc"