/*************************************************************************** * Copyright (C) 2004-2009 by Thomas Fischer * * fischer@unix-ag.uni-kl.de * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "webquerygooglescholar.h" namespace KBibTeX { WebQueryGoogleScholarWidget::WebQueryGoogleScholarWidget( TQWidget *parent, const char *name ) : WebQueryWidget( parent, name ) { init(); Settings *settings = Settings::self(); TQString value = settings->getWebQueryDefault( "GoogleScholar" ); value = value == TQString::null ? "" : value; lineEditQuery->setText( value ); slotTextChanged( value, true ); } WebQueryGoogleScholar::WebQueryGoogleScholar( TQWidget* parent ) : WebQuery( parent ), m_transferJob( NULL ), m_transferJobBuffer( NULL ) { m_importer = new BibTeX::FileImporterBibTeX( FALSE ); m_importer->setIgnoreComments( TRUE ); m_widget = new WebQueryGoogleScholarWidget( parent ); } WebQueryGoogleScholar::~WebQueryGoogleScholar() { delete m_widget; delete m_importer; } TQString WebQueryGoogleScholar::title() { return i18n( "Google Scholar" ); } TQString WebQueryGoogleScholar::disclaimer() { return i18n( "About Google Scholar" ); } TQString WebQueryGoogleScholar::disclaimerURL() { return "http://scholar.google.com/intl/en/scholar/about.html"; } WebQueryWidget *WebQueryGoogleScholar::widget() { return m_widget; } void WebQueryGoogleScholar::query() { WebQuery::query(); /** save search term in settings */ Settings *settings = Settings::self(); settings->setWebQueryDefault( "GoogleScholar", m_widget->lineEditQuery->text() ); /** generate web-save search term */ m_searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" ); m_searchTerm = m_searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" ); if ( m_searchTerm.isEmpty() ) { setEndSearch( WebQuery::statusInvalidQuery ); return; } /** initialize variables */ m_abort = false; m_numberOfResults = m_widget->spinBoxMaxHits->value(); setNumStages( m_numberOfResults + 5 ); /** reset KDE configuration for cookie handling */ readAndChangeConfig(); /** prepare HTTP request (buffer, signals, job) */ m_transferJobBuffer = new TQBuffer(); m_transferJobBuffer->open( IO_WriteOnly ); TDEIO::TransferJob* m_transferJob = TDEIO::get( KURL( "http://scholar.google.com/scholar_ncr" ), false, false ); connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) ); connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedStartpage( TDEIO::Job * ) ) ); } void WebQueryGoogleScholar::cancelQuery() { /** user aborted search */ m_abort = true; if ( m_transferJob != NULL ) m_transferJob->kill( false ); setEndSearch( WebQuery::statusError ); } void WebQueryGoogleScholar::slotFinishedStartpage( TDEIO::Job *job ) { /** close and delete buffer (content does not matter) */ m_transferJobBuffer->close(); delete m_transferJobBuffer; /** if aborted in the mean time, clean up everything */ if ( m_abort ) { restoreConfig(); return; } /** error occurred */ if ( job->error() != 0 ) { restoreConfig(); kdDebug() << "Error in slotFinishedStartpage: " << job->error() << endl; setEndSearch( statusError ); return; } /** update progress bar */ enterNextStage(); /** prepare next HTTP request for preferences page (buffer, signals, job) */ m_transferJobBuffer = new TQBuffer(); m_transferJobBuffer->open( IO_WriteOnly ); TDEIO::TransferJob* m_transferJob = TDEIO::get( KURL( "http://scholar.google.com/scholar_preferences?hl=en" ), false, false ); connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) ); connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedLoadingSettings( TDEIO::Job * ) ) ); } void WebQueryGoogleScholar::slotFinishedLoadingSettings( TDEIO::Job *job ) { /** close and delete buffer (content does not matter) */ m_transferJobBuffer->close(); TQString htmlCode = textFromBuffer( m_transferJobBuffer ); delete m_transferJobBuffer; /** if aborted in the mean time, clean up everything */ if ( m_abort ) { restoreConfig(); return; } /** error occurred */ if ( job->error() != 0 ) { restoreConfig(); kdDebug() << "Error in slotFinishedLoadingSettings: " << job->error() << endl; setEndSearch( statusError ); return; } /** update progress bar */ enterNextStage(); /** parse html code to get form values */ TQMap keyValues = evalFormFields( htmlCode ); /** set form values for BibTeX search */ keyValues["scis"] = "yes"; keyValues["scisf"] = "4"; keyValues["submit"] = "Save+Preferences"; keyValues["num"] = TQString::number( m_numberOfResults ); /** prepare next HTTP request to submit preferences (buffer, signals, job) */ KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar_setprefs", keyValues ) ); m_transferJobBuffer = new TQBuffer(); m_transferJobBuffer->open( IO_WriteOnly ); TDEIO::TransferJob* m_transferJob = TDEIO::get( nextUrl, false, false ); connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) ); connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedSavingSettings( TDEIO::Job * ) ) ); } void WebQueryGoogleScholar::slotFinishedSavingSettings( TDEIO::Job *job ) { /** close and delete buffer (content does not matter) */ m_transferJobBuffer->close(); TQString htmlCode = textFromBuffer( m_transferJobBuffer ); delete m_transferJobBuffer; /** if aborted in the mean time, clean up everything */ if ( m_abort ) { restoreConfig(); return; } /** error occurred */ if ( job->error() != 0 ) { restoreConfig(); kdDebug() << "Error in slotFinishedSavingSettings: " << job->error() << endl; setEndSearch( statusError ); return; } /** update progress bar */ enterNextStage(); /** parse html code to get form values */ TQMap keyValues = evalFormFields( htmlCode ); /** set form values for search */ keyValues["q"] = m_searchTerm; keyValues["num"] = TQString::number( m_numberOfResults ); /** prepare next HTTP request for actual search (buffer, signals, job) */ KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar", keyValues ) ); m_transferJobBuffer = new TQBuffer(); m_transferJobBuffer->open( IO_WriteOnly ); TDEIO::TransferJob* m_transferJob = TDEIO::get( nextUrl, false, false ); connect( m_transferJob, SIGNAL( data( TDEIO::Job *, const TQByteArray & ) ), this, SLOT( slotData( TDEIO::Job *, const TQByteArray & ) ) ); connect( m_transferJob, SIGNAL( result( TDEIO::Job * ) ), this, SLOT( slotFinishedReceivingResultOverview( TDEIO::Job * ) ) ); } void WebQueryGoogleScholar::slotFinishedReceivingResultOverview( TDEIO::Job *job ) { /** close and delete buffer (content does not matter) */ m_transferJobBuffer->close(); TQString htmlCode = textFromBuffer( m_transferJobBuffer ); delete m_transferJobBuffer; /** if aborted in the mean time, clean up everything */ if ( m_abort ) { restoreConfig(); return; } /** error occurred */ if ( job->error() != 0 ) { restoreConfig(); kdDebug() << "Error in slotFinishedReceivingResultOverview: " << job->error() << endl; setEndSearch( statusError ); return; } /** update progress bar */ enterNextStage(); /** find all links to BibTeX files in result page */ TQRegExp reBibUrl( "/scholar.bib[^ \">]+" ); int pos = 0; while ( !m_aborted && ( pos = htmlCode.find( reBibUrl, pos + 1 ) ) > 0 ) { /** download individual BibTeX file for each search hit */ KURL bibUrl( "http://scholar.google.com" + reBibUrl.cap( 0 ).replace( "&", "&" ) ); BibTeX::File *tmpBibFile = downloadBibTeXFile( bibUrl ); /** update progress bar */ enterNextStage(); /** parse, evaluate and store first BibTeX entry */ if ( tmpBibFile != NULL ) { BibTeX::File::ElementList::iterator it = tmpBibFile->begin(); if ( it != tmpBibFile->end() ) { BibTeX::Entry *entry = dynamic_cast( *it ); if ( entry != NULL ) emit foundEntry( new BibTeX::Entry( entry ), false ); } delete tmpBibFile; } } /** restore old cookie configuration */ restoreConfig(); /** set result status */ if ( m_aborted ) setEndSearch( statusAborted ); else setEndSearch( statusSuccess ); } void WebQueryGoogleScholar::readAndChangeConfig() { TDEConfig cfg( "kcookiejarrc" ); cfg.setGroup( "Cookie Policy" ); m_originalEnableCookies = cfg.readBoolEntry( "Cookies", true ); m_originalSessionCookies = cfg.readBoolEntry( "AcceptSessionCookies", true ); TQStringList cookieSettingsList = TQStringList::split( ',', cfg.readEntry( "CookieDomainAdvice", "" ) ); m_originalCookieGlobalAdvice = cfg.readEntry( "CookieGlobalAdvice", "Accept" ); for ( TQStringList::Iterator it = cookieSettingsList.begin(); it != cookieSettingsList.end(); ++it ) { TQStringList keyValue = TQStringList::split( ':', *it ); if ( keyValue.size() == 2 ) { m_originalCookieMap[keyValue[0]] = keyValue[1]; } } cfg.writeEntry( "Cookies", true ); cfg.writeEntry( "CookieGlobalAdvice", "Accept" ); cfg.writeEntry( "AcceptSessionCookies", true ); cookieSettingsList.clear(); for ( TQMap::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it ) { TQString value = it.key().contains( ".google." ) ? "Accept" : it.data(); cookieSettingsList << it.key() + ":" + value; } cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) ); cfg.sync(); ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" ); } void WebQueryGoogleScholar::restoreConfig() { TDEConfig cfg( "kcookiejarrc" ); cfg.setGroup( "Cookie Policy" ); cfg.writeEntry( "CookieGlobalAdvice", m_originalCookieGlobalAdvice ); cfg.writeEntry( "Cookies", m_originalEnableCookies ); cfg.writeEntry( "AcceptSessionCookies", m_originalSessionCookies ); TQStringList cookieSettingsList; for ( TQMap::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it ) cookieSettingsList << it.key() + ":" + it.data(); cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) ); cfg.sync(); if ( !m_originalEnableCookies ) ( void )DCOPRef( "kded", "kcookiejar" ).send( "shutdown" ); else ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" ); } TQString WebQueryGoogleScholar::textFromBuffer( TQBuffer *buffer ) { TQString htmlCode = ""; buffer->open( IO_ReadOnly ); TQTextStream ts( buffer ); while ( !ts.atEnd() ) htmlCode.append( ts.readLine() ); buffer->close(); return htmlCode; } TQMap WebQueryGoogleScholar::evalFormFields( const TQString &htmlCode ) { TQMap keyValues; TQRegExp reInput( "]+>" ); TQRegExp reSplit( "[<>=\" ]+" ); int pos = 0; while (( pos = htmlCode.find( reInput, pos + 1 ) ) > 5 ) { TQStringList elements = TQStringList::split( reSplit, reInput.cap( 0 ) ); bool checked = false; bool isCheckable = false; bool isSubmit = false; TQString key = TQString::null; TQString value = TQString::null; for ( TQStringList::Iterator it = elements.begin(); it != elements.end(); ++it ) { if ( *it == "name" ) { ++it; if ( it != elements.end() ) key = *it; else break; } if ( *it == "value" ) { ++it; if ( it != elements.end() ) value = *it; else { value = ""; break; } } if ( *it == "checked" ) checked = true; if ( *it == "type" ) { ++it; if ( it == elements.end() ) break; isCheckable = *it == "radio" || *it == "checkbox"; isSubmit = *it == "submit"; } } if (( !isCheckable || checked ) && ( !isSubmit || value == "submit" ) && value != TQString::null && key != TQString::null ) { keyValues[key] = value; } } TQRegExp reSelect( "" ); reSelect.setMinimal( true ); TQRegExp reOption( "]+>" ); int pos3 = 0; while (( pos3 = htmlCode.find( reSelect, pos3 + 1 ) ) > 5 ) { TQString key = reSelect.cap( 1 ); TQString sub = reSelect.cap( 0 ); int pos2 = 0; while (( pos2 = sub.find( reOption, pos2 + 1 ) ) > 5 ) { TQStringList elements = TQStringList::split( reSplit, reOption.cap( 0 ) ); bool selected = false; TQString value = TQString::null; for ( TQStringList::Iterator it = elements.begin(); it != elements.end(); ++it ) { if ( *it == "value" ) { ++it; if ( it != elements.end() ) value = *it; else { value = ""; break; } } if ( *it == "selected" ) selected = true; } if ( selected && value != TQString::null && key != TQString::null ) { keyValues[key] = value; } } } return keyValues; } TQString WebQueryGoogleScholar::formFieldsToUrl( const TQString &prefix, const TQMap &keyValues ) { bool first = true; TQString nextUrl = prefix; for ( TQMap::ConstIterator it = keyValues.begin(); it != keyValues.end(); ++it ) { if ( first ) nextUrl.append( "?" ); else nextUrl.append( "&" ); first = false; nextUrl.append( it.key() + "=" + it.data() ); } return nextUrl; } } #include "webquerygooglescholar.moc"