/*************************************************************************** copyright : (C) 2005-2006 by Robby Stephenson email : $EMAIL ***************************************************************************/ /*************************************************************************** * * * This file has been modified to match the requirements of KBibTeX. * * In case of problems or bugs arising from this implementation, please * * contact the KBibTeX team first. * * Thomas Fischer * * * ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "z3950connection.h" // #include "z3950fetcher.h" #include "messagehandler.h" #include "latin1literal.h" #include #include "iso5426converter.h" #include "iso6937converter.h" #include #include #ifdef HAVE_YAZ extern "C" { #include #include #include } #endif #include #include namespace { static const size_t Z3950_DEFAULT_MAX_RECORDS = 20; } using KBibTeX::Z3950ResultFound; using KBibTeX::Z3950Connection; Z3950ResultFound::Z3950ResultFound( const TQString& s ) : TQCustomEvent( uid() ) , m_result( TQDeepCopy( s ) ) { ++Z3950Connection::resultsLeft; } Z3950ResultFound::~Z3950ResultFound() { --Z3950Connection::resultsLeft; } class Z3950Connection::Private { public: Private() {} #ifdef HAVE_YAZ ~Private() { ZOOM_options_destroy( conn_opt ); ZOOM_connection_destroy( conn ); }; ZOOM_options conn_opt; ZOOM_connection conn; #endif }; int Z3950Connection::resultsLeft = 0; // since the character set goes into a yaz api call // I'm paranoid about user insertions, so just grab 64 // characters at most Z3950Connection::Z3950Connection( TQObject* fetcher, const TQString& host, uint port, const TQString& dbname, const TQString& sourceCharSet, const TQString& syntax, const TQString& esn ) : TQThread() , d( new Private() ) , m_connected( false ) , m_aborted( false ) , m_fetcher( fetcher ) , m_host( TQDeepCopy( host ) ) , m_port( port ) , m_dbname( TQDeepCopy( dbname ) ) , m_sourceCharSet( TQDeepCopy( sourceCharSet.left( 64 ) ) ) , m_syntax( TQDeepCopy( syntax ) ) , m_esn( TQDeepCopy( esn ) ) , m_start( 0 ) , m_limit( Z3950_DEFAULT_MAX_RECORDS ) , m_hasMore( false ) { } Z3950Connection::~Z3950Connection() { m_connected = false; delete d; d = 0; } void Z3950Connection::reset() { m_start = 0; m_limit = Z3950_DEFAULT_MAX_RECORDS; } void Z3950Connection::setQuery( const TQString& query_, unsigned int numHits ) { m_pqn = TQDeepCopy( query_ ); m_limit = Z3950_DEFAULT_MAX_RECORDS < numHits ? Z3950_DEFAULT_MAX_RECORDS : numHits; } void Z3950Connection::setUserPassword( const TQString& user_, const TQString& pword_ ) { m_user = TQDeepCopy( user_ ); m_password = TQDeepCopy( pword_ ); } void Z3950Connection::run() { // kdDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl; m_aborted = false; m_hasMore = false; resultsLeft = 0; #ifdef HAVE_YAZ if ( !makeConnection() ) { done(); return; } ZOOM_query query = ZOOM_query_create(); kdDebug() << "Z3950Connection::run() - pqn = " << toCString( m_pqn ) << endl; int errcode = ZOOM_query_prefix( query, toCString( m_pqn ) ); if ( errcode != 0 ) { kdDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl; ZOOM_query_destroy( query ); TQString s = i18n( "Query error!" ); s += ' ' + m_pqn; done( s, MessageHandler::Error ); return; } ZOOM_resultset resultSet = ZOOM_connection_search( d->conn, query ); // check abort status if ( m_aborted ) { done(); return; } // I know the LOC wants the syntax = "xml" and esn = "mods" // to get MODS data, that seems a bit odd... // esn only makes sense for marc and grs-1 // if syntax is mods, set esn to mods too TQCString type = "raw"; if ( m_syntax == Latin1Literal( "mods" ) ) { m_syntax = TQString::fromLatin1( "xml" ); ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); type = "xml"; } else { ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() ); } ZOOM_resultset_option_set( resultSet, "start", TQCString().setNum( m_start ) ); ZOOM_resultset_option_set( resultSet, "count", TQCString().setNum( m_limit - m_start ) ); // search in default syntax, unless syntax is already set if ( !m_syntax.isEmpty() ) { ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", m_syntax.latin1() ); } const char* errmsg; const char* addinfo; errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo ); if ( errcode != 0 ) { ZOOM_resultset_destroy( resultSet ); ZOOM_query_destroy( query ); m_connected = false; TQString s = i18n( "Connection search error %1: %2" ).arg( errcode ).arg( toString( errmsg ) ); if ( !TQCString( addinfo ).isEmpty() ) { s += " (" + toString( addinfo ) + ")"; } kdDebug() << "Z3950Connection::run() - " << s << endl; done( s, MessageHandler::Error ); return; } const size_t numResults = ZOOM_resultset_size( resultSet ); TQString newSyntax = m_syntax; if ( numResults > 0 ) { kdDebug() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl; // so now we know that results exist, might have to check syntax int len; ZOOM_record rec = ZOOM_resultset_record( resultSet, 0 ); // want raw unless it's mods ZOOM_record_get( rec, type, &len ); if ( len > 0 && m_syntax.isEmpty() ) { newSyntax = TQString::fromLatin1( ZOOM_record_get( rec, "syntax", &len ) ).lower(); kdDebug() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl; if ( newSyntax == Latin1Literal( "mods" ) || newSyntax == Latin1Literal( "xml" ) ) { m_syntax = TQString::fromLatin1( "xml" ); ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); } else if ( newSyntax == Latin1Literal( "grs-1" ) ) { // if it's defaulting to grs-1, go ahead and change it to try to get a marc // record since grs-1 is a last resort for us newSyntax.truncate( 0 ); } } // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1 if ( newSyntax != Latin1Literal( "xml" ) && newSyntax != Latin1Literal( "usmarc" ) && newSyntax != Latin1Literal( "marc21" ) && newSyntax != Latin1Literal( "unimarc" ) && newSyntax != Latin1Literal( "grs-1" ) ) { kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl; newSyntax = TQString::fromLatin1( "xml" ); ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); rec = ZOOM_resultset_record( resultSet, 0 ); ZOOM_record_get( rec, "xml", &len ); if ( len == 0 ) { // change set name back ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() ); newSyntax = TQString::fromLatin1( "usmarc" ); // try usmarc kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl; ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); rec = ZOOM_resultset_record( resultSet, 0 ); ZOOM_record_get( rec, "raw", &len ); } if ( len == 0 ) { newSyntax = TQString::fromLatin1( "marc21" ); // try marc21 kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl; ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); rec = ZOOM_resultset_record( resultSet, 0 ); ZOOM_record_get( rec, "raw", &len ); } if ( len == 0 ) { newSyntax = TQString::fromLatin1( "unimarc" ); // try unimarc kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl; ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); rec = ZOOM_resultset_record( resultSet, 0 ); ZOOM_record_get( rec, "raw", &len ); } if ( len == 0 ) { newSyntax = TQString::fromLatin1( "grs-1" ); // try grs-1 kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl; ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); rec = ZOOM_resultset_record( resultSet, 0 ); ZOOM_record_get( rec, "raw", &len ); } if ( len == 0 ) { kdDebug() << "Z3950Connection::run() - giving up" << endl; ZOOM_resultset_destroy( resultSet ); ZOOM_query_destroy( query ); done( i18n( "Record syntax error" ), MessageHandler::Error ); return; } kdDebug() << "Z3950Connection::run() - final syntax is " << newSyntax << endl; } } // go back to fooling ourselves and calling it mods if ( m_syntax == Latin1Literal( "xml" ) ) { m_syntax = TQString::fromLatin1( "mods" ); } if ( newSyntax == Latin1Literal( "xml" ) ) { newSyntax = TQString::fromLatin1( "mods" ); } // save syntax change for next time if ( m_syntax != newSyntax ) { tqApp->postEvent( m_fetcher, new Z3950SyntaxChange( newSyntax ) ); m_syntax = newSyntax; } if ( m_sourceCharSet.isEmpty() ) { m_sourceCharSet = TQString::fromLatin1( "marc-8" ); } const size_t realLimit = TQMIN( numResults, m_limit ); for ( size_t i = m_start; i < realLimit && !m_aborted; ++i ) { kdDebug() << "Z3950Connection::run() - grabbing index " << i << endl; ZOOM_record rec = ZOOM_resultset_record( resultSet, i ); if ( !rec ) { kdDebug() << "Z3950Connection::run() - no record returned for index " << i << endl; continue; } int len; TQString data; if ( m_syntax == Latin1Literal( "mods" ) ) { data = toString( ZOOM_record_get( rec, "xml", &len ) ); } else if ( m_syntax == Latin1Literal( "grs-1" ) ) // grs-1 { // we're going to parse the rendered data, very ugly... data = toString( ZOOM_record_get( rec, "render", &len ) ); } else { #if 0 kdWarning() << "Remove debug from z3950connection.cpp" << endl; { TQFile f1( TQString::fromLatin1( "/tmp/z3950.raw" ) ); if ( f1.open( IO_WriteOnly ) ) { TQDataStream t( &f1 ); t << ZOOM_record_get( rec, "raw", &len ); } f1.close(); } #endif data = toXML( ZOOM_record_get( rec, "raw", &len ), m_sourceCharSet ); } Z3950ResultFound* ev = new Z3950ResultFound( data ); TQApplication::postEvent( m_fetcher, ev ); } ZOOM_resultset_destroy( resultSet ); ZOOM_query_destroy( query ); m_hasMore = m_limit < numResults; if ( m_hasMore ) { m_start = m_limit; m_limit += Z3950_DEFAULT_MAX_RECORDS; } #endif done(); } bool Z3950Connection::makeConnection() { if ( m_connected ) { return true; } // kdDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl; // I don't know what to do except assume database, user, and password are in locale encoding #ifdef HAVE_YAZ d->conn_opt = ZOOM_options_create(); ZOOM_options_set( d->conn_opt, "implementationName", "KBibTeX" ); ZOOM_options_set( d->conn_opt, "databaseName", toCString( m_dbname ) ); ZOOM_options_set( d->conn_opt, "user", toCString( m_user ) ); ZOOM_options_set( d->conn_opt, "password", toCString( m_password ) ); d->conn = ZOOM_connection_create( d->conn_opt ); ZOOM_connection_connect( d->conn, m_host.latin1(), m_port ); int errcode; const char* errmsg; // unused: carries same info as 'errcode' const char* addinfo; errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo ); if ( errcode != 0 ) { ZOOM_options_destroy( d->conn_opt ); ZOOM_connection_destroy( d->conn ); m_connected = false; TQString s = i18n( "Connection error %1: %2" ).arg( errcode ).arg( toString( errmsg ) ); if ( !TQCString( addinfo ).isEmpty() ) { s += " (" + toString( addinfo ) + ")"; } kdDebug() << "Z3950Connection::makeConnection() - " << s << endl; done( s, MessageHandler::Error ); return false; } #endif m_connected = true; return true; } void Z3950Connection::done() { checkPendingEvents(); tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) ); } void Z3950Connection::done( const TQString& msg_, int type_ ) { checkPendingEvents(); if ( m_aborted ) { tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) ); } else { tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore, msg_, type_ ) ); } } void Z3950Connection::checkPendingEvents() { // if there's still some pending result events, go ahead and just wait 1 second if ( resultsLeft > 0 ) { sleep( 1 ); } } inline TQCString Z3950Connection::toCString( const TQString& text_ ) { return iconvRun( text_.utf8(), TQString::fromLatin1( "utf-8" ), m_sourceCharSet ); } inline TQString Z3950Connection::toString( const TQCString& text_ ) { return TQString::fromUtf8( iconvRun( text_, m_sourceCharSet, TQString::fromLatin1( "utf-8" ) ) ); } // static TQCString Z3950Connection::iconvRun( const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_ ) { #ifdef HAVE_YAZ if ( text_.isEmpty() ) { return text_; } if ( fromCharSet_ == toCharSet_ ) { return text_; } yaz_iconv_t cd = yaz_iconv_open( toCharSet_.latin1(), fromCharSet_.latin1() ); if ( !cd ) { // maybe it's iso 5426, which we sorta support TQString charSetLower = fromCharSet_.lower(); charSetLower.remove( '-' ).remove( ' ' ); if ( charSetLower == Latin1Literal( "iso5426" ) ) { return iconvRun( Iso5426Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ ); } else if ( charSetLower == Latin1Literal( "iso6937" ) ) { return iconvRun( Iso6937Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ ); } kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_ << " to " << toCharSet_ << " is unsupported" << endl; return text_; } const char* input = text_; size_t inlen = text_.length(); size_t outlen = 2 * inlen; // this is enough, right? TQMemArray result0( outlen ); char* result = result0.data(); int r = yaz_iconv( cd, const_cast( &input ), &inlen, &result, &outlen ); if ( r <= 0 ) { kdDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl; return text_; } // bug in yaz, need to flush buffer to catch last character yaz_iconv( cd, 0, 0, &result, &outlen ); // length is pointer difference size_t len = result - result0; TQCString output = TQCString( result0, len + 1 ); // kdDebug() << "-------------------------------------------" << endl; // kdDebug() << output << endl; // kdDebug() << "-------------------------------------------" << endl; yaz_iconv_close( cd ); return output; #endif return text_; } TQString Z3950Connection::toXML( const TQCString& marc_, const TQString& charSet_ ) { #ifdef HAVE_YAZ if ( marc_.isEmpty() ) { kdDebug() << "Z3950Connection::toXML() - empty string" << endl; return TQString::null; } yaz_iconv_t cd = yaz_iconv_open( "utf-8", charSet_.latin1() ); if ( !cd ) { // maybe it's iso 5426, which we sorta support TQString charSetLower = charSet_.lower(); charSetLower.remove( '-' ).remove( ' ' ); if ( charSetLower == Latin1Literal( "iso5426" ) ) { return toXML( Iso5426Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) ); } else if ( charSetLower == Latin1Literal( "iso6937" ) ) { return toXML( Iso6937Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) ); } kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl; return TQString::null; } yaz_marc_t mt = yaz_marc_create(); yaz_marc_iconv( mt, cd ); yaz_marc_xml( mt, YAZ_MARC_MARCXML ); // first 5 bytes are length bool ok; #if YAZ_VERSIONL < 0x030000 int len = marc_.left( 5 ).toInt( &ok ); #else size_t len = marc_.left( 5 ).toInt( &ok ); #endif if ( ok && ( len < 25 || len > 100000 ) ) { kdDebug() << "Z3950Connection::toXML() - bad length: " << ( ok ? len : -1 ) << endl; return TQString::null; } #if YAZ_VERSIONL < 0x030000 char* result; #else const char* result; #endif int r = yaz_marc_decode_buf( mt, marc_, -1, &result, &len ); if ( r <= 0 ) { kdDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl; return TQString::null; } TQString output = TQString::fromLatin1( "\n" ); output += TQString::fromUtf8( TQCString( result, len + 1 ), len + 1 ); // kdDebug() << TQCString(result) << endl; // kdDebug() << "-------------------------------------------" << endl; // kdDebug() << output << endl; yaz_iconv_close( cd ); yaz_marc_destroy( mt ); return output; #else // no yaz return TQString::null; #endif }