#ifndef _IFILE_INCLUDE_GUARD_LPC_56465465798732 #define _IFILE_INCLUDE_GUARD_LPC_56465465798732 /* This file is part of indexlib. * Copyright (C) 2005 Luís Pedro Coelho * * Indexlib is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License, version 2, as * published by the Free Software Foundation and available as file * GPL_V2 which is distributed along with indexlib. * * Indexlib is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA * * In addition, as a special exception, the copyright holders give * permission to link the code of this program with any edition of * the TQt library by Trolltech AS, Norway (or with modified versions * of TQt that use the same license as TQt), and distribute linked * combinations including the two. You must obey the GNU General * Public License in all respects for all of the code used other than * TQt. If you modify this file, you may extend this exception to * your version of the file, but you are not obligated to do so. If * you do not wish to do so, delete this exception statement from * your version. */ #include "index.h" #include "stringarray.h" #include "stringset.h" #include "leafdatavector.h" #include "tokenizer.h" #include #include #include struct ifile : public indexlib::index { public: ifile( std::string ); virtual void add( const char* str, const char* doc ); virtual std::auto_ptr search( const char* ) const; virtual unsigned ndocs() const { return docnames_.size(); } virtual std::string lookup_docname( unsigned idx ) const { return docnames_.get( idx ); } virtual void remove_doc( const char* doc ); void maintenance(); static void remove( std::string ); private: std::set find_word( std::string ) const; std::vector break_clean( const char* ) const; virtual std::auto_ptr everything() const; static bool invalid_word( std::string ); bool is_stop_word( std::string ) const; void calc_stopwords(); stringarray docnames_; stringset words_; stringset stopwords_; leafdatavector files_; std::auto_ptr tokenizer_; }; #endif /* _IFILE_INCLUDE_GUARD_LPC_56465465798732 */