// // WordList.h // // NAME // // manage and use an inverted index file. // // SYNOPSIS // // #include // // Configuration* config; // WordReference wordRef; // ... // WordList* words = new WordList(config) // // delete words; // // DESCRIPTION // // WordList is the mifluz equivalent of a database handler. Each // WordList object is bound to an inverted index file and implements the // operations to create it, fill it with word occurrences and search // for an entry matching a given criterion. // // CONFIGURATION // // wordlist_extend {true|false} (default false) // If true maintain reference count of unique // words. The Noccurrence method gives access to this count. // // wordlist_verbose (default 0) // Set the verbosity level of the WordList class. //
// 1 walk logic //
// 2 walk logic details //
// 3 walk logic lots of details // // wordlist_page_size (default 8192) // Berkeley DB page size (see Berkeley DB documentation) // // wordlist_cache_size (default 500K) // Berkeley DB cache size (see Berkeley DB documentation) // Cache makes a huge difference in performance. It must be at least 2% // of the expected total data size. Note that if compression is activated // the data size is eight times larger than the actual file size. In this // case the cache must be scaled to 2% of the data size, not 2% // of the file size. See Cache tuning in the mifluz guide for // more hints. // // wordlist_compress {true|false} (default false) // Activate compression of the index. The resulting index is eight times // smaller than the uncompressed index. // // // END // // Part of the ht://Dig package // Copyright (c) 1999-2004 The ht://Dig Group // For copyright details, see the file COPYING in your distribution // or the GNU Library General Public License (LGPL) version 2 or later // // // $Id: WordListMulti.h,v 1.4 2004/05/28 13:15:28 lha Exp $ // #ifndef _WordListMulti_h_ #define _WordListMulti_h_ #include #include #ifndef SWIG #include "WordList.h" #include "WordCursorOne.h" //#include "WordCursorMulti.h" #endif /* SWIG */ class WordContext; // // Inverted index interface // class WordListMulti : public WordList { public: //- // Constructor. Build inverted index handling object using // run time configuration parameters listed in the CONFIGURATION // section. // WordListMulti(WordContext* ncontext); virtual ~WordListMulti(); #ifndef SWIG virtual int Override(const WordReference& wordRef); #endif /* SWIG */ //- // Returns OK if wordRef exists in the index, NOTOK otherwise. // virtual int Exists(const WordReference& wordRef); // // Delete permanently // //- // Delete all entries in the index whose key matches the // Key() part of wordRef, using the Walk // method. // Returns the number of entries successfully deleted. // virtual int WalkDelete(const WordReference& wordRef); //- // Delete the entry in the index that exactly matches the // Key() part of wordRef. // Returns OK if deletion is successfull, NOTOK otherwise. // virtual int Delete(const WordReference& wordRef); //- // Open inverted index filename. mode // may be O_RDONLY or O_RDWR. If mode is // O_RDWR it can be or'ed with O_TRUNC to reset // the content of an existing inverted index. // Return OK on success, NOTOK otherwise. // virtual int Open(const String& filename, int mode); //- // Close inverted index. // Return OK on success, NOTOK otherwise. // virtual int Close(); //- // Return the size of the index in pages. // virtual unsigned int Size() const; int AddIndex(); int Merge(); //- // Alias to the Find method. // virtual List *operator [] (const WordReference& wordRef); //- // Returns the list of word occurrences matching the Key() // part of wordRef. In the Key(), the string // (accessed with GetWord()) matches any string that begins // with it. The List returned contains pointers to // WordReference objects. It is the responsibility of the // caller to free the list. // virtual List *Prefix (const WordReference& prefix); // // Iterate over the complete database. // #ifndef SWIG //- // Returns a list of all unique words contained in the inverted // index. The List returned contains pointers to // String objects. It is the responsibility of the caller // to free the list. See List.h header for usage. // virtual List *Words(); #endif /* SWIG */ //- // Returns a list of all entries contained in the // inverted index. The List returned contains pointers to // WordReference objects. It is the responsibility of // the caller to free the list. See List.h header for usage. // virtual List *WordRefs(); #ifndef SWIG //- // Create a cursor that searches all the occurrences in the // inverted index and call ncallback with // ncallback_data for every match. // virtual inline WordCursor *Cursor(wordlist_walk_callback_t callback, Object *callback_data) { return new WordCursorOne(this, callback, callback_data); } #endif /* SWIG */ //- // Create a cursor that searches all the occurrences in the // inverted index and that match nsearchKey. If // naction is set to HTDIG_WORDLIST_WALKER calls // searchKey.callback with searchKey.callback_data // for every match. If naction is set to // HTDIG_WORDLIST_COLLECT push each match in searchKey.collectRes // data member as a WordReference object. It is the responsibility // of the caller to free the searchKey.collectRes list. // virtual inline WordCursor *Cursor(const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { return new WordCursorOne(this, searchKey, action); } #ifndef SWIG //- // Create a cursor that searches all the occurrences in the // inverted index and that match nsearchKey and calls // ncallback with ncallback_data for every match. // virtual inline WordCursor *Cursor(const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { return new WordCursorOne(this, searchKey, callback, callback_data); } #endif /* SWIG */ // // Update/get global word statistics statistics // //- // Add one to the reference count for the string contained // in the Key().GetWord() part of wordRef. // Returns OK on success, NOTOK otherwise. // virtual int Ref(const WordReference& wordRef); //- // Substract one to the reference count for the string contained // in the Key().GetWord() part of wordRef. // Returns OK on success, NOTOK otherwise. // virtual int Unref(const WordReference& wordRef); virtual int AllRef(); #ifndef SWIG //- // Return in noccurrence the number of occurrences of the // string contained in the GetWord() part of key. // Returns OK on success, NOTOK otherwise. // virtual int Noccurrence(const String& key, unsigned int& noccurrence) const; virtual int Write(FILE* f) { return NOTOK; } virtual int Read(FILE* f) { return NOTOK; } virtual WordKey Key(const String& bufferin) { abort(); return WordKey(0); } virtual WordReference Word(const String& bufferin, int exists = 0) { abort(); return WordReference(0); } #endif /* SWIG */ // // Retrieve WordReferences from the database. // Backend of WordRefs, operator[], Prefix... // virtual List *Collect(const WordReference& word); #ifndef SWIG List* dbs; int serial; int file_max; int file_min; unsigned int put_max; #endif /* SWIG */ }; #endif /* _WordListMulti_h_ */