// // WordList.h // // NAME // // search specification and results for WordList. // // SYNOPSIS // // #include // // int callback(WordList *, WordDBCursor& , const WordReference *, Object &) // { // ... // } // // Object* data = ... // // WordList *words = ...; // // WordCursor *search = words->Cursor(callback, data); // WordCursor *search = words->Cursor(WordKey("word ")); // WordCursor *search = words->Cursor(WordKey("word "), callback, data); // // ... // // if(search->Walk() == NOTOK) bark; // List* results = search->GetResults(); // // if(search->WalkNext() == OK) // dosomething(search->GetFound()); // // DESCRIPTION // // WordCursor is an iterator on an inverted index. It is created by // asking a WordList object with the Cursor. There is // no other way to create a WordCursor object. // When the Walk* methods return, // the WordCursor object contains the result of the search and // status information that indicates if it reached the end of // the list (IsAtEnd() method). // // The callback function that is called each time a match is // found takes the following arguments: //
// WordList* words pointer to the inverted index handle.
// WordDBCursor& cursor to call Del() and delete the current match
// WordReference* wordRef is the match
// Object& data is the user data provided by the caller when
//              search began.
// 
// // The WordKey object that specifies the search criterion // may be used as follows (assuming word is followed by DOCID and // LOCATION): // // Ex1: WordKey("word ") find all occurrences // of word. // // Ex2: WordKey("meet ") find all occurrences // starting with meet, including meeting etc. // // Ex3: WordKey("meet 1") find all occurrences of // meet that occur at LOCATION 1 in any DOCID. This can // be inefficient since the search has to scan all occurrences // of meet to find the ones that occur at LOCATION 1. // // Ex4: WordKey("meet 2 ") find all occurrences of // meet that occur in DOCID 2, at any location. // // Interface functions are virtual so that a derivation of the // class is possible. Some functions are meant to be used by derived // classes such as the Initialize function. All data members // should be accessed using the corresponding accessor if possible. // // END // // Part of the ht://Dig package // Copyright (c) 1999-2004 The ht://Dig Group // For copyright details, see the file COPYING in your distribution // or the GNU Library General Public License (LGPL) version 2 or later // // // $Id: WordCursor.h,v 1.4 2004/05/28 13:15:26 lha Exp $ // #ifndef _WordCursor_h_ #define _WordCursor_h_ #ifndef SWIG #include "htString.h" #include "WordKey.h" #include "WordDB.h" class WordList; class WordDBCursor; #endif /* SWIG */ // // Possible values of the action argument of WordList::Walk // check walk function in WordList.cc for info on these: // #define HTDIG_WORDLIST_COLLECTOR 0x0001 #define HTDIG_WORDLIST_WALKER 0x0002 #ifndef SWIG // // Type of the callback argument in WordCursor // typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &); #endif /* SWIG */ // // Possible values of the status member // // // WalkNext reached the end of the matches // #define WORD_WALK_ATEND 0x0001 // // Failed to acquire Berkeley DB cursor // #define WORD_WALK_CURSOR_FAILED 0x0002 // // Berkeley DB Get operation failed // #define WORD_WALK_GET_FAILED 0x0004 // // Callback function returned NOTOK // #define WORD_WALK_CALLBACK_FAILED 0x0008 // // WalkNextStep hit an entry that does not match the // searched key. // #define WORD_WALK_NOMATCH_FAILED 0x0010 // // WordCursor contains undefined data // #define WORD_WALK_FAILED 0xffffffff // // Possible return values of the IsA() method // #define WORD_CURSOR 1 #define WORD_CURSORS 2 // // Wordlist::Walk uses WordCursor for : // state information : cursor // search term description // debug/trace/benchmarking // search result format description // class WordCursor { public: #ifndef SWIG // // Private constructor. Creator of the object must then call Initialize() // prior to using any other methods. // WordCursor() { Clear(); } //- // Private constructor. See WordList::Cursor method with same prototype for // description. // WordCursor(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, WordKey(), callback, callback_data, HTDIG_WORDLIST_WALKER); } //- // Private constructor. See WordList::Cursor method with same prototype for // description. // WordCursor(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { Clear(); Initialize(words, searchKey, 0, 0, action); } //- // Private constructor. See WordList::Cursor method with same prototype for // description. // WordCursor(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); } #endif /* SWIG */ virtual ~WordCursor() {} //- // Clear all data in object, set GetResult() data to NULL but // do not delete it (the application is responsible for that). // virtual void Clear(); virtual void ClearInternal(); virtual void ClearResult(); //- // Returns the type of the object. May be overloaded by // derived classes to differentiate them at runtime. // Returns WORD_CURSOR. // virtual int IsA() const { return WORD_CURSOR; } //- // Returns true if WalkNext() step entries in strictly increasing // order, false if it step entries in random order. // virtual int Ordered() const { return 1; } //- // Optimize the cursor before starting a Walk. // Returns OK on success, NOTOK otherwise. // virtual int Optimize() { return OK; } //- // Save in buffer all the information necessary to resume // the walk at the point it left. The ASCII representation of the // last key found (GetFound()) is written in buffer using the // WordKey::Get method. // virtual int ContextSave(String& buffer) const { found.Get(buffer); return OK; } //- // Restore from buffer all the information necessary to // resume the walk at the point it left. The buffer is expected // to contain an ASCII representation of a WordKey (see WordKey::Set // method). A Seek is done on the key and the object is prepared // to jump to the next occurrence when WalkNext is called (the // cursor_get_flags is set to DB_NEXT. // virtual int ContextRestore(const String& buffer); #ifndef SWIG //- // Walk and collect data from the index. // Returns OK on success, NOTOK otherwise. // virtual int Walk(); #endif /* SWIG */ //- // Must be called before other Walk methods are used. // Fill internal state according to input parameters // and move before the first matching entry. // Returns OK on success, NOTOK otherwise. // virtual int WalkInit(); //- // Move before the first index matching entry. // Returns OK on success, NOTOK otherwise. // virtual int WalkRewind(); //- // Move to the next matching entry. // At end of list, WORD_WALK_ATEND is returned. // Returns OK on success, NOTOK otherwise. // virtual int WalkNext(); #ifndef SWIG //- // Advance the cursor one step. The entry pointed to by the cursor may // or may not match the requirements. Returns OK if entry pointed // by cursor matches requirements. Returns NOTOK on // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry // does not match requirements, it's safe to call WalkNextStep again // until either OK or NOTOK is returned. // virtual int WalkNextStep(); #endif /* SWIG */ //- // Terminate Walk, free allocated resources. // Returns OK on success, NOTOK otherwise. // virtual int WalkFinish(); // // Find out if cursor should better jump to the next possible key // (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it // is decided that jump is a better move : cursor_set_flags = // DB_SET_RANGE key = calculated next possible key Else do nothing // Return OK if skipping successfull. Returns WORD_WALK_ATEND if no // more possible match, reached the maximum. Returns // WORD_WALK_FAILED on general failure, occurs if called and no // skipping necessary. // int SkipUselessSequentialWalking(); //- // Move before the inverted index position specified in patch. // May only be called after a successfull call to the WalkNext // or WalkNextStepmethod. // Copy defined fields from patch into a copy of the // found data member and // initialize internal state so that WalkNext jumps to // this key next time it's called (cursor_get_flag set to DB_SET_RANGE). // Returns OK if successfull, NOTOK otherwise. // virtual int Seek(const WordKey& patch); //- // Returns true if cursor is positioned after the last possible // match, false otherwise. // virtual int IsAtEnd() const { return status == WORD_WALK_ATEND; } // // Accessors for input parameters // //- // Returns the search criterion. // WordKey& GetSearch() { return searchKey; } #ifndef SWIG const WordKey& GetSearch() const { return searchKey; } #endif /* SWIG */ //- // Returns the type of action when a matching entry // is found. // int GetAction() const { return action; } // // Accessors for output parameters // //- // Returns the list of WordReference found. The application // is responsible for deallocation of the list. // List *GetResults() { return collectRes; } //- // For debugging purposes. Returns the list of WordReference hit // during the search // process. Some of them match the searched key, some don't. // The application is responsible for deallocation of the list. // List *GetTraces() { return traceRes; } //- // For debugging purposes. Set the list of WordReference hit // during the search process. // void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; } //- // Returns the last entry hit by the search. Only contains // a valid value if the last WalkNext or WalkNextStep // call was successfull (i.e. returned OK). // const WordReference& GetFound() { return found; } //- // Returns the number of occurrences of the searched word // in the inverted index in the noccurrence parameter. // Returns OK on success, NOTOK on failure. // virtual int Noccurrence(unsigned int& noccurrence) const; #ifndef SWIG //- // Convert the whole structure to an ASCII string description // Returns OK if successfull, NOTOK otherwise. // virtual int Get(String& bufferout) const; String Get() const { String tmp; Get(tmp); return tmp; } protected: //- // Protected method. Derived classes should use this function to initialize // the object if they do not call a WordCursor constructor in their own // constructutor. Initialization may occur after the object is created // and must occur before a Walk* method is called. See the // DESCRIPTION section for the semantics of the arguments. // Return OK on success, NOTOK on error. // int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction); // // Input parameters // //- // Input data. The key to be searched, see DESCRIPTION for more information. // WordKey searchKey; // // Input data. What do do when a WordReference is found. // Can either be // HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes // HTDIG_WORDLIST_WALKER callback is called for each WordReference found // int action; // // Input data. Callback function called for each match found. // wordlist_walk_callback_t callback; // // Input data. Argument given to callback, contains arbitrary // caller defined data. // Object *callback_data; // // Output parameters // // // Output data. List of WordReference found in the search. // List *collectRes; //- // Output data. Last match found. Use GetFound() to retrieve it. // WordReference found; //- // Output data. WORD_WALK_ATEND if cursor is past last match, // OK otherwise. Use GetStatus() to retrieve it. // int status; // // Debugging section. Do not use unless you know exactly what you do. // // // Collect everything found while searching (not necessarily matching) // List *traceRes; // // Internal state // // // The actual Berkeley DB cursor. // WordDBCursor cursor; // // The latest retrieved key and data // String key; String data; // // The shorted prefix key computed from searchKey // WordKey prefixKey; //- // WalkNext leap is either DB_NEXT or DB_SET_RANGE. // int cursor_get_flags; // // True if search key is a prefix key // int searchKeyIsSameAsPrefix; //- // The inverted index used by this cursor. // WordList *words; #endif /* SWIG */ }; #endif /* _WordCursor_h_ */