//
// WordListMulti.cc
//
// Part of the ht://Dig package
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: WordListMulti.cc,v 1.6 2004/05/28 13:15:28 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "WordListMulti.h"
#include "WordListOne.h"
#include "myqsort.h"
#include
#include
#include
#include
#include
#ifndef _MSC_VER /* _WIN32 */
#include
#endif
class WordDBMulti : public Object
{
public:
WordDBMulti() { words = 0; size = 0; mode = 0; }
WordListOne *words;
String filename;
int mode;
unsigned int size;
};
// *****************************************************************************
//
WordListMulti::WordListMulti(WordContext* ncontext)
{
dbs = new List;
context = ncontext;
// The database itself hasn't been opened yet
isopen = 0;
Configuration& config = context->GetConfiguration();
extended = config.Boolean("wordlist_extend");
verbose = config.Value("wordlist_verbose");
file_max = config.Value("wordlist_multi_max", 50);
if(file_max < 4) file_max = 4;
file_min = config.Value("wordlist_multi_min", 4);
if(file_min < 2) file_min = 2;
if(file_max < file_min) file_max = file_min * 2;
put_max = config.Value("wordlist_multi_put_max", 1000);
if(put_max < 50) put_max = 50;
compressor = 0;
serial = 0;
}
// *****************************************************************************
//
WordListMulti::~WordListMulti()
{
Close();
}
// *****************************************************************************
//
int WordListMulti::Open(const String& nfilename, int mode)
{
filename = nfilename;
char tmp[32];
struct stat stat_buf;
int i;
//
// Open existing indexes
//
for(i = 0; i < file_max; i++) {
String filename_one(filename);
sprintf(tmp, "%08d", i);
filename_one << tmp;
if(stat((char*)filename_one, &stat_buf) == 0) {
WordDBMulti* db = new WordDBMulti();
db->words = new WordListOne(context);
db->filename = filename_one;
db->mode = mode;
dbs->Push(db);
} else {
break;
}
}
serial = i;
//
// If no indexes exists and read-only, abort
//
if(i == 0 && (flags & DB_RDONLY)) {
fprintf(stderr, "WordListMulti::Open(%s, O_RDONLY): no index found\n", (char*)filename);
return NOTOK;
}
isopen = 1;
//
// If no indexes exists and read/write, create the first
//
if(i == 0)
if(AddIndex() != OK) return NOTOK;
WordDBMulti* db = (WordDBMulti*)dbs->Last();
if(db->words->Open(db->filename, mode) != OK)
return NOTOK;
return OK;
}
// *****************************************************************************
//
int WordListMulti::Close()
{
if(isopen) {
WordDBMulti* db;
ListCursor cursor;
for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
delete db->words;
}
dbs->Destroy();
isopen = 0;
filename.trunc();
}
return OK;
}
// ****************************************************************************
//
unsigned int WordListMulti::Size() const
{
unsigned int size = 0;
if(isopen) {
WordDBMulti* db;
ListCursor cursor;
for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
if(!db->words->isopen) {
if(db->words->Open(db->filename, O_RDONLY) != OK) return 0;
size += db->words->Size();
if(db->words->Close() != OK) return 0;
} else {
size += db->words->Size();
}
}
}
return size;
}
int WordListMulti::AddIndex()
{
if(Flags() & O_RDONLY) return NOTOK;
if(serial >= file_max)
Merge();
char tmp[32];
String filename_one(filename);
sprintf(tmp, "%08d", serial);
filename_one << tmp;
serial++;
WordDBMulti* db = new WordDBMulti();
db->words = new WordListOne(context);
db->words->extended = extended;
db->filename = filename_one;
dbs->Push(db);
return OK;
}
static int merge_cmp_size(WordListMulti*, WordDBMulti* a, WordDBMulti* b)
{
return b->size - a->size;
}
static int merge_cmp_filename(WordListMulti*, WordDBMulti* a, WordDBMulti* b)
{
return a->filename.compare(b->filename);
}
int WordListMulti::Merge()
{
if(Flags() & DB_RDONLY) return NOTOK;
Configuration& config = context->GetConfiguration();
int use_compress = config.Boolean("wordlist_compress");
WordDBMulti* db = (WordDBMulti*)dbs->Last();
if(db->words->Close() != OK) return NOTOK;
//
// heap lists all the files in decreasing size order (biggest first)
//
WordDBMulti* heap = new WordDBMulti[serial];
{
int i;
WordDBMulti* db;
ListCursor cursor;
for(i = 0, dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor)); i++) {
if(db->words->Open(db->filename, O_RDONLY) != OK) return NOTOK;
db->size = db->words->Size();
if(db->words->Close() != OK) return NOTOK;
heap[i] = *db;
}
dbs->Destroy();
myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_size, (void*)this);
}
String tmpname = filename;
tmpname << ".tmp";
while(serial > file_min) {
WordDBMulti* a = &heap[serial - 1];
WordDBMulti* b = &heap[serial - 2];
WordListOne tmp(context);
tmp.extended = 0;
if(a->words->Open(a->filename, O_RDONLY) != OK) return NOTOK;
if(b->words->Open(b->filename, O_RDONLY) != OK) return NOTOK;
if(tmp.Open(tmpname, O_RDWR) != OK) return NOTOK;
if(tmp.db->CacheP() && tmp.db->CacheOff() != 0) return OK;
WordDBCursor* cursora = a->words->db->Cursor();
WordDBCursor* cursorb = b->words->db->Cursor();
if(cursora->Open() != 0) return NOTOK;
String keya;
String dataa;
if(cursorb->Open() != 0) return NOTOK;
String keyb;
String datab;
int reta;
int retb;
reta = cursora->Get(keya, dataa, DB_NEXT);
retb = cursorb->Get(keyb, datab, DB_NEXT);
//
// Merge while there are entries in both indexes
//
while(reta == 0 && retb == 0) {
//
// If keya lower than keyb
//
if(WordKey::Compare(context, keya, keyb) < 0) {
if(tmp.db->Put(0, keya, dataa, 0) != 0) return NOTOK;
reta = cursora->Get(keya, dataa, DB_NEXT);
} else {
if(tmp.db->Put(0, keyb, datab, 0) != 0) return NOTOK;
retb = cursorb->Get(keyb, datab, DB_NEXT);
}
}
//
// Sanity check
//
if((reta != 0 && reta != DB_NOTFOUND) ||
(retb != 0 && retb != DB_NOTFOUND))
return NOTOK;
//
// Flush the remaining entries from the index that is
// not yet empty.
//
if(reta != DB_NOTFOUND || retb != DB_NOTFOUND) {
String key = reta == 0 ? keya : keyb;
String data = reta == 0 ? data : datab;
WordDBCursor* cursor = reta == 0 ? cursora : cursorb;
int ret = 0;
while(ret == 0) {
if(tmp.db->Put(0, key, data, 0) != 0) return NOTOK;
ret = cursor->Get(key, data, DB_NEXT);
}
if(ret != DB_NOTFOUND)
return NOTOK;
}
delete cursora;
delete cursorb;
a->words->Close();
b->words->Close();
tmp.Close();
//
// Remove file a
//
if(unlink((char*)a->filename) != 0) {
const String message = String("WordListMulti::Merge: unlink ") + a->filename;
perror((const char*)message);
return NOTOK;
}
if(use_compress) {
if(unlink((char*)(a->filename + String("_weakcmpr"))) != 0) {
const String message = String("WordListMulti::Merge: unlink ") + a->filename + String("_weakcmpr");
perror((const char*)message);
return NOTOK;
}
}
//
// Remove file b
//
if(unlink((char*)b->filename) != 0) {
const String message = String("WordListMulti::Merge: unlink ") + b->filename;
perror((const char*)message);
return NOTOK;
}
if(use_compress) {
if(unlink((char*)(b->filename + String("_weakcmpr"))) != 0) {
const String message = String("WordListMulti::Merge: unlink ") + b->filename + String("_weakcmpr");
perror((const char*)message);
return NOTOK;
}
}
//
// Rename tmp file into file b
//
if(rename((char*)tmpname, (char*)b->filename) != 0) {
const String message = String("WordListMulti::Merge: rename ") + tmpname + String(" ") + b->filename;
perror((const char*)message);
return NOTOK;
}
if(use_compress) {
if(rename((char*)(tmpname + String("_weakcmpr")), (char*)(b->filename + String("_weakcmpr"))) != 0) {
const String message = String("WordListMulti::Merge: rename ") + tmpname + String("_weakcmpr ") + b->filename + String("_weakcmpr");
perror((const char*)message);
return NOTOK;
}
}
//
// Update b file size. The size need not be accurate number as long
// as it reflects the relative size of each file.
//
b->size += a->size;
//
// The 'a' index is no longer in use
//
delete a->words;
serial--;
//
// update heap
//
myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_size, (void*)this);
}
//
// Rename the indexes so that they are in increasing order
// and push them in the list of active indexes.
//
myqsort((void*)heap, serial, sizeof(WordDBMulti), (myqsort_cmp)merge_cmp_filename, (void*)this);
int i;
for(i = 0; i < serial; i++) {
WordDBMulti* db = new WordDBMulti();
*db = heap[i];
String newname(filename);
char tmp[32];
sprintf(tmp, "%08d", i);
newname << tmp;
//
// Rename if not equal
//
if(db->filename.compare(newname)) {
//
// Rename db index into newname
//
if(rename((char*)db->filename, (char*)newname) != 0) {
const String message = String("WordListMulti::Merge: rename ") + db->filename + String(" ") + newname;
perror((const char*)message);
return NOTOK;
}
if(use_compress) {
if(rename((char*)(db->filename + String("_weakcmpr")), (char*)(newname + String("_weakcmpr"))) != 0) {
const String message = String("WordListMulti::Merge: rename ") + db->filename + String("_weakcmpr ") + newname + String("_weakcmpr");
perror((const char*)message);
return NOTOK;
}
}
db->filename = newname;
}
dbs->Push(db);
}
return OK;
}
// ****************************************************************************
//
int WordListMulti::Override(const WordReference& arg)
{
WordDBMulti* db = (WordDBMulti*)dbs->Last();
if(db->words->Size() > put_max) {
if(db->words->Close() != OK) return NOTOK;
if(AddIndex() != OK) return NOTOK;
db = (WordDBMulti*)dbs->Last();
if(db->words->Open(db->filename, db->mode) != OK) return NOTOK;
}
return db->words->Override(arg);
}
// *****************************************************************************
int WordListMulti::Exists(const WordReference& )
{
return 0;
}
// *****************************************************************************
//
List *WordListMulti::operator [] (const WordReference& )
{
return 0;
#if 0
return Collect(wordRef);
#endif
}
// *****************************************************************************
//
List *WordListMulti::Prefix (const WordReference& )
{
return 0;
#if 0
WordReference prefix2(prefix);
prefix2.Key().UndefinedWordSuffix();
return Collect(prefix2);
#endif
}
// *****************************************************************************
//
List *WordListMulti::WordRefs()
{
return 0;
#if 0
return Collect(WordReference(context));
#endif
}
// *****************************************************************************
//
List *WordListMulti::Collect(const WordReference&)
{
return 0;
#if 0
WordCursor *search = Cursor(wordRef.Key(), HTDIG_WORDLIST_COLLECTOR);
if(search->Walk() != OK) return 0;
List* result = search->GetResults();
delete search;
return result;
#endif
}
// *****************************************************************************
//
// Delete all records matching wordRef, return the number of
// deleted records.
//
int WordListMulti::WalkDelete(const WordReference& )
{
return 0;
#if 0
DeleteWordData data;
WordCursor *description = Cursor(wordRef.Key(), delete_word, &data);
description->Walk();
delete description;
return data.count;
#endif
}
int WordListMulti::Delete(const WordReference& )
{
return NOTOK;
}
// *****************************************************************************
//
//
List *WordListMulti::Words()
{
return 0;
#if 0
List *list = 0;
String key;
String record;
WordReference lastWord(context);
WordDBCursor* cursor = db.Cursor();
if(!cursor) return 0;
//
// Move past the first word count record
//
const WordReference& last = WordStat::Last(context);
last.Pack(key, record);
if(cursor->Get(key, record, DB_SET_RANGE) != 0)
return 0;
list = new List;
do {
WordReference wordRef(context, key, record);
if(lastWord.Key().GetWord().empty() ||
wordRef.Key().GetWord() != lastWord.Key().GetWord())
{
list->Add(new String(wordRef.Key().GetWord()));
lastWord = wordRef;
}
} while (cursor->Get(key, record, DB_NEXT) == 0);
return list;
#endif
}
// *****************************************************************************
//
// Returns the reference count for word in arg
//
int WordListMulti::Noccurrence(const String& , unsigned int& ) const
{
return 0;
#if 0
noccurrence = 0;
WordStat stat(context, key.GetWord());
int ret;
if((ret = db.Get(stat)) != 0) {
if(ret != DB_NOTFOUND)
return NOTOK;
} else {
noccurrence = stat.Noccurrence();
}
return OK;
#endif
}
// *****************************************************************************
//
// Increment reference count for wordRef
//
int WordListMulti::Ref(const WordReference& )
{
return NOTOK;
}
// *****************************************************************************
//
// Decrement reference count for wordRef
//
int WordListMulti::Unref(const WordReference& )
{
return NOTOK;
}
// *****************************************************************************
//
int WordListMulti::AllRef() {
if(!extended) return OK;
Merge();
WordDBMulti* db;
ListCursor cursor;
for(dbs->Start_Get(cursor); (db = (WordDBMulti*)dbs->Get_Next(cursor));) {
if(!db->words->isopen) {
if(db->words->Open(db->filename, O_RDWR) != OK) return NOTOK;
if(db->words->Close() != OK) return NOTOK;
}
}
return OK;
}