summaryrefslogtreecommitdiffstats
path: root/kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp')
-rw-r--r--kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp261
1 files changed, 261 insertions, 0 deletions
diff --git a/kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp b/kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp
new file mode 100644
index 00000000..d862b236
--- /dev/null
+++ b/kbabel/kbabeldict/modules/pocompendium/compendiumdata.cpp
@@ -0,0 +1,261 @@
+/* ****************************************************************************
+ This file is part of KBabel
+
+ Copyright (C) 2001 by Matthias Kiefer
+ <matthias.kiefer@gmx.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ In addition, as a special exception, the copyright holders give
+ permission to link the code of this program with any edition of
+ the Qt library by Trolltech AS, Norway (or with modified versions
+ of Qt that use the same license as Qt), and distribute linked
+ combinations including the two. You must obey the GNU General
+ Public License in all respects for all of the code used other than
+ Qt. If you modify this file, you may extend this exception to
+ your version of the file, but you are not obligated to do so. If
+ you do not wish to do so, delete this exception statement from
+ your version.
+
+**************************************************************************** */
+#include "compendiumdata.h"
+
+#include <resources.h>
+#include <catalog.h>
+#include <tagextractor.h>
+
+#include <kapplication.h>
+#include <kdebug.h>
+#include <klocale.h>
+
+using namespace KBabel;
+
+CompendiumData::CompendiumData(QObject *parent)
+ : QObject(parent)
+ , _active(false)
+ , _error(false)
+ , _initialized(false)
+ , _catalog(0)
+ , _exactDict(9887)
+ , _allDict(9887)
+ , _wordDict(9887)
+ , _textonlyDict(9887)
+{
+ _catalog = new Catalog(this, "CompendiumData::catalog", QString::null);
+ _exactDict.setAutoDelete(true);
+ _allDict.setAutoDelete(true);
+ _wordDict.setAutoDelete(true);
+ _textonlyDict.setAutoDelete(true);
+}
+
+
+bool CompendiumData::load(KURL url)
+{
+ if(_active)
+ return false;
+
+
+ _error = false;
+ _active = true;
+
+ _exactDict.clear();
+ _allDict.clear();
+ _wordDict.clear();
+ _textonlyDict.clear();
+
+
+ emit progressStarts(i18n("Loading PO compendium"));
+ connect(_catalog, SIGNAL(signalProgress(int)), this, SIGNAL(progress(int)));
+
+ ConversionStatus stat=_catalog->openURL(url);
+
+ disconnect(_catalog, SIGNAL(signalProgress(int))
+ , this, SIGNAL(progress(int)));
+
+
+ if( stat!= OK && stat != RECOVERED_PARSE_ERROR)
+ {
+ kdDebug(KBABEL_SEARCH) << "error while opening file " << url.prettyURL() << endl;
+
+ _error = true;
+ _errorMsg = i18n("Error while trying to read file for PO Compendium module:\n%1")
+ .arg(url.prettyURL());
+
+ emit progressEnds();
+
+ _active = false;
+ _initialized=true;
+
+ return false;
+ }
+
+ emit progressStarts(i18n("Building indices"));
+
+ int total = _catalog->numberOfEntries();
+ for(int i=0; i < total; i++)
+ {
+ if( (100*(i+1))%total < 100 )
+ {
+ emit progress((100*(i+1))/total);
+ kapp->processEvents(100);
+ }
+
+ // FIXME: shoudl care about plural forms
+ QString temp = _catalog->msgid(i,true).first();
+
+ int *index = new int(i);
+ _exactDict.insert(temp,index);
+
+
+ temp = simplify(temp);
+ temp = temp.lower();
+
+ if(!temp.isEmpty() && temp.length() > 1)
+ {
+ // add to allDict
+ QValueList<int> *indexList=_allDict[temp];
+
+ if(!indexList)
+ {
+ indexList = new QValueList<int>;
+ _allDict.insert(temp,indexList);
+ }
+
+ indexList->append(i);
+
+ // add to textonlyDict
+ QString temp1 = temp;
+ temp1.remove( ' ' );
+
+ indexList=_textonlyDict[temp1];
+
+ if(!indexList)
+ {
+ indexList = new QValueList<int>;
+ _textonlyDict.insert(temp1,indexList);
+ kdDebug() << "Adding " << temp1 << endl;
+ }
+
+ indexList->append(i);
+
+ // add to wordDict
+ QStringList wList = wordList(temp);
+ for ( QStringList::Iterator it = wList.begin()
+ ; it != wList.end(); ++it )
+ {
+ if( (*it).length() > 1)
+ {
+ indexList=_wordDict[*it];
+
+ if(!indexList)
+ {
+ indexList = new QValueList<int>;
+ _wordDict.insert(*it,indexList);
+ }
+
+ indexList->append(i);
+ }
+ }
+ }
+ }
+
+ // remove words, that are too frequent
+ uint max=_allDict.count()/10;
+ QDictIterator< QValueList<int> > it(_wordDict);
+ while ( it.current() )
+ {
+ if(it.current()->count() > max)
+ {
+ _wordDict.remove(it.currentKey());
+ }
+ else
+ {
+ ++it;
+ }
+ }
+
+
+ _initialized=true;
+
+ emit progressEnds();
+
+
+
+ _active = false;
+
+ return true;
+}
+
+const int* CompendiumData::exactDict(const QString text) const
+{
+ return _exactDict[text];
+}
+
+const QValueList<int>* CompendiumData::allDict(const QString text) const
+{
+ return _allDict[text];
+}
+
+const QValueList<int>* CompendiumData::wordDict(const QString text) const
+{
+ return _wordDict[text];
+}
+
+const QValueList<int>* CompendiumData::textonlyDict(const QString text) const
+{
+ return _textonlyDict[text];
+}
+
+
+void CompendiumData::registerObject(QObject *obj)
+{
+ if(!_registered.containsRef(obj))
+ _registered.append(obj);
+}
+
+bool CompendiumData::unregisterObject(QObject *obj)
+{
+ _registered.removeRef(obj);
+
+ return _registered.count()==0;
+}
+
+bool CompendiumData::hasObjects() const
+{
+ return _registered.count()==0;
+}
+
+QString CompendiumData::simplify(const QString string)
+{
+ QString result;
+
+ TagExtractor te;
+ te.setString(string);
+ result=te.plainString();
+
+ result=result.simplifyWhiteSpace();
+ result=result.stripWhiteSpace();
+
+ return result;
+}
+
+QStringList CompendiumData::wordList(const QString string)
+{
+ QString result=CompendiumData::simplify(string);
+
+ return QStringList::split(' ',result);
+}
+
+#include "compendiumdata.moc"