summaryrefslogtreecommitdiffstats
path: root/kspell2/plugins/ispell/ispell_checker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kspell2/plugins/ispell/ispell_checker.cpp')
-rw-r--r--kspell2/plugins/ispell/ispell_checker.cpp505
1 files changed, 0 insertions, 505 deletions
diff --git a/kspell2/plugins/ispell/ispell_checker.cpp b/kspell2/plugins/ispell/ispell_checker.cpp
deleted file mode 100644
index fba7d3479..000000000
--- a/kspell2/plugins/ispell/ispell_checker.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/* kspell2 - adopted from Enchant
- * Copyright (C) 2003 Dom Lachowicz
- * Copyright (C) 2004 Zack Rusin <zack@kde.org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- * In addition, as a special exception, Dom Lachowicz
- * gives permission to link the code of this program with
- * non-LGPL Spelling Provider libraries (eg: a MSFT Office
- * spell checker backend) and distribute linked combinations including
- * the two. You must obey the GNU Lesser General Public License in all
- * respects for all of the code used other than said providers. If you modify
- * this file, you may extend this exception to your version of the
- * file, but you are not obligated to do so. If you do not wish to
- * do so, delete this exception statement from your version.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-#include <vector>
-
-#include "sp_spell.h"
-#include "ispell_checker.h"
-
-#include <tqmap.h>
-#include <tqdir.h>
-#include <tqfileinfo.h>
-
-/***************************************************************************/
-
-typedef struct str_ispell_map
-{
- const char * lang;
- const char * dict;
- const char * enc;
-} IspellMap;
-
-static const char *ispell_dirs [] = {
- "/usr/lib/ispell",
- "/usr/local/lib/ispell",
- "/usr/local/share/ispell",
- "/usr/share/ispell",
- "/usr/pkg/lib",
- 0
-};
-static const IspellMap ispell_map [] = {
- {"ca" ,"catala.hash" ,"iso-8859-1" },
- {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
- {"cs" ,"czech.hash" ,"iso-8859-2" },
- {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
- {"da" ,"dansk.hash" ,"iso-8859-1" },
- {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
- {"de" ,"deutsch.hash" ,"iso-8859-1" },
- {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
- {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
- {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
- {"el" ,"ellhnika.hash" ,"iso-8859-7" },
- {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
- {"en" ,"british.hash" ,"iso-8859-1" },
- {"en_AU" ,"british.hash" ,"iso-8859-1" },
- {"en_BZ" ,"british.hash" ,"iso-8859-1" },
- {"en_CA" ,"british.hash" ,"iso-8859-1" },
- {"en_GB" ,"british.hash" ,"iso-8859-1" },
- {"en_IE" ,"british.hash" ,"iso-8859-1" },
- {"en_JM" ,"british.hash" ,"iso-8859-1" },
- {"en_NZ" ,"british.hash" ,"iso-8859-1" },
- {"en_TT" ,"british.hash" ,"iso-8859-1" },
- {"en_ZA" ,"british.hash" ,"iso-8859-1" },
- {"en_ZW" ,"british.hash" ,"iso-8859-1" },
- {"en_PH" ,"american.hash" ,"iso-8859-1" },
- {"en_US" ,"american.hash" ,"iso-8859-1" },
- {"eo" ,"esperanto.hash" ,"iso-8859-3" },
- {"es" ,"espanol.hash" ,"iso-8859-1" },
- {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
- {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
- {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
- {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
- {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
- {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
- {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
- {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
- {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
- {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
- {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
- {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
- {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
- {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
- {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
- {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
- {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
- {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
- {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
- {"fi" ,"finnish.hash" ,"iso-8859-1" },
- {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
- {"fr" ,"francais.hash" ,"iso-8859-1" },
- {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
- {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
- {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
- {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
- {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
- {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
- {"hu" ,"hungarian.hash" ,"iso-8859-2" },
- {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
- {"ga" ,"irish.hash" ,"iso-8859-1" },
- {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
- {"gl" ,"galician.hash" ,"iso-8859-1" },
- {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
- {"ia" ,"interlingua.hash" ,"iso-8859-1" },
- {"it" ,"italian.hash" ,"iso-8859-1" },
- {"it_IT" ,"italian.hash" ,"iso-8859-1" },
- {"it_CH" ,"italian.hash" ,"iso-8859-1" },
- {"la" ,"mlatin.hash" ,"iso-8859-1" },
- {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
- {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
- {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
- {"nl" ,"nederlands.hash" ,"iso-8859-1" },
- {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
- {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
- {"nb" ,"norsk.hash" ,"iso-8859-1" },
- {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
- {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
- {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
- {"no" ,"norsk.hash" ,"iso-8859-1" },
- {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
- {"pl" ,"polish.hash" ,"iso-8859-2" },
- {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
- {"pt" ,"brazilian.hash" ,"iso-8859-1" },
- {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
- {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
- {"ru" ,"russian.hash" ,"koi8-r" },
- {"ru_MD" ,"russian.hash" ,"koi8-r" },
- {"ru_RU" ,"russian.hash" ,"koi8-r" },
- {"sc" ,"sardinian.hash" ,"iso-8859-1" },
- {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
- {"sk" ,"slovak.hash" ,"iso-8859-2" },
- {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
- {"sl" ,"slovensko.hash" ,"iso-8859-2" },
- {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
- {"sv" ,"svenska.hash" ,"iso-8859-1" },
- {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
- {"uk" ,"ukrainian.hash" ,"koi8-u" },
- {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
- {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
-};
-
-static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
-static TQMap<TQString, TQString> ispell_dict_map;
-
-
-void
-ISpellChecker::try_autodetect_charset(const char * const inEncoding)
-{
- if (inEncoding && strlen(inEncoding))
- {
- m_translate_in = TQTextCodec::codecForName(inEncoding);
- }
-}
-
-/***************************************************************************/
-/***************************************************************************/
-
-ISpellChecker::ISpellChecker()
- : deftflag(-1),
- prefstringchar(-1),
- m_bSuccessfulInit(false),
- m_BC(NULL),
- m_cd(NULL),
- m_cl(NULL),
- m_cm(NULL),
- m_ho(NULL),
- m_nd(NULL),
- m_so(NULL),
- m_se(NULL),
- m_ti(NULL),
- m_te(NULL),
- m_hashstrings(NULL),
- m_hashtbl(NULL),
- m_pflaglist(NULL),
- m_sflaglist(NULL),
- m_chartypes(NULL),
- m_infile(NULL),
- m_outfile(NULL),
- m_askfilename(NULL),
- m_Trynum(0),
- m_translate_in(0)
-{
- memset(m_sflagindex,0,sizeof(m_sflagindex));
- memset(m_pflagindex,0,sizeof(m_pflagindex));
-}
-
-#ifndef FREEP
-#define FREEP(p) do { if (p) free(p); } while (0)
-#endif
-
-ISpellChecker::~ISpellChecker()
-{
- if (m_bSuccessfulInit) {
- // only cleanup our mess if we were successfully initialized
-
- clearindex (m_pflagindex);
- clearindex (m_sflagindex);
- }
-
- FREEP(m_hashtbl);
- FREEP(m_hashstrings);
- FREEP(m_sflaglist);
- FREEP(m_chartypes);
-
- delete m_translate_in;
- m_translate_in = 0;
-}
-
-bool
-ISpellChecker::checkWord( const TQString& utf8Word )
-{
- ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
- if (!m_bSuccessfulInit)
- return false;
-
- if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
- return false;
-
- bool retVal = false;
- TQCString out;
- if (!m_translate_in)
- return false;
- else {
- /* convert to 8bit string and null terminate */
- int len_out = utf8Word.length();
-
- out = m_translate_in->fromUnicode( utf8Word, len_out );
- }
-
- if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
- {
- if (good(iWord, 0, 0, 1, 0) == 1 ||
- compoundgood(iWord, 1) == 1)
- {
- retVal = true;
- }
- }
-
- return retVal;
-}
-
-TQStringList
-ISpellChecker::suggestWord(const TQString& utf8Word)
-{
- ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
- int c;
-
- if (!m_bSuccessfulInit)
- return TQStringList();
-
- if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
- utf8Word.length() == 0)
- return TQStringList();
-
- TQCString out;
- if (!m_translate_in)
- return TQStringList();
- else
- {
- /* convert to 8bit string and null terminate */
-
- int len_out = utf8Word.length();
- out = m_translate_in->fromUnicode( utf8Word, len_out );
- }
-
- if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
- makepossibilities(iWord);
- else
- return TQStringList();
-
- TQStringList sugg_arr;
- for (c = 0; c < m_pcount; c++)
- {
- TQString utf8Word;
-
- if (!m_translate_in)
- {
- /* copy to 8bit string and null terminate */
- utf8Word = TQString::fromUtf8( m_possibilities[c] );
- }
- else
- {
- /* convert to 32bit string and null terminate */
- utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
- }
-
- sugg_arr.append( utf8Word );
- }
-
- return sugg_arr;
-}
-
-static void
-s_buildHashNames (std::vector<std::string> & names, const char * dict)
-{
- const char * tmp = 0;
- int i = 0;
-
- names.clear ();
-
- while ( (tmp = ispell_dirs[i++]) ) {
- TQCString maybeFile = TQCString( tmp ) + '/';
- maybeFile += dict;
- names.push_back( maybeFile.data() );
- }
-}
-
-static void
-s_allDics()
-{
- const char * tmp = 0;
- int i = 0;
-
- while ( (tmp = ispell_dirs[i++]) ) {
- TQDir dir( tmp );
- TQStringList lst = dir.entryList( "*.hash" );
- for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
- TQFileInfo info( *it );
- for (size_t i = 0; i < size_ispell_map; i++)
- {
- const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
- if (!strcmp (info.fileName().latin1(), mapping->dict))
- {
- ispell_dict_map.insert( mapping->lang, *it );
- }
- }
- }
- }
-}
-
-TQValueList<TQString>
-ISpellChecker::allDics()
-{
- if ( ispell_dict_map.empty() )
- s_allDics();
-
- return ispell_dict_map.keys();
-}
-
-TQString
-ISpellChecker::loadDictionary (const char * szdict)
-{
- std::vector<std::string> dict_names;
-
- s_buildHashNames (dict_names, szdict);
-
- for (size_t i = 0; i < dict_names.size(); i++)
- {
- if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
- return dict_names[i].c_str();
- }
-
- return TQString::null;
-}
-
-/*!
- * Load ispell dictionary hash file for given language.
- *
- * \param szLang - The language tag ("en-US") we want to use
- * \return The name of the dictionary file
- */
-bool
-ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
-{
- TQString hashname;
-
- const char * encoding = NULL;
- const char * szFile = NULL;
-
- for (size_t i = 0; i < size_ispell_map; i++)
- {
- const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
- if (!strcmp (szLang, mapping->lang))
- {
- szFile = mapping->dict;
- encoding = mapping->enc;
- break;
- }
- }
-
- if (!szFile || !strlen(szFile))
- return false;
-
- alloc_ispell_struct();
-
- hashname = loadDictionary(szFile);
- if (hashname.isEmpty())
- return false;
-
- // one of the two above calls succeeded
- setDictionaryEncoding (hashname, encoding);
-
- return true;
-}
-
-void
-ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
-{
- /* Get Hash encoding from XML file. This should always work! */
- try_autodetect_charset(encoding);
-
- if (m_translate_in)
- {
- /* We still have to setup prefstringchar*/
- prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
- : static_cast<int *>(NULL));
-
- if (prefstringchar < 0)
- {
- std::string teststring;
- for(int n1 = 1; n1 <= 15; n1++)
- {
- teststring = "latin" + n1;
- prefstringchar = findfiletype(teststring.c_str(), 1,
- deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
- if (prefstringchar >= 0)
- break;
- }
- }
-
- return; /* success */
- }
-
- /* Test for UTF-8 first */
- prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
- if (prefstringchar >= 0)
- {
- m_translate_in = TQTextCodec::codecForName("utf8");
- }
-
- if (m_translate_in)
- return; /* success */
-
- /* Test for "latinN" */
- if (!m_translate_in)
- {
- /* Look for "altstringtype" names from latin1 to latin15 */
- for(int n1 = 1; n1 <= 15; n1++)
- {
- TQString teststring = TQString("latin%1").arg(n1);
- prefstringchar = findfiletype(teststring.latin1(), 1,
- deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
- if (prefstringchar >= 0)
- {
- //FIXME: latin1 might be wrong
- m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
- break;
- }
- }
- }
-
- /* If nothing found, use latin1 */
- if (!m_translate_in)
- {
- m_translate_in = TQTextCodec::codecForName("latin1");
- }
-}
-
-bool
-ISpellChecker::requestDictionary(const char *szLang)
-{
- if (!loadDictionaryForLanguage (szLang))
- {
- // handle a shortened version of the language tag: en_US => en
- std::string shortened_dict (szLang);
- size_t uscore_pos;
-
- if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
- shortened_dict = shortened_dict.substr(0, uscore_pos);
- if (!loadDictionaryForLanguage (shortened_dict.c_str()))
- return false;
- } else
- return false;
- }
-
- m_bSuccessfulInit = true;
-
- if (prefstringchar < 0)
- m_defdupchar = 0;
- else
- m_defdupchar = prefstringchar;
-
- return true;
-}