00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include <string.h>
00035
00036 #include <string>
00037 #include <vector>
00038
00039 #include "sp_spell.h"
00040 #include "ispell_checker.h"
00041
00042 #include <qmap.h>
00043 #include <qdir.h>
00044 #include <qfileinfo.h>
00045
00046
00047
00048 typedef struct str_ispell_map
00049 {
00050 const char * lang;
00051 const char * dict;
00052 const char * enc;
00053 } IspellMap;
00054
00055 static const char *ispell_dirs [] = {
00056 "/usr/lib/ispell",
00057 "/usr/local/lib/ispell",
00058 "/usr/local/share/ispell",
00059 "/usr/share/ispell",
00060 0
00061 };
00062 static const IspellMap ispell_map [] = {
00063 {"ca" ,"catala.hash" ,"iso-8859-1" },
00064 {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
00065 {"cs" ,"czech.hash" ,"iso-8859-2" },
00066 {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
00067 {"da" ,"dansk.hash" ,"iso-8859-1" },
00068 {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
00069 {"de" ,"deutsch.hash" ,"iso-8859-1" },
00070 {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
00071 {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
00072 {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
00073 {"el" ,"ellhnika.hash" ,"iso-8859-7" },
00074 {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
00075 {"en" ,"british.hash" ,"iso-8859-1" },
00076 {"en_AU" ,"british.hash" ,"iso-8859-1" },
00077 {"en_BZ" ,"british.hash" ,"iso-8859-1" },
00078 {"en_CA" ,"british.hash" ,"iso-8859-1" },
00079 {"en_GB" ,"british.hash" ,"iso-8859-1" },
00080 {"en_IE" ,"british.hash" ,"iso-8859-1" },
00081 {"en_JM" ,"british.hash" ,"iso-8859-1" },
00082 {"en_NZ" ,"british.hash" ,"iso-8859-1" },
00083 {"en_TT" ,"british.hash" ,"iso-8859-1" },
00084 {"en_ZA" ,"british.hash" ,"iso-8859-1" },
00085 {"en_ZW" ,"british.hash" ,"iso-8859-1" },
00086 {"en_PH" ,"american.hash" ,"iso-8859-1" },
00087 {"en_US" ,"american.hash" ,"iso-8859-1" },
00088 {"eo" ,"esperanto.hash" ,"iso-8859-3" },
00089 {"es" ,"espanol.hash" ,"iso-8859-1" },
00090 {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
00091 {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
00092 {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
00093 {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
00094 {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
00095 {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
00096 {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
00097 {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
00098 {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
00099 {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
00100 {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
00101 {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
00102 {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
00103 {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
00104 {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
00105 {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
00106 {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
00107 {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
00108 {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
00109 {"fi" ,"finnish.hash" ,"iso-8859-1" },
00110 {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
00111 {"fr" ,"francais.hash" ,"iso-8859-1" },
00112 {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
00113 {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
00114 {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
00115 {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
00116 {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
00117 {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
00118 {"hu" ,"hungarian.hash" ,"iso-8859-2" },
00119 {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
00120 {"ga" ,"irish.hash" ,"iso-8859-1" },
00121 {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
00122 {"gl" ,"galician.hash" ,"iso-8859-1" },
00123 {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
00124 {"ia" ,"interlingua.hash" ,"iso-8859-1" },
00125 {"it" ,"italian.hash" ,"iso-8859-1" },
00126 {"it_IT" ,"italian.hash" ,"iso-8859-1" },
00127 {"it_CH" ,"italian.hash" ,"iso-8859-1" },
00128 {"la" ,"mlatin.hash" ,"iso-8859-1" },
00129 {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
00130 {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
00131 {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
00132 {"nl" ,"nederlands.hash" ,"iso-8859-1" },
00133 {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
00134 {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
00135 {"nb" ,"norsk.hash" ,"iso-8859-1" },
00136 {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
00137 {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
00138 {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
00139 {"no" ,"norsk.hash" ,"iso-8859-1" },
00140 {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
00141 {"pl" ,"polish.hash" ,"iso-8859-2" },
00142 {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
00143 {"pt" ,"brazilian.hash" ,"iso-8859-1" },
00144 {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
00145 {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
00146 {"ru" ,"russian.hash" ,"koi8-r" },
00147 {"ru_MD" ,"russian.hash" ,"koi8-r" },
00148 {"ru_RU" ,"russian.hash" ,"koi8-r" },
00149 {"sc" ,"sardinian.hash" ,"iso-8859-1" },
00150 {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
00151 {"sk" ,"slovak.hash" ,"iso-8859-2" },
00152 {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
00153 {"sl" ,"slovensko.hash" ,"iso-8859-2" },
00154 {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
00155 {"sv" ,"svenska.hash" ,"iso-8859-1" },
00156 {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
00157 {"uk" ,"ukrainian.hash" ,"koi8-u" },
00158 {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
00159 {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
00160 };
00161
00162 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
00163 static QMap<QString, QString> ispell_dict_map;
00164
00165
00166 void
00167 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
00168 {
00169 if (inEncoding && strlen(inEncoding))
00170 {
00171 m_translate_in = QTextCodec::codecForName(inEncoding);
00172 }
00173 }
00174
00175
00176
00177
00178 ISpellChecker::ISpellChecker()
00179 : deftflag(-1),
00180 prefstringchar(-1),
00181 m_bSuccessfulInit(false),
00182 m_BC(NULL),
00183 m_cd(NULL),
00184 m_cl(NULL),
00185 m_cm(NULL),
00186 m_ho(NULL),
00187 m_nd(NULL),
00188 m_so(NULL),
00189 m_se(NULL),
00190 m_ti(NULL),
00191 m_te(NULL),
00192 m_hashstrings(NULL),
00193 m_hashtbl(NULL),
00194 m_pflaglist(NULL),
00195 m_sflaglist(NULL),
00196 m_chartypes(NULL),
00197 m_infile(NULL),
00198 m_outfile(NULL),
00199 m_askfilename(NULL),
00200 m_Trynum(0),
00201 m_translate_in(0)
00202 {
00203 memset(m_sflagindex,0,sizeof(m_sflagindex));
00204 memset(m_pflagindex,0,sizeof(m_pflagindex));
00205 }
00206
00207 #ifndef FREEP
00208 #define FREEP(p) do { if (p) free(p); } while (0)
00209 #endif
00210
00211 ISpellChecker::~ISpellChecker()
00212 {
00213 if (m_bSuccessfulInit) {
00214
00215
00216 clearindex (m_pflagindex);
00217 clearindex (m_sflagindex);
00218 }
00219
00220 FREEP(m_hashtbl);
00221 FREEP(m_hashstrings);
00222 FREEP(m_sflaglist);
00223 FREEP(m_chartypes);
00224
00225 delete m_translate_in;
00226 m_translate_in = 0;
00227 }
00228
00229 bool
00230 ISpellChecker::checkWord( const QString& utf8Word )
00231 {
00232 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00233 if (!m_bSuccessfulInit)
00234 return false;
00235
00236 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
00237 return false;
00238
00239 bool retVal = false;
00240 QCString out;
00241 if (!m_translate_in)
00242 return false;
00243 else {
00244
00245 int len_out = utf8Word.length();
00246
00247 out = m_translate_in->fromUnicode( utf8Word, len_out );
00248 }
00249
00250 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00251 {
00252 if (good(iWord, 0, 0, 1, 0) == 1 ||
00253 compoundgood(iWord, 1) == 1)
00254 {
00255 retVal = true;
00256 }
00257 }
00258
00259 return retVal;
00260 }
00261
00262 QStringList
00263 ISpellChecker::suggestWord(const QString& utf8Word)
00264 {
00265 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00266 int c;
00267
00268 if (!m_bSuccessfulInit)
00269 return QStringList();
00270
00271 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
00272 utf8Word.length() == 0)
00273 return QStringList();
00274
00275 QCString out;
00276 if (!m_translate_in)
00277 return QStringList();
00278 else
00279 {
00280
00281
00282 int len_out = utf8Word.length();
00283 out = m_translate_in->fromUnicode( utf8Word, len_out );
00284 }
00285
00286 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00287 makepossibilities(iWord);
00288 else
00289 return QStringList();
00290
00291 QStringList sugg_arr;
00292 for (c = 0; c < m_pcount; c++)
00293 {
00294 QString utf8Word;
00295
00296 if (!m_translate_in)
00297 {
00298
00299 utf8Word = QString::fromUtf8( m_possibilities[c] );
00300 }
00301 else
00302 {
00303
00304 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
00305 }
00306
00307 sugg_arr.append( utf8Word );
00308 }
00309
00310 return sugg_arr;
00311 }
00312
00313 static void
00314 s_buildHashNames (std::vector<std::string> & names, const char * dict)
00315 {
00316 const char * tmp = 0;
00317 int i = 0;
00318
00319 names.clear ();
00320
00321 while ( (tmp = ispell_dirs[i++]) ) {
00322 QCString maybeFile = QCString( tmp ) + '/';
00323 maybeFile += dict;
00324 names.push_back( maybeFile.data() );
00325 }
00326 }
00327
00328 static void
00329 s_allDics()
00330 {
00331 const char * tmp = 0;
00332 int i = 0;
00333
00334 while ( (tmp = ispell_dirs[i++]) ) {
00335 QDir dir( tmp );
00336 QStringList lst = dir.entryList( "*.hash" );
00337 for ( QStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
00338 QFileInfo info( *it );
00339 for (size_t i = 0; i < size_ispell_map; i++)
00340 {
00341 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00342 if (!strcmp (info.fileName().latin1(), mapping->dict))
00343 {
00344 ispell_dict_map.insert( mapping->lang, *it );
00345 }
00346 }
00347 }
00348 }
00349 }
00350
00351 QValueList<QString>
00352 ISpellChecker::allDics()
00353 {
00354 if ( ispell_dict_map.empty() )
00355 s_allDics();
00356
00357 return ispell_dict_map.keys();
00358 }
00359
00360 QString
00361 ISpellChecker::loadDictionary (const char * szdict)
00362 {
00363 std::vector<std::string> dict_names;
00364
00365 s_buildHashNames (dict_names, szdict);
00366
00367 for (size_t i = 0; i < dict_names.size(); i++)
00368 {
00369 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
00370 return dict_names[i].c_str();
00371 }
00372
00373 return QString::null;
00374 }
00375
00382 bool
00383 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
00384 {
00385 QString hashname;
00386
00387 const char * encoding = NULL;
00388 const char * szFile = NULL;
00389
00390 for (size_t i = 0; i < size_ispell_map; i++)
00391 {
00392 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00393 if (!strcmp (szLang, mapping->lang))
00394 {
00395 szFile = mapping->dict;
00396 encoding = mapping->enc;
00397 break;
00398 }
00399 }
00400
00401 if (!szFile || !strlen(szFile))
00402 return false;
00403
00404 alloc_ispell_struct();
00405
00406 hashname = loadDictionary(szFile);
00407 if (hashname.isEmpty())
00408 return false;
00409
00410
00411 setDictionaryEncoding (hashname, encoding);
00412
00413 return true;
00414 }
00415
00416 void
00417 ISpellChecker::setDictionaryEncoding( const QString& hashname, const char * encoding )
00418 {
00419
00420 try_autodetect_charset(encoding);
00421
00422 if (m_translate_in)
00423 {
00424
00425 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
00426 : static_cast<int *>(NULL));
00427
00428 if (prefstringchar < 0)
00429 {
00430 std::string teststring;
00431 for(int n1 = 1; n1 <= 15; n1++)
00432 {
00433 teststring = "latin" + n1;
00434 prefstringchar = findfiletype(teststring.c_str(), 1,
00435 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00436 if (prefstringchar >= 0)
00437 break;
00438 }
00439 }
00440
00441 return;
00442 }
00443
00444
00445 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00446 if (prefstringchar >= 0)
00447 {
00448 m_translate_in = QTextCodec::codecForName("utf8");
00449 }
00450
00451 if (m_translate_in)
00452 return;
00453
00454
00455 if (!m_translate_in)
00456 {
00457
00458 for(int n1 = 1; n1 <= 15; n1++)
00459 {
00460 QString teststring = QString("latin%1").arg(n1);
00461 prefstringchar = findfiletype(teststring.latin1(), 1,
00462 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00463 if (prefstringchar >= 0)
00464 {
00465
00466 m_translate_in = QTextCodec::codecForName( teststring.latin1() );
00467 break;
00468 }
00469 }
00470 }
00471
00472
00473 if (!m_translate_in)
00474 {
00475 m_translate_in = QTextCodec::codecForName("latin1");
00476 }
00477 }
00478
00479 bool
00480 ISpellChecker::requestDictionary(const char *szLang)
00481 {
00482 if (!loadDictionaryForLanguage (szLang))
00483 {
00484
00485 std::string shortened_dict (szLang);
00486 size_t uscore_pos;
00487
00488 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
00489 shortened_dict = shortened_dict.substr(0, uscore_pos);
00490 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
00491 return false;
00492 } else
00493 return false;
00494 }
00495
00496 m_bSuccessfulInit = true;
00497
00498 if (prefstringchar < 0)
00499 m_defdupchar = 0;
00500 else
00501 m_defdupchar = prefstringchar;
00502
00503 return true;
00504 }