good.cpp
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the 00017 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 * Boston, MA 02110-1301, USA. 00019 * 00020 * In addition, as a special exception, Dom Lachowicz 00021 * gives permission to link the code of this program with 00022 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00023 * spell checker backend) and distribute linked combinations including 00024 * the two. You must obey the GNU Lesser General Public License in all 00025 * respects for all of the code used other than said providers. If you modify 00026 * this file, you may extend this exception to your version of the 00027 * file, but you are not obligated to do so. If you do not wish to 00028 * do so, delete this exception statement from your version. 00029 */ 00030 00031 /* 00032 * good.c - see if a word or its root word 00033 * is in the dictionary. 00034 * 00035 * Pace Willisson, 1983 00036 * 00037 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA 00038 * All rights reserved. 00039 * 00040 * Redistribution and use in source and binary forms, with or without 00041 * modification, are permitted provided that the following conditions 00042 * are met: 00043 * 00044 * 1. Redistributions of source code must retain the above copyright 00045 * notice, this list of conditions and the following disclaimer. 00046 * 2. Redistributions in binary form must reproduce the above copyright 00047 * notice, this list of conditions and the following disclaimer in the 00048 * documentation and/or other materials provided with the distribution. 00049 * 3. All modifications to the source code must be clearly marked as 00050 * such. Binary redistributions based on modified source code 00051 * must be clearly marked as modified versions in the documentation 00052 * and/or other materials provided with the distribution. 00053 * 4. All advertising materials mentioning features or use of this software 00054 * must display the following acknowledgment: 00055 * This product includes software developed by Geoff Kuenning and 00056 * other unpaid contributors. 00057 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00058 * products derived from this software without specific prior 00059 * written permission. 00060 * 00061 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00062 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00063 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00064 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00065 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00066 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00067 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00068 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00069 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00070 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00071 * SUCH DAMAGE. 00072 */ 00073 00074 /* 00075 * $Log$ 00076 * Revision 1.1 2004/01/31 16:44:12 zrusin 00077 * ISpell plugin. 00078 * 00079 * Revision 1.4 2003/08/14 17:51:26 dom 00080 * update license - exception clause should be Lesser GPL 00081 * 00082 * Revision 1.3 2003/07/28 20:40:25 dom 00083 * fix up the license clause, further win32-registry proof some directory getting functions 00084 * 00085 * Revision 1.2 2003/07/16 22:52:37 dom 00086 * LGPL + exception license 00087 * 00088 * Revision 1.1 2003/07/15 01:15:04 dom 00089 * ispell enchant backend 00090 * 00091 * Revision 1.2 2003/01/29 05:50:11 hippietrail 00092 * 00093 * Fixed my mess in EncodingManager. 00094 * Changed many C casts to C++ casts. 00095 * 00096 * Revision 1.1 2003/01/24 05:52:32 hippietrail 00097 * 00098 * Refactored ispell code. Old ispell global variables had been put into 00099 * an allocated structure, a pointer to which was passed to many functions. 00100 * I have now made all such functions and variables private members of the 00101 * ISpellChecker class. It was C OO, now it's C++ OO. 00102 * 00103 * I've fixed the makefiles and tested compilation but am unable to test 00104 * operation. Please back out my changes if they cause problems which 00105 * are not obvious or easy to fix. 00106 * 00107 * Revision 1.6 2003/01/06 18:48:38 dom 00108 * ispell cleanup, start of using new 'add' save features 00109 * 00110 * Revision 1.5 2002/09/19 05:31:15 hippietrail 00111 * 00112 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00113 * K&R function declarations removed, converted to Doxygen style comments 00114 * where possible. No code has been changed (I hope). Compiles for me but 00115 * unable to test. 00116 * 00117 * Revision 1.4 2002/09/17 03:03:29 hippietrail 00118 * 00119 * After seeking permission on the developer list I've reformatted all the 00120 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00121 * spaces for tabs. It should all look good with our standard 4-space 00122 * tabs now. 00123 * I've concentrated just on indentation in the actual code. More prettying 00124 * could be done. 00125 * * NO code changes were made * 00126 * 00127 * Revision 1.3 2002/09/13 17:20:12 mpritchett 00128 * Fix more warnings for Linux build 00129 * 00130 * Revision 1.2 2001/05/12 16:05:42 thomasf 00131 * Big pseudo changes to ispell to make it pass around a structure rather 00132 * than rely on all sorts of gloabals willy nilly here and there. Also 00133 * fixed our spelling class to work with accepting suggestions once more. 00134 * This code is dirty, gross and ugly (not to mention still not supporting 00135 * multiple hash sized just yet) but it works on my machine and will no 00136 * doubt break other machines. 00137 * 00138 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00139 * moving to spell/xp 00140 * 00141 * Revision 1.5 2000/02/09 22:35:25 sterwill 00142 * Clean up some warnings 00143 * 00144 * Revision 1.4 1998/12/29 14:55:32 eric 00145 * 00146 * I've doctored the ispell code pretty extensively here. It is now 00147 * warning-free on Win32. It also *works* on Win32 now, since I 00148 * replaced all the I/O calls with ANSI standard ones. 00149 * 00150 * Revision 1.3 1998/12/28 23:11:30 eric 00151 * 00152 * modified spell code and integration to build on Windows. 00153 * This is still a hack. 00154 * 00155 * Actually, it doesn't yet WORK on Windows. It just builds. 00156 * SpellCheckInit is failing for some reason. 00157 * 00158 * Revision 1.2 1998/12/28 22:16:22 eric 00159 * 00160 * These changes begin to incorporate the spell checker into AbiWord. Most 00161 * of this is a hack. 00162 * 00163 * 1. added other/spell to the -I list in config/abi_defs 00164 * 2. replaced other/spell/Makefile with one which is more like 00165 * our build system. 00166 * 3. added other/spell to other/Makefile so that the build will now 00167 * dive down and build the spell check library. 00168 * 4. added the AbiSpell library to the Makefiles in wp/main 00169 * 5. added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp. 00170 * This call is a HACK and should be replaced with something 00171 * proper later. 00172 * 6. added code to fv_View.cpp as follows: 00173 * whenever you double-click on a word, the spell checker 00174 * verifies that word and prints its status to stdout. 00175 * 00176 * Caveats: 00177 * 1. This will break the Windows build. I'm going to work on fixing it 00178 * now. 00179 * 2. This only works if your dictionary is in /usr/lib/ispell/american.hash. 00180 * The dictionary location is currently hard-coded. This will be 00181 * fixed as well. 00182 * 00183 * Anyway, such as it is, it works. 00184 * 00185 * Revision 1.1 1998/12/28 18:04:43 davet 00186 * Spell checker code stripped from ispell. At this point, there are 00187 * two external routines... the Init routine, and a check-a-word routine 00188 * which returns a boolean value, and takes a 16 bit char string. 00189 * The code resembles the ispell code as much as possible still. 00190 * 00191 * Revision 1.43 1994/11/02 06:56:05 geoff 00192 * Remove the anyword feature, which I've decided is a bad idea. 00193 * 00194 * Revision 1.42 1994/10/25 05:45:59 geoff 00195 * Add support for an affix that will work with any word, even if there's 00196 * no explicit flag. 00197 * 00198 * Revision 1.41 1994/05/24 06:23:06 geoff 00199 * Let tgood decide capitalization questions, rather than doing it ourselves. 00200 * 00201 * Revision 1.40 1994/05/17 06:44:10 geoff 00202 * Add support for controlled compound formation and the COMPOUNDONLY 00203 * option to affix flags. 00204 * 00205 * Revision 1.39 1994/01/25 07:11:31 geoff 00206 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00207 * 00208 */ 00209 00210 #include <ctype.h> 00211 #include <stdio.h> 00212 #include <stdlib.h> 00213 #include <string.h> 00214 00215 #include "ispell_checker.h" 00216 00217 00218 int good P ((ichar_t * word, int ignoreflagbits, int allhits, 00219 int pfxopts, int sfxopts)); 00220 00221 #ifndef NO_CAPITALIZATION_SUPPORT 00222 00232 static int entryhasaffixes (struct dent *dent, struct success *hit) 00233 { 00234 if (hit->prefix && !TSTMASKBIT (dent->mask, hit->prefix->flagbit)) 00235 return 0; 00236 if (hit->suffix && !TSTMASKBIT (dent->mask, hit->suffix->flagbit)) 00237 return 0; 00238 return 1; /* Yes, these affixes are legal */ 00239 } 00240 00241 /* 00242 * \param word 00243 * \param hit 00244 * \param len 00245 * 00246 * \return 00247 */ 00248 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len) 00249 { 00250 register ichar_t * dword; 00251 register ichar_t * w; 00252 register struct dent * dent; 00253 ichar_t dentword[INPUTWORDLEN + MAXAFFIXLEN]; 00254 int preadd; 00255 int prestrip; 00256 int sufadd; 00257 ichar_t * limit; 00258 long thiscap; 00259 long dentcap; 00260 00261 thiscap = whatcap (word); 00262 /* 00263 ** All caps is always legal, regardless of affixes. 00264 */ 00265 preadd = prestrip = sufadd = 0; 00266 if (thiscap == ALLCAPS) 00267 return 1; 00268 else if (thiscap == FOLLOWCASE) 00269 { 00270 /* Set up some constants for the while(1) loop below */ 00271 if (hit->prefix) 00272 { 00273 preadd = hit->prefix->affl; 00274 prestrip = hit->prefix->stripl; 00275 } 00276 else 00277 preadd = prestrip = 0; 00278 sufadd = hit->suffix ? hit->suffix->affl : 0; 00279 } 00280 /* 00281 ** Search the variants for one that matches what we have. Note 00282 ** that thiscap can't be ALLCAPS, since we already returned 00283 ** for that case. 00284 */ 00285 dent = hit->dictent; 00286 for ( ; ; ) 00287 { 00288 dentcap = captype (dent->flagfield); 00289 if (dentcap != thiscap) 00290 { 00291 if (dentcap == ANYCASE && thiscap == CAPITALIZED 00292 && entryhasaffixes (dent, hit)) 00293 return 1; 00294 } 00295 else /* captypes match */ 00296 { 00297 if (thiscap != FOLLOWCASE) 00298 { 00299 if (entryhasaffixes (dent, hit)) 00300 return 1; 00301 } 00302 else 00303 { 00304 /* 00305 ** Make sure followcase matches exactly. 00306 ** Life is made more difficult by the 00307 ** possibility of affixes. Start with 00308 ** the prefix. 00309 */ 00310 strtoichar (dentword, dent->word, INPUTWORDLEN, 1); 00311 dword = dentword; 00312 limit = word + preadd; 00313 if (myupper (dword[prestrip])) 00314 { 00315 for (w = word; w < limit; w++) 00316 { 00317 if (mylower (*w)) 00318 goto doublecontinue; 00319 } 00320 } 00321 else 00322 { 00323 for (w = word; w < limit; w++) 00324 { 00325 if (myupper (*w)) 00326 goto doublecontinue; 00327 } 00328 } 00329 dword += prestrip; 00330 /* Do root part of word */ 00331 limit = dword + len - preadd - sufadd; 00332 while (dword < limit) 00333 { 00334 if (*dword++ != *w++) 00335 goto doublecontinue; 00336 } 00337 /* Do suffix */ 00338 dword = limit - 1; 00339 if (myupper (*dword)) 00340 { 00341 for ( ; *w; w++) 00342 { 00343 if (mylower (*w)) 00344 goto doublecontinue; 00345 } 00346 } 00347 else 00348 { 00349 for ( ; *w; w++) 00350 { 00351 if (myupper (*w)) 00352 goto doublecontinue; 00353 } 00354 } 00355 /* 00356 ** All failure paths go to "doublecontinue," 00357 ** so if we get here it must match. 00358 */ 00359 if (entryhasaffixes (dent, hit)) 00360 return 1; 00361 doublecontinue: ; 00362 } 00363 } 00364 if ((dent->flagfield & MOREVARIANTS) == 0) 00365 break; 00366 dent = dent->next; 00367 } 00368 00369 /* No matches found */ 00370 return 0; 00371 } 00372 #endif 00373 00374 #ifndef NO_CAPITALIZATION_SUPPORT 00375 00384 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts) 00385 #else 00386 /* ARGSUSED */ 00387 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts) 00388 #endif 00389 { 00390 ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN]; 00391 register ichar_t * p; 00392 register ichar_t * q; 00393 register int n; 00394 register struct dent * dp; 00395 00396 /* 00397 ** Make an uppercase copy of the word we are checking. 00398 */ 00399 for (p = w, q = nword; *p; ) 00400 *q++ = mytoupper (*p++); 00401 *q = 0; 00402 n = q - nword; 00403 00404 m_numhits = 0; 00405 00406 if ((dp = ispell_lookup (nword, 1)) != NULL) 00407 { 00408 m_hits[0].dictent = dp; 00409 m_hits[0].prefix = NULL; 00410 m_hits[0].suffix = NULL; 00411 #ifndef NO_CAPITALIZATION_SUPPORT 00412 if (allhits || cap_ok (w, &m_hits[0], n)) 00413 m_numhits = 1; 00414 #else 00415 m_numhits = 1; 00416 #endif 00417 } 00418 00419 if (m_numhits && !allhits) 00420 return 1; 00421 00422 /* try stripping off affixes */ 00423 00424 chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts); 00425 00426 return m_numhits; 00427 } 00428 00429 00430 00431