hash.cpp
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the 00017 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 * Boston, MA 02110-1301, USA. 00019 * 00020 * In addition, as a special exception, Dom Lachowicz 00021 * gives permission to link the code of this program with 00022 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00023 * spell checker backend) and distribute linked combinations including 00024 * the two. You must obey the GNU Lesser General Public License in all 00025 * respects for all of the code used other than said providers. If you modify 00026 * this file, you may extend this exception to your version of the 00027 * file, but you are not obligated to do so. If you do not wish to 00028 * do so, delete this exception statement from your version. 00029 */ 00030 00031 /* 00032 * hash.c - a simple hash function for ispell 00033 * 00034 * Pace Willisson, 1983 00035 * 00036 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA 00037 * All rights reserved. 00038 * 00039 * Redistribution and use in source and binary forms, with or without 00040 * modification, are permitted provided that the following conditions 00041 * are met: 00042 * 00043 * 1. Redistributions of source code must retain the above copyright 00044 * notice, this list of conditions and the following disclaimer. 00045 * 2. Redistributions in binary form must reproduce the above copyright 00046 * notice, this list of conditions and the following disclaimer in the 00047 * documentation and/or other materials provided with the distribution. 00048 * 3. All modifications to the source code must be clearly marked as 00049 * such. Binary redistributions based on modified source code 00050 * must be clearly marked as modified versions in the documentation 00051 * and/or other materials provided with the distribution. 00052 * 4. All advertising materials mentioning features or use of this software 00053 * must display the following acknowledgment: 00054 * This product includes software developed by Geoff Kuenning and 00055 * other unpaid contributors. 00056 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00057 * products derived from this software without specific prior 00058 * written permission. 00059 * 00060 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00061 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00062 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00063 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00064 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00065 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00066 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00067 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00068 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00069 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00070 * SUCH DAMAGE. 00071 */ 00072 00073 /* 00074 * $Log$ 00075 * Revision 1.1 2004/01/31 16:44:12 zrusin 00076 * ISpell plugin. 00077 * 00078 * Revision 1.4 2003/08/14 17:51:27 dom 00079 * update license - exception clause should be Lesser GPL 00080 * 00081 * Revision 1.3 2003/07/28 20:40:26 dom 00082 * fix up the license clause, further win32-registry proof some directory getting functions 00083 * 00084 * Revision 1.2 2003/07/16 22:52:39 dom 00085 * LGPL + exception license 00086 * 00087 * Revision 1.1 2003/07/15 01:15:05 dom 00088 * ispell enchant backend 00089 * 00090 * Revision 1.2 2003/01/29 05:50:11 hippietrail 00091 * 00092 * Fixed my mess in EncodingManager. 00093 * Changed many C casts to C++ casts. 00094 * 00095 * Revision 1.1 2003/01/24 05:52:33 hippietrail 00096 * 00097 * Refactored ispell code. Old ispell global variables had been put into 00098 * an allocated structure, a pointer to which was passed to many functions. 00099 * I have now made all such functions and variables private members of the 00100 * ISpellChecker class. It was C OO, now it's C++ OO. 00101 * 00102 * I've fixed the makefiles and tested compilation but am unable to test 00103 * operation. Please back out my changes if they cause problems which 00104 * are not obvious or easy to fix. 00105 * 00106 * Revision 1.5 2002/09/19 05:31:15 hippietrail 00107 * 00108 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00109 * K&R function declarations removed, converted to Doxygen style comments 00110 * where possible. No code has been changed (I hope). Compiles for me but 00111 * unable to test. 00112 * 00113 * Revision 1.4 2002/09/17 03:03:29 hippietrail 00114 * 00115 * After seeking permission on the developer list I've reformatted all the 00116 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00117 * spaces for tabs. It should all look good with our standard 4-space 00118 * tabs now. 00119 * I've concentrated just on indentation in the actual code. More prettying 00120 * could be done. 00121 * * NO code changes were made * 00122 * 00123 * Revision 1.3 2002/09/13 17:20:13 mpritchett 00124 * Fix more warnings for Linux build 00125 * 00126 * Revision 1.2 2001/05/12 16:05:42 thomasf 00127 * Big pseudo changes to ispell to make it pass around a structure rather 00128 * than rely on all sorts of gloabals willy nilly here and there. Also 00129 * fixed our spelling class to work with accepting suggestions once more. 00130 * This code is dirty, gross and ugly (not to mention still not supporting 00131 * multiple hash sized just yet) but it works on my machine and will no 00132 * doubt break other machines. 00133 * 00134 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00135 * moving to spell/xp 00136 * 00137 * Revision 1.3 1998/12/29 14:55:33 eric 00138 * 00139 * I've doctored the ispell code pretty extensively here. It is now 00140 * warning-free on Win32. It also *works* on Win32 now, since I 00141 * replaced all the I/O calls with ANSI standard ones. 00142 * 00143 * Revision 1.2 1998/12/28 23:11:30 eric 00144 * 00145 * modified spell code and integration to build on Windows. 00146 * This is still a hack. 00147 * 00148 * Actually, it doesn't yet WORK on Windows. It just builds. 00149 * SpellCheckInit is failing for some reason. 00150 * 00151 * Revision 1.1 1998/12/28 18:04:43 davet 00152 * Spell checker code stripped from ispell. At this point, there are 00153 * two external routines... the Init routine, and a check-a-word routine 00154 * which returns a boolean value, and takes a 16 bit char string. 00155 * The code resembles the ispell code as much as possible still. 00156 * 00157 * Revision 1.20 1994/01/25 07:11:34 geoff 00158 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00159 * 00160 */ 00161 00162 #include "ispell_checker.h" 00163 00164 /* 00165 * The following hash algorithm is due to Ian Dall, with slight modifications 00166 * by Geoff Kuenning to reflect the results of testing with the English 00167 * dictionaries actually distributed with ispell. 00168 */ 00169 #define HASHSHIFT 5 00170 00171 #ifdef NO_CAPITALIZATION_SUPPORT 00172 #define HASHUPPER(c) c 00173 #else /* NO_CAPITALIZATION_SUPPORT */ 00174 #define HASHUPPER(c) mytoupper(c) 00175 #endif /* NO_CAPITALIZATION_SUPPORT */ 00176 00177 /* 00178 * \param s 00179 * \param hashtblsize 00180 */ 00181 int ISpellChecker::hash (ichar_t *s, int hashtblsize) 00182 { 00183 register long h = 0; 00184 register int i; 00185 00186 #ifdef ICHAR_IS_CHAR 00187 for (i = 4; i-- && *s != 0; ) 00188 h = (h << 8) | HASHUPPER (*s++); 00189 #else /* ICHAR_IS_CHAR */ 00190 for (i = 2; i-- && *s != 0; ) 00191 h = (h << 16) | HASHUPPER (*s++); 00192 #endif /* ICHAR_IS_CHAR */ 00193 while (*s != 0) 00194 { 00195 /* 00196 * We have to do circular shifts the hard way, since C doesn't 00197 * have them even though the hardware probably does. Oh, well. 00198 */ 00199 h = (h << HASHSHIFT) 00200 | ((h >> (32 - HASHSHIFT)) & ((1 << HASHSHIFT) - 1)); 00201 h ^= HASHUPPER (*s++); 00202 } 00203 return static_cast<unsigned long>(h) % hashtblsize; 00204 }