2 Jazzy - a Java library for Spell Checking
\r
3 Copyright (C) 2001 Mindaugas Idzelis
\r
4 Full text of license can be found in LICENSE.txt
\r
6 This library is free software; you can redistribute it and/or
\r
7 modify it under the terms of the GNU Lesser General Public
\r
8 License as published by the Free Software Foundation; either
\r
9 version 2.1 of the License, or (at your option) any later version.
\r
11 This library is distributed in the hope that it will be useful,
\r
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
14 Lesser General Public License for more details.
\r
16 You should have received a copy of the GNU Lesser General Public
\r
17 License along with this library; if not, write to the Free Software
\r
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
\r
20 package com.swabunga.spell.engine;
\r
22 import java.io.BufferedReader;
\r
23 import java.io.File;
\r
24 import java.io.FileNotFoundException;
\r
25 import java.io.FileReader;
\r
26 import java.io.FileWriter;
\r
27 import java.io.IOException;
\r
28 import java.util.HashMap;
\r
29 import java.util.LinkedList;
\r
30 import java.util.List;
\r
31 import java.util.Vector;
\r
34 * The SpellDictionary class holds the instance of the dictionary.
\r
36 * This class is thread safe. Derived classes should ensure that this preserved.
\r
39 * There are many open source dictionary files. For just a few see:
\r
40 * http://wordlist.sourceforge.net/
\r
43 * This dictionary class reads words one per line. Make sure that your word list
\r
44 * is formatted in this way (most are).
\r
47 public class GenericSpellDictionary extends SpellDictionaryASpell {
\r
49 //tech_monkey: the alphabet / replace list stuff has been moved into the Transformator classes,
\r
50 //since they are so closely tied to how the phonetic transformations are done.
\r
52 // * This replace list is used if no phonetic file is supplied or it doesn't
\r
53 // * contain the alphabet.
\r
55 // protected static final char[] englishAlphabet =
\r
58 /** A field indicating the initial hash map capacity (16KB) for the main
\r
59 * dictionary hash map. Interested to see what the performance of a
\r
60 * smaller initial capacity is like.
\r
62 private final static int INITIAL_CAPACITY = 16 * 1024;
\r
65 * The hashmap that contains the word dictionary. The map is hashed on the doublemeta
\r
66 * code. The map entry contains a LinkedList of words that have the same double meta code.
\r
68 @SuppressWarnings("unchecked")
\r
69 protected HashMap mainDictionary = new HashMap(INITIAL_CAPACITY);
\r
71 /** Holds the dictionary file for appending*/
\r
72 private File dictFile = null;
\r
76 * Dictionary constructor that uses the DoubleMeta class with the
\r
78 * @param wordList The file containing dictionary as a words list.
\r
79 * @throws java.io.FileNotFoundException when the words list file could not
\r
80 * be located on the system.
\r
81 * @throws java.io.IOException when problems occurs while reading the words
\r
84 public GenericSpellDictionary(File wordList) throws FileNotFoundException, IOException {
\r
85 this(wordList, (File) null);
\r
89 * Dictionary constructor that uses an aspell phonetic file to
\r
90 * build the transformation table.
\r
91 * If phonetic is null, then DoubleMeta is used with the English alphabet
\r
92 * @param wordList The file containing dictionary as a words list.
\r
93 * @param phonetic The file containing the phonetic transformation
\r
95 * @throws java.io.FileNotFoundException when the words list or phonetic
\r
96 * file could not be located on the system
\r
97 * @throws java.io.IOException when problems occurs while reading the
\r
98 * words list or phonetic file
\r
100 public GenericSpellDictionary(File wordList, File phonetic) throws FileNotFoundException, IOException {
\r
103 dictFile = wordList;
\r
104 createDictionary(new BufferedReader(new FileReader(wordList)));
\r
109 * Add a word permanently to the dictionary (and the dictionary file).
\r
110 * <p>This needs to be made thread safe (synchronized)</p>
\r
111 * @param word The word to add to the dictionary
\r
113 public void addWord(String word) {
\r
115 if (dictFile == null)
\r
118 FileWriter w = new FileWriter(dictFile.toString(), true);
\r
119 // Open with append.
\r
123 } catch (IOException ex) {
\r
124 System.out.println("Error writing to dictionary file");
\r
129 * Constructs the dictionary from a word list file.
\r
131 * Each word in the reader should be on a separate line.
\r
133 * This is a very slow function. On my machine it takes quite a while to
\r
134 * load the data in. I suspect that we could speed this up quite allot.
\r
136 protected void createDictionary(BufferedReader in) throws IOException {
\r
138 while (line != null) {
\r
139 line = in.readLine();
\r
140 if (line != null) {
\r
141 line = new String(line.toCharArray());
\r
148 * Allocates a word in the dictionary
\r
150 @SuppressWarnings("unchecked")
\r
151 protected void putWord(String word) {
\r
152 String code = getCode(word);
\r
153 LinkedList list = (LinkedList) mainDictionary.get(code);
\r
154 if (list != null) {
\r
157 list = new LinkedList();
\r
159 mainDictionary.put(code, list);
\r
164 * Returns a list of strings (words) for the code.
\r
165 * @param code The phonetic code we want to find words for
\r
166 * @return the list of words having the same phonetic code
\r
168 @SuppressWarnings("unchecked")
\r
170 public List getWords(String code) {
\r
171 //Check the main dictionary.
\r
172 List mainDictResult = (List) mainDictionary.get(code);
\r
173 if (mainDictResult == null)
\r
174 return new Vector();
\r
175 return mainDictResult;
\r
179 * Returns true if the word is correctly spelled against the current word list.
\r
180 * @param word The word to checked in the dictionary
\r
181 * @return indication if the word is in the dictionary
\r
183 @SuppressWarnings("unchecked")
\r
185 public boolean isCorrect(String word) {
\r
186 List possible = getWords(getCode(word));
\r
187 if (possible.contains(word))
\r
189 //JMH should we always try the lowercase version. If I dont then capitalised
\r
190 //words are always returned as incorrect.
\r
191 else if (possible.contains(word.toLowerCase()))
\r