src/com/swabunga/spell/engine/GenericSpellDictionary.java

   1 /*\r
   2 Jazzy - a Java library for Spell Checking\r
   3 Copyright (C) 2001 Mindaugas Idzelis\r
   4 Full text of license can be found in LICENSE.txt\r
   5 \r
   6 This library is free software; you can redistribute it and/or\r
   7 modify it under the terms of the GNU Lesser General Public\r
   8 License as published by the Free Software Foundation; either\r
   9 version 2.1 of the License, or (at your option) any later version.\r
  10 \r
  11 This library is distributed in the hope that it will be useful,\r
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
  14 Lesser General Public License for more details.\r
  15 \r
  16 You should have received a copy of the GNU Lesser General Public\r
  17 License along with this library; if not, write to the Free Software\r
  18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\r
  19 */\r
  20 package com.swabunga.spell.engine;\r
  21 \r
  22 import java.io.BufferedReader;\r
  23 import java.io.File;\r
  24 import java.io.FileNotFoundException;\r
  25 import java.io.FileReader;\r
  26 import java.io.FileWriter;\r
  27 import java.io.IOException;\r
  28 import java.util.HashMap;\r
  29 import java.util.LinkedList;\r
  30 import java.util.List;\r
  31 import java.util.Vector;\r
  32 \r
  33 /**\r
  34  * The SpellDictionary class holds the instance of the dictionary.\r
  35  * <p>\r
  36  * This class is thread safe. Derived classes should ensure that this preserved.\r
  37  * </p>\r
  38  * <p>\r
  39  * There are many open source dictionary files. For just a few see:\r
  40  * http://wordlist.sourceforge.net/\r
  41  * </p>\r
  42  * <p>\r
  43  * This dictionary class reads words one per line. Make sure that your word list\r
  44  * is formatted in this way (most are).\r
  45  * </p>\r
  46  */\r
  47 public class GenericSpellDictionary extends SpellDictionaryASpell {\r
  48 \r
  49 //tech_monkey: the alphabet / replace list stuff has been moved into the Transformator classes,\r
  50 //since they are so closely tied to how the phonetic transformations are done.\r
  51 //    /**\r
  52 //     * This replace list is used if no phonetic file is supplied or it doesn't\r
  53 //     * contain the alphabet.\r
  54 //     */\r
  55 //    protected static final char[] englishAlphabet =\r
  56 \r
  57 \r
  58   /** A field indicating the initial hash map capacity (16KB) for the main\r
  59    *  dictionary hash map. Interested to see what the performance of a\r
  60    *  smaller initial capacity is like.\r
  61    */\r
  62   private final static int INITIAL_CAPACITY = 16 * 1024;\r
  63 \r
  64   /**\r
  65    * The hashmap that contains the word dictionary. The map is hashed on the doublemeta\r
  66    * code. The map entry contains a LinkedList of words that have the same double meta code.\r
  67    */\r
  68   @SuppressWarnings("unchecked")\r
  69 protected HashMap mainDictionary = new HashMap(INITIAL_CAPACITY);\r
  70 \r
  71   /** Holds the dictionary file for appending*/\r
  72   private File dictFile = null;\r
  73 \r
  74 \r
  75   /**\r
  76    * Dictionary constructor that uses the DoubleMeta class with the\r
  77    * English alphabet.\r
  78    * @param wordList The file containing dictionary as a words list.\r
  79    * @throws java.io.FileNotFoundException when the words list file could not \r
  80    * be located on the system.\r
  81    * @throws java.io.IOException when problems occurs while reading the words \r
  82    * list file\r
  83    */\r
  84   public GenericSpellDictionary(File wordList) throws FileNotFoundException, IOException {\r
  85     this(wordList, (File) null);\r
  86   }\r
  87 \r
  88   /**\r
  89    * Dictionary constructor that uses an aspell phonetic file to\r
  90    * build the transformation table.\r
  91    * If phonetic is null, then DoubleMeta is used with the English alphabet\r
  92    * @param wordList The file containing dictionary as a words list.\r
  93    * @param phonetic The file containing the phonetic transformation \r
  94    * information.\r
  95    * @throws java.io.FileNotFoundException when the words list or phonetic \r
  96    * file could not be located on the system\r
  97    * @throws java.io.IOException when problems occurs while reading the \r
  98    * words list or phonetic file\r
  99    */\r
 100   public GenericSpellDictionary(File wordList, File phonetic) throws FileNotFoundException, IOException {\r
 101 \r
 102     super(phonetic);\r
 103     dictFile = wordList;\r
 104     createDictionary(new BufferedReader(new FileReader(wordList)));\r
 105   }\r
 106 \r
 107 \r
 108   /**\r
 109    * Add a word permanently to the dictionary (and the dictionary file).\r
 110    * <p>This needs to be made thread safe (synchronized)</p>\r
 111    * @param word The word to add to the dictionary\r
 112    */\r
 113   public void addWord(String word) {\r
 114     putWord(word);\r
 115     if (dictFile == null)\r
 116       return;\r
 117     try {\r
 118       FileWriter w = new FileWriter(dictFile.toString(), true);\r
 119       // Open with append.\r
 120       w.write(word);\r
 121       w.write("\n");\r
 122       w.close();\r
 123     } catch (IOException ex) {\r
 124       System.out.println("Error writing to dictionary file");\r
 125     }\r
 126   }\r
 127 \r
 128   /**\r
 129    * Constructs the dictionary from a word list file.\r
 130    * <p>\r
 131    * Each word in the reader should be on a separate line.\r
 132    * <p>\r
 133    * This is a very slow function. On my machine it takes quite a while to\r
 134    * load the data in. I suspect that we could speed this up quite allot.\r
 135    */\r
 136   protected void createDictionary(BufferedReader in) throws IOException {\r
 137     String line = "";\r
 138     while (line != null) {\r
 139       line = in.readLine();\r
 140       if (line != null) {\r
 141         line = new String(line.toCharArray());\r
 142         putWord(line);\r
 143       }\r
 144     }\r
 145   }\r
 146 \r
 147   /**\r
 148    * Allocates a word in the dictionary\r
 149    */\r
 150   @SuppressWarnings("unchecked")\r
 151 protected void putWord(String word) {\r
 152     String code = getCode(word);\r
 153     LinkedList list = (LinkedList) mainDictionary.get(code);\r
 154     if (list != null) {\r
 155       list.add(word);\r
 156     } else {\r
 157       list = new LinkedList();\r
 158       list.add(word);\r
 159       mainDictionary.put(code, list);\r
 160     }\r
 161   }\r
 162 \r
 163   /**\r
 164    * Returns a list of strings (words) for the code.\r
 165    * @param code The phonetic code we want to find words for\r
 166    * @return the list of words having the same phonetic code\r
 167    */\r
 168   @SuppressWarnings("unchecked")\r
 169 @Override\r
 170 public List getWords(String code) {\r
 171     //Check the main dictionary.\r
 172     List mainDictResult = (List) mainDictionary.get(code);\r
 173     if (mainDictResult == null)\r
 174       return new Vector();\r
 175     return mainDictResult;\r
 176   }\r
 177 \r
 178   /**\r
 179    * Returns true if the word is correctly spelled against the current word list.\r
 180    * @param word The word to checked in the dictionary\r
 181    * @return indication if the word is in the dictionary\r
 182    */\r
 183   @SuppressWarnings("unchecked")\r
 184 @Override\r
 185 public boolean isCorrect(String word) {\r
 186     List possible = getWords(getCode(word));\r
 187     if (possible.contains(word))\r
 188       return true;\r
 189     //JMH should we always try the lowercase version. If I dont then capitalised\r
 190     //words are always returned as incorrect.\r
 191     else if (possible.contains(word.toLowerCase()))\r
 192       return true;\r
 193     return false;\r
 194   }\r
 195 }\r