From b68c6b1cbec0aa6556a489e49949da6d8790beaf Mon Sep 17 00:00:00 2001 From: Ken Wakasa Date: Fri, 10 Dec 2010 21:01:25 +0900 Subject: [PATCH] Move tools/makedict from platform/development to platform/packages/inputmethods/LatinIME The corresponding change is I01ef7084 Change-Id: I559207ab75feffe5ef4678c4a85f178a024448c5 --- tools/makedict/Android.mk | 24 -- tools/makedict/etc/Android.mk | 20 - tools/makedict/etc/makedict | 63 --- tools/makedict/etc/manifest.txt | 1 - .../com/android/tools/dict/BigramDictionary.java | 286 ------------- .../android/tools/dict/MakeBinaryDictionary.java | 443 --------------------- 6 files changed, 837 deletions(-) delete mode 100644 tools/makedict/Android.mk delete mode 100644 tools/makedict/etc/Android.mk delete mode 100755 tools/makedict/etc/makedict delete mode 100644 tools/makedict/etc/manifest.txt delete mode 100644 tools/makedict/src/com/android/tools/dict/BigramDictionary.java delete mode 100755 tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java diff --git a/tools/makedict/Android.mk b/tools/makedict/Android.mk deleted file mode 100644 index b9fc5533..00000000 --- a/tools/makedict/Android.mk +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := $(call all-java-files-under,src) -LOCAL_JAR_MANIFEST := etc/manifest.txt -LOCAL_MODULE := makedict - -include $(BUILD_HOST_JAVA_LIBRARY) -include $(LOCAL_PATH)/etc/Android.mk diff --git a/tools/makedict/etc/Android.mk b/tools/makedict/etc/Android.mk deleted file mode 100644 index da162868..00000000 --- a/tools/makedict/etc/Android.mk +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2009 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) - -LOCAL_PREBUILT_EXECUTABLES := makedict -include $(BUILD_HOST_PREBUILT) - diff --git a/tools/makedict/etc/makedict b/tools/makedict/etc/makedict deleted file mode 100755 index 8420d6e5..00000000 --- a/tools/makedict/etc/makedict +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh -# Copyright 2009, The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Set up prog to be the path of this script, including following symlinks, -# and set up progdir to be the fully-qualified pathname of its directory. -prog="$0" -while [ -h "${prog}" ]; do - newProg=`/bin/ls -ld "${prog}"` - newProg=`expr "${newProg}" : ".* -> \(.*\)$"` - if expr "x${newProg}" : 'x/' >/dev/null; then - prog="${newProg}" - else - progdir=`dirname "${prog}"` - prog="${progdir}/${newProg}" - fi -done -oldwd=`pwd` -progdir=`dirname "${prog}"` -cd "${progdir}" -progdir=`pwd` -prog="${progdir}"/`basename "${prog}"` -cd "${oldwd}" - -jarfile=makedict.jar -frameworkdir="$progdir" -if [ ! -r "$frameworkdir/$jarfile" ] -then - frameworkdir=`dirname "$progdir"`/tools/lib - libdir=`dirname "$progdir"`/tools/lib -fi -if [ ! -r "$frameworkdir/$jarfile" ] -then - frameworkdir=`dirname "$progdir"`/framework - libdir=`dirname "$progdir"`/lib -fi -if [ ! -r "$frameworkdir/$jarfile" ] -then - echo `basename "$prog"`": can't find $jarfile" - exit 1 -fi - -if [ "$OSTYPE" = "cygwin" ] ; then - jarpath=`cygpath -w "$frameworkdir/$jarfile"` - progdir=`cygpath -w "$progdir"` -else - jarpath="$frameworkdir/$jarfile" -fi - -# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored -# might need more memory, e.g. -Xmx128M -exec java -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@" diff --git a/tools/makedict/etc/manifest.txt b/tools/makedict/etc/manifest.txt deleted file mode 100644 index aa3a3e84..00000000 --- a/tools/makedict/etc/manifest.txt +++ /dev/null @@ -1 +0,0 @@ -Main-Class: com.android.tools.dict.MakeBinaryDictionary diff --git a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java b/tools/makedict/src/com/android/tools/dict/BigramDictionary.java deleted file mode 100644 index 35115bf2..00000000 --- a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.tools.dict; - -import org.xml.sax.Attributes; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -/** - * Helper for MakeBinaryDictionary - * Deals with all the bigram data - */ -public class BigramDictionary { - - /* - * Must match the values in the client side which is located in dictionary.cpp & dictionary.h - * Changing these values will generate totally different structure which must be also reflected - * on the client side. - */ - public static final int FLAG_BIGRAM_READ = 0x80; - public static final int FLAG_BIGRAM_CHILDEXIST = 0x40; - public static final int FLAG_BIGRAM_CONTINUED = 0x80; - public static final int FLAG_BIGRAM_FREQ = 0x7F; - - public static final int FOR_REVERSE_LOOKUPALL = -99; - - public ArrayList mBigramToFill = new ArrayList(); - public ArrayList mBigramToFillAddress = new ArrayList(); - - public HashMap mBi; - - public boolean mHasBigram; - - public BigramDictionary(String bigramSrcFilename, boolean hasBigram) { - mHasBigram = hasBigram; - loadBigram(bigramSrcFilename); - } - - private void loadBigram(String filename) { - mBi = new HashMap(); - if (!mHasBigram) { - System.out.println("Number of bigrams = " + Bigram.sBigramNum); - return; - } - try { - SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); - parser.parse(new File(filename), new DefaultHandler() { - String w1 = null; - boolean inWord1 = false; - boolean inWord2 = false; - int freq = 0, counter = 0; - Bigram tempBigram = null; - - @Override - public void startElement(String uri, String localName, - String qName, Attributes attributes) { - if (qName.equals("bi")) { - inWord1 = true; - w1 = attributes.getValue(0); - int count = Integer.parseInt(attributes.getValue(1)); - tempBigram = new Bigram(count); - counter = 0; - } else if (qName.equals("w")) { - inWord2 = true; - String word2 = attributes.getValue(0); - int freq = Integer.parseInt(attributes.getValue(1)); - tempBigram.setWord2(counter, word2, freq); - counter++; - Bigram.sBigramNum++; - } - } - - @Override - public void endElement(String uri, String localName, - String qName) { - if (inWord2) { - inWord2 = false; - } else if (inWord1) { - inWord1 = false; - mBi.put(w1, tempBigram); - } - } - }); - } catch (Exception ioe) { - System.err.println("Exception in parsing bigram\n" + ioe); - ioe.printStackTrace(); - } - System.out.println("Number of bigrams = " + Bigram.sBigramNum); - } - - byte[] writeBigrams(byte[] dict, Map mDictionary) { - for (int i = 0; i < mBigramToFill.size(); i++) { - String w1 = mBigramToFill.get(i); - int address = mBigramToFillAddress.get(i); - - Bigram temp = mBi.get(w1); - int word2Count = temp.count; - int j4; - for (int j = 0; j < word2Count; j++) { - if (!mDictionary.containsKey(temp.word2[j])) { - System.out.println("Not in dictionary: " + temp.word2[j]); - System.exit(0); - } else { - j4 = (j * 4); - int addressOfWord2 = mDictionary.get(temp.word2[j]); - dict[address + j4 + 0] = (byte) (((addressOfWord2 & 0x3F0000) >> 16) - | FLAG_BIGRAM_READ); - dict[address + j4 + 1] = (byte) ((addressOfWord2 & 0x00FF00) >> 8); - dict[address + j4 + 2] = (byte) ((addressOfWord2 & 0x0000FF)); - - if (j == (word2Count - 1)) { - dict[address + j4 + 3] = (byte) (temp.freq[j] & FLAG_BIGRAM_FREQ); - } else { - dict[address + j4 + 3] = (byte) ((temp.freq[j] & FLAG_BIGRAM_FREQ) - | FLAG_BIGRAM_CONTINUED); - } - } - } - } - - return dict; - } - - void reverseLookupAll(Map mDictionary, byte[] dict) { - Set st = mDictionary.keySet(); - for (String s : st) { - searchForTerminalNode(mDictionary.get(s), FOR_REVERSE_LOOKUPALL, dict); - } - } - - void searchForTerminalNode(int bigramAddress, int frequency, byte[] dict) { - StringBuilder sb = new StringBuilder(48); - int pos; - boolean found = false; - int followDownBranchAddress = 2; - char followingChar = ' '; - int depth = 0; - int totalLoopCount = 0; - - while (!found) { - boolean followDownAddressSearchStop = false; - boolean firstAddress = true; - boolean haveToSearchAll = true; - - if (depth > 0) { - sb.append(followingChar); - } - pos = followDownBranchAddress; // pos start at count - int count = dict[pos] & 0xFF; - pos++; - for (int i = 0; i < count; i++) { - totalLoopCount++; - // pos at data - pos++; - // pos now at flag - if (!MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // non-terminal - if (!followDownAddressSearchStop) { - int addr = MakeBinaryDictionary.get22BitAddress(pos, dict); - if (addr > bigramAddress) { - followDownAddressSearchStop = true; - if (firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } else if (!haveToSearchAll) { - break; - } - } else { - followDownBranchAddress = addr; - followingChar = (char) (0xFF & dict[pos-1]); - if(firstAddress) { - firstAddress = false; - haveToSearchAll = false; - } - } - } - pos += 3; - } else if (MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // terminal - // found !! - if (bigramAddress == (pos-1)) { - sb.append((char) (0xFF & dict[pos-1])); - found = true; - break; - } - - // address + freq (4 byte) - if (MakeBinaryDictionary.getSecondBitOfByte(pos, dict)) { - if (!followDownAddressSearchStop) { - int addr = MakeBinaryDictionary.get22BitAddress(pos, dict); - if (addr > bigramAddress) { - followDownAddressSearchStop = true; - if (firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } else if (!haveToSearchAll) { - break; - } - } else { - followDownBranchAddress = addr; - followingChar = (char) (0xFF & dict[pos-1]); - if(firstAddress) { - firstAddress = false; - haveToSearchAll = true; - } - } - } - pos += 4; - } else { // freq only (2 byte) - pos += 2; - } - // skipping bigram - int bigramExist = (dict[pos] & FLAG_BIGRAM_READ); - if (bigramExist > 0) { - int nextBigramExist = 1; - while (nextBigramExist > 0) { - pos += 3; - nextBigramExist = (dict[pos++] & FLAG_BIGRAM_CONTINUED); - } - } else { - pos++; - } - } - } - depth++; - if (followDownBranchAddress == 2) { - System.out.println("ERROR!!! Cannot find bigram!!"); - System.exit(0); - } - } - - if (frequency == FOR_REVERSE_LOOKUPALL) { - System.out.println("Reverse: " + sb.toString() + " (" + bigramAddress + ")" - + " Loop: " + totalLoopCount); - } else { - System.out.println(" bigram: " + sb.toString() + " (" + bigramAddress + ") freq: " - + frequency + " Loop: " + totalLoopCount); - } - } - - static class Bigram { - String[] word2; - int[] freq; - int count; - static int sBigramNum = 0; - - String getSecondWord(int i) { - return word2[i]; - } - - int getFrequency(int i) { - return (freq[i] == 0) ? 1 : freq[i]; - } - - void setWord2(int index, String word2, int freq) { - this.word2[index] = word2; - this.freq[index] = freq; - } - - public Bigram(int word2Count) { - count = word2Count; - word2 = new String[word2Count]; - freq = new int[word2Count]; - } - } -} diff --git a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java b/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java deleted file mode 100755 index ca6de56e..00000000 --- a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java +++ /dev/null @@ -1,443 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.tools.dict; - -import org.xml.sax.Attributes; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -/** - * Compresses a list of words, frequencies, and bigram data - * into a tree structured binary dictionary. - * Dictionary Version: 200 (may contain bigrams) - * Version number started from 200 rather than 1 because we wanted to prevent number of roots in - * any old dictionaries being mistaken as the version number. There is not a chance that there - * will be more than 200 roots. Version number should be increased when there is structural change - * in the data. There is no need to increase the version when only the words in the data changes. - */ -public class MakeBinaryDictionary { - - private static final int VERSION_NUM = 200; - - public static final int ALPHA_SIZE = 256; - - public static final String TAG_WORD = "w"; - public static final String ATTR_FREQ = "f"; - - private static final int FLAG_ADDRESS_MASK = 0x400000; - private static final int FLAG_TERMINAL_MASK = 0x800000; - private static final int ADDRESS_MASK = 0x3FFFFF; - - /** - * Unit for this variable is in bytes - * If destination file name is main.dict and file limit causes dictionary to be separated into - * multiple file, it will generate main0.dict, main1.dict, and so forth. - */ - private static int sOutputFileSize; - private static boolean sSplitOutput; - - public static final CharNode EMPTY_NODE = new CharNode(); - - List roots; - Map mDictionary; - int mWordCount; - - BigramDictionary bigramDict; - - static class CharNode { - char data; - int freq; - boolean terminal; - List children; - static int sNodes; - - public CharNode() { - sNodes++; - } - } - - public static void usage() { - System.err.println("Usage: makedict -s [-b ] " - + "-d [--size filesize]"); - System.exit(-1); - } - - public static void main(String[] args) { - int checkSource = -1; - int checkBigram = -1; - int checkDest = -1; - int checkFileSize = -1; - for (int i = 0; i < args.length; i+=2) { - if (args[i].equals("-s")) checkSource = (i + 1); - if (args[i].equals("-b")) checkBigram = (i + 1); - if (args[i].equals("-d")) checkDest = (i + 1); - if (args[i].equals("--size")) checkFileSize = (i + 1); - } - if (checkFileSize >= 0) { - sSplitOutput = true; - sOutputFileSize = Integer.parseInt(args[checkFileSize]); - } else { - sSplitOutput = false; - } - if (checkDest >= 0 && !args[checkDest].endsWith(".dict")) { - System.err.println("Error: Dictionary output file extension should be \".dict\""); - usage(); - } else if (checkSource >= 0 && checkBigram >= 0 && checkDest >= 0 && - ((!sSplitOutput && args.length == 6) || (sSplitOutput && args.length == 8))) { - new MakeBinaryDictionary(args[checkSource], args[checkBigram], args[checkDest]); - } else if (checkSource >= 0 && checkDest >= 0 && - ((!sSplitOutput && args.length == 4) || (sSplitOutput && args.length == 6))) { - new MakeBinaryDictionary(args[checkSource], null, args[checkDest]); - } else { - usage(); - } - } - - public MakeBinaryDictionary(String srcFilename, String bigramSrcFilename, String destFilename){ - System.out.println("Generating dictionary version " + VERSION_NUM); - bigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null)); - populateDictionary(srcFilename); - writeToDict(destFilename); - - // Enable the code below to verify that the generated tree is traversable - // and bigram data is stored correctly. - if (false) { - bigramDict.reverseLookupAll(mDictionary, dict); - traverseDict(2, new char[32], 0); - } - } - - private void populateDictionary(String filename) { - roots = new ArrayList(); - mDictionary = new HashMap(); - try { - SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); - parser.parse(new File(filename), new DefaultHandler() { - boolean inWord; - int freq; - StringBuilder wordBuilder = new StringBuilder(48); - - @Override - public void startElement(String uri, String localName, - String qName, Attributes attributes) { - if (qName.equals("w")) { - inWord = true; - freq = Integer.parseInt(attributes.getValue(0)); - wordBuilder.setLength(0); - } - } - - @Override - public void characters(char[] data, int offset, int length) { - // Ignore other whitespace - if (!inWord) return; - wordBuilder.append(data, offset, length); - } - - @Override - public void endElement(String uri, String localName, - String qName) { - if (qName.equals("w")) { - if (wordBuilder.length() > 1) { - addWordTop(wordBuilder.toString(), freq); - mWordCount++; - } - inWord = false; - } - } - }); - } catch (Exception ioe) { - System.err.println("Exception in parsing\n" + ioe); - ioe.printStackTrace(); - } - System.out.println("Nodes = " + CharNode.sNodes); - } - - private int indexOf(List children, char c) { - if (children == null) { - return -1; - } - for (int i = 0; i < children.size(); i++) { - if (children.get(i).data == c) { - return i; - } - } - return -1; - } - - private void addWordTop(String word, int occur) { - if (occur > 255) occur = 255; - char firstChar = word.charAt(0); - int index = indexOf(roots, firstChar); - if (index == -1) { - CharNode newNode = new CharNode(); - newNode.data = firstChar; - newNode.freq = occur; - index = roots.size(); - roots.add(newNode); - } else { - roots.get(index).freq += occur; - } - if (word.length() > 1) { - addWordRec(roots.get(index), word, 1, occur); - } else { - roots.get(index).terminal = true; - } - } - - private void addWordRec(CharNode parent, String word, int charAt, int occur) { - CharNode child = null; - char data = word.charAt(charAt); - if (parent.children == null) { - parent.children = new ArrayList(); - } else { - for (int i = 0; i < parent.children.size(); i++) { - CharNode node = parent.children.get(i); - if (node.data == data) { - child = node; - break; - } - } - } - if (child == null) { - child = new CharNode(); - parent.children.add(child); - } - child.data = data; - if (child.freq == 0) child.freq = occur; - if (word.length() > charAt + 1) { - addWordRec(child, word, charAt + 1, occur); - } else { - child.terminal = true; - child.freq = occur; - } - } - - byte[] dict; - int dictSize; - static final int CHAR_WIDTH = 8; - static final int FLAGS_WIDTH = 1; // Terminal flag (word end) - static final int ADDR_WIDTH = 23; // Offset to children - static final int FREQ_WIDTH_BYTES = 1; - static final int COUNT_WIDTH_BYTES = 1; - - private void addCount(int count) { - dict[dictSize++] = (byte) (0xFF & count); - } - - private void addNode(CharNode node, String word1) { - if (node.terminal) { // store address of each word1 - mDictionary.put(word1, dictSize); - } - int charData = 0xFFFF & node.data; - if (charData > 254) { - dict[dictSize++] = (byte) 255; - dict[dictSize++] = (byte) ((node.data >> 8) & 0xFF); - dict[dictSize++] = (byte) (node.data & 0xFF); - } else { - dict[dictSize++] = (byte) (0xFF & node.data); - } - if (node.children != null) { - dictSize += 3; // Space for children address - } else { - dictSize += 1; // Space for just the terminal/address flags - } - if ((0xFFFFFF & node.freq) > 255) { - node.freq = 255; - } - if (node.terminal) { - byte freq = (byte) (0xFF & node.freq); - dict[dictSize++] = freq; - // bigram - if (bigramDict.mBi.containsKey(word1)) { - int count = bigramDict.mBi.get(word1).count; - bigramDict.mBigramToFill.add(word1); - bigramDict.mBigramToFillAddress.add(dictSize); - dictSize += (4 * count); - } else { - dict[dictSize++] = (byte) (0x00); - } - } - } - - int nullChildrenCount = 0; - int notTerminalCount = 0; - - private void updateNodeAddress(int nodeAddress, CharNode node, - int childrenAddress) { - if ((dict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character - nodeAddress += 2; - } - childrenAddress = ADDRESS_MASK & childrenAddress; - if (childrenAddress == 0) { - nullChildrenCount++; - } else { - childrenAddress |= FLAG_ADDRESS_MASK; - } - if (node.terminal) { - childrenAddress |= FLAG_TERMINAL_MASK; - } else { - notTerminalCount++; - } - dict[nodeAddress + 1] = (byte) (childrenAddress >> 16); - if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) { - dict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8); - dict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF)); - } - } - - void writeWordsRec(List children, StringBuilder word) { - if (children == null || children.size() == 0) { - return; - } - final int childCount = children.size(); - addCount(childCount); - int[] childrenAddresses = new int[childCount]; - for (int j = 0; j < childCount; j++) { - CharNode node = children.get(j); - childrenAddresses[j] = dictSize; - word.append(children.get(j).data); - addNode(node, word.toString()); - word.deleteCharAt(word.length()-1); - } - for (int j = 0; j < childCount; j++) { - CharNode node = children.get(j); - int nodeAddress = childrenAddresses[j]; - int cacheDictSize = dictSize; - word.append(children.get(j).data); - writeWordsRec(node.children, word); - word.deleteCharAt(word.length()-1); - updateNodeAddress(nodeAddress, node, node.children != null - ? cacheDictSize : 0); - } - } - - void writeToDict(String dictFilename) { - // 4MB max, 22-bit offsets - dict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably - // < 1MB in most cases, as there is a limit in the - // resource size in apks. - dictSize = 0; - - dict[dictSize++] = (byte) (0xFF & VERSION_NUM); // version info - dict[dictSize++] = (byte) (0xFF & (bigramDict.mHasBigram ? 1 : 0)); - - StringBuilder word = new StringBuilder(48); - writeWordsRec(roots, word); - dict = bigramDict.writeBigrams(dict, mDictionary); - System.out.println("Dict Size = " + dictSize); - if (!sSplitOutput) { - sOutputFileSize = dictSize; - } - try { - int currentLoc = 0; - int i = 0; - int extension = dictFilename.indexOf(".dict"); - String filename = dictFilename.substring(0, extension); - while (dictSize > 0) { - FileOutputStream fos; - if (sSplitOutput) { - fos = new FileOutputStream(filename + i + ".dict"); - } else { - fos = new FileOutputStream(filename + ".dict"); - } - if (dictSize > sOutputFileSize) { - fos.write(dict, currentLoc, sOutputFileSize); - dictSize -= sOutputFileSize; - currentLoc += sOutputFileSize; - } else { - fos.write(dict, currentLoc, dictSize); - dictSize = 0; - } - fos.close(); - i++; - } - } catch (IOException ioe) { - System.err.println("Error writing dict file:" + ioe); - } - } - - void traverseDict(int pos, char[] word, int depth) { - int count = dict[pos++] & 0xFF; - for (int i = 0; i < count; i++) { - char c = (char) (dict[pos++] & 0xFF); - if (c == 0xFF) { // two byte character - c = (char) (((dict[pos] & 0xFF) << 8) | (dict[pos+1] & 0xFF)); - pos += 2; - } - word[depth] = c; - boolean terminal = getFirstBitOfByte(pos, dict); - int address = 0; - if ((dict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check - address = get22BitAddress(pos, dict); - pos += 3; - } else { - pos += 1; - } - if (terminal) { - showWord(word, depth + 1, dict[pos] & 0xFF); - pos++; - - int bigramExist = (dict[pos] & bigramDict.FLAG_BIGRAM_READ); - if (bigramExist > 0) { - int nextBigramExist = 1; - while (nextBigramExist > 0) { - int bigramAddress = get22BitAddress(pos, dict); - pos += 3; - int frequency = (bigramDict.FLAG_BIGRAM_FREQ & dict[pos]); - bigramDict.searchForTerminalNode(bigramAddress, frequency, dict); - nextBigramExist = (dict[pos++] & bigramDict.FLAG_BIGRAM_CONTINUED); - } - } else { - pos++; - } - } - if (address != 0) { - traverseDict(address, word, depth + 1); - } - } - } - - void showWord(char[] word, int size, int freq) { - System.out.print(new String(word, 0, size) + " " + freq + "\n"); - } - - static int get22BitAddress(int pos, byte[] dict) { - return ((dict[pos + 0] & 0x3F) << 16) - | ((dict[pos + 1] & 0xFF) << 8) - | ((dict[pos + 2] & 0xFF)); - } - - static boolean getFirstBitOfByte(int pos, byte[] dict) { - return (dict[pos] & 0x80) > 0; - } - - static boolean getSecondBitOfByte(int pos, byte[] dict) { - return (dict[pos] & 0x40) > 0; - } -} -- 2.11.0