2 * Copyright (C) 2009 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include "../include/spellingtable.h"
24 namespace ime_pinyin {
26 #ifdef ___BUILD_MODEL___
28 const char SpellingTable::
29 kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};
31 // "" is the biggest, so that all empty strings will be moved to the end
32 // _eb mean empty is biggest
33 int compare_raw_spl_eb(const void* p1, const void* p2) {
34 if ('\0' == (static_cast<const RawSpelling*>(p1))->str[0])
37 if ('\0' == (static_cast<const RawSpelling*>(p2))->str[0])
40 return strcmp((static_cast<const RawSpelling*>(p1))->str,
41 (static_cast<const RawSpelling*>(p2))->str);
44 size_t get_odd_next(size_t value) {
45 size_t v_next = value;
47 size_t v_next_sqrt = (size_t)sqrt(v_next);
50 for (size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++) {
51 if (v_next % v_dv == 0) {
67 SpellingTable::SpellingTable() {
69 raw_spellings_ = NULL;
76 SpellingTable::~SpellingTable() {
80 size_t SpellingTable::get_hash_pos(const char* spelling_str) {
82 for (size_t pos = 0; pos < spelling_size_; pos++) {
83 if ('\0' == spelling_str[pos])
85 hash_pos += (size_t)spelling_str[pos];
88 hash_pos = hash_pos % spelling_max_num_;
92 size_t SpellingTable::hash_pos_next(size_t hash_pos) {
94 hash_pos = hash_pos % spelling_max_num_;
98 void SpellingTable::free_resource() {
99 if (NULL != raw_spellings_)
100 delete [] raw_spellings_;
101 raw_spellings_ = NULL;
103 if (NULL != spelling_buf_)
104 delete [] spelling_buf_;
105 spelling_buf_ = NULL;
108 bool SpellingTable::init_table(size_t pure_spl_size, size_t spl_max_num,
110 if (pure_spl_size == 0 || spl_max_num ==0)
113 need_score_ = need_score;
117 spelling_size_ = pure_spl_size + 1;
120 spelling_max_num_ = get_odd_next(spl_max_num);
123 raw_spellings_ = new RawSpelling[spelling_max_num_];
124 spelling_buf_ = new char[spelling_max_num_ * (spelling_size_)];
125 if (NULL == raw_spellings_ || NULL == spelling_buf_) {
130 memset(raw_spellings_, 0, spelling_max_num_ * sizeof(RawSpelling));
131 memset(spelling_buf_, 0, spelling_max_num_ * (spelling_size_));
137 bool SpellingTable::put_spelling(const char* spelling_str, double freq) {
138 if (frozen_ || NULL == spelling_str)
141 for (size_t pos = 0; pos < kNotSupportNum; pos++) {
142 if (strcmp(spelling_str, kNotSupportList[pos]) == 0) {
149 size_t hash_pos = get_hash_pos(spelling_str);
151 raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
153 if (strncmp(raw_spellings_[hash_pos].str, spelling_str,
154 spelling_size_ - 1) == 0) {
155 raw_spellings_[hash_pos].freq += freq;
159 size_t hash_pos_ori = hash_pos;
162 if (strncmp(raw_spellings_[hash_pos].str,
163 spelling_str, spelling_size_ - 1) == 0) {
164 raw_spellings_[hash_pos].freq += freq;
168 if ('\0' == raw_spellings_[hash_pos].str[0]) {
169 raw_spellings_[hash_pos].freq += freq;
170 strncpy(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1);
171 raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
176 hash_pos = hash_pos_next(hash_pos);
177 if (hash_pos_ori == hash_pos)
185 bool SpellingTable::contain(const char* spelling_str) {
186 if (NULL == spelling_str || NULL == spelling_buf_ || frozen_)
189 size_t hash_pos = get_hash_pos(spelling_str);
191 if ('\0' == raw_spellings_[hash_pos].str[0])
194 if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
198 size_t hash_pos_ori = hash_pos;
201 hash_pos = hash_pos_next(hash_pos);
202 if (hash_pos_ori == hash_pos)
205 if ('\0' == raw_spellings_[hash_pos].str[0])
208 if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
217 const char* SpellingTable::arrange(size_t *item_size, size_t *spl_num) {
218 if (NULL == raw_spellings_ || NULL == spelling_buf_ ||
219 NULL == item_size || NULL == spl_num)
222 qsort(raw_spellings_, spelling_max_num_, sizeof(RawSpelling),
225 // After sorting, only the first spelling_num_ items are valid.
226 // Copy them to the destination buffer.
227 for (size_t pos = 0; pos < spelling_num_; pos++) {
228 strncpy(spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
234 printf("------------Spelling Possiblities--------------\n");
236 double max_score = 0;
237 double min_score = 0;
239 // After sorting, only the first spelling_num_ items are valid.
240 for (size_t pos = 0; pos < spelling_num_; pos++) {
241 raw_spellings_[pos].freq /= total_freq_;
244 max_score = raw_spellings_[0].freq;
245 min_score = max_score;
247 if (raw_spellings_[pos].freq > max_score)
248 max_score = raw_spellings_[pos].freq;
249 if (raw_spellings_[pos].freq < min_score)
250 min_score = raw_spellings_[pos].freq;
256 printf("-----max psb: %f, min psb: %f\n", max_score, min_score);
258 max_score = log(max_score);
259 min_score = log(min_score);
262 printf("-----max log value: %f, min log value: %f\n",
263 max_score, min_score);
265 // The absolute value of min_score is bigger than that of max_score because
266 // both of them are negative after log function.
267 score_amplifier_ = 1.0 * 255 / min_score;
269 double average_score = 0;
270 for (size_t pos = 0; pos < spelling_num_; pos++) {
271 double score = log(raw_spellings_[pos].freq) * score_amplifier_;
274 average_score += score;
276 // Because of calculation precision issue, score might be a little bigger
277 // than 255 after being amplified.
280 char *this_spl_buf = spelling_buf_ + pos * spelling_size_;
281 this_spl_buf[spelling_size_ - 1] =
282 static_cast<char>((unsigned char)score);
285 printf("---pos:%d, %s, psb:%d\n", pos, this_spl_buf,
286 (unsigned char)this_spl_buf[spelling_size_ -1]);
289 average_score /= spelling_num_;
290 assert(average_score <= 255);
291 average_score_ = static_cast<uint8>(average_score);
294 printf("\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_,
298 *item_size = spelling_size_;
299 *spl_num = spelling_num_;
301 return spelling_buf_;
304 float SpellingTable::get_score_amplifier() {
305 return static_cast<float>(score_amplifier_);
308 unsigned char SpellingTable::get_average_score() {
309 return average_score_;
312 #endif // ___BUILD_MODEL___
313 } // namespace ime_pinyin