2 * Copyright (c) 2003 Nara Institute of Science and Technology
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Id: dartsdic.cpp,v 1.2 2007/03/13 07:51:47 masayu-a Exp $
45 typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long>
55 typedef std::multimap<std::string, long> Hash;
56 typedef Hash::value_type HashVal;
64 da_open(char *daname, char *lexname, char *datname)
67 DoubleArrayL *darts = new DoubleArrayL;
69 da = (darts_t*)cha_malloc(sizeof(darts_t));
70 da->da_mmap = cha_mmap_file(daname);
71 darts->set_array(cha_mmap_map(da->da_mmap), cha_mmap_size(da->da_mmap)/darts->getUnitSize());
73 da->lex_mmap = cha_mmap_file(lexname);
74 da->dat_mmap = cha_mmap_file(datname);
80 da_lookup(darts_t *da, char *key, int key_len, long *indecies, int num)
83 ->commonPrefixSearch(key, indecies, num, key_len);
87 da_exact_lookup(darts_t *da, char *key, int key_len)
90 ->exactMatchSearch(key, key_len);
93 #define lex_map(d) cha_mmap_map((d)->lex_mmap)
94 #define dat_map(d) cha_mmap_map((d)->dat_mmap)
97 da_get_lex(darts_t *da, long index, da_lex_t *lex_data, int *key_len)
100 char *base = (char *)lex_map(da) + index;
102 *key_len = ((short *)base)[0];
103 num = ((short *)base)[1];
104 base += sizeof(short) * 2;
106 for (i = 0; i < num; i++) {
107 memcpy((void*)(lex_data + i),
108 (void*)base, sizeof(da_lex_t));
109 base += sizeof(da_lex_t);
116 da_get_lex_base(darts_t *da)
122 da_get_dat_base(darts_t *da)
128 da_build_new(char *path)
132 builder = (da_build_t*)cha_malloc(sizeof(da_build_t));
133 builder->entries = new Hash;
134 builder->path = new std::string(path);
140 da_build_add(da_build_t *builder, char *key, long val)
142 builder->entries->insert(HashVal(key, val));
146 redump_lex(size_t key_len, std::vector<long>& indices,
147 char* tmpfile, FILE* lexfile)
149 long index = ftell(lexfile);
152 buf = (short)key_len;
153 fwrite(&buf, sizeof(short), 1, lexfile);
154 buf = (short)indices.size();
155 fwrite(&buf, sizeof(short), 1, lexfile);
156 for (std::vector<long>::iterator i = indices.begin();
157 i != indices.end(); i++) {
158 da_lex_t* lex = (da_lex_t*)(tmpfile + *i);
159 fwrite(lex, sizeof(da_lex_t), 1, lexfile);
166 da_build_dump(da_build_t* builder, char* tmpfile, FILE* lexfile)
168 Hash::iterator i, last;
169 Hash* entries = builder->entries;
170 char** keys = new char*[entries->size()];
171 size_t* lens = new size_t[entries->size()];
172 long* vals = new long[entries->size()];
174 std::vector<long> lex_indices;
176 std::cerr << entries->size() << " entries" << std::endl;
178 i = entries->begin();
179 while (i != entries->end()) {
180 const std::string& key = i->first;
181 last = entries->upper_bound(key);
183 for (; i != last; i++) {
184 lex_indices.push_back(i->second);
186 lens[size] = key.size();
187 keys[size] = (char*) key.data();
188 vals[size] = redump_lex(lens[size], lex_indices, tmpfile, lexfile);
189 if (vals[size] < 0) {
190 std::cerr << "Unexpected error at " << key << std::endl;
191 cha_exit_perror("build darts file");
195 std::cerr << size << " keys" << std::endl;
198 da.build(size, keys, lens, vals);
199 da.save(builder->path->c_str(), "wb");
201 return builder->entries->size();