1 // MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
4 // Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
5 // Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
16 bool Connector::open(const Param ¶m) {
17 const std::string filename = create_filename
18 (param.get<std::string>("dicdir"), MATRIX_FILE);
19 return open(filename.c_str());
22 bool Connector::open(const char* filename,
24 CHECK_FALSE(cmmap_->open(filename, mode))
25 << "cannot open: " << filename;
27 matrix_ = cmmap_->begin();
29 CHECK_FALSE(matrix_) << "matrix is NULL" ;
30 CHECK_FALSE(cmmap_->size() >= 2)
31 << "file size is invalid: " << filename;
33 lsize_ = static_cast<unsigned short>((*cmmap_)[0]);
34 rsize_ = static_cast<unsigned short>((*cmmap_)[1]);
36 CHECK_FALSE(static_cast<size_t>(lsize_ * rsize_ + 2)
38 << "file size is invalid: " << filename;
40 matrix_ = cmmap_->begin() + 2;
44 void Connector::close() {
48 bool Connector::openText(const char *filename) {
49 std::ifstream ifs(WPATH(filename));
51 WHAT << "no such file or directory: " << filename;
55 scoped_fixed_array<char, BUF_SIZE> buf;
56 ifs.getline(buf.get(), buf.size());
57 CHECK_DIE(tokenize2(buf.get(), "\t ", column, 2) == 2)
58 << "format error: " << buf.get();
59 lsize_ = std::atoi(column[0]);
60 rsize_ = std::atoi(column[1]);
64 bool Connector::compile(const char *ifile, const char *ofile) {
65 std::ifstream ifs(WPATH(ifile));
66 std::istringstream iss(MATRIX_DEF_DEFAULT);
67 std::istream *is = &ifs;
71 << " is not found. minimum setting is used." << std::endl;
77 scoped_fixed_array<char, BUF_SIZE> buf;
79 is->getline(buf.get(), buf.size());
81 CHECK_DIE(tokenize2(buf.get(), "\t ", column, 2) == 2)
82 << "format error: " << buf.get();
84 const unsigned short lsize = std::atoi(column[0]);
85 const unsigned short rsize = std::atoi(column[1]);
86 std::vector<short> matrix(lsize * rsize);
87 std::fill(matrix.begin(), matrix.end(), 0);
89 std::cout << "reading " << ifile << " ... "
90 << lsize << "x" << rsize << std::endl;
92 while (is->getline(buf.get(), buf.size())) {
93 CHECK_DIE(tokenize2(buf.get(), "\t ", column, 3) == 3)
94 << "format error: " << buf.get();
95 const size_t l = std::atoi(column[0]);
96 const size_t r = std::atoi(column[1]);
97 const int c = std::atoi(column[2]);
98 CHECK_DIE(l < lsize && r < rsize) << "index values are out of range";
99 progress_bar("emitting matrix ", l + 1, lsize);
100 matrix[(l + lsize * r)] = static_cast<short>(c);
103 std::ofstream ofs(WPATH(ofile), std::ios::binary|std::ios::out);
104 CHECK_DIE(ofs) << "permission denied: " << ofile;
105 ofs.write(reinterpret_cast<const char*>(&lsize), sizeof(unsigned short));
106 ofs.write(reinterpret_cast<const char*>(&rsize), sizeof(unsigned short));
107 ofs.write(reinterpret_cast<const char*>(&matrix[0]),
108 lsize * rsize * sizeof(short));