1 // MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
4 // Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
5 // Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
14 #include "stream_wrapper.h"
15 #include "scoped_ptr.h"
22 static bool read(std::istream *is,
23 std::vector<std::vector<std::string> > *r,
24 const std::vector<int> &level) {
30 scoped_fixed_array<char, BUF_SIZE> buf;
31 scoped_fixed_array<char *, BUF_SIZE> csv;
33 while (is->getline(buf.get(), buf.size())) {
34 if (std::strcmp(buf.get(), "EOS") == 0) {
37 CHECK_DIE(tokenize(buf.get(), "\t", col, 2) == 2) << "format error";
39 size_t n = tokenizeCSV(col[1], csv.get() + 1, csv.size() - 1);
40 std::vector<std::string> tmp;
41 for (size_t i = 0; i < level.size(); ++i) {
42 size_t m = level[i] < 0 ? n : level[i];
43 CHECK_DIE(m <= n) << " out of range " << level[i];
45 for (size_t j = 0; j <= m; ++j) {
51 tmp.push_back(output);
59 static bool parseLevel(const char *level_str,
60 std::vector<int> *level) {
61 scoped_fixed_array<char, BUF_SIZE> buf;
62 scoped_fixed_array<char *, 512> col;
63 std::strncpy(buf.get(), level_str, buf.size());
65 size_t n = tokenize2(buf.get(), "\t ", col.get(), col.size());
66 for (size_t i = 0; i < n; ++i) {
67 level->push_back(std::atoi(col[i]));
72 static void printeval(std::ostream *os, size_t c, size_t p, size_t r) {
73 double pr = (p == 0) ? 0 : 100.0 * c/p;
74 double re = (r == 0) ? 0 : 100.0 * c/r;
75 double F = ((pr + re) == 0.0) ? 0 : 2 * pr * re /(pr + re);
76 scoped_fixed_array<char, BUF_SIZE> buf;
77 sprintf(buf.get(), "%4.4f(%d/%d) %4.4f(%d/%d) %4.4f\n",
89 static bool eval(int argc, char **argv) {
90 static const MeCab::Option long_options[] = {
91 { "level", 'l', "0 -1", "STR", "set level of evaluations" },
92 { "output", 'o', 0, "FILE", "set the output file name" },
93 { "version", 'v', 0, 0, "show the version and exit" },
94 { "help", 'h', 0, 0, "show this help and exit." },
99 param.open(argc, argv, long_options);
101 if (!param.open(argc, argv, long_options)) {
102 std::cout << param.what() << "\n\n" << COPYRIGHT
103 << "\ntry '--help' for more information." << std::endl;
107 if (!param.help_version()) return 0;
109 const std::vector<std::string> &files = param.rest_args();
110 if (files.size() < 2) {
111 std::cout << "Usage: " <<
112 param.program_name() << " output answer" << std::endl;
116 std::string output = param.get<std::string>("output");
117 if (output.empty()) output = "-";
118 MeCab::ostream_wrapper ofs(output.c_str());
119 CHECK_DIE(*ofs) << "no such file or directory: " << output;
121 const std::string system = files[0];
122 const std::string answer = files[1];
124 const std::string level_str = param.get<std::string>("level");
126 std::ifstream ifs1(WPATH(files[0].c_str()));
127 std::ifstream ifs2(WPATH(files[1].c_str()));
129 CHECK_DIE(ifs1) << "no such file or directory: " << files[0].c_str();
130 CHECK_DIE(ifs2) << "no such file or directory: " << files[0].c_str();
131 CHECK_DIE(!level_str.empty()) << "level_str is NULL";
133 std::vector<int> level;
134 parseLevel(level_str.c_str(), &level);
135 CHECK_DIE(level.size()) << "level_str is empty: " << level_str;
136 std::vector<size_t> result_tbl(level.size());
137 std::fill(result_tbl.begin(), result_tbl.end(), 0);
142 std::vector<std::vector<std::string> > r1;
143 std::vector<std::vector<std::string> > r2;
146 if (!read(&ifs1, &r1, level) || !read(&ifs2, &r2, level))
154 while (i1 < r1.size() && i2 < r2.size()) {
156 for (size_t i = 0; i < result_tbl.size(); ++i) {
157 if (r1[i1][i] == r2[i2][i]) {
161 p1 += r1[i1][0].size();
162 p2 += r2[i2][0].size();
167 } else if (p1 < p2) {
168 p1 += r1[i1][0].size();
172 p2 += r2[i2][0].size();
178 while (i1 < r1.size()) {
183 while (i2 < r2.size()) {
189 *ofs << " precision recall F"
191 for (size_t i = 0; i < result_tbl.size(); ++i) {
192 if (level[i] == -1) {
193 *ofs << "LEVEL ALL: ";
195 *ofs << "LEVEL " << level[i] << ": ";
197 printeval(&*ofs, result_tbl[i], prec, recall);
204 class TestSentenceGenerator {
206 static int run(int argc, char **argv) {
207 static const MeCab::Option long_options[] = {
208 { "output", 'o', 0, "FILE", "set the output filename" },
209 { "version", 'v', 0, 0, "show the version and exit" },
210 { "help", 'h', 0, 0, "show this help and exit." },
215 param.open(argc, argv, long_options);
217 if (!param.open(argc, argv, long_options)) {
218 std::cout << param.what() << "\n\n" << COPYRIGHT
219 << "\ntry '--help' for more information." << std::endl;
223 if (!param.help_version()) {
227 const std::vector<std::string> &tmp = param.rest_args();
228 std::vector<std::string> files = tmp;
230 files.push_back("-");
233 std::string output = param.get<std::string>("output");
234 if (output.empty()) output = "-";
235 MeCab::ostream_wrapper ofs(output.c_str());
236 CHECK_DIE(*ofs) << "permission denied: " << output;
238 scoped_fixed_array<char, BUF_SIZE> buf;
241 for (size_t i = 0; i < files.size(); ++i) {
242 MeCab::istream_wrapper ifs(files[i].c_str());
243 CHECK_DIE(*ifs) << "no such file or directory: " << files[i];
244 while (ifs->getline(buf.get(), buf.size())) {
245 const size_t n = tokenize(buf.get(), "\t ", col, 2);
246 CHECK_DIE(n <= 2) << "format error: " << buf.get();
247 if (std::strcmp(col[0], "EOS") == 0 && !str.empty()) {
248 *ofs << str << std::endl;
262 int mecab_system_eval(int argc, char **argv) {
263 return MeCab::Eval::eval(argc, argv);
266 int mecab_test_gen(int argc, char **argv) {
267 return MeCab::TestSentenceGenerator::run(argc, argv);