1 // MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
4 // Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
5 // Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
12 #include "feature_index.h"
13 #include "tokenizer.h"
14 #include "scoped_ptr.h"
22 bool empty() const { return (len_ == 0); }
26 explicit LearnerTagger(): tokenizer_(0), path_allocator_(0),
27 feature_index_(0), begin_(0), end_(0), len_(0) {}
28 virtual ~LearnerTagger() {}
31 Tokenizer<LearnerNode, LearnerPath> *tokenizer_;
32 Allocator<LearnerNode, LearnerPath> *allocator_;
33 FreeList<LearnerPath> *path_allocator_;
34 FeatureIndex *feature_index_;
35 scoped_string begin_data_;
39 std::vector<LearnerNode *> begin_node_list_;
40 std::vector<LearnerNode *> end_node_list_;
42 LearnerNode *lookup(size_t);
43 bool connect(size_t, LearnerNode *);
49 class EncoderLearnerTagger: public LearnerTagger {
51 bool open(Tokenizer<LearnerNode, LearnerPath> *tokenzier,
52 Allocator<LearnerNode, LearnerPath> *allocator,
53 FeatureIndex *feature_index,
54 size_t eval_size, size_t unk_eval_size);
55 bool read(std::istream *, std::vector<double> *);
56 int eval(size_t *, size_t *, size_t *) const;
57 double gradient(double *expected);
58 explicit EncoderLearnerTagger(): eval_size_(1024), unk_eval_size_(1024) {}
59 virtual ~EncoderLearnerTagger() { close(); }
63 size_t unk_eval_size_;
64 std::vector<LearnerPath *> ans_path_list_;
67 class DecoderLearnerTagger: public LearnerTagger {
69 bool open(const Param &);
70 bool parse(std::istream *, std::ostream *);
71 virtual ~DecoderLearnerTagger() { close(); }
74 scoped_ptr<Tokenizer<LearnerNode, LearnerPath> > tokenizer_data_;
75 scoped_ptr<Allocator<LearnerNode, LearnerPath> > allocator_data_;
76 scoped_ptr<FeatureIndex> feature_index_data_;