OSDN Git Service

Initial commit
[wordring-tm/wordring-tm.git] / third_party / mecab-0.996 / src / learner_tagger.h
1 //  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
2 //
3 //
4 //  Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
5 //  Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
6 #ifndef MECAB_TAGGER_H
7 #define MECAB_TAGGER_H
8
9 #include <vector>
10 #include "mecab.h"
11 #include "freelist.h"
12 #include "feature_index.h"
13 #include "tokenizer.h"
14 #include "scoped_ptr.h"
15
16 namespace MeCab {
17
18 class FeatureIndex;
19
20 class LearnerTagger {
21  public:
22   bool empty() const { return (len_ == 0); }
23   void close() {}
24   void clear() {}
25
26   explicit LearnerTagger(): tokenizer_(0), path_allocator_(0),
27                             feature_index_(0), begin_(0), end_(0), len_(0) {}
28   virtual ~LearnerTagger() {}
29
30  protected:
31   Tokenizer<LearnerNode, LearnerPath> *tokenizer_;
32   Allocator<LearnerNode, LearnerPath> *allocator_;
33   FreeList<LearnerPath>               *path_allocator_;
34   FeatureIndex                        *feature_index_;
35   scoped_string                        begin_data_;
36   const char                          *begin_;
37   const char                          *end_;
38   size_t                               len_;
39   std::vector<LearnerNode *>           begin_node_list_;
40   std::vector<LearnerNode *>           end_node_list_;
41
42   LearnerNode *lookup(size_t);
43   bool connect(size_t, LearnerNode *);
44   bool viterbi();
45   bool buildLattice();
46   bool initList();
47 };
48
49 class EncoderLearnerTagger: public LearnerTagger {
50  public:
51   bool open(Tokenizer<LearnerNode, LearnerPath> *tokenzier,
52             Allocator<LearnerNode, LearnerPath> *allocator,
53             FeatureIndex *feature_index,
54             size_t eval_size, size_t unk_eval_size);
55   bool read(std::istream *, std::vector<double> *);
56   int eval(size_t *, size_t *, size_t *) const;
57   double gradient(double *expected);
58   explicit EncoderLearnerTagger(): eval_size_(1024), unk_eval_size_(1024) {}
59   virtual ~EncoderLearnerTagger() { close(); }
60
61  private:
62   size_t eval_size_;
63   size_t unk_eval_size_;
64   std::vector<LearnerPath *> ans_path_list_;
65 };
66
67 class DecoderLearnerTagger: public LearnerTagger {
68  public:
69   bool open(const Param &);
70   bool parse(std::istream *, std::ostream *);
71   virtual ~DecoderLearnerTagger() { close(); }
72
73  private:
74   scoped_ptr<Tokenizer<LearnerNode, LearnerPath> > tokenizer_data_;
75   scoped_ptr<Allocator<LearnerNode, LearnerPath> > allocator_data_;
76   scoped_ptr<FeatureIndex> feature_index_data_;
77 };
78 }
79
80 #endif