2 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
4 Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
5 Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
10 /* C/C++ common data structures */
13 * DictionaryInfo structure
15 struct mecab_dictionary_info_t {
17 * filename of dictionary
18 * On Windows, filename is stored in UTF-8 encoding
23 * character set of the dictionary. e.g., "SHIFT-JIS", "UTF-8"
28 * How many words are registered in this dictionary.
34 * this value should be MECAB_USR_DIC, MECAB_SYS_DIC, or MECAB_UNK_DIC.
39 * left attributes size
44 * right attributes size
49 * version of this dictionary
51 unsigned short version;
54 * pointer to the next dictionary info.
56 struct mecab_dictionary_info_t *next;
64 * pointer to the right node
66 struct mecab_node_t* rnode;
69 * pointer to the next right path
71 struct mecab_path_t* rnext;
74 * pointer to the left node
76 struct mecab_node_t* lnode;
79 * pointer to the next left path
82 struct mecab_path_t* lnext;
90 * marginal probability
100 * pointer to the previous node.
102 struct mecab_node_t *prev;
105 * pointer to the next node.
107 struct mecab_node_t *next;
110 * pointer to the node which ends at the same position.
112 struct mecab_node_t *enext;
115 * pointer to the node which starts at the same position.
117 struct mecab_node_t *bnext;
120 * pointer to the right path.
121 * this value is NULL if MECAB_ONE_BEST mode.
123 struct mecab_path_t *rpath;
126 * pointer to the right path.
127 * this value is NULL if MECAB_ONE_BEST mode.
129 struct mecab_path_t *lpath;
133 * this value is not 0 terminated.
134 * You can get the length with length/rlength members.
149 * length of the surface form.
151 unsigned short length;
154 * length of the surface form including white space before the morph.
156 unsigned short rlength;
161 unsigned short rcAttr;
166 unsigned short lcAttr;
169 * unique part of speech id. This value is defined in "pos.def" file.
171 unsigned short posid;
176 unsigned char char_type;
179 * status of this model.
180 * This value is MECAB_NOR_NODE, MECAB_UNK_NODE, MECAB_BOS_NODE, MECAB_EOS_NODE, or MECAB_EON_NODE.
185 * set 1 if this node is best node.
187 unsigned char isbest;
190 * forward accumulative log summation.
191 * This value is only available when MECAB_MARGINAL_PROB is passed.
196 * backward accumulative log summation.
197 * This value is only available when MECAB_MARGINAL_PROB is passed.
202 * marginal probability.
203 * This value is only available when MECAB_MARGINAL_PROB is passed.
213 * best accumulative cost from bos node to this node.
219 * Parameters for MeCab::Node::stat
223 * Normal node defined in the dictionary.
227 * Unknown node not defined in the dictionary.
231 * Virtual node representing a beginning of the sentence.
235 * Virtual node representing a end of the sentence.
240 * Virtual node representing a end of the N-best enumeration.
246 * Parameters for MeCab::DictionaryInfo::type
250 * This is a system dictionary.
255 * This is a user dictionary.
260 * This is a unknown word dictionary.
266 * Parameters for MeCab::Lattice::request_type
270 * One best result is obtained (default mode)
274 * Set this flag if you want to obtain N best results.
278 * Set this flag if you want to enable a partial parsing mode.
279 * When this flag is set, the input |sentence| needs to be written
280 * in partial parsing format.
284 * Set this flag if you want to obtain marginal probabilities.
285 * Marginal probability is set in MeCab::Node::prob.
286 * The parsing speed will get 3-5 times slower than the default mode.
288 MECAB_MARGINAL_PROB = 8,
290 * Set this flag if you want to obtain alternative results.
293 MECAB_ALTERNATIVE = 16,
295 * When this flag is set, the result linked-list (Node::next/prev)
296 * traverses all nodes in the lattice.
298 MECAB_ALL_MORPHS = 32,
301 * When this flag is set, tagger internally copies the body of passed
302 * sentence into internal buffer.
304 MECAB_ALLOCATE_SENTENCE = 64
308 * Parameters for MeCab::Lattice::boundary_constraint_type
312 * The token boundary is not specified.
314 MECAB_ANY_BOUNDARY = 0,
317 * The position is a strong token boundary.
319 MECAB_TOKEN_BOUNDARY = 1,
322 * The position is not a token boundary.
324 MECAB_INSIDE_TOKEN = 2
341 # define MECAB_DLL_EXTERN __declspec(dllexport)
342 # define MECAB_DLL_CLASS_EXTERN __declspec(dllexport)
344 # define MECAB_DLL_EXTERN __declspec(dllimport)
348 #ifndef MECAB_DLL_EXTERN
349 # define MECAB_DLL_EXTERN extern
352 #ifndef MECAB_DLL_CLASS_EXTERN
353 # define MECAB_DLL_CLASS_EXTERN
356 typedef struct mecab_t mecab_t;
357 typedef struct mecab_model_t mecab_model_t;
358 typedef struct mecab_lattice_t mecab_lattice_t;
359 typedef struct mecab_dictionary_info_t mecab_dictionary_info_t;
360 typedef struct mecab_node_t mecab_node_t;
361 typedef struct mecab_path_t mecab_path_t;
366 /* old mecab interface */
368 * C wrapper of MeCab::Tagger::create(argc, argv)
370 MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv);
373 * C wrapper of MeCab::Tagger::create(arg)
375 MECAB_DLL_EXTERN mecab_t* mecab_new2(const char *arg);
378 * C wrapper of MeCab::Tagger::version()
380 MECAB_DLL_EXTERN const char* mecab_version();
383 * C wrapper of MeCab::getLastError()
385 MECAB_DLL_EXTERN const char* mecab_strerror(mecab_t *mecab);
388 * C wrapper of MeCab::deleteTagger(tagger)
390 MECAB_DLL_EXTERN void mecab_destroy(mecab_t *mecab);
393 * C wrapper of MeCab::Tagger:set_partial()
395 MECAB_DLL_EXTERN int mecab_get_partial(mecab_t *mecab);
398 * C wrapper of MeCab::Tagger::partial()
400 MECAB_DLL_EXTERN void mecab_set_partial(mecab_t *mecab, int partial);
403 * C wrapper of MeCab::Tagger::theta()
405 MECAB_DLL_EXTERN float mecab_get_theta(mecab_t *mecab);
408 * C wrapper of MeCab::Tagger::set_theta()
410 MECAB_DLL_EXTERN void mecab_set_theta(mecab_t *mecab, float theta);
413 * C wrapper of MeCab::Tagger::lattice_level()
415 MECAB_DLL_EXTERN int mecab_get_lattice_level(mecab_t *mecab);
418 * C wrapper of MeCab::Tagger::set_lattice_level()
420 MECAB_DLL_EXTERN void mecab_set_lattice_level(mecab_t *mecab, int level);
423 * C wrapper of MeCab::Tagger::all_morphs()
425 MECAB_DLL_EXTERN int mecab_get_all_morphs(mecab_t *mecab);
428 * C wrapper of MeCab::Tagger::set_all_moprhs()
430 MECAB_DLL_EXTERN void mecab_set_all_morphs(mecab_t *mecab, int all_morphs);
433 * C wrapper of MeCab::Tagger::parse(MeCab::Lattice *lattice)
435 MECAB_DLL_EXTERN int mecab_parse_lattice(mecab_t *mecab, mecab_lattice_t *lattice);
438 * C wrapper of MeCab::Tagger::parse(const char *str)
440 MECAB_DLL_EXTERN const char* mecab_sparse_tostr(mecab_t *mecab, const char *str);
443 * C wrapper of MeCab::Tagger::parse(const char *str, size_t len)
445 MECAB_DLL_EXTERN const char* mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len);
448 * C wrapper of MeCab::Tagger::parse(const char *str, char *ostr, size_t olen)
450 MECAB_DLL_EXTERN char* mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len,
451 char *ostr, size_t olen);
454 * C wrapper of MeCab::Tagger::parseToNode(const char *str)
456 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*);
459 * C wrapper of MeCab::Tagger::parseToNode(const char *str, size_t len)
461 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t);
464 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str)
466 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str);
469 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str, size_t len)
471 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N,
472 const char *str, size_t len);
475 * C wrapper of MeCab::Tagger::parseNBest(size_t N, const char *str, char *ostr, size_t olen)
477 MECAB_DLL_EXTERN char* mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N,
478 const char *str, size_t len,
479 char *ostr, size_t olen);
482 * C wrapper of MeCab::Tagger::parseNBestInit(const char *str)
484 MECAB_DLL_EXTERN int mecab_nbest_init(mecab_t *mecab, const char *str);
487 * C wrapper of MeCab::Tagger::parseNBestInit(const char *str, size_t len)
489 MECAB_DLL_EXTERN int mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len);
492 * C wrapper of MeCab::Tagger::next()
494 MECAB_DLL_EXTERN const char* mecab_nbest_next_tostr(mecab_t *mecab);
497 * C wrapper of MeCab::Tagger::next(char *ostr, size_t olen)
499 MECAB_DLL_EXTERN char* mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen);
502 * C wrapper of MeCab::Tagger::nextNode()
504 MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab);
507 * C wrapper of MeCab::Tagger::formatNode(const Node *node)
509 MECAB_DLL_EXTERN const char* mecab_format_node(mecab_t *mecab, const mecab_node_t *node);
512 * C wrapper of MeCab::Tagger::dictionary_info()
514 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab);
516 /* lattice interface */
518 * C wrapper of MeCab::createLattice()
520 MECAB_DLL_EXTERN mecab_lattice_t *mecab_lattice_new();
523 * C wrapper of MeCab::deleteLattice(lattice)
525 MECAB_DLL_EXTERN void mecab_lattice_destroy(mecab_lattice_t *lattice);
528 * C wrapper of MeCab::Lattice::clear()
530 MECAB_DLL_EXTERN void mecab_lattice_clear(mecab_lattice_t *lattice);
533 * C wrapper of MeCab::Lattice::is_available()
536 MECAB_DLL_EXTERN int mecab_lattice_is_available(mecab_lattice_t *lattice);
539 * C wrapper of MeCab::Lattice::bos_node()
541 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_bos_node(mecab_lattice_t *lattice);
544 * C wrapper of MeCab::Lattice::eos_node()
546 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_eos_node(mecab_lattice_t *lattice);
549 * C wrapper of MeCab::Lattice::begin_nodes()
552 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_begin_nodes(mecab_lattice_t *lattice);
554 * C wrapper of MeCab::Lattice::end_nodes()
556 MECAB_DLL_EXTERN mecab_node_t **mecab_lattice_get_all_end_nodes(mecab_lattice_t *lattice);
559 * C wrapper of MeCab::Lattice::begin_nodes(pos)
561 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_begin_nodes(mecab_lattice_t *lattice, size_t pos);
564 * C wrapper of MeCab::Lattice::end_nodes(pos)
566 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_get_end_nodes(mecab_lattice_t *lattice, size_t pos);
569 * C wrapper of MeCab::Lattice::sentence()
571 MECAB_DLL_EXTERN const char *mecab_lattice_get_sentence(mecab_lattice_t *lattice);
574 * C wrapper of MeCab::Lattice::set_sentence(sentence)
576 MECAB_DLL_EXTERN void mecab_lattice_set_sentence(mecab_lattice_t *lattice, const char *sentence);
579 * C wrapper of MeCab::Lattice::set_sentence(sentence, len)
582 MECAB_DLL_EXTERN void mecab_lattice_set_sentence2(mecab_lattice_t *lattice, const char *sentence, size_t len);
585 * C wrapper of MeCab::Lattice::size()
587 MECAB_DLL_EXTERN size_t mecab_lattice_get_size(mecab_lattice_t *lattice);
590 * C wrapper of MeCab::Lattice::Z()
592 MECAB_DLL_EXTERN double mecab_lattice_get_z(mecab_lattice_t *lattice);
595 * C wrapper of MeCab::Lattice::set_Z()
597 MECAB_DLL_EXTERN void mecab_lattice_set_z(mecab_lattice_t *lattice, double Z);
600 * C wrapper of MeCab::Lattice::theta()
602 MECAB_DLL_EXTERN double mecab_lattice_get_theta(mecab_lattice_t *lattice);
605 * C wrapper of MeCab::Lattice::set_theta()
608 MECAB_DLL_EXTERN void mecab_lattice_set_theta(mecab_lattice_t *lattice, double theta);
611 * C wrapper of MeCab::Lattice::next()
613 MECAB_DLL_EXTERN int mecab_lattice_next(mecab_lattice_t *lattice);
616 * C wrapper of MeCab::Lattice::request_type()
618 MECAB_DLL_EXTERN int mecab_lattice_get_request_type(mecab_lattice_t *lattice);
621 * C wrapper of MeCab::Lattice::has_request_type()
623 MECAB_DLL_EXTERN int mecab_lattice_has_request_type(mecab_lattice_t *lattice, int request_type);
626 * C wrapper of MeCab::Lattice::set_request_type()
628 MECAB_DLL_EXTERN void mecab_lattice_set_request_type(mecab_lattice_t *lattice, int request_type);
631 * C wrapper of MeCab::Lattice::add_request_type()
634 MECAB_DLL_EXTERN void mecab_lattice_add_request_type(mecab_lattice_t *lattice, int request_type);
637 * C wrapper of MeCab::Lattice::remove_request_type()
639 MECAB_DLL_EXTERN void mecab_lattice_remove_request_type(mecab_lattice_t *lattice, int request_type);
642 * C wrapper of MeCab::Lattice::newNode();
644 MECAB_DLL_EXTERN mecab_node_t *mecab_lattice_new_node(mecab_lattice_t *lattice);
647 * C wrapper of MeCab::Lattice::toString()
649 MECAB_DLL_EXTERN const char *mecab_lattice_tostr(mecab_lattice_t *lattice);
652 * C wrapper of MeCab::Lattice::toString(buf, size)
654 MECAB_DLL_EXTERN const char *mecab_lattice_tostr2(mecab_lattice_t *lattice, char *buf, size_t size);
657 * C wrapper of MeCab::Lattice::enumNBestAsString(N)
659 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr(mecab_lattice_t *lattice, size_t N);
662 * C wrapper of MeCab::Lattice::enumNBestAsString(N, buf, size)
665 MECAB_DLL_EXTERN const char *mecab_lattice_nbest_tostr2(mecab_lattice_t *lattice, size_t N, char *buf, size_t size);
668 * C wrapper of MeCab::Lattice::has_constraint()
670 MECAB_DLL_EXTERN int mecab_lattice_has_constraint(mecab_lattice_t *lattice);
673 * C wrapper of MeCab::Lattice::boundary_constraint(pos)
675 MECAB_DLL_EXTERN int mecab_lattice_get_boundary_constraint(mecab_lattice_t *lattice, size_t pos);
679 * C wrapper of MeCab::Lattice::feature_constraint(pos)
681 MECAB_DLL_EXTERN const char *mecab_lattice_get_feature_constraint(mecab_lattice_t *lattice, size_t pos);
684 * C wrapper of MeCab::Lattice::boundary_constraint(pos, type)
686 MECAB_DLL_EXTERN void mecab_lattice_set_boundary_constraint(mecab_lattice_t *lattice, size_t pos, int boundary_type);
689 * C wrapper of MeCab::Lattice::set_feature_constraint(begin_pos, end_pos, feature)
691 MECAB_DLL_EXTERN void mecab_lattice_set_feature_constraint(mecab_lattice_t *lattice, size_t begin_pos, size_t end_pos, const char *feature);
694 * C wrapper of MeCab::Lattice::set_result(result);
696 MECAB_DLL_EXTERN void mecab_lattice_set_result(mecab_lattice_t *lattice, const char *result);
699 * C wrapper of MeCab::Lattice::what()
701 MECAB_DLL_EXTERN const char *mecab_lattice_strerror(mecab_lattice_t *lattice);
704 /* model interface */
706 * C wapper of MeCab::Model::create(argc, argv)
708 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new(int argc, char **argv);
711 * C wapper of MeCab::Model::create(arg)
713 MECAB_DLL_EXTERN mecab_model_t *mecab_model_new2(const char *arg);
716 * C wapper of MeCab::deleteModel(model)
719 MECAB_DLL_EXTERN void mecab_model_destroy(mecab_model_t *model);
722 * C wapper of MeCab::Model::createTagger()
724 MECAB_DLL_EXTERN mecab_t *mecab_model_new_tagger(mecab_model_t *model);
727 * C wapper of MeCab::Model::createLattice()
729 MECAB_DLL_EXTERN mecab_lattice_t *mecab_model_new_lattice(mecab_model_t *model);
732 * C wrapper of MeCab::Model::swap()
734 MECAB_DLL_EXTERN int mecab_model_swap(mecab_model_t *model, mecab_model_t *new_model);
737 * C wapper of MeCab::Model::dictionary_info()
739 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_model_dictionary_info(mecab_model_t *model);
742 * C wrapper of MeCab::Model::transition_cost()
744 MECAB_DLL_EXTERN int mecab_model_transition_cost(mecab_model_t *model,
745 unsigned short rcAttr,
746 unsigned short lcAttr);
749 * C wrapper of MeCab::Model::lookup()
751 MECAB_DLL_EXTERN mecab_node_t *mecab_model_lookup(mecab_model_t *model,
754 mecab_lattice_t *lattice);
756 /* static functions */
757 MECAB_DLL_EXTERN int mecab_do(int argc, char **argv);
758 MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv);
759 MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv);
760 MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv);
761 MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv);
762 MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv);
773 typedef struct mecab_dictionary_info_t DictionaryInfo;
774 typedef struct mecab_path_t Path;
775 typedef struct mecab_node_t Node;
777 template <typename N, typename P> class Allocator;
783 class MECAB_DLL_CLASS_EXTERN Lattice {
786 * Clear all internal lattice data.
788 virtual void clear() = 0;
791 * Return true if result object is available.
794 virtual bool is_available() const = 0;
797 * Return bos (begin of sentence) node.
798 * You can obtain all nodes via "for (const Node *node = lattice->bos_node(); node; node = node->next) {}"
799 * @return bos node object
801 virtual Node *bos_node() const = 0;
804 * Return eos (end of sentence) node.
805 * @return eos node object
807 virtual Node *eos_node() const = 0;
811 * This method is used internally.
813 virtual Node **begin_nodes() const = 0;
816 * This method is used internally.
818 virtual Node **end_nodes() const = 0;
822 * Return node linked list ending at |pos|.
823 * You can obtain all nodes via "for (const Node *node = lattice->end_nodes(pos); node; node = node->enext) {}"
824 * @param pos position of nodes. 0 <= pos < size()
825 * @return node linked list
827 virtual Node *end_nodes(size_t pos) const = 0;
830 * Return node linked list starting at |pos|.
831 * You can obtain all nodes via "for (const Node *node = lattice->begin_nodes(pos); node; node = node->bnext) {}"
832 * @param pos position of nodes. 0 <= pos < size()
833 * @return node linked list
835 virtual Node *begin_nodes(size_t pos) const = 0;
839 * If MECAB_NBEST or MECAB_PARTIAL mode is off, the returned poiner is the same as the one set by set_sentence().
842 virtual const char *sentence() const = 0;
845 * Set sentence. This method does not take the ownership of the object.
846 * @param sentence sentence
848 virtual void set_sentence(const char *sentence) = 0;
852 * Set sentence. This method does not take the ownership of the object.
853 * @param sentence sentence
854 * @param len length of the sentence
856 virtual void set_sentence(const char *sentence, size_t len) = 0;
860 * Return sentence size.
861 * @return sentence size
863 virtual size_t size() const = 0;
866 * Set normalization factor of CRF.
867 * @param Z new normalization factor.
869 virtual void set_Z(double Z) = 0;
872 * return normalization factor of CRF.
873 * @return normalization factor.
875 virtual double Z() const = 0;
878 * Set temparature parameter theta.
879 * @param theta temparature parameter.
881 virtual void set_theta(float theta) = 0;
884 * Return temparature parameter theta.
885 * @return temparature parameter.
887 virtual float theta() const = 0;
890 * Obtain next-best result. The internal linked list structure is updated.
891 * You should set MECAB_NBEST reques_type in advance.
892 * Return false if no more results are available or request_type is invalid.
895 virtual bool next() = 0;
898 * Return the current request type.
899 * @return request type
901 virtual int request_type() const = 0;
904 * Return true if the object has a specified request type.
907 virtual bool has_request_type(int request_type) const = 0;
911 * @param request_type new request type assigned
913 virtual void set_request_type(int request_type) = 0;
917 * @param request_type new request type added
919 virtual void add_request_type(int request_type) = 0;
922 * Remove request type.
923 * @param request_type new request type removed
925 virtual void remove_request_type(int request_type) = 0;
929 * This method is used internally.
931 virtual Allocator<Node, Path> *allocator() const = 0;
935 * Return new node. Lattice objects has the ownership of the node.
936 * @return new node object
938 virtual Node *newNode() = 0;
941 * Return string representation of the lattice.
942 * Returned object is managed by this instance. When clear/set_sentence() method
943 * is called, the returned buffer is initialized.
944 * @return string representation of the lattice
946 virtual const char *toString() = 0;
949 * Return string representation of the node.
950 * Returned object is managed by this instance. When clear/set_sentence() method
951 * is called, the returned buffer is initialized.
952 * @return string representation of the node
953 * @param node node object
955 virtual const char *toString(const Node *node) = 0;
958 * Return string representation of the N-best results.
959 * Returned object is managed by this instance. When clear/set_sentence() method
960 * is called, the returned buffer is initialized.
961 * @return string representation of the node
962 * @param N how many results you want to obtain
964 virtual const char *enumNBestAsString(size_t N) = 0;
968 * Return string representation of the lattice.
969 * Result is saved in the specified buffer.
970 * @param buf output buffer
971 * @param size output buffer size
972 * @return string representation of the lattice
974 virtual const char *toString(char *buf, size_t size) = 0;
977 * Return string representation of the node.
978 * Result is saved in the specified buffer.
979 * @param node node object
980 * @param buf output buffer
981 * @param size output buffer size
982 * @return string representation of the lattice
984 virtual const char *toString(const Node *node,
985 char *buf, size_t size) = 0;
988 * Return string representation of the N-best result.
989 * Result is saved in the specified.
990 * @param N how many results you want to obtain
991 * @param buf output buffer
992 * @param size output buffer size
993 * @return string representation of the lattice
995 virtual const char *enumNBestAsString(size_t N, char *buf, size_t size) = 0;
999 * Returns true if any parsing constraint is set
1001 virtual bool has_constraint() const = 0;
1004 * Returns the boundary constraint at the position.
1005 * @param pos the position of constraint
1006 * @return boundary constraint type
1008 virtual int boundary_constraint(size_t pos) const = 0;
1011 * Returns the token constraint at the position.
1012 * @param pos the beginning position of constraint.
1013 * @return constrained node starting at the position.
1015 virtual const char *feature_constraint(size_t pos) const = 0;
1018 * Set parsing constraint for partial parsing mode.
1019 * @param pos the position of the boundary
1020 * @param boundary_constraint_type the type of boundary
1022 virtual void set_boundary_constraint(size_t pos,
1023 int boundary_constraint_type) = 0;
1026 * Set parsing constraint for partial parsing mode.
1027 * @param begin_pos the starting position of the constrained token.
1028 * @param end_pos the the ending position of the constrained token.
1029 * @param feature the feature of the constrained token.
1031 virtual void set_feature_constraint(
1032 size_t begin_pos, size_t end_pos,
1033 const char *feature) = 0;
1036 * Set golden parsing results for unittesting.
1037 * @param result the parsing result written in the standard mecab output.
1039 virtual void set_result(const char *result) = 0;
1042 * Return error string.
1043 * @return error string
1045 virtual const char *what() const = 0;
1048 * Set error string. given string is copied to the internal buffer.
1049 * @param str new error string
1051 virtual void set_what(const char *str) = 0;
1055 * Create new Lattice object
1056 * @return new Lattice object
1058 static Lattice *create();
1061 virtual ~Lattice() {}
1067 class MECAB_DLL_CLASS_EXTERN Model {
1070 * Return DictionaryInfo linked list.
1071 * @return DictionaryInfo linked list
1073 virtual const DictionaryInfo *dictionary_info() const = 0;
1076 * Return transtion cost from rcAttr to lcAttr.
1077 * @return transtion cost
1079 virtual int transition_cost(unsigned short rcAttr,
1080 unsigned short lcAttr) const = 0;
1083 * perform common prefix search from the range [begin, end).
1084 * |lattice| takes the ownership of return value.
1085 * @return node linked list.
1087 virtual Node *lookup(const char *begin, const char *end,
1088 Lattice *lattice) const = 0;
1091 * Create a new Tagger object.
1092 * All returned tagger object shares this model object as a parsing model.
1093 * Never delete this model object before deleting tagger object.
1094 * @return new Tagger object
1096 virtual Tagger *createTagger() const = 0;
1099 * Create a new Lattice object.
1100 * @return new Lattice object
1102 virtual Lattice *createLattice() const = 0;
1105 * Swap the instance with |model|.
1106 * The ownership of |model| always moves to this instance,
1107 * meaning that passed |model| will no longer be accessible after calling this method.
1108 * return true if new model is swapped successfully.
1109 * This method is thread safe. All taggers created by
1110 * Model::createTagger() method will also be updated asynchronously.
1111 * No need to stop the parsing thread excplicitly before swapping model object.
1113 * @param model new model which is going to be swapped with the current model.
1115 virtual bool swap(Model *model) = 0;
1118 * Return a version string
1119 * @return version string
1121 static const char *version();
1127 * Factory method to create a new Model with a specified main's argc/argv-style parameters.
1128 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the
1129 * cause of the errors.
1130 * @return new Model object
1131 * @param argc number of parameters
1132 * @param argv parameter list
1134 static Model* create(int argc, char **argv);
1137 * Factory method to create a new Model with a string parameter representation, i.e.,
1138 * "-d /user/local/mecab/dic/ipadic -Ochasen".
1139 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the
1140 * cause of the errors.
1141 * @return new Model object
1142 * @param arg single string representation of the argment.
1144 static Model* create(const char *arg);
1151 class MECAB_DLL_CLASS_EXTERN Tagger {
1154 * Handy static method.
1155 * Return true if lattice is parsed successfully.
1156 * This function is equivalent to
1158 * Tagger *tagger = model.createModel();
1159 * cosnt bool result = tagger->parse(lattice);
1165 static bool parse(const Model &model, Lattice *lattice);
1168 * Parse lattice object.
1169 * Return true if lattice is parsed successfully.
1170 * A sentence must be set to the lattice with Lattice:set_sentence object before calling this method.
1171 * Parsed node object can be obtained with Lattice:bos_node.
1172 * This method is thread safe.
1173 * @return lattice lattice object
1176 virtual bool parse(Lattice *lattice) const = 0;
1179 * Parse given sentence and return parsed result as string.
1180 * You should not delete the returned string. The returned buffer
1181 * is overwritten when parse method is called again.
1182 * This method is NOT thread safe.
1183 * @param str sentence
1184 * @return parsed result
1186 virtual const char* parse(const char *str) = 0;
1189 * Parse given sentence and return Node object.
1190 * You should not delete the returned node object. The returned buffer
1191 * is overwritten when parse method is called again.
1192 * You can traverse all nodes via Node::next member.
1193 * This method is NOT thread safe.
1194 * @param str sentence
1195 * @return bos node object
1197 virtual const Node* parseToNode(const char *str) = 0;
1200 * Parse given sentence and obtain N-best results as a string format.
1201 * Currently, N must be 1 <= N <= 512 due to the limitation of the buffer size.
1202 * You should not delete the returned string. The returned buffer
1203 * is overwritten when parse method is called again.
1204 * This method is DEPRECATED. Use Lattice class.
1205 * @param N how many results you want to obtain
1206 * @param str sentence
1207 * @return parsed result
1209 virtual const char* parseNBest(size_t N, const char *str) = 0;
1212 * Initialize N-best enumeration with a sentence.
1213 * Return true if initialization finishes successfully.
1214 * N-best result is obtained by calling next() or nextNode() in sequence.
1215 * This method is NOT thread safe.
1216 * This method is DEPRECATED. Use Lattice class.
1217 * @param str sentence
1220 virtual bool parseNBestInit(const char *str) = 0;
1223 * Return next-best parsed result. You must call parseNBestInit() in advance.
1224 * Return NULL if no more reuslt is available.
1225 * This method is NOT thread safe.
1226 * This method is DEPRECATED. Use Lattice class.
1227 * @return node object
1229 virtual const Node* nextNode() = 0;
1232 * Return next-best parsed result. You must call parseNBestInit() in advance.
1233 * Return NULL if no more reuslt is available.
1234 * This method is NOT thread safe.
1235 * This method is DEPRECATED. Use Lattice class.
1236 * @return parsed result
1238 virtual const char* next() = 0;
1241 * Return formatted node object. The format is specified with
1242 * --unk-format, --bos-format, --eos-format, and --eon-format respectively.
1243 * You should not delete the returned string. The returned buffer
1244 * is overwritten when parse method is called again.
1245 * This method is NOT thread safe.
1246 * This method is DEPRECATED. Use Lattice class.
1247 * @param node node object.
1248 * @return parsed result
1250 virtual const char* formatNode(const Node *node) = 0;
1254 * The same as parse() method, but input length and output buffer are passed.
1255 * Return parsed result as string. The result pointer is the same as |ostr|.
1256 * Return NULL, if parsed result string cannot be stored within |olen| bytes.
1257 * @param str sentence
1258 * @param len sentence length
1259 * @param ostr output buffer
1260 * @param olen output buffer length
1261 * @return parsed result
1263 virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0;
1266 * The same as parse() method, but input length can be passed.
1267 * @param str sentence
1268 * @param len sentence length
1269 * @return parsed result
1271 virtual const char* parse(const char *str, size_t len) = 0;
1274 * The same as parseToNode(), but input lenth can be passed.
1275 * @param str sentence
1276 * @param len sentence length
1277 * @return node object
1279 virtual const Node* parseToNode(const char *str, size_t len) = 0;
1282 * The same as parseNBest(), but input length can be passed.
1283 * @param N how many results you want to obtain
1284 * @param str sentence
1285 * @param len sentence length
1286 * @return parsed result
1288 virtual const char* parseNBest(size_t N, const char *str, size_t len) = 0;
1291 * The same as parseNBestInit(), but input length can be passed.
1292 * @param str sentence
1293 * @param len sentence length
1295 * @return parsed result
1297 virtual bool parseNBestInit(const char *str, size_t len) = 0;
1300 * The same as next(), but output buffer can be passed.
1301 * Return NULL if more than |olen| buffer is required to store output string.
1302 * @param ostr output buffer
1303 * @param olen output buffer length
1304 * @return parsed result
1306 virtual const char* next(char *ostr , size_t olen) = 0;
1309 * The same as parseNBest(), but input length and output buffer can be passed.
1310 * Return NULL if more than |olen| buffer is required to store output string.
1311 * @param N how many results you want to obtain
1312 * @param str input sentence
1313 * @param len input sentence length
1314 * @param ostr output buffer
1315 * @param olen output buffer length
1316 * @return parsed result
1318 virtual const char* parseNBest(size_t N, const char *str,
1319 size_t len, char *ostr, size_t olen) = 0;
1322 * The same as formatNode(), but output buffer can be passed.
1323 * Return NULL if more than |olen| buffer is required to store output string.
1324 * @param node node object
1325 * @param ostr output buffer
1326 * @param olen output buffer length
1327 * @return parsed result
1329 virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0;
1334 * This method is DEPRECATED. Use Lattice::set_request_type(MECAB_PARTIAL).
1335 * @param request_type new request type assigned
1337 virtual void set_request_type(int request_type) = 0;
1340 * Return the current request type.
1341 * This method is DEPRECATED. Use Lattice class.
1342 * @return request type
1344 virtual int request_type() const = 0;
1347 * Return true if partial parsing mode is on.
1348 * This method is DEPRECATED. Use Lattice::has_request_type(MECAB_PARTIAL).
1351 virtual bool partial() const = 0;
1354 * set partial parsing mode.
1355 * This method is DEPRECATED. Use Lattice::add_request_type(MECAB_PARTIAL) or Lattice::remove_request_type(MECAB_PARTIAL)
1356 * @param partial partial mode
1358 virtual void set_partial(bool partial) = 0;
1361 * Return lattice level.
1362 * This method is DEPRECATED. Use Lattice::*_request_type()
1363 * @return int lattice level
1365 virtual int lattice_level() const = 0;
1368 * Set lattice level.
1369 * This method is DEPRECATED. Use Lattice::*_request_type()
1370 * @param level lattice level
1372 virtual void set_lattice_level(int level) = 0;
1375 * Return true if all morphs output mode is on.
1376 * This method is DEPRECATED. Use Lattice::has_request_type(MECAB_ALL_MORPHS).
1379 virtual bool all_morphs() const = 0;
1382 * set all-morphs output mode.
1383 * This method is DEPRECATED. Use Lattice::add_request_type(MECAB_ALL_MORPHS) or Lattice::remove_request_type(MECAB_ALL_MORPHS)
1386 virtual void set_all_morphs(bool all_morphs) = 0;
1389 * Set temparature parameter theta.
1390 * @param theta temparature parameter.
1392 virtual void set_theta(float theta) = 0;
1395 * Return temparature parameter theta.
1396 * @return temparature parameter.
1398 virtual float theta() const = 0;
1401 * Return DictionaryInfo linked list.
1402 * @return DictionaryInfo linked list
1404 virtual const DictionaryInfo* dictionary_info() const = 0;
1407 * Return error string.
1408 * @return error string
1410 virtual const char* what() const = 0;
1412 virtual ~Tagger() {}
1416 * Factory method to create a new Tagger with a specified main's argc/argv-style parameters.
1417 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the
1418 * cause of the errors.
1419 * @return new Tagger object
1420 * @param argc number of parameters
1421 * @param argv parameter list
1423 static Tagger *create(int argc, char **argv);
1426 * Factory method to create a new Tagger with a string parameter representation, i.e.,
1427 * "-d /user/local/mecab/dic/ipadic -Ochasen".
1428 * Return NULL if new model cannot be initialized. Use MeCab::getLastError() to obtain the
1429 * cause of the errors.
1430 * @return new Model object
1431 * @param arg single string representation of the argment.
1433 static Tagger *create(const char *arg);
1437 * Return a version string
1438 * @return version string
1440 static const char *version();
1445 * Alias of Lattice::create()
1447 MECAB_DLL_EXTERN Lattice *createLattice();
1450 * Alias of Mode::create(argc, argv)
1452 MECAB_DLL_EXTERN Model *createModel(int argc, char **argv);
1455 * Alias of Mode::create(arg)
1457 MECAB_DLL_EXTERN Model *createModel(const char *arg);
1460 * Alias of Tagger::create(argc, argv)
1462 MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
1465 * Alias of Tagger::create(arg)
1467 MECAB_DLL_EXTERN Tagger *createTagger(const char *arg);
1470 * delete Lattice object.
1471 * This method calles "delete lattice".
1472 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too.
1473 * @param lattice lattice object
1475 MECAB_DLL_EXTERN void deleteLattice(Lattice *lattice);
1479 * delete Model object.
1480 * This method calles "delete model".
1481 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too.
1482 * @param model model object
1484 MECAB_DLL_EXTERN void deleteModel(Model *model);
1487 * delete Tagger object.
1488 * This method calles "delete tagger".
1489 * In some environment, e.g., MS-Windows, an object allocated inside a DLL must be deleted in the same DLL too.
1490 * @param tagger tagger object
1492 MECAB_DLL_EXTERN void deleteTagger(Tagger *tagger);
1495 * Return last error string.
1496 * @return error string
1498 MECAB_DLL_EXTERN const char* getLastError();
1501 * An alias of getLastError.
1502 * It is kept for backward compatibility.
1503 * @return error string
1505 MECAB_DLL_EXTERN const char* getTaggerError();
1509 #endif /* MECAB_MECAB_H_ */