1 // MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
3 // Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
4 // Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
6 using System.Collections.Generic;
13 private const string FloatFormat = "f6";
15 private delegate void WriteAction(StringBuilder os, MeCabNode bosNode);
16 private WriteAction write;
18 private string outputFormatType;
20 public string OutputFormatType
24 return this.outputFormatType;
28 this.outputFormatType = value;
32 this.write = this.WriteLattice;
35 this.write = this.WriteWakati;
38 this.write = this.WriteNone;
41 this.write = this.WriteDump;
44 this.write = this.WriteEM;
47 throw new ArgumentOutOfRangeException(value + " is not supported Format");
52 public void Open(MeCabParam param)
54 this.OutputFormatType = param.OutputFormatType;
57 public void Write(StringBuilder os, MeCabNode bosNode)
59 this.write(os, bosNode);
62 public void WriteLattice(StringBuilder os, MeCabNode bosNode)
64 for (MeCabNode node = bosNode.Next; node.Next != null; node = node.Next)
66 os.Append(node.Surface);
68 os.Append(node.Feature);
74 public void WriteWakati(StringBuilder os, MeCabNode bosNode)
76 MeCabNode node = bosNode.Next;
77 if (node.Next != null)
79 os.Append(node.Surface);
80 for (node = node.Next; node.Next != null; node = node.Next)
83 os.Append(node.Surface);
89 public void WriteNone(StringBuilder os, MeCabNode bosNode)
94 public void WriteUser(StringBuilder os, MeCabNode bosNode)
96 throw new NotImplementedException();
99 public void WriteEM(StringBuilder os, MeCabNode bosNode)
101 const float MinProb = 0.0001f;
102 for (MeCabNode node = bosNode; node != null; node = node.Next)
104 if (node.Prob >= MinProb)
107 if (node.Stat == MeCabNodeStat.Bos)
109 else if (node.Stat == MeCabNodeStat.Eos)
112 os.Append(node.Surface);
113 os.Append("\t").Append(node.Feature);
114 os.Append("\t").Append(node.Prob.ToString(FloatFormat));
117 for (MeCabPath path = node.LPath; path != null; path = path.LNext)
119 if (path.Prob >= MinProb)
121 os.Append("B\t").Append(path.LNode.Feature);
122 os.Append("\t").Append(node.Feature);
123 os.Append("\t").Append(path.Prob.ToString(FloatFormat));
128 os.AppendLine("EOS");
131 public void WriteDump(StringBuilder os, MeCabNode bosNode)
133 for (MeCabNode node = bosNode; node != null; node = node.Next)
136 os.Append(node.Id).Append(" ");
138 if (node.Stat == MeCabNodeStat.Bos)
140 else if (node.Stat == MeCabNodeStat.Eos)
143 os.Append(node.Surface);
145 os.Append(" ").Append(node.Feature);
146 os.Append(" ").Append(node.BPos);
147 os.Append(" ").Append(node.EPos);
148 os.Append(" ").Append(node.RCAttr);
149 os.Append(" ").Append(node.LCAttr);
150 os.Append(" ").Append(node.PosId);
151 os.Append(" ").Append(node.CharType);
152 os.Append(" ").Append((int)node.Stat);
153 os.Append(" ").Append(node.IsBest ? "1" : "0");
154 os.Append(" ").Append(node.Alpha.ToString(FloatFormat));
155 os.Append(" ").Append(node.Beta.ToString(FloatFormat));
156 os.Append(" ").Append(node.Prob.ToString(FloatFormat));
157 os.Append(" ").Append(node.Cost);
159 for (MeCabPath path = node.LPath; path != null; path = path.LNext)
162 os.Append(" ").Append(path.LNode.Id);
165 os.Append(":").Append(path.Cost);
166 os.Append(":").Append(path.Prob.ToString(FloatFormat));
173 public unsafe void WriteNode(StringBuilder os, char* p, string sentence, MeCabNode node)
175 for (; *p != 0x0; p++)
179 default: os.Append(*p); break;
183 default: os.Append("unkonwn meta char ").Append(*p); break;
184 case 'S': os.Append(sentence); break;
185 case 'L': os.Append(sentence.Length); break;
186 case 'm': os.Append(node.Surface); break;
187 case 'M': os.Append(sentence, (node.BPos - node.RLength + node.Length), node.RLength); break;
188 case 'h': os.Append(node.PosId); break;
189 case '%': os.Append('%'); break;
190 case 'c': os.Append(node.WCost); break;
191 case 'H': os.Append(node.Feature); break;
192 case 't': os.Append(node.CharType); break;
193 case 's': os.Append(node.Stat); break;
194 case 'P': os.Append(node.Prob); break;
198 default: throw new ArgumentException("[iseSCwcnblLh] is required after %p");
200 case 'i': os.Append(node.Id); break;
202 case 'i': throw new ArgumentException("%pi is not supported");
204 case 'S': os.Append(sentence, node.BPos, (node.RLength - node.Length)); break;
205 case 's': os.Append(node.BPos); break;
206 case 'e': os.Append(node.EPos); break;
207 case 'C': os.Append(node.Cost - node.Prev.Cost - node.WCost); break;
208 case 'w': os.Append(node.WCost); break;
209 case 'c': os.Append(node.Cost); break;
210 case 'n': os.Append(node.Cost - node.Prev.Cost); break;
211 case 'b': os.Append(node.IsBest ? '*' : ' '); break;
212 case 'P': os.Append(node.Prob); break;
213 case 'A': os.Append(node.Alpha); break;
214 case 'B': os.Append(node.Beta); break;
215 case 'l': os.Append(node.Length); break;
216 case 'L': os.Append(node.RLength); break;
220 default: throw new ArgumentException("lr is required after %ph");
221 case 'l': os.Append(node.LCAttr); break;
222 case 'r': os.Append(node.RCAttr); break;
227 if (sep == '\\') sep = this.GetEscapedChar(*++p);
228 if (node.LPath == null) throw new InvalidOperationException("no path information, use -l option");
229 for (MeCabPath path = node.LPath; path != null; path = path.LNext)
231 if (path != node.LPath) os.Append(sep);
234 case 'i': os.Append(path.LNode.PosId); break;
235 case 'c': os.Append(path.Cost); break;
236 case 'P': os.Append(path.Prob); break;
237 default: throw new ArgumentException("[icP] is required after %pp");
244 char separator = '\t';
247 separator = this.GetEscapedChar(*++p);
250 if (*++p != '[') throw new ArgumentException("cannot find '['");
251 string[] features = node.Feature.Split(',');
255 if (char.IsDigit(*++p)) { n = n * 10 + (*p - '0'); continue; }
256 if (n >= features.Length) throw new ArgumentException("given index is out of range");
257 os.Append(features[n]);
258 if (*++p == ',') { os.Append(separator); n = 0; continue; }
259 if (*p == ']') break;
260 throw new ArgumentException("cannot find ']'");
268 private char GetEscapedChar(char p)
272 case '0': return '\0';
273 case 'a': return '\a';
274 case 'b': return '\b';
275 case 't': return '\t';
276 case 'n': return '\n';
277 case 'v': return '\v';
278 case 'f': return '\f';
279 case 'r': return '\r';
280 case 's': return ' ';
281 case '\\': return '\\';
282 default: return '\0'; //never be here