OSDN Git Service

SVNから移行
[nmecab/NMeCabRepo2.git] / trunk / src / LibNMeCab / Core / Writer.cs
1 //  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
2 //
3 //  Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
4 //  Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
5 using System;
6 using System.Collections.Generic;
7 using System.Text;
8
9 namespace NMeCab.Core
10 {
11     public class Writer
12     {
13         private const string FloatFormat = "f6";
14
15         private delegate void WriteAction(StringBuilder os, MeCabNode bosNode);
16         private WriteAction write;
17
18         private string outputFormatType;
19
20         public string OutputFormatType
21         {
22             get
23             {
24                 return this.outputFormatType;
25             }
26             set
27             {
28                 this.outputFormatType = value;
29                 switch (value)
30                 {
31                     case "lattice":
32                         this.write = this.WriteLattice;
33                         break;
34                     case "wakati":
35                         this.write = this.WriteWakati;
36                         break;
37                     case "none":
38                         this.write = this.WriteNone;
39                         break;
40                     case "dump":
41                         this.write = this.WriteDump;
42                         break;
43                     case "em":
44                         this.write = this.WriteEM;
45                         break;
46                     default:
47                         throw new ArgumentOutOfRangeException(value + " is not supported Format");
48                 }
49             }
50         }
51
52         public void Open(MeCabParam param)
53         {
54             this.OutputFormatType = param.OutputFormatType;
55         }
56
57         public void Write(StringBuilder os, MeCabNode bosNode)
58         {
59             this.write(os, bosNode);
60         }
61
62         public void WriteLattice(StringBuilder os, MeCabNode bosNode)
63         {
64             for (MeCabNode node = bosNode.Next; node.Next != null; node = node.Next)
65             {
66                 os.Append(node.Surface);
67                 os.Append("\t");
68                 os.Append(node.Feature);
69                 os.AppendLine();
70             }
71             os.AppendLine("EOS");
72         }
73
74         public void WriteWakati(StringBuilder os, MeCabNode bosNode)
75         {
76             MeCabNode node = bosNode.Next;
77             if (node.Next != null)
78             {
79                 os.Append(node.Surface);
80                 for (node = node.Next; node.Next != null; node = node.Next)
81                 {
82                     os.Append(" ");
83                     os.Append(node.Surface);
84                 }
85             }
86             os.AppendLine();
87         }
88
89         public void WriteNone(StringBuilder os, MeCabNode bosNode)
90         {
91             // do nothing
92         }
93
94         public void WriteUser(StringBuilder os, MeCabNode bosNode)
95         {
96             throw new NotImplementedException();
97         }
98
99         public void WriteEM(StringBuilder os, MeCabNode bosNode)
100         {
101             const float MinProb = 0.0001f;
102             for (MeCabNode node = bosNode; node != null; node = node.Next)
103             {
104                 if (node.Prob >= MinProb)
105                 {
106                     os.Append("U\t");
107                     if (node.Stat == MeCabNodeStat.Bos)
108                         os.Append("BOS");
109                     else if (node.Stat == MeCabNodeStat.Eos)
110                         os.Append("EOS");
111                     else
112                         os.Append(node.Surface);
113                     os.Append("\t").Append(node.Feature);
114                     os.Append("\t").Append(node.Prob.ToString(FloatFormat));
115                     os.AppendLine();
116                 }
117                 for (MeCabPath path = node.LPath; path != null; path = path.LNext)
118                 {
119                     if (path.Prob >= MinProb)
120                     {
121                         os.Append("B\t").Append(path.LNode.Feature);
122                         os.Append("\t").Append(node.Feature);
123                         os.Append("\t").Append(path.Prob.ToString(FloatFormat));
124                         os.AppendLine();
125                     }
126                 }
127             }
128             os.AppendLine("EOS");
129         }
130
131         public void WriteDump(StringBuilder os, MeCabNode bosNode)
132         {
133             for (MeCabNode node = bosNode; node != null; node = node.Next)
134             {
135 #if NeedId
136                 os.Append(node.Id).Append(" ");
137 #endif
138                 if (node.Stat == MeCabNodeStat.Bos)
139                     os.Append("BOS");
140                 else if (node.Stat == MeCabNodeStat.Eos)
141                     os.Append("EOS");
142                 else
143                     os.Append(node.Surface);
144
145                 os.Append(" ").Append(node.Feature);
146                 os.Append(" ").Append(node.BPos);
147                 os.Append(" ").Append(node.EPos);
148                 os.Append(" ").Append(node.RCAttr);
149                 os.Append(" ").Append(node.LCAttr);
150                 os.Append(" ").Append(node.PosId);
151                 os.Append(" ").Append(node.CharType);
152                 os.Append(" ").Append((int)node.Stat);
153                 os.Append(" ").Append(node.IsBest ? "1" : "0");
154                 os.Append(" ").Append(node.Alpha.ToString(FloatFormat));
155                 os.Append(" ").Append(node.Beta.ToString(FloatFormat));
156                 os.Append(" ").Append(node.Prob.ToString(FloatFormat));
157                 os.Append(" ").Append(node.Cost);
158
159                 for (MeCabPath path = node.LPath; path != null; path = path.LNext)
160                 {
161 #if NeedId
162                     os.Append(" ").Append(path.LNode.Id);
163 #endif
164                     os.Append(" ");
165                     os.Append(":").Append(path.Cost);
166                     os.Append(":").Append(path.Prob.ToString(FloatFormat));
167                 }
168
169                 os.AppendLine();
170             }
171         }
172
173         public unsafe void WriteNode(StringBuilder os, char* p, string sentence, MeCabNode node)
174         {
175             for (; *p != 0x0; p++)
176             {
177                 switch (*p)
178                 {
179                     default: os.Append(*p); break;
180                     case '%':
181                         switch (*++p)
182                         {
183                             default: os.Append("unkonwn meta char ").Append(*p); break;
184                             case 'S': os.Append(sentence); break;
185                             case 'L': os.Append(sentence.Length); break;
186                             case 'm': os.Append(node.Surface); break;
187                             case 'M': os.Append(sentence, (node.BPos - node.RLength + node.Length), node.RLength); break;
188                             case 'h': os.Append(node.PosId); break;
189                             case '%': os.Append('%'); break;
190                             case 'c': os.Append(node.WCost); break;
191                             case 'H': os.Append(node.Feature); break;
192                             case 't': os.Append(node.CharType); break;
193                             case 's': os.Append(node.Stat); break;
194                             case 'P': os.Append(node.Prob); break;
195                             case 'p':
196                                 switch (*++p)
197                                 {
198                                     default: throw new ArgumentException("[iseSCwcnblLh] is required after %p");
199 #if NeedId
200                                     case 'i': os.Append(node.Id); break;
201 #else
202                                     case 'i': throw new ArgumentException("%pi is not supported");
203 #endif
204                                     case 'S': os.Append(sentence, node.BPos, (node.RLength - node.Length)); break;
205                                     case 's': os.Append(node.BPos); break;
206                                     case 'e': os.Append(node.EPos); break;
207                                     case 'C': os.Append(node.Cost - node.Prev.Cost - node.WCost); break;
208                                     case 'w': os.Append(node.WCost); break;
209                                     case 'c': os.Append(node.Cost); break;
210                                     case 'n': os.Append(node.Cost - node.Prev.Cost); break;
211                                     case 'b': os.Append(node.IsBest ? '*' : ' '); break;
212                                     case 'P': os.Append(node.Prob); break;
213                                     case 'A': os.Append(node.Alpha); break;
214                                     case 'B': os.Append(node.Beta); break;
215                                     case 'l': os.Append(node.Length); break;
216                                     case 'L': os.Append(node.RLength); break;
217                                     case 'h':
218                                         switch (*++p)
219                                         {
220                                             default: throw new ArgumentException("lr is required after %ph");
221                                             case 'l': os.Append(node.LCAttr); break;
222                                             case 'r': os.Append(node.RCAttr); break;
223                                         }; break;
224                                     case 'p':
225                                         char mode = *++p;
226                                         char sep = *++p;
227                                         if (sep == '\\') sep = this.GetEscapedChar(*++p);
228                                         if (node.LPath == null) throw new InvalidOperationException("no path information, use -l option");
229                                         for (MeCabPath path = node.LPath; path != null; path = path.LNext)
230                                         {
231                                             if (path != node.LPath) os.Append(sep);
232                                             switch (mode)
233                                             {
234                                                 case 'i': os.Append(path.LNode.PosId); break;
235                                                 case 'c': os.Append(path.Cost); break;
236                                                 case 'P': os.Append(path.Prob); break;
237                                                 default: throw new ArgumentException("[icP] is required after %pp");
238                                             }
239                                         }
240                                         break;
241                                 } break;
242                             case 'f':
243                             case 'F':
244                                 char separator = '\t';
245                                 if (*p == 'F')
246                                     if (*++p == '\\')
247                                         separator = this.GetEscapedChar(*++p);
248                                     else
249                                         separator = *p;
250                                 if (*++p != '[') throw new ArgumentException("cannot find '['");
251                                 string[] features = node.Feature.Split(',');
252                                 int n = 0;
253                                 while (true)
254                                 {
255                                     if (char.IsDigit(*++p)) { n = n * 10 + (*p - '0'); continue; }
256                                     if (n >= features.Length) throw new ArgumentException("given index is out of range");
257                                     os.Append(features[n]);
258                                     if (*++p == ',') { os.Append(separator); n = 0; continue; }
259                                     if (*p == ']') break;
260                                     throw new ArgumentException("cannot find ']'");
261                                 } 
262                                 break;
263                         } break;
264                 }
265             }
266         }
267
268         private char GetEscapedChar(char p)
269         {
270             switch (p)
271             {
272                 case '0': return '\0';
273                 case 'a': return '\a';
274                 case 'b': return '\b';
275                 case 't': return '\t';
276                 case 'n': return '\n';
277                 case 'v': return '\v';
278                 case 'f': return '\f';
279                 case 'r': return '\r';
280                 case 's': return ' ';
281                 case '\\': return '\\';
282                 default: return '\0'; //never be here
283             }
284         }
285     }
286 }