OSDN Git Service

CsvReaderをSystem.Data.DataTableベースに変更,IDataSetを廃止(#23867, #23868)
[karinto/karinto.git] / Karinto / CsvReader.cs
1 /*\r
2  *      Karinto Library Project\r
3  *\r
4  *      This software is distributed under a zlib-style license.\r
5  *      See license.txt for more information.\r
6  */\r
7 \r
8 using System;\r
9 using System.IO;\r
10 using System.Collections.Generic;\r
11 using System.Text;\r
12 using System.Text.RegularExpressions;\r
13 using System.Data;\r
14 \r
15 namespace Karinto\r
16 {\r
17     public class CsvReader<TDataTable> where TDataTable : DataTable, new()\r
18     {\r
19         #region private fields\r
20 \r
21         private string separator;\r
22         private Regex sepPattern;\r
23         private TDataTable protoType;\r
24         private DataColumnCollection cols;\r
25         \r
26         private delegate bool Parser(ref string input, out object output);\r
27         static private Dictionary<Type, Parser> parsers;\r
28         private Parser[] lineParser;\r
29 \r
30         #endregion\r
31 \r
32         #region constructors\r
33         static CsvReader()\r
34         { \r
35             parsers = new Dictionary<Type, Parser>();\r
36             parsers[typeof(Double)] = ParseDouble;\r
37         }\r
38 \r
39         public CsvReader()\r
40         {\r
41             Separator = @",";\r
42             protoType = new TDataTable();\r
43             cols = protoType.Columns;\r
44             lineParser = new Parser[cols.Count];\r
45             for(int i = 0; i < cols.Count; ++i)\r
46             {\r
47                 Type t = cols[i].DataType;\r
48                 if (parsers[t] != null)\r
49                 {\r
50                     lineParser[i] = parsers[t];\r
51                 }\r
52                 else\r
53                 {\r
54                     lineParser[i] = ParseString;\r
55                 }\r
56             }\r
57         }\r
58         #endregion\r
59 \r
60         #region properties\r
61         \r
62         /// <summary>\r
63         ///     区切り文字\r
64         /// </summary>\r
65         public string Separator \r
66         {\r
67             get\r
68             {\r
69                 return separator;\r
70             }\r
71             set\r
72             {\r
73                 separator = value;\r
74                 sepPattern = new Regex(separator, RegexOptions.Compiled);\r
75             }\r
76         }\r
77 \r
78         #endregion\r
79 \r
80         #region public methods\r
81 \r
82         public TDataTable Read(FilePath path)\r
83         {\r
84             TDataTable table = new TDataTable();\r
85             try\r
86             {\r
87                 using (FileStream fs = new FileStream(\r
88                     path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))\r
89                 {\r
90                     using (StreamReader reader = new StreamReader(fs))\r
91                     {\r
92                         while (reader.Peek() >= 0)\r
93                         {\r
94                             DataRow row = table.NewRow();\r
95                             string line = reader.ReadLine();\r
96                             SetRow(row, line);\r
97                             if (row.HasErrors)\r
98                             {\r
99                                 if (table.Rows.Count < 1)\r
100                                 {\r
101                                     SetHeader(table.Columns, line);\r
102                                 }\r
103                                 continue;\r
104                             }\r
105                             table.Rows.Add(row);\r
106                         }\r
107                     }\r
108                 }\r
109             }\r
110             catch (Exception ex)\r
111             {\r
112                 throw ex;\r
113             }\r
114             return table;\r
115         }\r
116 \r
117         public TDataTable ReadAll(FilePath path)\r
118         {\r
119             return null;\r
120         }\r
121 \r
122         #endregion\r
123 \r
124         #region private methods\r
125 \r
126         private void SetRow(DataRow row, string line)\r
127         {\r
128             for (int i = 0; i < lineParser.Length; ++i)\r
129             { \r
130                 object value;\r
131                 if (lineParser[i](ref line, out value))\r
132                 {\r
133                     row[i] = value;\r
134                 }\r
135                 else\r
136                 {\r
137                     row.RowError = line;\r
138                 }\r
139                 Match m = sepPattern.Match(line);\r
140                 line = line.Substring(m.Length);\r
141             }\r
142             return;\r
143         }\r
144 \r
145         private void SetHeader(DataColumnCollection columns, string line)\r
146         {\r
147             for (int i = 0; i < columns.Count; ++i)\r
148             {\r
149                 object value;\r
150                 if (ParseString(ref line, out value))\r
151                 {\r
152                     string name = value as string;\r
153                     Match comment = Regex.Match(name, @"^#\s*");\r
154                     columns[i].ColumnName = name.Substring(comment.Length);\r
155                 }\r
156                 else\r
157                 {\r
158                     return;\r
159                 }\r
160                 Match m = sepPattern.Match(line);\r
161                 line = line.Substring(m.Length);\r
162             }\r
163         }\r
164 \r
165         static private bool ParseDouble(ref string input, out object output)\r
166         {\r
167             Match m = RegexSet.DecimalFloat.Match(input);\r
168             if (!m.Success)\r
169             {\r
170                 output = 0;\r
171                 return false;\r
172             }\r
173             output = Double.Parse(m.Groups[1].Value);\r
174             input = input.Substring(m.Length);\r
175             return true;\r
176         }\r
177 \r
178         private bool ParseString(ref string input, out object output)\r
179         {\r
180             Match quotedMatch = RegexSet.QuotedString.Match(input);\r
181             if (quotedMatch.Success)\r
182             {\r
183                 string content = quotedMatch.Groups[1].Value;\r
184                 output = content.Replace("\"\"", "\"");\r
185                 input = input.Substring(quotedMatch.Length);\r
186                 return true;\r
187             }\r
188 \r
189             Match m = sepPattern.Match(input);\r
190             if (m.Success)\r
191             {\r
192                 output = input.Substring(0, m.Index);\r
193                 input = input.Substring(m.Index);\r
194             }\r
195             else\r
196             {\r
197                 output = input;\r
198                 input = "";\r
199             }\r
200             return true;\r
201         }\r
202 \r
203         #endregion\r
204     }\r
205 \r
206     /*\r
207     public class CsvReader<TDataSet> where TDataSet : IDataSet, new()\r
208     {\r
209         private string separator = ",";\r
210 \r
211 \r
212 \r
213         /// <summary>\r
214         ///     CSVファイルを読み込む\r
215         /// </summary>\r
216         /// <param name="path">CSVファイルのパス</param>\r
217         /// <param name="proto">行の雛型</param>\r
218         /// <returns>指定した型のリスト</returns>\r
219         public List<TDataSet> Read(FilePath path)\r
220         {\r
221             return Read(path, new TDataSet());\r
222         }\r
223 \r
224         /// <summary>\r
225         ///     雛型を指定してCSVファイルを読み込む\r
226         /// </summary>\r
227         /// <param name="path">CSVファイルのパス</param>\r
228         /// <param name="proto">行の雛型</param>\r
229         /// <returns>指定した型のリスト</returns>\r
230         public List<TDataSet> Read(FilePath path, TDataSet proto)\r
231         {\r
232             List<TDataSet> records = new List<TDataSet>();\r
233 \r
234             string line = "";\r
235             // 数値にマッチする正規表現\r
236             //string regex = @"\s*" + RegexSet.DecimalFloat + @"\s*" + separator;\r
237             string regex = @"\s*" + @"(?<1>([-+]?(\d+([\.,]\d*)?|([\.,]\d+))([eE][-+]?\d+)?))" + @"\s*" + separator;\r
238             Regex rCsv = new Regex(regex, RegexOptions.Compiled);\r
239 \r
240             try\r
241             {\r
242                 using (FileStream fs = new FileStream(\r
243                     path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))\r
244                 {\r
245                     using (StreamReader reader = new StreamReader(fs))\r
246                     {\r
247                         while (reader.Peek() >= 0)\r
248                         {\r
249                             line = reader.ReadLine() + separator;\r
250                             //Console.WriteLine("> " + line);\r
251                             MatchCollection mc = rCsv.Matches(line);\r
252 \r
253                             // 現状ではNaNが含まれる行は受理しない\r
254                             if (!proto.Width.Includes(mc.Count)) continue;\r
255 \r
256                             if (proto.KeyRange != null)\r
257                             {\r
258                                 double key = Double.Parse(mc[0].Groups[1].Value);\r
259                                 if (!proto.KeyRange.Includes(key)) continue;\r
260                             }\r
261 \r
262                             TDataSet c = new TDataSet();\r
263                             foreach (Match m in mc)\r
264                             {\r
265                                 c.Push(Double.Parse(m.Groups[1].Value));\r
266                             }\r
267                             records.Add(c);\r
268                         }\r
269                     }\r
270                 }\r
271             }\r
272             catch (Exception e)\r
273             {\r
274                 Console.WriteLine("The file could not be read:");\r
275                 Console.WriteLine("> " + path);\r
276                 Console.WriteLine(e.Message);\r
277                 throw e;\r
278             }\r
279 \r
280             return records;\r
281         }\r
282      \r
283     }*/\r
284 }\r