OSDN Git Service

CsvReaderのパーサ追加,プロトタイプ指定・型引数省略に対応
[karinto/karinto.git] / Karinto / CsvReader.cs
index 5d3ae11..bc1b291 100755 (executable)
@@ -10,95 +10,353 @@ using System.IO;
 using System.Collections.Generic;\r
 using System.Text;\r
 using System.Text.RegularExpressions;\r
-\r
+using System.Data;\r
 \r
 namespace Karinto\r
 {\r
-    public class CsvReader<TDataSet> where TDataSet : IDataSet, new()\r
+    public class CsvReader : CsvReader<DataTable>\r
     {\r
-        private string separator = ",";\r
+        private CsvReader()\r
+        { \r
+        }\r
 \r
-        public string Separator\r
+        public CsvReader(DataTable prototype)\r
+            : base(prototype)\r
         {\r
-            get { return separator; }\r
         }\r
+    }\r
 \r
-        /// <summary>\r
-        ///     区切り文字を指定する\r
-        ///     カンマ以外だったらCSVじゃないような...?\r
-        /// </summary>\r
-        /// <param name="separator">区切り文字(正規表現可)</param>\r
-        public void SetSeparator(string separator)\r
+    public class CsvReader<TDataTable> where TDataTable : DataTable, new()\r
+    {\r
+        #region private fields\r
+\r
+        private string separator;\r
+        private Regex sepPattern;\r
+        private TDataTable prototype;\r
+        private DataColumnCollection cols;\r
+\r
+        public delegate bool Parser(ref string input, out object output);\r
+        static private Dictionary<Type, Parser> parsers;\r
+        private Parser[] lineParser;\r
+\r
+        #endregion\r
+\r
+        #region constructors\r
+        static CsvReader()\r
         {\r
-            this.separator = separator;\r
+            parsers = new Dictionary<Type, Parser>();\r
+            parsers[typeof(Boolean)] = ParseBoolean;\r
+            parsers[typeof(Double)] = ParseDouble;\r
+            parsers[typeof(Single)] = ParseSingle;\r
+            parsers[typeof(Decimal)] = ParseDecimal;\r
+            parsers[typeof(Int32)] = ParseInt32;\r
+            parsers[typeof(Int64)] = ParseInt64;\r
         }\r
 \r
-        /// <summary>\r
-        ///     CSVファイルを読み込む\r
-        /// </summary>\r
-        /// <param name="path">CSVファイルのパス</param>\r
-        /// <param name="proto">行の雛型</param>\r
-        /// <returns>指定した型のリスト</returns>\r
-        public List<TDataSet> Read(string path)\r
+        public CsvReader()\r
+            : this(new TDataTable())\r
         {\r
-            return Read(path, new TDataSet());\r
         }\r
 \r
+        public CsvReader(TDataTable prototype)\r
+        {\r
+            Separator = @",";\r
+            this.prototype = (TDataTable)prototype.Clone();\r
+            cols = prototype.Columns;\r
+            lineParser = new Parser[cols.Count];\r
+            for (int i = 0; i < cols.Count; ++i)\r
+            {\r
+                Type t = cols[i].DataType;\r
+                if (parsers[t] != null)\r
+                {\r
+                    lineParser[i] = parsers[t];\r
+                }\r
+                else if (t == typeof(DateTime))\r
+                {\r
+                    lineParser[i] = ParseDateTime;\r
+                }\r
+                else\r
+                {\r
+                    lineParser[i] = ParseString;\r
+                }\r
+            }\r
+        }\r
+        #endregion\r
+\r
+        #region properties\r
+\r
         /// <summary>\r
-        ///     雛型を指定してCSVファイルを読み込む\r
+        ///     区切り文字\r
         /// </summary>\r
-        /// <param name="path">CSVファイルのパス</param>\r
-        /// <param name="proto">行の雛型</param>\r
-        /// <returns>指定した型のリスト</returns>\r
-        public List<TDataSet> Read(string path, TDataSet proto)\r
+        public string Separator\r
         {\r
-            List<TDataSet> records = new List<TDataSet>();\r
+            get\r
+            {\r
+                return separator;\r
+            }\r
+            set\r
+            {\r
+                separator = value;\r
+                sepPattern = new Regex(separator, RegexOptions.Compiled);\r
+            }\r
+        }\r
+\r
+        #endregion\r
 \r
+        #region public methods\r
+\r
+        public TDataTable Read(FilePath path)\r
+        {\r
+            TDataTable table = (TDataTable)prototype.Clone();\r
             try\r
             {\r
-                using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))\r
+                using (FileStream fs = new FileStream(\r
+                    path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))\r
                 {\r
                     using (StreamReader reader = new StreamReader(fs))\r
                     {\r
-                        string line = "";\r
-                        // 数値にマッチする正規表現\r
-                        string rfloat = @"\s*([-+]?(?:[0-9]+(\.[0-9]*)?|(\.[0-9]+))([eE][-+]?[0-9]+)?)\s*";\r
-                        Regex rCsv = new Regex(rfloat + separator, RegexOptions.Compiled);\r
-\r
                         while (reader.Peek() >= 0)\r
                         {\r
-                            line = reader.ReadLine() + separator;\r
-                            //Console.WriteLine("> " + line);\r
-                            MatchCollection mc = rCsv.Matches(line);\r
-\r
-                            // 現状ではNaNが含まれる行は受理しない\r
-                            if (!proto.Width.Includes(mc.Count)) continue;\r
-\r
-                            if (proto.KeyRange != null)\r
+                            DataRow row = table.NewRow();\r
+                            string line = reader.ReadLine();\r
+                            SetRow(row, line);\r
+                            if (row.HasErrors)\r
                             {\r
-                                double key = Double.Parse(mc[0].Groups[1].Value);\r
-                                if (!proto.KeyRange.Includes(key)) continue;\r
+                                if (table.Rows.Count < 1)\r
+                                {\r
+                                    SetHeader(table.Columns, line);\r
+                                }\r
+                                continue;\r
                             }\r
-\r
-                            TDataSet c = new TDataSet();\r
-                            foreach (Match m in mc)\r
-                            {\r
-                                c.Push(Double.Parse(m.Groups[1].Value));\r
-                            }\r
-                            records.Add(c);\r
+                            table.Rows.Add(row);\r
                         }\r
                     }\r
                 }\r
             }\r
-            catch (Exception e)\r
+            catch (Exception ex)\r
+            {\r
+                throw ex;\r
+            }\r
+            return table;\r
+        }\r
+\r
+        public TDataTable ReadAll(FilePath path)\r
+        {\r
+            throw new NotImplementedException();\r
+            return null;\r
+        }\r
+\r
+        public Parser GetParser(int column)\r
+        {\r
+            return lineParser[column];\r
+        }\r
+\r
+        public Parser GetParser(string column)\r
+        {\r
+            return lineParser[prototype.Columns.IndexOf(column)];\r
+        }\r
+\r
+        public Parser GetParser(DataColumn column)\r
+        {\r
+            return lineParser[prototype.Columns.IndexOf(column.ColumnName)];\r
+        }\r
+\r
+        static public Parser GetParser(Type type)\r
+        {\r
+            return parsers[type];\r
+        }\r
+\r
+        public void SetParser(int column, Parser parser)\r
+        {\r
+            lineParser[column] = parser;\r
+        }\r
+\r
+        public void SetParser(string column, Parser parser)\r
+        {\r
+            lineParser[prototype.Columns.IndexOf(column)] = parser;\r
+        }\r
+\r
+        public void SetParser(DataColumn column, Parser parser)\r
+        { \r
+            lineParser[prototype.Columns.IndexOf(column.ColumnName)] = parser;\r
+        }\r
+\r
+        static public void SetParser(Type type, Parser parser)\r
+        {\r
+            parsers[type] = parser;\r
+        }\r
+\r
+\r
+\r
+        #endregion\r
+\r
+        #region private methods\r
+\r
+        private void SetRow(DataRow row, string line)\r
+        {\r
+            for (int i = 0; i < lineParser.Length; ++i)\r
+            {\r
+                object value;\r
+                if (lineParser[i](ref line, out value))\r
+                {\r
+                    row[i] = value;\r
+                }\r
+                else\r
+                {\r
+                    row.RowError = line;\r
+                }\r
+                Match m = sepPattern.Match(line);\r
+                line = line.Substring(m.Length);\r
+            }\r
+            return;\r
+        }\r
+\r
+        private void SetHeader(DataColumnCollection columns, string line)\r
+        {\r
+            for (int i = 0; i < columns.Count; ++i)\r
+            {\r
+                object value;\r
+                if (ParseString(ref line, out value))\r
+                {\r
+                    string name = value as string;\r
+                    Match comment = Regex.Match(name, @"^#\s*");\r
+                    columns[i].Caption = name.Substring(comment.Length);\r
+                }\r
+                else\r
+                {\r
+                    return;\r
+                }\r
+                Match m = sepPattern.Match(line);\r
+                line = line.Substring(m.Length);\r
+            }\r
+        }\r
+\r
+        #region parsers\r
+\r
+        static private bool ParseBoolean(ref string input, out object output)\r
+        {\r
+            Match m = Regex.Match(input, \r
+                        "^" + Boolean.TrueString, RegexOptions.IgnoreCase);\r
+            if (!m.Success)\r
+            {\r
+                m = Regex.Match(input, \r
+                        "^" + Boolean.FalseString, RegexOptions.IgnoreCase);\r
+                if (!m.Success)\r
+                {\r
+                    output = false;\r
+                    return false;\r
+                }\r
+            }\r
+            output = true;\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        static private bool ParseDouble(ref string input, out object output)\r
+        {\r
+            Match m = RegexSet.DecimalFloat.Match(input);\r
+            if (!m.Success)\r
+            {\r
+                output = 0;\r
+                return false;\r
+            }\r
+            output = Double.Parse(m.Groups[1].Value);\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        static private bool ParseSingle(ref string input, out object output)\r
+        {\r
+            Match m = RegexSet.DecimalFloat.Match(input);\r
+            if (!m.Success)\r
+            {\r
+                output = 0;\r
+                return false;\r
+            }\r
+            output = Single.Parse(m.Groups[1].Value);\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        static private bool ParseDecimal(ref string input, out object output)\r
+        {\r
+            Match m = RegexSet.DecimalFloat.Match(input);\r
+            if (!m.Success)\r
+            {\r
+                output = 0;\r
+                return false;\r
+            }\r
+            output = Decimal.Parse(m.Groups[1].Value);\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        static private bool ParseInt32(ref string input, out object output)\r
+        {\r
+            Match m = RegexSet.DecimalFloat.Match(input);\r
+            if (!m.Success)\r
+            {\r
+                output = 0;\r
+                return false;\r
+            }\r
+            output = (Int32)Double.Parse(m.Groups[1].Value);\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        static private bool ParseInt64(ref string input, out object output)\r
+        {\r
+            Match m = RegexSet.DecimalFloat.Match(input);\r
+            if (!m.Success)\r
+            {\r
+                output = 0;\r
+                return false;\r
+            }\r
+            output = (Int64)Double.Parse(m.Groups[1].Value);\r
+            input = input.Substring(m.Length);\r
+            return true;\r
+        }\r
+\r
+        private bool ParseDateTime(ref string input, out object output)\r
+        {\r
+            object value;\r
+            ParseString(ref input, out value);\r
+            DateTime dt;\r
+            if (DateTime.TryParse(value as string, out dt))\r
             {\r
-                Console.WriteLine("The file could not be read:");\r
-                Console.WriteLine("> " + path);\r
-                Console.WriteLine(e.Message);\r
-                throw e;\r
+                output = dt;\r
+                return true;\r
             }\r
+            output = DBNull.Value;\r
+            return false;\r
+        }\r
 \r
-            return records;\r
+        private bool ParseString(ref string input, out object output)\r
+        {\r
+            Match quotedMatch = RegexSet.QuotedString.Match(input);\r
+            if (quotedMatch.Success)\r
+            {\r
+                string content = quotedMatch.Groups[1].Value;\r
+                output = content.Replace("\"\"", "\"");\r
+                input = input.Substring(quotedMatch.Length);\r
+                return true;\r
+            }\r
+\r
+            Match m = sepPattern.Match(input);\r
+            if (m.Success)\r
+            {\r
+                output = input.Substring(0, m.Index);\r
+                input = input.Substring(m.Index);\r
+            }\r
+            else\r
+            {\r
+                output = input;\r
+                input = "";\r
+            }\r
+            return true;\r
         }\r
+        #endregion\r
+\r
+        #endregion\r
     }\r
 }\r