OSDN Git Service

UTF-8に対処
authorOlyutorskii <olyutorskii@users.osdn.me>
Mon, 18 Apr 2011 18:02:57 +0000 (03:02 +0900)
committerOlyutorskii <olyutorskii@users.osdn.me>
Mon, 18 Apr 2011 18:02:57 +0000 (03:02 +0900)
src/main/java/jp/sourceforge/jindolf/archiver/Builder.java
src/main/java/jp/sourceforge/jindolf/archiver/HttpAccess.java
src/main/java/jp/sourceforge/jindolf/archiver/JinArchiver.java

index ba097f6..0fe807e 100644 (file)
@@ -10,8 +10,10 @@ import java.io.IOException;
 import java.io.InputStream;\r
 import java.net.URL;\r
 import java.net.URLConnection;\r
+import java.nio.charset.Charset;\r
 import jp.sourceforge.jindolf.parser.ContentBuilder;\r
 import jp.sourceforge.jindolf.parser.ContentBuilderSJ;\r
+import jp.sourceforge.jindolf.parser.ContentBuilderUCS2;\r
 import jp.sourceforge.jindolf.parser.DecodeException;\r
 import jp.sourceforge.jindolf.parser.DecodedContent;\r
 import jp.sourceforge.jindolf.parser.HtmlParseException;\r
@@ -24,17 +26,25 @@ import jp.sourceforge.jindolf.parser.StreamDecoder;
  */\r
 public final class Builder{\r
 \r
-    /**\r
-     * 入力ストリームをShift_JISでデコードする。\r
-     * @param istream 入力\r
-     * @return デコード結果\r
-     * @throws IOException 入力エラー\r
-     * @throws DecodeException デコードエラー\r
-     */\r
-    public static DecodedContent contentFromStream(InputStream istream)\r
+    private static int BUF_SZ = 100 * 1024;\r
+\r
+    public static DecodedContent contentFromStream(Charset charset,\r
+                                                     InputStream istream)\r
             throws IOException, DecodeException{\r
-        StreamDecoder decoder = new SjisDecoder();\r
-        ContentBuilder builder = new ContentBuilderSJ();\r
+        StreamDecoder decoder;\r
+        ContentBuilder builder;\r
+\r
+        if(charset.name().equalsIgnoreCase("Shift_JIS")){\r
+            decoder = new SjisDecoder();\r
+            builder = new ContentBuilderSJ(BUF_SZ);\r
+        }else if(charset.name().equalsIgnoreCase("UTF-8")){\r
+            decoder = new StreamDecoder(charset.newDecoder());\r
+            builder = new ContentBuilderUCS2(BUF_SZ);\r
+        }else{\r
+            assert false;\r
+            return null;\r
+        }\r
+\r
         decoder.setDecodeHandler(builder);\r
 \r
         decoder.decode(istream);\r
@@ -61,6 +71,8 @@ public final class Builder{
 \r
         handler.initVillageData(villageData);\r
 \r
+        Charset charset = villageData.getLandDef().getEncoding();\r
+\r
         for(PeriodResource resource : villageData.getPeriodResourceList()){\r
             handler.initPeriodResource(resource);\r
             URL url;\r
@@ -74,7 +86,7 @@ public final class Builder{
                 long downTimeMs = conn.getDate();\r
                 resource.setDownTimeMs(downTimeMs);\r
             }\r
-            DecodedContent content = contentFromStream(istream);\r
+            DecodedContent content = contentFromStream(charset, istream);\r
             istream.close();\r
             parser.parseAutomatic(content);\r
         }\r
index a43c8aa..1a92e02 100644 (file)
@@ -9,6 +9,7 @@ package jp.sourceforge.jindolf.archiver;
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.net.URL;\r
+import java.nio.charset.Charset;\r
 import java.util.LinkedList;\r
 import java.util.List;\r
 import jp.sourceforge.jindolf.corelib.LandDef;\r
@@ -65,8 +66,9 @@ public final class HttpAccess{
                    IOException {\r
         URL url = getPeriodListURL(landDef, vid);\r
 \r
+        Charset charset = landDef.getEncoding();\r
         InputStream istream = url.openStream();\r
-        DecodedContent content = Builder.contentFromStream(istream);\r
+        DecodedContent content = Builder.contentFromStream(charset, istream);\r
         istream.close();\r
 \r
         HtmlParser parser = new HtmlParser();\r
index d2cb172..2f0781c 100644 (file)
@@ -38,7 +38,7 @@ public final class JinArchiver{
     /** バージョン。 */\r
     private static final String VERSION;\r
     /** Generator. */\r
-    private static final String GENERATOR;\r
+    public static final String GENERATOR;\r
 \r
     private static final List<LandDef> LANDDEF_LIST;\r
 \r