入出力強化

[mikutoga/TogaGem.git] / src / main / java / jp / sourceforge / mikutoga / parser / TextDecoder.java
diff --git a/src/main/java/jp/sourceforge/mikutoga/parser/TextDecoder.java b/src/main/java/jp/sourceforge/mikutoga/parser/TextDecoder.java

index 08f3821..1a8044a 100644 (file)
--- a/src/main/java/jp/sourceforge/mikutoga/parser/TextDecoder.java
+++ b/src/main/java/jp/sourceforge/mikutoga/parser/TextDecoder.java
@@ -17,6 +17,14 @@ import java.nio.charset.CodingErrorAction;
  
  /**
   * 文字デコーダー。
+ * <p>あらかじめ長さが既知であるバイト列をMMD入力ソースから読み取り、
+ * デコーディング結果を返す。
+ * <p>デコード対象のバイト列が全てメモリ上に展開されるので、
+ * 巨大なテキストのデコードには不適当。
+ * <p>入力バイト値0x00以降をデコーディングの対象から外す
+ * 「ゼロチョップモード」を備える。
+ * デフォルトではゼロチョップモードはオフ。
+ * ゼロチョップモードはUTF16などのデコーディング時に使っても意味が無い。
   */
  public class TextDecoder {
  
@@ -29,6 +37,8 @@ public class TextDecoder {
  
      private final CharsetDecoder decoder;
  
+    private boolean chopZero = false;
+
      private byte[] byteArray;
      private ByteBuffer byteBuffer;  // byteArrayの別ビュー
      private CharBuffer charBuffer;
@@ -48,7 +58,7 @@ public class TextDecoder {
       * コンストラクタ。
       * @param decoder デコーダ
       */
-    protected TextDecoder(CharsetDecoder decoder){
+    public TextDecoder(CharsetDecoder decoder){
          super();
          this.decoder = decoder;
          this.decoder.onMalformedInput(CodingErrorAction.REPORT);
@@ -67,6 +77,7 @@ public class TextDecoder {
          }
  
          int rounded = (int)( newSize * WIDEN_RATE );
+        if(rounded < BYTEBUF_SZ) rounded = BYTEBUF_SZ;
  
          this.byteArray = new byte[rounded];
          this.byteBuffer = ByteBuffer.wrap(this.byteArray);
@@ -82,21 +93,66 @@ public class TextDecoder {
      }
  
      /**
+     * ゼロチョップモードを設定する。
+     * ゼロチョップモードをオンにすると、
+     * 入力バイト値0x00以降はデコード対象外となる。
+     * @param chop trueならゼロチョップモードオン
+     */
+    public void setZeroChopMode(boolean chop){
+        this.chopZero = chop;
+        return;
+    }
+
+    /**
+     * ゼロチョップモードか否か判定する。
+     * @return ゼロチョップモードならtrue
+     */
+    public boolean isZeroChopMode(){
+        return this.chopZero;
+    }
+
+    /**
+     * 入力バイト列のバイト値'0'出現以降をチョップする。
+     * ゼロチョップモードでなければ何もしない。
+     */
+    protected void chopZeroTermed(){
+        if( ! this.chopZero ) return;
+
+        int limit = this.byteBuffer.limit();
+
+        for(int idx = 0; idx < limit; idx++){
+            byte bVal = this.byteArray[idx];
+            if(bVal == 0x00){
+                this.byteBuffer.limit(idx);
+                break;
+            }
+        }
+
+        return;
+    }
+
+    /**
       * バイト列を読み込み文字列へデコーディングする。
-     * @param source 入力ソース
+     * @param is 入力ストリーム
       * @param byteSize 読み込みバイトサイズ
-     * @return 文字へのデコード結果。
+     * @return 内部に保持されるデコード結果。
+     * 次回呼び出しまでに結果の適切なコピーがなされなければならない。
       * @throws MmdEofException 意図しないファイル末端
       * @throws MmdFormatException 矛盾したバイトシーケンス
       * もしくは未定義文字
       * @throws IOException 入力エラー
       */
-    public CharBuffer parseString(MmdSource source, int byteSize)
+    public CharBuffer parseString(MmdInputStream is, int byteSize)
              throws MmdEofException, MmdFormatException, IOException{
          prepareBuffer(byteSize);
  
-        source.parseByteArray(this.byteArray, 0, byteSize);
+        int readSize = is.read(this.byteArray, 0, byteSize);
+        if(readSize != byteSize){
+            throw new MmdEofException(is.getPosition());
+        }
+
          this.byteBuffer.rewind().limit(byteSize);
+        chopZeroTermed();
  
          this.charBuffer.clear();
  
@@ -104,8 +160,13 @@ public class TextDecoder {
          CoderResult decResult =
                  this.decoder.decode(this.byteBuffer, this.charBuffer, true);
          if(decResult.isError()){
-            throw new MmdFormatException("illegal character encoding",
-                                         source.getPosition() );
+            if(decResult.isUnmappable()){
+                throw new MmdFormatException("unmapped character",
+                                             is.getPosition() );
+            }else{
+                throw new MmdFormatException("illegal character encoding",
+                                             is.getPosition() );
+            }
          }else if(decResult.isOverflow()){
              assert false;
          }