src/main/java/jp/sourceforge/mikutoga/parser/CommonParser.java

   1 /*\r
   2  * common MMD parser\r
   3  *\r
   4  * License : The MIT License\r
   5  * Copyright(c) 2010 MikuToga Partners\r
   6  */\r
   7 \r
   8 package jp.sourceforge.mikutoga.parser;\r
   9 \r
  10 import java.io.IOException;\r
  11 import java.nio.ByteBuffer;\r
  12 import java.nio.CharBuffer;\r
  13 import java.nio.charset.Charset;\r
  14 import java.nio.charset.CharsetDecoder;\r
  15 import java.nio.charset.CoderResult;\r
  16 import java.nio.charset.CodingErrorAction;\r
  17 \r
  18 /**\r
  19  * 各種パーサの共通実装。\r
  20  */\r
  21 public class CommonParser {\r
  22 \r
  23     /** 日本語デコード作業用入力バッファ長。バイト単位。 */\r
  24     public static final int TEXTBUF_SZ = 512;\r
  25 \r
  26     /**\r
  27      * MMD各種ファイルで用いられる日本語エンコーディング。(windows-31j)\r
  28      * ほぼShift_JISのスーパーセットと思ってよい。\r
  29      * デコード結果はUCS-2集合に収まるはず。\r
  30      */\r
  31     protected static final Charset CS_WIN31J = Charset.forName("windows-31j");\r
  32 \r
  33     private static final byte TERMINATOR = (byte) '\0';  // 0x00\r
  34     private static final char UCSYEN = '\u00a5';\r
  35     private static final char SJISYEN = (char) 0x005c;  // '\u005c\u005c';\r
  36 \r
  37     private final MmdSource source;\r
  38     private final CharsetDecoder decoder;\r
  39     private final byte[] textArray;\r
  40     private final ByteBuffer textBuffer;  // textArrayの別ビュー\r
  41     private final CharBuffer charBuffer;\r
  42 \r
  43     /**\r
  44      * コンストラクタ。\r
  45      * @param source 入力ソース\r
  46      */\r
  47     public CommonParser(MmdSource source){\r
  48         super();\r
  49 \r
  50         this.source = source;\r
  51         this.decoder = CS_WIN31J.newDecoder();\r
  52         this.textArray = new byte[TEXTBUF_SZ];\r
  53         this.textBuffer = ByteBuffer.wrap(this.textArray);\r
  54         int maxChars =\r
  55                 (int)(TEXTBUF_SZ * (this.decoder.maxCharsPerByte())) + 1;\r
  56         this.charBuffer = CharBuffer.allocate(maxChars);\r
  57 \r
  58         this.decoder.onMalformedInput(CodingErrorAction.REPORT);\r
  59         this.decoder.onUnmappableCharacter(CodingErrorAction.REPORT);\r
  60         this.textBuffer.clear();\r
  61         this.charBuffer.clear();\r
  62 \r
  63         return;\r
  64     }\r
  65 \r
  66     /**\r
  67      * 入力ソースにまだデータが残っているか判定する。\r
  68      * @return まだ読み込んでいないデータが残っていればtrue\r
  69      * @throws IOException IOエラー\r
  70      * @see MmdSource#hasMore()\r
  71      */\r
  72     protected boolean hasMore() throws IOException{\r
  73         boolean result = this.source.hasMore();\r
  74         return result;\r
  75     }\r
  76 \r
  77     /**\r
  78      * 入力ソースを読み飛ばす。\r
  79      * @param skipLength 読み飛ばすバイト数。\r
  80      * @throws IOException IOエラー\r
  81      * @throws MmdEofException 読み飛ばす途中でストリーム終端に達した。\r
  82      * @see MmdSource#skip(long)\r
  83      */\r
  84     protected void skip(long skipLength)\r
  85             throws IOException, MmdEofException {\r
  86         long result = this.source.skip(skipLength);\r
  87         if(result != skipLength){\r
  88             throw new MmdEofException(this.source.getPosition());\r
  89         }\r
  90 \r
  91         return;\r
  92     }\r
  93 \r
  94     /**\r
  95      * 入力ソースを読み飛ばす。\r
  96      * @param skipLength 読み飛ばすバイト数。\r
  97      * @throws IOException IOエラー\r
  98      * @throws MmdEofException 読み飛ばす途中でストリーム終端に達した。\r
  99      * @see MmdSource#skip(long)\r
 100      */\r
 101     protected void skip(int skipLength)\r
 102             throws IOException, MmdEofException {\r
 103         skip((long) skipLength);\r
 104     }\r
 105 \r
 106     /**\r
 107      * byte値を読み込む。\r
 108      * @return 読み込んだbyte値\r
 109      * @throws IOException IOエラー\r
 110      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 111      * @see MmdSource#parseByte()\r
 112      */\r
 113     protected byte parseByte()\r
 114             throws IOException, MmdEofException{\r
 115         return this.source.parseByte();\r
 116     }\r
 117 \r
 118     /**\r
 119      * 符号無し値としてbyte値を読み込み、int型に変換して返す。\r
 120      * 符号は拡張されない。(0xffは0x000000ffとなる)\r
 121      * @return 読み込まれた値のint値\r
 122      * @throws IOException IOエラー\r
 123      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 124      * @see MmdSource#parseUByteAsInteger()\r
 125      */\r
 126     protected int parseUByteAsInteger()\r
 127             throws IOException, MmdEofException{\r
 128         return this.source.parseUByteAsInteger();\r
 129     }\r
 130 \r
 131     /**\r
 132      * byte値を読み込み、boolean型に変換して返す。\r
 133      * 0x00は偽、それ以外は真と解釈される。\r
 134      * @return 読み込まれた値のboolean値\r
 135      * @throws IOException IOエラー\r
 136      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 137      * @see MmdSource#parseBoolean()\r
 138      */\r
 139     protected boolean parseBoolean()\r
 140             throws IOException, MmdEofException{\r
 141         return this.source.parseBoolean();\r
 142     }\r
 143 \r
 144     /**\r
 145      * short値を読み込む。\r
 146      * short値はリトルエンディアンで格納されていると仮定される。\r
 147      * @return 読み込んだshort値\r
 148      * @throws IOException IOエラー\r
 149      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 150      * @see MmdSource#parseShort()\r
 151      */\r
 152     protected short parseShort()\r
 153             throws IOException, MmdEofException{\r
 154         return this.source.parseShort();\r
 155     }\r
 156 \r
 157     /**\r
 158      * 符号無し値としてshort値を読み込み、int型に変換して返す。\r
 159      * 符号は拡張されない。(0xffffは0x0000ffffとなる)\r
 160      * short値はリトルエンディアンで格納されていると仮定される。\r
 161      * @return 読み込まれた値のint値\r
 162      * @throws IOException IOエラー\r
 163      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 164      * @see MmdSource#parseUShortAsInteger()\r
 165      */\r
 166     protected int parseUShortAsInteger()\r
 167             throws IOException, MmdEofException{\r
 168         return this.source.parseUShortAsInteger();\r
 169     }\r
 170 \r
 171     /**\r
 172      * int値を読み込む。\r
 173      * int値はリトルエンディアンで格納されていると仮定される。\r
 174      * @return 読み込んだint値\r
 175      * @throws IOException IOエラー\r
 176      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 177      * @see MmdSource#parseInteger()\r
 178      */\r
 179     protected int parseInteger()\r
 180             throws IOException, MmdEofException{\r
 181         return this.source.parseInteger();\r
 182     }\r
 183 \r
 184     /**\r
 185      * float値を読み込む。\r
 186      * float値はリトルエンディアンで格納されていると仮定される。\r
 187      * @return 読み込んだfloat値\r
 188      * @throws IOException IOエラー\r
 189      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 190      * @see MmdSource#parseFloat()\r
 191      */\r
 192     protected float parseFloat()\r
 193             throws IOException, MmdEofException{\r
 194         return this.source.parseFloat();\r
 195     }\r
 196 \r
 197     /**\r
 198      * byte配列を読み込む。\r
 199      * @param dst 格納先配列\r
 200      * @param offset 読み込み開始オフセット\r
 201      * @param length 読み込みバイト数\r
 202      * @throws IOException IOエラー\r
 203      * @throws NullPointerException 配列がnull\r
 204      * @throws IndexOutOfBoundsException 引数が配列属性と矛盾\r
 205      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 206      * @see MmdSource#parseByteArray(byte[], int, int)\r
 207      */\r
 208     protected void parseByteArray(byte[] dst, int offset, int length)\r
 209             throws IOException,\r
 210                    NullPointerException,\r
 211                    IndexOutOfBoundsException,\r
 212                    MmdEofException {\r
 213         this.source.parseByteArray(dst, offset, length);\r
 214         return;\r
 215     }\r
 216 \r
 217     /**\r
 218      * byte配列を読み込む。\r
 219      * 配列要素全ての読み込みが試みられる。\r
 220      * @param dst 格納先配列\r
 221      * @throws IOException IOエラー\r
 222      * @throws NullPointerException 配列がnull\r
 223      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 224      * @see MmdSource#parseByteArray(byte[])\r
 225      */\r
 226     protected void parseByteArray(byte[] dst)\r
 227             throws IOException, NullPointerException, MmdEofException{\r
 228         this.source.parseByteArray(dst);\r
 229         return;\r
 230     }\r
 231 \r
 232     /**\r
 233      * float配列を読み込む。\r
 234      * @param dst 格納先配列\r
 235      * @param offset 読み込み開始オフセット\r
 236      * @param length 読み込みfloat要素数\r
 237      * @throws IOException IOエラー\r
 238      * @throws NullPointerException 配列がnull\r
 239      * @throws IndexOutOfBoundsException 引数が配列属性と矛盾\r
 240      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 241      * @see MmdSource#parseFloatArray(float[], int, int)\r
 242      */\r
 243     protected void parseFloatArray(float[] dst, int offset, int length)\r
 244             throws IOException,\r
 245                    NullPointerException,\r
 246                    IndexOutOfBoundsException,\r
 247                    MmdEofException {\r
 248         this.source.parseFloatArray(dst, offset, length);\r
 249         return;\r
 250     }\r
 251 \r
 252     /**\r
 253      * float配列を読み込む。\r
 254      * 配列要素全ての読み込みが試みられる。\r
 255      * @param dst 格納先配列\r
 256      * @throws IOException IOエラー\r
 257      * @throws NullPointerException 配列がnull\r
 258      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 259      * @see MmdSource#parseFloatArray(float[])\r
 260      */\r
 261     protected void parseFloatArray(float[] dst)\r
 262             throws IOException, NullPointerException, MmdEofException{\r
 263         this.source.parseFloatArray(dst);\r
 264         return;\r
 265     }\r
 266 \r
 267     /**\r
 268      * 指定された最大バイト長に収まるゼロ終端(0x00)文字列を読み込む。\r
 269      * 入力バイト列はwindows-31jエンコーディングとして解釈される。\r
 270      * ゼロ終端以降のデータは無視されるが、\r
 271      * IO入力は指定バイト数だけ読み進められる。\r
 272      * ゼロ終端が見つからないまま指定バイト数が読み込み終わった場合、\r
 273      * そこまでのデータから文字列を構成する。\r
 274      * <p>\r
 275      * 戻り結果にはU+00A5(UCS円通貨記号)が含まれないことが保証される。\r
 276      * ※0x5c(Win31J円通貨)はU+005C(UCSバックスラッシュ)にデコードされる。\r
 277      *\r
 278      * @param maxlen 読み込みバイト数\r
 279      * @return デコードされた文字列\r
 280      * @throws IOException IOエラー\r
 281      * @throws IllegalArgumentException 読み込みバイト数が負であるか、\r
 282      * または内部バッファに対し大きすぎる。\r
 283      * @throws MmdEofException 読み込む途中でストリーム終端に達した。\r
 284      * @throws MmdFormatException 不正な文字エンコーディングが検出された。\r
 285      */\r
 286     protected String parseZeroTermString(int maxlen)\r
 287             throws IOException,\r
 288                    IllegalArgumentException,\r
 289                    MmdEofException,\r
 290                    MmdFormatException {\r
 291         if(this.textArray.length < maxlen || maxlen < 0){\r
 292             throw new IllegalArgumentException();\r
 293         }\r
 294 \r
 295         this.source.parseByteArray(this.textArray, 0, maxlen);\r
 296 \r
 297         int length = -1;\r
 298         for(int pos = 0; pos < maxlen; pos++){\r
 299             byte ch = this.textArray[pos];\r
 300             if(ch == TERMINATOR){\r
 301                 length = pos;\r
 302                 break;\r
 303             }\r
 304         }\r
 305         if(length < 0) length = maxlen;\r
 306 \r
 307         this.textBuffer.rewind();\r
 308         this.textBuffer.limit(length);\r
 309         this.charBuffer.clear();\r
 310         CoderResult decResult =\r
 311                 this.decoder.decode(this.textBuffer, this.charBuffer, true);\r
 312         if( ! decResult.isUnderflow() || decResult.isError()){\r
 313             throw new MmdFormatException("illegal character encoding",\r
 314                                          this.source.getPosition() );\r
 315         }\r
 316 \r
 317         this.charBuffer.flip();\r
 318         String result = this.charBuffer.toString();\r
 319 \r
 320         if(result.indexOf(UCSYEN) >= 0){\r
 321             result = result.replace(UCSYEN, SJISYEN);\r
 322         }\r
 323 \r
 324         return result;\r
 325     }\r
 326 \r
 327 }\r