OSDN Git Service

サロゲートペア出現時異常系の不具合を修正
authorOlyutorskii <olyutorskii@users.osdn.me>
Wed, 15 Jun 2016 11:58:39 +0000 (20:58 +0900)
committerOlyutorskii <olyutorskii@users.osdn.me>
Wed, 15 Jun 2016 11:58:39 +0000 (20:58 +0900)
src/main/java/jp/sourceforge/jindolf/parser/ContentBuilderUCS2.java
src/test/java/jp/sourceforge/jindolf/parser/ContentBuilderUCS2Test.java
src/test/java/jp/sourceforge/jindolf/parser/DecodedContentTest.java

index 5d51e3b..c1f9965 100644 (file)
@@ -48,8 +48,8 @@ public class ContentBuilderUCS2 extends ContentBuilder{
      */
     public static byte[] charToUTF16(char ch){
         byte[] result = new byte[2];
-        result[0] = (byte)(ch >> 8);
-        result[1] = (byte)(ch & 0xff);
+        result[0] = (byte) (ch >> 8);
+        result[1] = (byte) (ch & 0xff);
 
         return result;
     }
@@ -83,30 +83,31 @@ public class ContentBuilderUCS2 extends ContentBuilder{
         flushError();
 
         int length = seq.length();
-        int startPos = 0;
+        int copyDone = 0;
 
         for(int pos = 0; pos < length; pos++){
             char ch = seq.charAt(pos);
 
-            if(   ! Character.isHighSurrogate(ch)
-               && ! Character.isLowSurrogate (ch) ){
+            if(    ! Character.isHighSurrogate(ch)
+                && ! Character.isLowSurrogate (ch) ){
                 continue;
             }
 
-            if(startPos < pos){
-                CharSequence chopped = seq.subSequence(startPos, pos);
+            if(copyDone < pos){
+                CharSequence chopped = seq.subSequence(copyDone, pos);
                 getContent().append(chopped);
-                startPos = pos + 1;
             }
 
+            copyDone = pos + 1;
+
             byte[] barr = charToUTF16(ch);
             for(byte bval : barr){
                 getContent().addDecodeError(bval);
             }
         }
 
-        if(startPos < length){
-            CharSequence chopped = seq.subSequence(startPos, length);
+        if(copyDone < length){
+            CharSequence chopped = seq.subSequence(copyDone, length);
             getContent().append(chopped);
         }
 
index ab4af2e..67d0d48 100644 (file)
@@ -5,11 +5,18 @@
 
 package jp.sourceforge.jindolf.parser;
 
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.ArrayList;
+import java.util.List;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 /**
@@ -36,6 +43,383 @@ public class ContentBuilderUCS2Test {
     public void tearDown() {
     }
 
+    public static byte[] byteArray(CharSequence seq){
+        byte[] result;
+
+        List<Byte> byteList = new ArrayList<>();
+
+        int length = seq.length();
+        for(int pos = 0; pos < length; pos++){
+            int val = 0;
+
+            char ch = seq.charAt(pos);
+
+            if('0' <= ch && ch <= '9'){
+                val += ch - '0';
+            }else if('a' <= ch && ch <= 'f'){
+                val += ch - 'a' + 10;
+            }else if('A' <= ch && ch <= 'F'){
+                val += ch - 'A' + 10;
+            }else{
+                continue;
+            }
+
+            pos++;
+            if(pos >= length) break;
+
+            val *= 16;
+            ch = seq.charAt(pos);
+
+            if('0' <= ch && ch <= '9'){
+                val += ch - '0';
+            }else if('a' <= ch && ch <= 'f'){
+                val += ch - 'a' + 10;
+            }else if('A' <= ch && ch <= 'F'){
+                val += ch - 'A' + 10;
+            }else{
+                continue;
+            }
+
+            byteList.add((byte)val);
+        }
+
+        result = new byte[byteList.size()];
+
+        for(int pos = 0; pos < result.length; pos++){
+            result[pos] = byteList.get(pos);
+        }
+
+        return result;
+    }
+
+    /**
+     * Test of UTF8
+     */
+    @Test
+    public void testUTF8() throws Exception {
+        Charset cs = Charset.forName("UTF-8");
+
+        CharsetDecoder cd;
+        ContentBuilderUCS2 cb;
+        StreamDecoder decoder;
+        byte[] bdata;
+        InputStream is;
+        DecodedContent content;
+        List<DecodeErrorInfo> errList;
+        DecodeErrorInfo einfo;
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("41:42:43");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("ABC", content.toString());
+        assertFalse(content.hasDecodeError());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("41:EFBCA2:43");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("ABC", content.toString());
+        assertFalse(content.hasDecodeError());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("41:FF:43");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("A?C", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(1, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xff, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+
+        return;
+    }
+
+    /**
+     * Test of UTF16
+     */
+    @Test
+    public void testUTF16() throws Exception {
+        Charset cs = Charset.forName("UTF-16");
+
+        CharsetDecoder cd;
+        ContentBuilderUCS2 cb;
+        StreamDecoder decoder;
+        byte[] bdata;
+        InputStream is;
+        DecodedContent content;
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:0042:0043");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("ABC", content.toString());
+        assertFalse(content.hasDecodeError());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:FF22:0043");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("ABC", content.toString());
+        assertFalse(content.hasDecodeError());
+
+
+        return;
+    }
+
+    /**
+     * Test of UTF16 sequence error
+     */
+    @Test
+    public void testUTF16_seq() throws Exception {
+        Charset cs = Charset.forName("UTF-16");
+
+        CharsetDecoder cd;
+        ContentBuilderUCS2 cb;
+        StreamDecoder decoder;
+        byte[] bdata;
+        InputStream is;
+        DecodedContent content;
+        List<DecodeErrorInfo> errList;
+        DecodeErrorInfo einfo;
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:d800:0043:0044");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(6, content.length());
+        assertEquals("A????D", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(4, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xd8, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x00, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+        einfo = errList.get(2);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x00, einfo.getRawByte1st());
+        assertEquals(3, einfo.getCharPosition());
+        einfo = errList.get(3);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x43, einfo.getRawByte1st());
+        assertEquals(4, einfo.getCharPosition());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:0042:dc00:0044");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(5, content.length());
+        assertEquals("AB??D", content.toString());
+        errList = content.getDecodeErrorList();
+        assertEquals(2, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xdc, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x00, einfo.getRawByte1st());
+        assertEquals(3, einfo.getCharPosition());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:d800");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(3, content.length());
+        assertEquals("A??", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(2, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xd8, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x00, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+
+        return;
+    }
+
+    /**
+     * Test of UTF16 mapping error
+     */
+    @Test
+    public void testUTF16_nomap() throws Exception {
+        Charset cs = Charset.forName("UTF-16");
+
+        CharsetDecoder cd;
+        ContentBuilderUCS2 cb;
+        StreamDecoder decoder;
+        byte[] bdata;
+        InputStream is;
+        DecodedContent content;
+        List<DecodeErrorInfo> errList;
+        DecodeErrorInfo einfo;
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:d83d:dc11:0042");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+//        assertEquals(7, content.length());
+//        assertEquals("A????\udc11B", content.toString());
+        assertEquals(6, content.length());
+        assertEquals("A????B", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(4, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xd8, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x3d, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+        einfo = errList.get(2);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xdc, einfo.getRawByte1st());
+        assertEquals(3, einfo.getCharPosition());
+        einfo = errList.get(3);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x11, einfo.getRawByte1st());
+        assertEquals(4, einfo.getCharPosition());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("d83d:dc11:0042");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(5, content.length());
+        assertEquals("????B", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(4, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xd8, einfo.getRawByte1st());
+        assertEquals(0, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x3d, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+        einfo = errList.get(2);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xdc, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+        einfo = errList.get(3);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x11, einfo.getRawByte1st());
+        assertEquals(3, einfo.getCharPosition());
+
+
+        cd = cs.newDecoder();
+        decoder = new StreamDecoder(cd);
+        cb = new ContentBuilderUCS2();
+        decoder.setDecodeHandler(cb);
+        bdata = byteArray("0041:d83d:dc11");
+        is = new ByteArrayInputStream(bdata);
+        decoder.decode(is);
+        content = cb.getContent();
+
+        assertEquals(5, content.length());
+        assertEquals("A????", content.toString());
+        assertTrue(content.hasDecodeError());
+        errList = content.getDecodeErrorList();
+        assertEquals(4, errList.size());
+        einfo = errList.get(0);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xd8, einfo.getRawByte1st());
+        assertEquals(1, einfo.getCharPosition());
+        einfo = errList.get(1);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x3d, einfo.getRawByte1st());
+        assertEquals(2, einfo.getCharPosition());
+        einfo = errList.get(2);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0xdc, einfo.getRawByte1st());
+        assertEquals(3, einfo.getCharPosition());
+        einfo = errList.get(3);
+        assertFalse(einfo.has2nd());
+        assertEquals((byte)0x11, einfo.getRawByte1st());
+        assertEquals(4, einfo.getCharPosition());
+
+
+        return;
+    }
+
+
     /**
      * Test of charToUTF8 method, of class ContentBuilderUCS2.
      */
@@ -45,7 +429,7 @@ public class ContentBuilderUCS2Test {
 
         char ch;
         byte[] result;
-        
+
         ch = '\ud844';
         result = ContentBuilderUCS2.charToUTF16(ch);
 
index 7a16a88..096dc29 100644 (file)
@@ -12,6 +12,7 @@ import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 /**
@@ -61,7 +62,7 @@ public class DecodedContentTest {
         assertEquals("abc", content.toString());
 
         try{
-            content = new DecodedContent(-1);
+            new DecodedContent(-1);
             fail();
         }catch(NegativeArraySizeException e){
         }catch(Throwable e){
@@ -125,7 +126,7 @@ public class DecodedContentTest {
         assertFalse(content.hasDecodeError());
 
         content = new DecodedContent();
-        List list = content.getDecodeErrorList();
+        List<DecodeErrorInfo> list = content.getDecodeErrorList();
         assertEquals(0, list.size());
         assertFalse(content.hasDecodeError());
 
@@ -546,7 +547,7 @@ public class DecodedContentTest {
         List<DecodeErrorInfo> errList;
         int result;
 
-        errList = new ArrayList<DecodeErrorInfo>();
+        errList = new ArrayList<>();
         result = DecodedContent.lsearchErrorIndex(errList, 10);
         assertEquals(0, result);
 
@@ -595,7 +596,7 @@ public class DecodedContentTest {
         List<DecodeErrorInfo> errList;
         int result;
 
-        errList = new ArrayList<DecodeErrorInfo>();
+        errList = new ArrayList<>();
         result = DecodedContent.bsearchErrorIndex(errList, 10);
         assertEquals(0, result);
 
@@ -644,7 +645,7 @@ public class DecodedContentTest {
         List<DecodeErrorInfo> errList;
         int result;
 
-        errList = new ArrayList<DecodeErrorInfo>();
+        errList = new ArrayList<>();
 
         errList.clear();
         for(int pos = 0; pos <= 1000; pos += 10){
@@ -695,7 +696,7 @@ public class DecodedContentTest {
         System.out.println("ensureCapacity");
 
         DecodedContent content;
-        
+
         content = new DecodedContent("abc");
         content.ensureCapacity(-1);
         content.ensureCapacity(0);