OSDN Git Service

5cf650427977d3a564dd9d223344ac6221012997
[jindolf/JinParser.git] / src / test / java / jp / sourceforge / jindolf / parser / ContentBuilderUCS2Test.java
1 /*
2  * License : The MIT License
3  * Copyright(c) 2010 olyutorskii
4  */
5
6 package jp.sourceforge.jindolf.parser;
7
8 import java.io.ByteArrayInputStream;
9 import java.io.InputStream;
10 import java.nio.charset.Charset;
11 import java.nio.charset.CharsetDecoder;
12 import java.util.ArrayList;
13 import java.util.List;
14 import org.junit.After;
15 import org.junit.AfterClass;
16 import org.junit.Before;
17 import org.junit.BeforeClass;
18 import org.junit.Test;
19
20 import static org.junit.Assert.*;
21
22 /**
23  *
24  */
25 public class ContentBuilderUCS2Test {
26
27     public ContentBuilderUCS2Test() {
28     }
29
30     @BeforeClass
31     public static void setUpClass() throws Exception{
32     }
33
34     @AfterClass
35     public static void tearDownClass() throws Exception{
36     }
37
38     @Before
39     public void setUp() {
40     }
41
42     @After
43     public void tearDown() {
44     }
45
46     public static byte[] byteArray(CharSequence seq){
47         byte[] result;
48
49         List<Byte> byteList = new ArrayList<>();
50
51         int length = seq.length();
52         for(int pos = 0; pos < length; pos++){
53             int val = 0;
54
55             char ch = seq.charAt(pos);
56
57             if('0' <= ch && ch <= '9'){
58                 val += ch - '0';
59             }else if('a' <= ch && ch <= 'f'){
60                 val += ch - 'a' + 10;
61             }else if('A' <= ch && ch <= 'F'){
62                 val += ch - 'A' + 10;
63             }else{
64                 continue;
65             }
66
67             pos++;
68             if(pos >= length) break;
69
70             val *= 16;
71             ch = seq.charAt(pos);
72
73             if('0' <= ch && ch <= '9'){
74                 val += ch - '0';
75             }else if('a' <= ch && ch <= 'f'){
76                 val += ch - 'a' + 10;
77             }else if('A' <= ch && ch <= 'F'){
78                 val += ch - 'A' + 10;
79             }else{
80                 continue;
81             }
82
83             byteList.add((byte)val);
84         }
85
86         result = new byte[byteList.size()];
87
88         for(int pos = 0; pos < result.length; pos++){
89             result[pos] = byteList.get(pos);
90         }
91
92         return result;
93     }
94
95     /**
96      * Test of UTF8
97      */
98     @Test
99     public void testUTF8() throws Exception {
100         Charset cs = Charset.forName("UTF-8");
101
102         CharsetDecoder cd;
103         ContentBuilderUCS2 cb;
104         StreamDecoder decoder;
105         byte[] bdata;
106         InputStream is;
107         DecodedContent content;
108         List<DecodeErrorInfo> errList;
109         DecodeErrorInfo einfo;
110
111
112         cd = cs.newDecoder();
113         decoder = new StreamDecoder(cd);
114         cb = new ContentBuilderUCS2();
115         decoder.setDecodeHandler(cb);
116         bdata = byteArray("41:42:43");
117         is = new ByteArrayInputStream(bdata);
118         decoder.decode(is);
119         content = cb.getContent();
120
121         assertEquals(3, content.length());
122         assertEquals("ABC", content.toString());
123         assertFalse(content.hasDecodeError());
124
125
126         cd = cs.newDecoder();
127         decoder = new StreamDecoder(cd);
128         cb = new ContentBuilderUCS2();
129         decoder.setDecodeHandler(cb);
130         bdata = byteArray("41:EFBCA2:43");
131         is = new ByteArrayInputStream(bdata);
132         decoder.decode(is);
133         content = cb.getContent();
134
135         assertEquals(3, content.length());
136         assertEquals("ABC", content.toString());
137         assertFalse(content.hasDecodeError());
138
139
140         cd = cs.newDecoder();
141         decoder = new StreamDecoder(cd);
142         cb = new ContentBuilderUCS2();
143         decoder.setDecodeHandler(cb);
144         bdata = byteArray("41:FF:43");
145         is = new ByteArrayInputStream(bdata);
146         decoder.decode(is);
147         content = cb.getContent();
148
149         assertEquals(3, content.length());
150         assertEquals("A?C", content.toString());
151         assertTrue(content.hasDecodeError());
152         errList = content.getDecodeErrorList();
153         assertEquals(1, errList.size());
154         einfo = errList.get(0);
155         assertFalse(einfo.has2nd());
156         assertEquals((byte)0xff, einfo.getRawByte1st());
157         assertEquals(1, einfo.getCharPosition());
158
159         return;
160     }
161
162     /**
163      * Test of UTF16
164      */
165     @Test
166     public void testUTF16() throws Exception {
167         Charset cs = Charset.forName("UTF-16");
168
169         CharsetDecoder cd;
170         ContentBuilderUCS2 cb;
171         StreamDecoder decoder;
172         byte[] bdata;
173         InputStream is;
174         DecodedContent content;
175
176
177         cd = cs.newDecoder();
178         decoder = new StreamDecoder(cd);
179         cb = new ContentBuilderUCS2();
180         decoder.setDecodeHandler(cb);
181         bdata = byteArray("0041:0042:0043");
182         is = new ByteArrayInputStream(bdata);
183         decoder.decode(is);
184         content = cb.getContent();
185
186         assertEquals(3, content.length());
187         assertEquals("ABC", content.toString());
188         assertFalse(content.hasDecodeError());
189
190
191         cd = cs.newDecoder();
192         decoder = new StreamDecoder(cd);
193         cb = new ContentBuilderUCS2();
194         decoder.setDecodeHandler(cb);
195         bdata = byteArray("0041:FF22:0043");
196         is = new ByteArrayInputStream(bdata);
197         decoder.decode(is);
198         content = cb.getContent();
199
200         assertEquals(3, content.length());
201         assertEquals("ABC", content.toString());
202         assertFalse(content.hasDecodeError());
203
204
205         return;
206     }
207
208     /**
209      * Test of UTF16 sequence error
210      */
211     @Test
212     public void testUTF16_seq() throws Exception {
213         Charset cs = Charset.forName("UTF-16");
214
215         CharsetDecoder cd;
216         ContentBuilderUCS2 cb;
217         StreamDecoder decoder;
218         byte[] bdata;
219         InputStream is;
220         DecodedContent content;
221         List<DecodeErrorInfo> errList;
222         DecodeErrorInfo einfo;
223
224         cd = cs.newDecoder();
225         decoder = new StreamDecoder(cd);
226         cb = new ContentBuilderUCS2();
227         decoder.setDecodeHandler(cb);
228         bdata = byteArray("0041:d800:0043:0044");
229         is = new ByteArrayInputStream(bdata);
230         decoder.decode(is);
231         content = cb.getContent();
232
233         assertEquals(6, content.length());
234         assertEquals("A????D", content.toString());
235         assertTrue(content.hasDecodeError());
236         errList = content.getDecodeErrorList();
237         assertEquals(4, errList.size());
238         einfo = errList.get(0);
239         assertFalse(einfo.has2nd());
240         assertEquals((byte)0xd8, einfo.getRawByte1st());
241         assertEquals(1, einfo.getCharPosition());
242         einfo = errList.get(1);
243         assertFalse(einfo.has2nd());
244         assertEquals((byte)0x00, einfo.getRawByte1st());
245         assertEquals(2, einfo.getCharPosition());
246         einfo = errList.get(2);
247         assertFalse(einfo.has2nd());
248         assertEquals((byte)0x00, einfo.getRawByte1st());
249         assertEquals(3, einfo.getCharPosition());
250         einfo = errList.get(3);
251         assertFalse(einfo.has2nd());
252         assertEquals((byte)0x43, einfo.getRawByte1st());
253         assertEquals(4, einfo.getCharPosition());
254
255
256         cd = cs.newDecoder();
257         decoder = new StreamDecoder(cd);
258         cb = new ContentBuilderUCS2();
259         decoder.setDecodeHandler(cb);
260         bdata = byteArray("0041:0042:dc00:0044");
261         is = new ByteArrayInputStream(bdata);
262         decoder.decode(is);
263         content = cb.getContent();
264
265         assertEquals(5, content.length());
266         assertEquals("AB??D", content.toString());
267         errList = content.getDecodeErrorList();
268         assertEquals(2, errList.size());
269         einfo = errList.get(0);
270         assertFalse(einfo.has2nd());
271         assertEquals((byte)0xdc, einfo.getRawByte1st());
272         assertEquals(2, einfo.getCharPosition());
273         einfo = errList.get(1);
274         assertFalse(einfo.has2nd());
275         assertEquals((byte)0x00, einfo.getRawByte1st());
276         assertEquals(3, einfo.getCharPosition());
277
278
279         cd = cs.newDecoder();
280         decoder = new StreamDecoder(cd);
281         cb = new ContentBuilderUCS2();
282         decoder.setDecodeHandler(cb);
283         bdata = byteArray("0041:d800");
284         is = new ByteArrayInputStream(bdata);
285         decoder.decode(is);
286         content = cb.getContent();
287
288         assertEquals(3, content.length());
289         assertEquals("A??", content.toString());
290         assertTrue(content.hasDecodeError());
291         errList = content.getDecodeErrorList();
292         assertEquals(2, errList.size());
293         einfo = errList.get(0);
294         assertFalse(einfo.has2nd());
295         assertEquals((byte)0xd8, einfo.getRawByte1st());
296         assertEquals(1, einfo.getCharPosition());
297         einfo = errList.get(1);
298         assertFalse(einfo.has2nd());
299         assertEquals((byte)0x00, einfo.getRawByte1st());
300         assertEquals(2, einfo.getCharPosition());
301
302         return;
303     }
304
305     /**
306      * Test of UTF16 mapping error
307      */
308     @Test
309     public void testUTF16_nomap() throws Exception {
310         Charset cs = Charset.forName("UTF-16");
311
312         CharsetDecoder cd;
313         ContentBuilderUCS2 cb;
314         StreamDecoder decoder;
315         byte[] bdata;
316         InputStream is;
317         DecodedContent content;
318
319         cd = cs.newDecoder();
320         decoder = new StreamDecoder(cd);
321         cb = new ContentBuilderUCS2();
322         decoder.setDecodeHandler(cb);
323         bdata = byteArray("0041:d83d:dc11:0042");
324         is = new ByteArrayInputStream(bdata);
325         decoder.decode(is);
326         content = cb.getContent();
327
328         assertEquals(4, content.length());
329         assertEquals("A\ud83d\udc11B", content.toString());
330
331         return;
332     }
333
334 }