OSDN Git Service

topic/jiocema とマージ
[jindolf/JinParser.git] / src / test / java / jp / sourceforge / jindolf / parser / ContentBuilderTest.java
1 /*
2  * License : The MIT License
3  * Copyright(c) 2010 olyutorskii
4  */
5
6 package jp.sourceforge.jindolf.parser;
7
8 import io.bitbucket.olyutorskii.jiocema.DecodeBreakException;
9 import io.bitbucket.olyutorskii.jiocema.DecodeNotifier;
10 import java.io.ByteArrayInputStream;
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.nio.charset.Charset;
14 import java.nio.charset.CharsetDecoder;
15 import java.util.List;
16 import org.junit.After;
17 import org.junit.AfterClass;
18 import org.junit.Before;
19 import org.junit.BeforeClass;
20 import org.junit.Test;
21
22 import static org.junit.Assert.*;
23
24 /**
25  *
26  */
27 public class ContentBuilderTest {
28
29     public ContentBuilderTest() {
30     }
31
32     @BeforeClass
33     public static void setUpClass() throws Exception{
34     }
35
36     @AfterClass
37     public static void tearDownClass() throws Exception{
38     }
39
40     @Before
41     public void setUp() {
42     }
43
44     @After
45     public void tearDown() {
46     }
47
48
49     /**
50      * Test of UTF8
51      * @throws Exception
52      */
53     @Test
54     public void testUTF8() throws Exception {
55         Charset cs = Charset.forName("UTF-8");
56
57         CharsetDecoder cd;
58         ContentBuilder cb;
59         DecodeNotifier decoder;
60         byte[] bdata;
61         InputStream is;
62         DecodedContent content;
63         List<DecodeErrorInfo> errList;
64         DecodeErrorInfo einfo;
65
66
67         cd = cs.newDecoder();
68         decoder = new DecodeNotifier(cd);
69         cb = new ContentBuilder();
70         decoder.setCharDecodeListener(cb);
71         bdata = Bseq.byteArray("41:42:43");
72         is = new ByteArrayInputStream(bdata);
73         decoder.decode(is);
74         content = cb.getContent();
75
76         assertEquals(3, content.length());
77         assertEquals("ABC", content.toString());
78         assertFalse(content.hasDecodeError());
79
80
81         cd = cs.newDecoder();
82         decoder = new DecodeNotifier(cd);
83         cb = new ContentBuilder();
84         decoder.setCharDecodeListener(cb);
85         bdata = Bseq.byteArray("41:EFBCA2:43");
86         is = new ByteArrayInputStream(bdata);
87         decoder.decode(is);
88         content = cb.getContent();
89
90         assertEquals(3, content.length());
91         assertEquals("ABC", content.toString());
92         assertFalse(content.hasDecodeError());
93
94
95         cd = cs.newDecoder();
96         decoder = new DecodeNotifier(cd);
97         cb = new ContentBuilder();
98         decoder.setCharDecodeListener(cb);
99         bdata = Bseq.byteArray("41:FF:43");
100         is = new ByteArrayInputStream(bdata);
101         decoder.decode(is);
102         content = cb.getContent();
103
104         assertEquals(3, content.length());
105         assertEquals("A?C", content.toString());
106         assertTrue(content.hasDecodeError());
107         errList = content.getDecodeErrorList();
108         assertEquals(1, errList.size());
109         einfo = errList.get(0);
110         assertFalse(einfo.has2nd());
111         assertEquals((byte)0xff, einfo.getRawByte1st());
112         assertEquals(1, einfo.getCharPosition());
113
114         return;
115     }
116
117     /**
118      * Test of UTF16
119      * @throws Exception
120      */
121     @Test
122     public void testUTF16() throws Exception {
123         Charset cs = Charset.forName("UTF-16");
124
125         CharsetDecoder cd;
126         ContentBuilder cb;
127         DecodeNotifier decoder;
128         byte[] bdata;
129         InputStream is;
130         DecodedContent content;
131
132
133         cd = cs.newDecoder();
134         decoder = new DecodeNotifier(cd);
135         cb = new ContentBuilder();
136         decoder.setCharDecodeListener(cb);
137         bdata = Bseq.byteArray("0041:0042:0043");
138         is = new ByteArrayInputStream(bdata);
139         decoder.decode(is);
140         content = cb.getContent();
141
142         assertEquals(3, content.length());
143         assertEquals("ABC", content.toString());
144         assertFalse(content.hasDecodeError());
145
146
147         cd = cs.newDecoder();
148         decoder = new DecodeNotifier(cd);
149         cb = new ContentBuilder();
150         decoder.setCharDecodeListener(cb);
151         bdata = Bseq.byteArray("0041:FF22:0043");
152         is = new ByteArrayInputStream(bdata);
153         decoder.decode(is);
154         content = cb.getContent();
155
156         assertEquals(3, content.length());
157         assertEquals("ABC", content.toString());
158         assertFalse(content.hasDecodeError());
159
160
161         return;
162     }
163
164     /**
165      * Test of UTF16 sequence error
166      * @throws Exception
167      */
168     @Test
169     public void testUTF16_seq() throws Exception {
170         Charset cs = Charset.forName("UTF-16");
171
172         CharsetDecoder cd;
173         ContentBuilder cb;
174         DecodeNotifier decoder;
175         byte[] bdata;
176         InputStream is;
177         DecodedContent content;
178         List<DecodeErrorInfo> errList;
179         DecodeErrorInfo einfo;
180
181         cd = cs.newDecoder();
182         decoder = new DecodeNotifier(cd);
183         cb = new ContentBuilder();
184         decoder.setCharDecodeListener(cb);
185         bdata = Bseq.byteArray("0041:d800:0043:0044");
186         is = new ByteArrayInputStream(bdata);
187         decoder.decode(is);
188         content = cb.getContent();
189
190         assertEquals(6, content.length());
191         assertEquals("A????D", content.toString());
192         assertTrue(content.hasDecodeError());
193         errList = content.getDecodeErrorList();
194         assertEquals(4, errList.size());
195         einfo = errList.get(0);
196         assertFalse(einfo.has2nd());
197         assertEquals((byte)0xd8, einfo.getRawByte1st());
198         assertEquals(1, einfo.getCharPosition());
199         einfo = errList.get(1);
200         assertFalse(einfo.has2nd());
201         assertEquals((byte)0x00, einfo.getRawByte1st());
202         assertEquals(2, einfo.getCharPosition());
203         einfo = errList.get(2);
204         assertFalse(einfo.has2nd());
205         assertEquals((byte)0x00, einfo.getRawByte1st());
206         assertEquals(3, einfo.getCharPosition());
207         einfo = errList.get(3);
208         assertFalse(einfo.has2nd());
209         assertEquals((byte)0x43, einfo.getRawByte1st());
210         assertEquals(4, einfo.getCharPosition());
211
212
213         cd = cs.newDecoder();
214         decoder = new DecodeNotifier(cd);
215         cb = new ContentBuilder();
216         decoder.setCharDecodeListener(cb);
217         bdata = Bseq.byteArray("0041:0042:dc00:0044");
218         is = new ByteArrayInputStream(bdata);
219         decoder.decode(is);
220         content = cb.getContent();
221
222         assertEquals(5, content.length());
223         assertEquals("AB??D", content.toString());
224         errList = content.getDecodeErrorList();
225         assertEquals(2, errList.size());
226         einfo = errList.get(0);
227         assertFalse(einfo.has2nd());
228         assertEquals((byte)0xdc, einfo.getRawByte1st());
229         assertEquals(2, einfo.getCharPosition());
230         einfo = errList.get(1);
231         assertFalse(einfo.has2nd());
232         assertEquals((byte)0x00, einfo.getRawByte1st());
233         assertEquals(3, einfo.getCharPosition());
234
235
236         cd = cs.newDecoder();
237         decoder = new DecodeNotifier(cd);
238         cb = new ContentBuilder();
239         decoder.setCharDecodeListener(cb);
240         bdata = Bseq.byteArray("0041:d800");
241         is = new ByteArrayInputStream(bdata);
242         decoder.decode(is);
243         content = cb.getContent();
244
245         assertEquals(3, content.length());
246         assertEquals("A??", content.toString());
247         assertTrue(content.hasDecodeError());
248         errList = content.getDecodeErrorList();
249         assertEquals(2, errList.size());
250         einfo = errList.get(0);
251         assertFalse(einfo.has2nd());
252         assertEquals((byte)0xd8, einfo.getRawByte1st());
253         assertEquals(1, einfo.getCharPosition());
254         einfo = errList.get(1);
255         assertFalse(einfo.has2nd());
256         assertEquals((byte)0x00, einfo.getRawByte1st());
257         assertEquals(2, einfo.getCharPosition());
258
259         return;
260     }
261
262     /**
263      * Test of UTF16 mapping error
264      * @throws Exception
265      */
266     @Test
267     public void testUTF16_nomap() throws Exception {
268         Charset cs = Charset.forName("UTF-16");
269
270         CharsetDecoder cd;
271         ContentBuilder cb;
272         DecodeNotifier decoder;
273         byte[] bdata;
274         InputStream is;
275         DecodedContent content;
276
277         cd = cs.newDecoder();
278         decoder = new DecodeNotifier(cd);
279         cb = new ContentBuilder();
280         decoder.setCharDecodeListener(cb);
281         bdata = Bseq.byteArray("0041:d83d:dc11:0042");
282         is = new ByteArrayInputStream(bdata);
283         decoder.decode(is);
284         content = cb.getContent();
285
286         assertEquals(4, content.length());
287         assertEquals("A\ud83d\udc11B", content.toString());
288
289         return;
290     }
291
292     @Test
293     public void testSheep() throws IOException, DecodeBreakException {
294         System.out.println("sheep");
295
296         Charset cs;
297         CharsetDecoder decoder;
298         ContentBuilder listener;
299
300         DecodeNotifier sd;
301         InputStream is;
302
303         cs = Charset.forName("UTF-8");
304         decoder = cs.newDecoder();
305
306         sd = new DecodeNotifier(decoder);
307
308         listener = new ContentBuilder();
309         sd.setCharDecodeListener(listener);
310
311         // SMP character U+1F411 [SHEEP]
312         // see https://ja.osdn.net/projects/jindolf/ticket/36356
313         is = Bseq.byteStream(0xf0, 0x9f, 0x90, 0x91);
314         sd.decode(is);
315         assertEquals("\ud83d\udc11", listener.getContent().toString());
316
317         return;
318     }
319
320 }