2 * License : The MIT License
3 * Copyright(c) 2010 olyutorskii
6 package jp.sourceforge.jindolf.parser;
8 import io.bitbucket.olyutorskii.jiocema.DecodeBreakException;
9 import io.bitbucket.olyutorskii.jiocema.DecodeNotifier;
10 import java.io.ByteArrayInputStream;
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.nio.charset.Charset;
14 import java.nio.charset.CharsetDecoder;
15 import java.util.List;
16 import org.junit.After;
17 import org.junit.AfterClass;
18 import org.junit.Before;
19 import org.junit.BeforeClass;
20 import org.junit.Test;
22 import static org.junit.Assert.*;
27 public class ContentBuilderTest {
29 public ContentBuilderTest() {
33 public static void setUpClass() throws Exception{
37 public static void tearDownClass() throws Exception{
45 public void tearDown() {
54 public void testUTF8() throws Exception {
55 Charset cs = Charset.forName("UTF-8");
59 DecodeNotifier decoder;
62 DecodedContent content;
63 List<DecodeErrorInfo> errList;
64 DecodeErrorInfo einfo;
68 decoder = new DecodeNotifier(cd);
69 cb = new ContentBuilder();
70 decoder.setCharDecodeListener(cb);
71 bdata = Bseq.byteArray("41:42:43");
72 is = new ByteArrayInputStream(bdata);
74 content = cb.getContent();
76 assertEquals(3, content.length());
77 assertEquals("ABC", content.toString());
78 assertFalse(content.hasDecodeError());
82 decoder = new DecodeNotifier(cd);
83 cb = new ContentBuilder();
84 decoder.setCharDecodeListener(cb);
85 bdata = Bseq.byteArray("41:EFBCA2:43");
86 is = new ByteArrayInputStream(bdata);
88 content = cb.getContent();
90 assertEquals(3, content.length());
91 assertEquals("ABC", content.toString());
92 assertFalse(content.hasDecodeError());
96 decoder = new DecodeNotifier(cd);
97 cb = new ContentBuilder();
98 decoder.setCharDecodeListener(cb);
99 bdata = Bseq.byteArray("41:FF:43");
100 is = new ByteArrayInputStream(bdata);
102 content = cb.getContent();
104 assertEquals(3, content.length());
105 assertEquals("A?C", content.toString());
106 assertTrue(content.hasDecodeError());
107 errList = content.getDecodeErrorList();
108 assertEquals(1, errList.size());
109 einfo = errList.get(0);
110 assertFalse(einfo.has2nd());
111 assertEquals((byte)0xff, einfo.getRawByte1st());
112 assertEquals(1, einfo.getCharPosition());
122 public void testUTF16() throws Exception {
123 Charset cs = Charset.forName("UTF-16");
127 DecodeNotifier decoder;
130 DecodedContent content;
133 cd = cs.newDecoder();
134 decoder = new DecodeNotifier(cd);
135 cb = new ContentBuilder();
136 decoder.setCharDecodeListener(cb);
137 bdata = Bseq.byteArray("0041:0042:0043");
138 is = new ByteArrayInputStream(bdata);
140 content = cb.getContent();
142 assertEquals(3, content.length());
143 assertEquals("ABC", content.toString());
144 assertFalse(content.hasDecodeError());
147 cd = cs.newDecoder();
148 decoder = new DecodeNotifier(cd);
149 cb = new ContentBuilder();
150 decoder.setCharDecodeListener(cb);
151 bdata = Bseq.byteArray("0041:FF22:0043");
152 is = new ByteArrayInputStream(bdata);
154 content = cb.getContent();
156 assertEquals(3, content.length());
157 assertEquals("ABC", content.toString());
158 assertFalse(content.hasDecodeError());
165 * Test of UTF16 sequence error
169 public void testUTF16_seq() throws Exception {
170 Charset cs = Charset.forName("UTF-16");
174 DecodeNotifier decoder;
177 DecodedContent content;
178 List<DecodeErrorInfo> errList;
179 DecodeErrorInfo einfo;
181 cd = cs.newDecoder();
182 decoder = new DecodeNotifier(cd);
183 cb = new ContentBuilder();
184 decoder.setCharDecodeListener(cb);
185 bdata = Bseq.byteArray("0041:d800:0043:0044");
186 is = new ByteArrayInputStream(bdata);
188 content = cb.getContent();
190 assertEquals(6, content.length());
191 assertEquals("A????D", content.toString());
192 assertTrue(content.hasDecodeError());
193 errList = content.getDecodeErrorList();
194 assertEquals(4, errList.size());
195 einfo = errList.get(0);
196 assertFalse(einfo.has2nd());
197 assertEquals((byte)0xd8, einfo.getRawByte1st());
198 assertEquals(1, einfo.getCharPosition());
199 einfo = errList.get(1);
200 assertFalse(einfo.has2nd());
201 assertEquals((byte)0x00, einfo.getRawByte1st());
202 assertEquals(2, einfo.getCharPosition());
203 einfo = errList.get(2);
204 assertFalse(einfo.has2nd());
205 assertEquals((byte)0x00, einfo.getRawByte1st());
206 assertEquals(3, einfo.getCharPosition());
207 einfo = errList.get(3);
208 assertFalse(einfo.has2nd());
209 assertEquals((byte)0x43, einfo.getRawByte1st());
210 assertEquals(4, einfo.getCharPosition());
213 cd = cs.newDecoder();
214 decoder = new DecodeNotifier(cd);
215 cb = new ContentBuilder();
216 decoder.setCharDecodeListener(cb);
217 bdata = Bseq.byteArray("0041:0042:dc00:0044");
218 is = new ByteArrayInputStream(bdata);
220 content = cb.getContent();
222 assertEquals(5, content.length());
223 assertEquals("AB??D", content.toString());
224 errList = content.getDecodeErrorList();
225 assertEquals(2, errList.size());
226 einfo = errList.get(0);
227 assertFalse(einfo.has2nd());
228 assertEquals((byte)0xdc, einfo.getRawByte1st());
229 assertEquals(2, einfo.getCharPosition());
230 einfo = errList.get(1);
231 assertFalse(einfo.has2nd());
232 assertEquals((byte)0x00, einfo.getRawByte1st());
233 assertEquals(3, einfo.getCharPosition());
236 cd = cs.newDecoder();
237 decoder = new DecodeNotifier(cd);
238 cb = new ContentBuilder();
239 decoder.setCharDecodeListener(cb);
240 bdata = Bseq.byteArray("0041:d800");
241 is = new ByteArrayInputStream(bdata);
243 content = cb.getContent();
245 assertEquals(3, content.length());
246 assertEquals("A??", content.toString());
247 assertTrue(content.hasDecodeError());
248 errList = content.getDecodeErrorList();
249 assertEquals(2, errList.size());
250 einfo = errList.get(0);
251 assertFalse(einfo.has2nd());
252 assertEquals((byte)0xd8, einfo.getRawByte1st());
253 assertEquals(1, einfo.getCharPosition());
254 einfo = errList.get(1);
255 assertFalse(einfo.has2nd());
256 assertEquals((byte)0x00, einfo.getRawByte1st());
257 assertEquals(2, einfo.getCharPosition());
263 * Test of UTF16 mapping error
267 public void testUTF16_nomap() throws Exception {
268 Charset cs = Charset.forName("UTF-16");
272 DecodeNotifier decoder;
275 DecodedContent content;
277 cd = cs.newDecoder();
278 decoder = new DecodeNotifier(cd);
279 cb = new ContentBuilder();
280 decoder.setCharDecodeListener(cb);
281 bdata = Bseq.byteArray("0041:d83d:dc11:0042");
282 is = new ByteArrayInputStream(bdata);
284 content = cb.getContent();
286 assertEquals(4, content.length());
287 assertEquals("A\ud83d\udc11B", content.toString());
293 public void testSheep() throws IOException, DecodeBreakException {
294 System.out.println("sheep");
297 CharsetDecoder decoder;
298 ContentBuilder listener;
303 cs = Charset.forName("UTF-8");
304 decoder = cs.newDecoder();
306 sd = new DecodeNotifier(decoder);
308 listener = new ContentBuilder();
309 sd.setCharDecodeListener(listener);
311 // SMP character U+1F411 [SHEEP]
312 // see https://ja.osdn.net/projects/jindolf/ticket/36356
313 is = Bseq.byteStream(0xf0, 0x9f, 0x90, 0x91);
315 assertEquals("\ud83d\udc11", listener.getContent().toString());