2 * License : The MIT License
3 * Copyright(c) 2010 olyutorskii
6 package jp.sourceforge.jindolf.parser;
8 import java.io.ByteArrayInputStream;
9 import java.io.InputStream;
10 import java.nio.charset.Charset;
11 import java.nio.charset.CharsetDecoder;
12 import java.util.ArrayList;
13 import java.util.List;
14 import org.junit.After;
15 import org.junit.AfterClass;
16 import org.junit.Before;
17 import org.junit.BeforeClass;
18 import org.junit.Test;
20 import static org.junit.Assert.*;
25 public class ContentBuilderUCS2Test {
27 public ContentBuilderUCS2Test() {
31 public static void setUpClass() throws Exception{
35 public static void tearDownClass() throws Exception{
43 public void tearDown() {
46 public static byte[] byteArray(CharSequence seq){
49 List<Byte> byteList = new ArrayList<>();
51 int length = seq.length();
52 for(int pos = 0; pos < length; pos++){
55 char ch = seq.charAt(pos);
57 if('0' <= ch && ch <= '9'){
59 }else if('a' <= ch && ch <= 'f'){
61 }else if('A' <= ch && ch <= 'F'){
68 if(pos >= length) break;
73 if('0' <= ch && ch <= '9'){
75 }else if('a' <= ch && ch <= 'f'){
77 }else if('A' <= ch && ch <= 'F'){
83 byteList.add((byte)val);
86 result = new byte[byteList.size()];
88 for(int pos = 0; pos < result.length; pos++){
89 result[pos] = byteList.get(pos);
99 public void testUTF8() throws Exception {
100 Charset cs = Charset.forName("UTF-8");
103 ContentBuilderUCS2 cb;
104 StreamDecoder decoder;
107 DecodedContent content;
108 List<DecodeErrorInfo> errList;
109 DecodeErrorInfo einfo;
112 cd = cs.newDecoder();
113 decoder = new StreamDecoder(cd);
114 cb = new ContentBuilderUCS2();
115 decoder.setDecodeHandler(cb);
116 bdata = byteArray("41:42:43");
117 is = new ByteArrayInputStream(bdata);
119 content = cb.getContent();
121 assertEquals(3, content.length());
122 assertEquals("ABC", content.toString());
123 assertFalse(content.hasDecodeError());
126 cd = cs.newDecoder();
127 decoder = new StreamDecoder(cd);
128 cb = new ContentBuilderUCS2();
129 decoder.setDecodeHandler(cb);
130 bdata = byteArray("41:EFBCA2:43");
131 is = new ByteArrayInputStream(bdata);
133 content = cb.getContent();
135 assertEquals(3, content.length());
136 assertEquals("ABC", content.toString());
137 assertFalse(content.hasDecodeError());
140 cd = cs.newDecoder();
141 decoder = new StreamDecoder(cd);
142 cb = new ContentBuilderUCS2();
143 decoder.setDecodeHandler(cb);
144 bdata = byteArray("41:FF:43");
145 is = new ByteArrayInputStream(bdata);
147 content = cb.getContent();
149 assertEquals(3, content.length());
150 assertEquals("A?C", content.toString());
151 assertTrue(content.hasDecodeError());
152 errList = content.getDecodeErrorList();
153 assertEquals(1, errList.size());
154 einfo = errList.get(0);
155 assertFalse(einfo.has2nd());
156 assertEquals((byte)0xff, einfo.getRawByte1st());
157 assertEquals(1, einfo.getCharPosition());
166 public void testUTF16() throws Exception {
167 Charset cs = Charset.forName("UTF-16");
170 ContentBuilderUCS2 cb;
171 StreamDecoder decoder;
174 DecodedContent content;
177 cd = cs.newDecoder();
178 decoder = new StreamDecoder(cd);
179 cb = new ContentBuilderUCS2();
180 decoder.setDecodeHandler(cb);
181 bdata = byteArray("0041:0042:0043");
182 is = new ByteArrayInputStream(bdata);
184 content = cb.getContent();
186 assertEquals(3, content.length());
187 assertEquals("ABC", content.toString());
188 assertFalse(content.hasDecodeError());
191 cd = cs.newDecoder();
192 decoder = new StreamDecoder(cd);
193 cb = new ContentBuilderUCS2();
194 decoder.setDecodeHandler(cb);
195 bdata = byteArray("0041:FF22:0043");
196 is = new ByteArrayInputStream(bdata);
198 content = cb.getContent();
200 assertEquals(3, content.length());
201 assertEquals("ABC", content.toString());
202 assertFalse(content.hasDecodeError());
209 * Test of UTF16 sequence error
212 public void testUTF16_seq() throws Exception {
213 Charset cs = Charset.forName("UTF-16");
216 ContentBuilderUCS2 cb;
217 StreamDecoder decoder;
220 DecodedContent content;
221 List<DecodeErrorInfo> errList;
222 DecodeErrorInfo einfo;
224 cd = cs.newDecoder();
225 decoder = new StreamDecoder(cd);
226 cb = new ContentBuilderUCS2();
227 decoder.setDecodeHandler(cb);
228 bdata = byteArray("0041:d800:0043:0044");
229 is = new ByteArrayInputStream(bdata);
231 content = cb.getContent();
233 assertEquals(6, content.length());
234 assertEquals("A????D", content.toString());
235 assertTrue(content.hasDecodeError());
236 errList = content.getDecodeErrorList();
237 assertEquals(4, errList.size());
238 einfo = errList.get(0);
239 assertFalse(einfo.has2nd());
240 assertEquals((byte)0xd8, einfo.getRawByte1st());
241 assertEquals(1, einfo.getCharPosition());
242 einfo = errList.get(1);
243 assertFalse(einfo.has2nd());
244 assertEquals((byte)0x00, einfo.getRawByte1st());
245 assertEquals(2, einfo.getCharPosition());
246 einfo = errList.get(2);
247 assertFalse(einfo.has2nd());
248 assertEquals((byte)0x00, einfo.getRawByte1st());
249 assertEquals(3, einfo.getCharPosition());
250 einfo = errList.get(3);
251 assertFalse(einfo.has2nd());
252 assertEquals((byte)0x43, einfo.getRawByte1st());
253 assertEquals(4, einfo.getCharPosition());
256 cd = cs.newDecoder();
257 decoder = new StreamDecoder(cd);
258 cb = new ContentBuilderUCS2();
259 decoder.setDecodeHandler(cb);
260 bdata = byteArray("0041:0042:dc00:0044");
261 is = new ByteArrayInputStream(bdata);
263 content = cb.getContent();
265 assertEquals(5, content.length());
266 assertEquals("AB??D", content.toString());
267 errList = content.getDecodeErrorList();
268 assertEquals(2, errList.size());
269 einfo = errList.get(0);
270 assertFalse(einfo.has2nd());
271 assertEquals((byte)0xdc, einfo.getRawByte1st());
272 assertEquals(2, einfo.getCharPosition());
273 einfo = errList.get(1);
274 assertFalse(einfo.has2nd());
275 assertEquals((byte)0x00, einfo.getRawByte1st());
276 assertEquals(3, einfo.getCharPosition());
279 cd = cs.newDecoder();
280 decoder = new StreamDecoder(cd);
281 cb = new ContentBuilderUCS2();
282 decoder.setDecodeHandler(cb);
283 bdata = byteArray("0041:d800");
284 is = new ByteArrayInputStream(bdata);
286 content = cb.getContent();
288 assertEquals(3, content.length());
289 assertEquals("A??", content.toString());
290 assertTrue(content.hasDecodeError());
291 errList = content.getDecodeErrorList();
292 assertEquals(2, errList.size());
293 einfo = errList.get(0);
294 assertFalse(einfo.has2nd());
295 assertEquals((byte)0xd8, einfo.getRawByte1st());
296 assertEquals(1, einfo.getCharPosition());
297 einfo = errList.get(1);
298 assertFalse(einfo.has2nd());
299 assertEquals((byte)0x00, einfo.getRawByte1st());
300 assertEquals(2, einfo.getCharPosition());
306 * Test of UTF16 mapping error
309 public void testUTF16_nomap() throws Exception {
310 Charset cs = Charset.forName("UTF-16");
313 ContentBuilderUCS2 cb;
314 StreamDecoder decoder;
317 DecodedContent content;
319 cd = cs.newDecoder();
320 decoder = new StreamDecoder(cd);
321 cb = new ContentBuilderUCS2();
322 decoder.setDecodeHandler(cb);
323 bdata = byteArray("0041:d83d:dc11:0042");
324 is = new ByteArrayInputStream(bdata);
326 content = cb.getContent();
328 assertEquals(4, content.length());
329 assertEquals("A\ud83d\udc11B", content.toString());