OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / encoding / encoding_test.go
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package encoding_test
6
7 import (
8         "io/ioutil"
9         "strings"
10         "testing"
11
12         "golang.org/x/text/encoding"
13         "golang.org/x/text/encoding/charmap"
14         "golang.org/x/text/transform"
15 )
16
17 func TestEncodeInvalidUTF8(t *testing.T) {
18         inputs := []string{
19                 "hello.",
20                 "wo\ufffdld.",
21                 "ABC\xff\x80\x80", // Invalid UTF-8.
22                 "\x80\x80\x80\x80\x80",
23                 "\x80\x80D\x80\x80",          // Valid rune at "D".
24                 "E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
25                 "G",
26                 "H\xe2\x82",     // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
27                 "\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
28         }
29         // Each invalid source byte becomes '\x1a'.
30         want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
31
32         transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
33         gotBuf := make([]byte, 0, 1024)
34         src := make([]byte, 0, 1024)
35         for i, input := range inputs {
36                 dst := make([]byte, 1024)
37                 src = append(src, input...)
38                 atEOF := i == len(inputs)-1
39                 nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
40                 gotBuf = append(gotBuf, dst[:nDst]...)
41                 src = src[nSrc:]
42                 if err != nil && err != transform.ErrShortSrc {
43                         t.Fatalf("i=%d: %v", i, err)
44                 }
45                 if atEOF && err != nil {
46                         t.Fatalf("i=%d: atEOF: %v", i, err)
47                 }
48         }
49         if got := string(gotBuf); got != want {
50                 t.Fatalf("\ngot  %+q\nwant %+q", got, want)
51         }
52 }
53
54 func TestReplacement(t *testing.T) {
55         for _, direction := range []string{"Decode", "Encode"} {
56                 enc, want := (transform.Transformer)(nil), ""
57                 if direction == "Decode" {
58                         enc = encoding.Replacement.NewDecoder()
59                         want = "\ufffd"
60                 } else {
61                         enc = encoding.Replacement.NewEncoder()
62                         want = "AB\x00CD\ufffdYZ"
63                 }
64                 sr := strings.NewReader("AB\x00CD\x80YZ")
65                 g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
66                 if err != nil {
67                         t.Errorf("%s: ReadAll: %v", direction, err)
68                         continue
69                 }
70                 if got := string(g); got != want {
71                         t.Errorf("%s:\ngot  %q\nwant %q", direction, got, want)
72                         continue
73                 }
74         }
75 }
76
77 func TestUTF8Validator(t *testing.T) {
78         testCases := []struct {
79                 desc    string
80                 dstSize int
81                 src     string
82                 atEOF   bool
83                 want    string
84                 wantErr error
85         }{
86                 {
87                         "empty input",
88                         100,
89                         "",
90                         false,
91                         "",
92                         nil,
93                 },
94                 {
95                         "valid 1-byte 1-rune input",
96                         100,
97                         "a",
98                         false,
99                         "a",
100                         nil,
101                 },
102                 {
103                         "valid 3-byte 1-rune input",
104                         100,
105                         "\u1234",
106                         false,
107                         "\u1234",
108                         nil,
109                 },
110                 {
111                         "valid 5-byte 3-rune input",
112                         100,
113                         "a\u0100\u0101",
114                         false,
115                         "a\u0100\u0101",
116                         nil,
117                 },
118                 {
119                         "perfectly sized dst (non-ASCII)",
120                         5,
121                         "a\u0100\u0101",
122                         false,
123                         "a\u0100\u0101",
124                         nil,
125                 },
126                 {
127                         "short dst (non-ASCII)",
128                         4,
129                         "a\u0100\u0101",
130                         false,
131                         "a\u0100",
132                         transform.ErrShortDst,
133                 },
134                 {
135                         "perfectly sized dst (ASCII)",
136                         5,
137                         "abcde",
138                         false,
139                         "abcde",
140                         nil,
141                 },
142                 {
143                         "short dst (ASCII)",
144                         4,
145                         "abcde",
146                         false,
147                         "abcd",
148                         transform.ErrShortDst,
149                 },
150                 {
151                         "partial input (!EOF)",
152                         100,
153                         "a\u0100\xf1",
154                         false,
155                         "a\u0100",
156                         transform.ErrShortSrc,
157                 },
158                 {
159                         "invalid input (EOF)",
160                         100,
161                         "a\u0100\xf1",
162                         true,
163                         "a\u0100",
164                         encoding.ErrInvalidUTF8,
165                 },
166                 {
167                         "invalid input (!EOF)",
168                         100,
169                         "a\u0100\x80",
170                         false,
171                         "a\u0100",
172                         encoding.ErrInvalidUTF8,
173                 },
174                 {
175                         "invalid input (above U+10FFFF)",
176                         100,
177                         "a\u0100\xf7\xbf\xbf\xbf",
178                         false,
179                         "a\u0100",
180                         encoding.ErrInvalidUTF8,
181                 },
182                 {
183                         "invalid input (surrogate half)",
184                         100,
185                         "a\u0100\xed\xa0\x80",
186                         false,
187                         "a\u0100",
188                         encoding.ErrInvalidUTF8,
189                 },
190         }
191         for _, tc := range testCases {
192                 dst := make([]byte, tc.dstSize)
193                 nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
194                 if nDst < 0 || len(dst) < nDst {
195                         t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
196                         continue
197                 }
198                 got := string(dst[:nDst])
199                 if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
200                         t.Errorf("%s:\ngot  %+q, %d, %v\nwant %+q, %d, %v",
201                                 tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
202                         continue
203                 }
204         }
205 }
206
207 func TestErrorHandler(t *testing.T) {
208         testCases := []struct {
209                 desc      string
210                 handler   func(*encoding.Encoder) *encoding.Encoder
211                 sizeDst   int
212                 src, want string
213                 nSrc      int
214                 err       error
215         }{
216                 {
217                         desc:    "one rune replacement",
218                         handler: encoding.ReplaceUnsupported,
219                         sizeDst: 100,
220                         src:     "\uAC00",
221                         want:    "\x1a",
222                         nSrc:    3,
223                 },
224                 {
225                         desc:    "mid-stream rune replacement",
226                         handler: encoding.ReplaceUnsupported,
227                         sizeDst: 100,
228                         src:     "a\uAC00bcd\u00e9",
229                         want:    "a\x1abcd\xe9",
230                         nSrc:    9,
231                 },
232                 {
233                         desc:    "at end rune replacement",
234                         handler: encoding.ReplaceUnsupported,
235                         sizeDst: 10,
236                         src:     "\u00e9\uAC00",
237                         want:    "\xe9\x1a",
238                         nSrc:    5,
239                 },
240                 {
241                         desc:    "short buffer replacement",
242                         handler: encoding.ReplaceUnsupported,
243                         sizeDst: 1,
244                         src:     "\u00e9\uAC00",
245                         want:    "\xe9",
246                         nSrc:    2,
247                         err:     transform.ErrShortDst,
248                 },
249                 {
250                         desc:    "one rune html escape",
251                         handler: encoding.HTMLEscapeUnsupported,
252                         sizeDst: 100,
253                         src:     "\uAC00",
254                         want:    "&#44032;",
255                         nSrc:    3,
256                 },
257                 {
258                         desc:    "mid-stream html escape",
259                         handler: encoding.HTMLEscapeUnsupported,
260                         sizeDst: 100,
261                         src:     "\u00e9\uAC00dcba",
262                         want:    "\xe9&#44032;dcba",
263                         nSrc:    9,
264                 },
265                 {
266                         desc:    "short buffer html escape",
267                         handler: encoding.HTMLEscapeUnsupported,
268                         sizeDst: 9,
269                         src:     "ab\uAC01",
270                         want:    "ab",
271                         nSrc:    2,
272                         err:     transform.ErrShortDst,
273                 },
274         }
275         for i, tc := range testCases {
276                 tr := tc.handler(charmap.Windows1250.NewEncoder())
277                 b := make([]byte, tc.sizeDst)
278                 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
279                 if err != tc.err {
280                         t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
281                 }
282                 if got := string(b[:nDst]); got != tc.want {
283                         t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
284                 }
285                 if nSrc != tc.nSrc {
286                         t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
287                 }
288
289         }
290 }