1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 "golang.org/x/text/encoding"
13 "golang.org/x/text/encoding/charmap"
14 "golang.org/x/text/transform"
17 func TestEncodeInvalidUTF8(t *testing.T) {
21 "ABC\xff\x80\x80", // Invalid UTF-8.
22 "\x80\x80\x80\x80\x80",
23 "\x80\x80D\x80\x80", // Valid rune at "D".
24 "E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
26 "H\xe2\x82", // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
27 "\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
29 // Each invalid source byte becomes '\x1a'.
30 want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
32 transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
33 gotBuf := make([]byte, 0, 1024)
34 src := make([]byte, 0, 1024)
35 for i, input := range inputs {
36 dst := make([]byte, 1024)
37 src = append(src, input...)
38 atEOF := i == len(inputs)-1
39 nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
40 gotBuf = append(gotBuf, dst[:nDst]...)
42 if err != nil && err != transform.ErrShortSrc {
43 t.Fatalf("i=%d: %v", i, err)
45 if atEOF && err != nil {
46 t.Fatalf("i=%d: atEOF: %v", i, err)
49 if got := string(gotBuf); got != want {
50 t.Fatalf("\ngot %+q\nwant %+q", got, want)
54 func TestReplacement(t *testing.T) {
55 for _, direction := range []string{"Decode", "Encode"} {
56 enc, want := (transform.Transformer)(nil), ""
57 if direction == "Decode" {
58 enc = encoding.Replacement.NewDecoder()
61 enc = encoding.Replacement.NewEncoder()
62 want = "AB\x00CD\ufffdYZ"
64 sr := strings.NewReader("AB\x00CD\x80YZ")
65 g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
67 t.Errorf("%s: ReadAll: %v", direction, err)
70 if got := string(g); got != want {
71 t.Errorf("%s:\ngot %q\nwant %q", direction, got, want)
77 func TestUTF8Validator(t *testing.T) {
78 testCases := []struct {
95 "valid 1-byte 1-rune input",
103 "valid 3-byte 1-rune input",
111 "valid 5-byte 3-rune input",
119 "perfectly sized dst (non-ASCII)",
127 "short dst (non-ASCII)",
132 transform.ErrShortDst,
135 "perfectly sized dst (ASCII)",
148 transform.ErrShortDst,
151 "partial input (!EOF)",
156 transform.ErrShortSrc,
159 "invalid input (EOF)",
164 encoding.ErrInvalidUTF8,
167 "invalid input (!EOF)",
172 encoding.ErrInvalidUTF8,
175 "invalid input (above U+10FFFF)",
177 "a\u0100\xf7\xbf\xbf\xbf",
180 encoding.ErrInvalidUTF8,
183 "invalid input (surrogate half)",
185 "a\u0100\xed\xa0\x80",
188 encoding.ErrInvalidUTF8,
191 for _, tc := range testCases {
192 dst := make([]byte, tc.dstSize)
193 nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
194 if nDst < 0 || len(dst) < nDst {
195 t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
198 got := string(dst[:nDst])
199 if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
200 t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v",
201 tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
207 func TestErrorHandler(t *testing.T) {
208 testCases := []struct {
210 handler func(*encoding.Encoder) *encoding.Encoder
217 desc: "one rune replacement",
218 handler: encoding.ReplaceUnsupported,
225 desc: "mid-stream rune replacement",
226 handler: encoding.ReplaceUnsupported,
228 src: "a\uAC00bcd\u00e9",
229 want: "a\x1abcd\xe9",
233 desc: "at end rune replacement",
234 handler: encoding.ReplaceUnsupported,
241 desc: "short buffer replacement",
242 handler: encoding.ReplaceUnsupported,
247 err: transform.ErrShortDst,
250 desc: "one rune html escape",
251 handler: encoding.HTMLEscapeUnsupported,
258 desc: "mid-stream html escape",
259 handler: encoding.HTMLEscapeUnsupported,
261 src: "\u00e9\uAC00dcba",
262 want: "\xe9가dcba",
266 desc: "short buffer html escape",
267 handler: encoding.HTMLEscapeUnsupported,
272 err: transform.ErrShortDst,
275 for i, tc := range testCases {
276 tr := tc.handler(charmap.Windows1250.NewEncoder())
277 b := make([]byte, tc.sizeDst)
278 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
280 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
282 if got := string(b[:nDst]); got != tc.want {
283 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
286 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)