1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 <?xml version="1.0" encoding="UTF-8"?>
19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
20 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
23 <hello lang="en">World <>'" 白鵬翔</hello>
24 <query>&何; &is-it;</query>
26 <outer foo:attr="value" xmlns:tag="ns4">
30 <![CDATA[Some text here.]]>
32 </body><!-- missing final newline -->`
34 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
36 var rawTokens = []Token{
38 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
40 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
41 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
43 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
45 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
46 CharData("World <>'\" 白鵬翔"),
47 EndElement{Name{"", "hello"}},
49 StartElement{Name{"", "query"}, []Attr{}},
50 CharData("What is it?"),
51 EndElement{Name{"", "query"}},
53 StartElement{Name{"", "goodbye"}, []Attr{}},
54 EndElement{Name{"", "goodbye"}},
56 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
58 StartElement{Name{"", "inner"}, []Attr{}},
59 EndElement{Name{"", "inner"}},
61 EndElement{Name{"", "outer"}},
63 StartElement{Name{"tag", "name"}, []Attr{}},
65 CharData("Some text here."),
67 EndElement{Name{"tag", "name"}},
69 EndElement{Name{"", "body"}},
70 Comment(" missing final newline "),
73 var cookedTokens = []Token{
75 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
77 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
80 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
82 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
83 CharData("World <>'\" 白鵬翔"),
84 EndElement{Name{"ns2", "hello"}},
86 StartElement{Name{"ns2", "query"}, []Attr{}},
87 CharData("What is it?"),
88 EndElement{Name{"ns2", "query"}},
90 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
91 EndElement{Name{"ns2", "goodbye"}},
93 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
95 StartElement{Name{"ns2", "inner"}, []Attr{}},
96 EndElement{Name{"ns2", "inner"}},
98 EndElement{Name{"ns2", "outer"}},
100 StartElement{Name{"ns3", "name"}, []Attr{}},
102 CharData("Some text here."),
104 EndElement{Name{"ns3", "name"}},
106 EndElement{Name{"ns2", "body"}},
107 Comment(" missing final newline "),
110 const testInputAltEncoding = `
111 <?xml version="1.0" encoding="x-testing-uppercase"?>
114 var rawTokensAltEncoding = []Token{
116 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
118 StartElement{Name{"", "tag"}, []Attr{}},
120 EndElement{Name{"", "tag"}},
123 var xmlInput = []string{
124 // unexpected EOF cases
149 // other Syntax errors
154 // "<!0 >", // let the Token() caller handle
163 // "<![CDATA[d]]>", // let the Token() caller handle
170 func TestRawToken(t *testing.T) {
171 d := NewDecoder(strings.NewReader(testInput))
172 d.Entity = testEntity
173 testRawToken(t, d, testInput, rawTokens)
176 const nonStrictInput = `
177 <tag>non&entity</tag>
178 <tag>&unknown;entity</tag>
187 var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
189 var nonStrictTokens = []Token{
191 StartElement{Name{"", "tag"}, []Attr{}},
192 CharData("non&entity"),
193 EndElement{Name{"", "tag"}},
195 StartElement{Name{"", "tag"}, []Attr{}},
196 CharData("&unknown;entity"),
197 EndElement{Name{"", "tag"}},
199 StartElement{Name{"", "tag"}, []Attr{}},
201 EndElement{Name{"", "tag"}},
203 StartElement{Name{"", "tag"}, []Attr{}},
205 EndElement{Name{"", "tag"}},
207 StartElement{Name{"", "tag"}, []Attr{}},
209 EndElement{Name{"", "tag"}},
211 StartElement{Name{"", "tag"}, []Attr{}},
213 EndElement{Name{"", "tag"}},
215 StartElement{Name{"", "tag"}, []Attr{}},
217 EndElement{Name{"", "tag"}},
219 StartElement{Name{"", "tag"}, []Attr{}},
221 EndElement{Name{"", "tag"}},
225 func TestNonStrictRawToken(t *testing.T) {
226 d := NewDecoder(strings.NewReader(nonStrictInput))
228 testRawToken(t, d, nonStrictInput, nonStrictTokens)
231 type downCaser struct {
236 func (d *downCaser) ReadByte() (c byte, err error) {
237 c, err = d.r.ReadByte()
238 if c >= 'A' && c <= 'Z' {
244 func (d *downCaser) Read(p []byte) (int, error) {
245 d.t.Fatalf("unexpected Read call on downCaser reader")
249 func TestRawTokenAltEncoding(t *testing.T) {
250 d := NewDecoder(strings.NewReader(testInputAltEncoding))
251 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
252 if charset != "x-testing-uppercase" {
253 t.Fatalf("unexpected charset %q", charset)
255 return &downCaser{t, input.(io.ByteReader)}, nil
257 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
260 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
261 d := NewDecoder(strings.NewReader(testInputAltEncoding))
262 token, err := d.RawToken()
264 t.Fatalf("expected a token on first RawToken call")
269 token, err = d.RawToken()
271 t.Errorf("expected a nil token; got %#v", token)
274 t.Fatalf("expected an error on second RawToken call")
276 const encoding = "x-testing-uppercase"
277 if !strings.Contains(err.Error(), encoding) {
278 t.Errorf("expected error to contain %q; got error: %v",
283 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
285 for i, want := range rawTokens {
286 start := d.InputOffset()
287 have, err := d.RawToken()
288 end := d.InputOffset()
290 t.Fatalf("token %d: unexpected error: %s", i, err)
292 if !reflect.DeepEqual(have, want) {
293 var shave, swant string
294 if _, ok := have.(CharData); ok {
295 shave = fmt.Sprintf("CharData(%q)", have)
297 shave = fmt.Sprintf("%#v", have)
299 if _, ok := want.(CharData); ok {
300 swant = fmt.Sprintf("CharData(%q)", want)
302 swant = fmt.Sprintf("%#v", want)
304 t.Errorf("token %d = %s, want %s", i, shave, swant)
307 // Check that InputOffset returned actual token.
309 case start < lastEnd:
310 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
312 // Special case: EndElement can be synthesized.
313 if start == end && end == lastEnd {
316 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
317 case end > int64(len(raw)):
318 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
320 text := raw[start:end]
321 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
322 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
329 // Ensure that directives (specifically !DOCTYPE) include the complete
330 // text of any nested directives, noting that < and > do not change
331 // nesting depth if they are in single or double quotes.
333 var nestedDirectivesInput = `
334 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
335 <!DOCTYPE [<!ENTITY xlt ">">]>
336 <!DOCTYPE [<!ENTITY xlt "<">]>
337 <!DOCTYPE [<!ENTITY xlt '>'>]>
338 <!DOCTYPE [<!ENTITY xlt '<'>]>
339 <!DOCTYPE [<!ENTITY xlt '">'>]>
340 <!DOCTYPE [<!ENTITY xlt "'<">]>
343 var nestedDirectivesTokens = []Token{
345 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
347 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
349 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
351 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
353 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
355 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
357 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
361 func TestNestedDirectives(t *testing.T) {
362 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
364 for i, want := range nestedDirectivesTokens {
365 have, err := d.Token()
367 t.Fatalf("token %d: unexpected error: %s", i, err)
369 if !reflect.DeepEqual(have, want) {
370 t.Errorf("token %d = %#v want %#v", i, have, want)
375 func TestToken(t *testing.T) {
376 d := NewDecoder(strings.NewReader(testInput))
377 d.Entity = testEntity
379 for i, want := range cookedTokens {
380 have, err := d.Token()
382 t.Fatalf("token %d: unexpected error: %s", i, err)
384 if !reflect.DeepEqual(have, want) {
385 t.Errorf("token %d = %#v want %#v", i, have, want)
390 func TestSyntax(t *testing.T) {
391 for i := range xmlInput {
392 d := NewDecoder(strings.NewReader(xmlInput[i]))
394 for _, err = d.Token(); err == nil; _, err = d.Token() {
396 if _, ok := err.(*SyntaxError); !ok {
397 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
402 type allScalars struct {
424 var all = allScalars{
448 const testScalarsInput = `<allscalars>
451 <False1>false</False1>
463 <Uintptr>11</Uintptr>
465 <Float32>13.0</Float32>
466 <Float64>14.0</Float64>
468 <PtrString>16</PtrString>
471 func TestAllScalars(t *testing.T) {
473 err := Unmarshal([]byte(testScalarsInput), &a)
478 if !reflect.DeepEqual(a, all) {
479 t.Errorf("have %+v want %+v", a, all)
487 func TestIssue569(t *testing.T) {
488 data := `<item><Field_a>abcd</Field_a></item>`
490 err := Unmarshal([]byte(data), &i)
492 if err != nil || i.Field_a != "abcd" {
493 t.Fatal("Expecting abcd")
497 func TestUnquotedAttrs(t *testing.T) {
498 data := "<tag attr=azAZ09:-_\t>"
499 d := NewDecoder(strings.NewReader(data))
501 token, err := d.Token()
502 if _, ok := err.(*SyntaxError); ok {
503 t.Errorf("Unexpected error: %v", err)
505 if token.(StartElement).Name.Local != "tag" {
506 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
508 attr := token.(StartElement).Attr[0]
509 if attr.Value != "azAZ09:-_" {
510 t.Errorf("Unexpected attribute value: %v", attr.Value)
512 if attr.Name.Local != "attr" {
513 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
517 func TestValuelessAttrs(t *testing.T) {
518 tests := [][3]string{
519 {"<p nowrap>", "p", "nowrap"},
520 {"<p nowrap >", "p", "nowrap"},
521 {"<input checked/>", "input", "checked"},
522 {"<input checked />", "input", "checked"},
524 for _, test := range tests {
525 d := NewDecoder(strings.NewReader(test[0]))
527 token, err := d.Token()
528 if _, ok := err.(*SyntaxError); ok {
529 t.Errorf("Unexpected error: %v", err)
531 if token.(StartElement).Name.Local != test[1] {
532 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
534 attr := token.(StartElement).Attr[0]
535 if attr.Value != test[2] {
536 t.Errorf("Unexpected attribute value: %v", attr.Value)
538 if attr.Name.Local != test[2] {
539 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
544 func TestCopyTokenCharData(t *testing.T) {
545 data := []byte("same data")
546 var tok1 Token = CharData(data)
547 tok2 := CopyToken(tok1)
548 if !reflect.DeepEqual(tok1, tok2) {
549 t.Error("CopyToken(CharData) != CharData")
552 if reflect.DeepEqual(tok1, tok2) {
553 t.Error("CopyToken(CharData) uses same buffer.")
557 func TestCopyTokenStartElement(t *testing.T) {
558 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
560 tok2 := CopyToken(tok1)
561 if tok1.(StartElement).Attr[0].Value != "en" {
562 t.Error("CopyToken overwrote Attr[0]")
564 if !reflect.DeepEqual(tok1, tok2) {
565 t.Error("CopyToken(StartElement) != StartElement")
567 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
568 if reflect.DeepEqual(tok1, tok2) {
569 t.Error("CopyToken(CharData) uses same buffer.")
573 func TestSyntaxErrorLineNum(t *testing.T) {
574 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
575 d := NewDecoder(strings.NewReader(testInput))
577 for _, err = d.Token(); err == nil; _, err = d.Token() {
579 synerr, ok := err.(*SyntaxError)
581 t.Error("Expected SyntaxError.")
583 if synerr.Line != 3 {
584 t.Error("SyntaxError didn't have correct line number.")
588 func TestTrailingRawToken(t *testing.T) {
589 input := `<FOO></FOO> `
590 d := NewDecoder(strings.NewReader(input))
592 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
595 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
599 func TestTrailingToken(t *testing.T) {
600 input := `<FOO></FOO> `
601 d := NewDecoder(strings.NewReader(input))
603 for _, err = d.Token(); err == nil; _, err = d.Token() {
606 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
610 func TestEntityInsideCDATA(t *testing.T) {
611 input := `<test><![CDATA[ &val=foo ]]></test>`
612 d := NewDecoder(strings.NewReader(input))
614 for _, err = d.Token(); err == nil; _, err = d.Token() {
617 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
621 var characterTests = []struct {
625 {"\x12<doc/>", "illegal character code U+0012"},
626 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
627 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
628 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
629 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
630 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
631 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
632 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
633 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
636 func TestDisallowedCharacters(t *testing.T) {
638 for i, tt := range characterTests {
639 d := NewDecoder(strings.NewReader(tt.in))
645 synerr, ok := err.(*SyntaxError)
647 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
649 if synerr.Msg != tt.err {
650 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
655 type procInstEncodingTest struct {
659 var procInstTests = []struct {
663 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
664 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
665 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
666 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
667 {`encoding="FOO" `, [2]string{"", "FOO"}},
670 func TestProcInstEncoding(t *testing.T) {
671 for _, test := range procInstTests {
672 if got := procInst("version", test.input); got != test.expect[0] {
673 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
675 if got := procInst("encoding", test.input); got != test.expect[1] {
676 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
681 // Ensure that directives with comments include the complete
682 // text of any nested directives.
684 var directivesWithCommentsInput = `
685 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
686 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
687 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
690 var directivesWithCommentsTokens = []Token{
692 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
694 Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
696 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`),
700 func TestDirectivesWithComments(t *testing.T) {
701 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
703 for i, want := range directivesWithCommentsTokens {
704 have, err := d.Token()
706 t.Fatalf("token %d: unexpected error: %s", i, err)
708 if !reflect.DeepEqual(have, want) {
709 t.Errorf("token %d = %#v want %#v", i, have, want)
714 // Writer whose Write method always returns an error.
715 type errWriter struct{}
717 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
719 func TestEscapeTextIOErrors(t *testing.T) {
720 expectErr := "unwritable"
721 err := EscapeText(errWriter{}, []byte{'A'})
723 if err == nil || err.Error() != expectErr {
724 t.Errorf("have %v, want %v", err, expectErr)
728 func TestEscapeTextInvalidChar(t *testing.T) {
729 input := []byte("A \x00 terminated string.")
730 expected := "A \uFFFD terminated string."
732 buff := new(bytes.Buffer)
733 if err := EscapeText(buff, input); err != nil {
734 t.Fatalf("have %v, want nil", err)
736 text := buff.String()
738 if text != expected {
739 t.Errorf("have %v, want %v", text, expected)
743 func TestIssue5880(t *testing.T) {
745 data, err := Marshal(T{192, 168, 0, 1})
747 t.Errorf("Marshal error: %v", err)
749 if !utf8.Valid(data) {
750 t.Errorf("Marshal generated invalid UTF-8: %x", data)