1 // Go support for Protocol Buffers - Google's data interchange format
3 // Copyright 2010 The Go Authors. All rights reserved.
4 // https://github.com/golang/protobuf
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 // Functions for parsing the Text protocol buffer format.
35 // TODO: message sets.
47 // Error string emitted when deserializing Any and fields are already set
48 const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set"
50 type ParseError struct {
52 Line int // 1-based line number
53 Offset int // 0-based byte offset from start of input
56 func (p *ParseError) Error() string {
58 // show offset only for first line
59 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
61 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
67 line int // line number
68 offset int // byte number from start of input, not start of line
69 unquoted string // the unquoted version of value, if it was a quoted string
72 func (t *token) String() string {
74 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
76 return fmt.Sprintf("parse error: %v", t.err)
79 type textParser struct {
80 s string // remaining input
81 done bool // whether the parsing is finished (success or error)
82 backed bool // whether back() was called
87 func newTextParser(s string) *textParser {
95 func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
96 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
102 // Numbers and identifiers are matched by [-+._A-Za-z0-9]
103 func isIdentOrNumberChar(c byte) bool {
105 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
107 case '0' <= c && c <= '9':
111 case '-', '+', '.', '_':
117 func isWhitespace(c byte) bool {
119 case ' ', '\t', '\n', '\r':
125 func isQuote(c byte) bool {
133 func (p *textParser) skipWhitespace() {
135 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
137 // comment; skip to end of line or input
138 for i < len(p.s) && p.s[i] != '\n' {
151 p.s = p.s[i:len(p.s)]
157 func (p *textParser) advance() {
164 // Start of non-whitespace
166 p.cur.offset, p.cur.line = p.offset, p.line
169 case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
171 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
175 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
176 if p.s[i] == '\\' && i+1 < len(p.s) {
182 if i >= len(p.s) || p.s[i] != p.s[0] {
183 p.errorf("unmatched quote")
186 unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
188 p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
191 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
195 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
199 p.errorf("unexpected byte %#x", p.s[0])
202 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
204 p.offset += len(p.cur.value)
208 errBadUTF8 = errors.New("proto: bad UTF-8")
209 errBadHex = errors.New("proto: bad hexadecimal")
212 func unquoteC(s string, quote rune) (string, error) {
213 // This is based on C++'s tokenizer.cc.
214 // Despite its name, this is *not* parsing C syntax.
215 // For instance, "\0" is an invalid quoted string.
217 // Avoid allocation in trivial cases.
219 for _, r := range s {
220 if r == '\\' || r == quote {
229 buf := make([]byte, 0, 3*len(s)/2)
231 r, n := utf8.DecodeRuneInString(s)
232 if r == utf8.RuneError && n == 1 {
233 return "", errBadUTF8
237 if r < utf8.RuneSelf {
238 buf = append(buf, byte(r))
240 buf = append(buf, string(r)...)
245 ch, tail, err := unescape(s)
249 buf = append(buf, ch...)
252 return string(buf), nil
255 func unescape(s string) (ch string, tail string, err error) {
256 r, n := utf8.DecodeRuneInString(s)
257 if r == utf8.RuneError && n == 1 {
258 return "", "", errBadUTF8
277 return "?", s, nil // trigraph workaround
278 case '\'', '"', '\\':
279 return string(r), s, nil
280 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
282 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
287 if r == 'x' || r == 'X' {
292 i, err := strconv.ParseUint(ss, base, 8)
296 return string([]byte{byte(i)}), s, nil
303 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
306 bs := make([]byte, n/2)
307 for i := 0; i < n; i += 2 {
308 a, ok1 := unhex(s[i])
309 b, ok2 := unhex(s[i+1])
311 return "", "", errBadHex
316 return string(bs), s, nil
318 return "", "", fmt.Errorf(`unknown escape \%c`, r)
321 // Adapted from src/pkg/strconv/quote.go.
322 func unhex(b byte) (v byte, ok bool) {
324 case '0' <= b && b <= '9':
326 case 'a' <= b && b <= 'f':
327 return b - 'a' + 10, true
328 case 'A' <= b && b <= 'F':
329 return b - 'A' + 10, true
334 // Back off the parser by one token. Can only be done between calls to next().
335 // It makes the next advance() a no-op.
336 func (p *textParser) back() { p.backed = true }
338 // Advances the parser and returns the new current token.
339 func (p *textParser) next() *token {
340 if p.backed || p.done {
347 } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
348 // Look for multiple quoted strings separated by whitespace,
349 // and concatenate them.
353 if p.done || !isQuote(p.s[0]) {
357 if p.cur.err != nil {
360 cat.value += " " + p.cur.value
361 cat.unquoted += p.cur.unquoted
363 p.done = false // parser may have seen EOF, but we want to return cat
369 func (p *textParser) consumeToken(s string) error {
376 return p.errorf("expected %q, found %q", s, tok.value)
381 // Return a RequiredNotSetError indicating which required field was not set.
382 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
384 sprops := GetProperties(st)
385 for i := 0; i < st.NumField(); i++ {
386 if !isNil(sv.Field(i)) {
390 props := sprops.Prop[i]
392 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
395 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
398 // Returns the index in the struct for the named field, as well as the parsed tag properties.
399 func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
400 i, ok := sprops.decoderOrigNames[name]
402 return i, sprops.Prop[i], true
404 return -1, nil, false
407 // Consume a ':' from the input stream (if the next token is a colon),
408 // returning an error if a colon is needed but not present.
409 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
414 if tok.value != ":" {
415 // Colon is optional when the field is a group or message.
421 // A "bytes" field is either a message, a string, or a repeated field;
422 // those three become *T, *string and []T respectively, so we can check for
423 // this field being a pointer to a non-string.
424 if typ.Kind() == reflect.Ptr {
426 if typ.Elem().Kind() == reflect.String {
429 } else if typ.Kind() == reflect.Slice {
431 if typ.Elem().Kind() != reflect.Ptr {
434 } else if typ.Kind() == reflect.String {
435 // The proto3 exception is for a string field,
436 // which requires a colon.
442 return p.errorf("expected ':', found %q", tok.value)
449 func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
451 sprops := GetProperties(st)
452 reqCount := sprops.reqCount
453 var reqFieldErr error
454 fieldSet := make(map[string]bool)
455 // A struct is a sequence of "name: value", terminated by one of
456 // '>' or '}', or the end of the input. A name may also be
457 // "[extension]" or "[type/url]".
459 // The whole struct can also be an expanded Any message, like:
460 // [type/url] < ... struct contents ... >
466 if tok.value == terminator {
469 if tok.value == "[" {
470 // Looks like an extension or an Any.
472 // TODO: Check whether we need to handle
473 // namespace rooted names (e.g. ".something.Foo").
474 extName, err := p.consumeExtName()
479 if s := strings.LastIndex(extName, "/"); s >= 0 {
480 // If it contains a slash, it's an Any type URL.
481 messageName := extName[s+1:]
482 mt := MessageType(messageName)
484 return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
490 // consume an optional colon
491 if tok.value == ":" {
497 var terminator string
504 return p.errorf("expected '{' or '<', found %q", tok.value)
506 v := reflect.New(mt.Elem())
507 if pe := p.readStruct(v.Elem(), terminator); pe != nil {
510 b, err := Marshal(v.Interface().(Message))
512 return p.errorf("failed to marshal message of type %q: %v", messageName, err)
514 if fieldSet["type_url"] {
515 return p.errorf(anyRepeatedlyUnpacked, "type_url")
517 if fieldSet["value"] {
518 return p.errorf(anyRepeatedlyUnpacked, "value")
520 sv.FieldByName("TypeUrl").SetString(extName)
521 sv.FieldByName("Value").SetBytes(b)
522 fieldSet["type_url"] = true
523 fieldSet["value"] = true
527 var desc *ExtensionDesc
528 // This could be faster, but it's functional.
529 // TODO: Do something smarter than a linear scan.
530 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
531 if d.Name == extName {
537 return p.errorf("unrecognized extension %q", extName)
540 props := &Properties{}
541 props.Parse(desc.Tag)
543 typ := reflect.TypeOf(desc.ExtensionType)
544 if err := p.checkForColon(props, typ); err != nil {
548 rep := desc.repeated()
550 // Read the extension structure, and set it in
551 // the value we're constructing.
552 var ext reflect.Value
554 ext = reflect.New(typ).Elem()
556 ext = reflect.New(typ.Elem()).Elem()
558 if err := p.readAny(ext, props); err != nil {
559 if _, ok := err.(*RequiredNotSetError); !ok {
564 ep := sv.Addr().Interface().(Message)
566 SetExtension(ep, desc, ext.Interface())
568 old, err := GetExtension(ep, desc)
571 sl = reflect.ValueOf(old) // existing slice
573 sl = reflect.MakeSlice(typ, 0, 1)
575 sl = reflect.Append(sl, ext)
576 SetExtension(ep, desc, sl.Interface())
578 if err := p.consumeOptionalSeparator(); err != nil {
584 // This is a normal, non-extension field.
586 var dst reflect.Value
587 fi, props, ok := structFieldByName(sprops, name)
590 } else if oop, ok := sprops.OneofTypes[name]; ok {
593 nv := reflect.New(oop.Type.Elem())
594 dst = nv.Elem().Field(0)
595 field := sv.Field(oop.Field)
597 return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, sv.Type().Field(oop.Field).Name)
602 return p.errorf("unknown field name %q in %v", name, st)
605 if dst.Kind() == reflect.Map {
606 // Consume any colon.
607 if err := p.checkForColon(props, dst.Type()); err != nil {
611 // Construct the map if it doesn't already exist.
613 dst.Set(reflect.MakeMap(dst.Type()))
615 key := reflect.New(dst.Type().Key()).Elem()
616 val := reflect.New(dst.Type().Elem()).Elem()
618 // The map entry should be this sequence of tokens:
619 // < key : KEY value : VALUE >
620 // However, implementations may omit key or value, and technically
621 // we should support them in any order. See b/28924776 for a time
625 var terminator string
632 return p.errorf("expected '{' or '<', found %q", tok.value)
639 if tok.value == terminator {
644 if err := p.consumeToken(":"); err != nil {
647 if err := p.readAny(key, props.mkeyprop); err != nil {
650 if err := p.consumeOptionalSeparator(); err != nil {
654 if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil {
657 if err := p.readAny(val, props.mvalprop); err != nil {
660 if err := p.consumeOptionalSeparator(); err != nil {
665 return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
669 dst.SetMapIndex(key, val)
673 // Check that it's not already set if it's not a repeated field.
674 if !props.Repeated && fieldSet[name] {
675 return p.errorf("non-repeated field %q was repeated", name)
678 if err := p.checkForColon(props, dst.Type()); err != nil {
682 // Parse into the field.
683 fieldSet[name] = true
684 if err := p.readAny(dst, props); err != nil {
685 if _, ok := err.(*RequiredNotSetError); !ok {
694 if err := p.consumeOptionalSeparator(); err != nil {
701 return p.missingRequiredFieldError(sv)
706 // consumeExtName consumes extension name or expanded Any type URL and the
707 // following ']'. It returns the name or URL consumed.
708 func (p *textParser) consumeExtName() (string, error) {
714 // If extension name or type url is quoted, it's a single token.
715 if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
716 name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
720 return name, p.consumeToken("]")
723 // Consume everything up to "]"
725 for tok.value != "]" {
726 parts = append(parts, tok.value)
729 return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
732 return strings.Join(parts, ""), nil
735 // consumeOptionalSeparator consumes an optional semicolon or comma.
736 // It is used in readStruct to provide backward compatibility.
737 func (p *textParser) consumeOptionalSeparator() error {
742 if tok.value != ";" && tok.value != "," {
748 func (p *textParser) readAny(v reflect.Value, props *Properties) error {
754 return p.errorf("unexpected EOF")
757 switch fv := v; fv.Kind() {
760 if at.Elem().Kind() == reflect.Uint8 {
761 // Special case for []byte
762 if tok.value[0] != '"' && tok.value[0] != '\'' {
763 // Deliberately written out here, as the error after
764 // this switch statement would write "invalid []byte: ...",
765 // which is not as user-friendly.
766 return p.errorf("invalid string: %v", tok.value)
768 bytes := []byte(tok.unquoted)
769 fv.Set(reflect.ValueOf(bytes))
773 if tok.value == "[" {
774 // Repeated field with list notation, like [1,2,3].
776 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
777 err := p.readAny(fv.Index(fv.Len()-1), props)
785 if tok.value == "]" {
788 if tok.value != "," {
789 return p.errorf("Expected ']' or ',' found %q", tok.value)
794 // One value of the repeated field.
796 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
797 return p.readAny(fv.Index(fv.Len()-1), props)
799 // true/1/t/True or false/f/0/False.
801 case "true", "1", "t", "True":
804 case "false", "0", "f", "False":
808 case reflect.Float32, reflect.Float64:
810 // Ignore 'f' for compatibility with output generated by C++, but don't
811 // remove 'f' when the value is "-inf" or "inf".
812 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
815 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
820 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
825 if len(props.Enum) == 0 {
828 m, ok := enumValueMaps[props.Enum]
832 x, ok := m[tok.value]
839 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
845 // A basic field (indirected through pointer), or a repeated message/group
847 fv.Set(reflect.New(fv.Type().Elem()))
848 return p.readAny(fv.Elem(), props)
850 if tok.value[0] == '"' || tok.value[0] == '\'' {
851 fv.SetString(tok.unquoted)
855 var terminator string
862 return p.errorf("expected '{' or '<', found %q", tok.value)
864 // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
865 return p.readStruct(fv, terminator)
867 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
872 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
877 return p.errorf("invalid %v: %v", v.Type(), tok.value)
880 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
881 // before starting to unmarshal, so any existing data in pb is always removed.
882 // If a required field is not set and no other error occurs,
883 // UnmarshalText returns *RequiredNotSetError.
884 func UnmarshalText(s string, pb Message) error {
885 if um, ok := pb.(encoding.TextUnmarshaler); ok {
886 err := um.UnmarshalText([]byte(s))
890 v := reflect.ValueOf(pb)
891 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {