1 // Protocol Buffers for Go with Gadgets
3 // Copyright (c) 2013, The GoGo Authors. All rights reserved.
4 // http://github.com/gogo/protobuf
6 // Go support for Protocol Buffers - Google's data interchange format
8 // Copyright 2010 The Go Authors. All rights reserved.
9 // https://github.com/golang/protobuf
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
15 // * Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 // * Redistributions in binary form must reproduce the above
18 // copyright notice, this list of conditions and the following disclaimer
19 // in the documentation and/or other materials provided with the
21 // * Neither the name of Google Inc. nor the names of its
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 // Functions for parsing the Text protocol buffer format.
40 // TODO: message sets.
53 // Error string emitted when deserializing Any and fields are already set
54 const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set"
56 type ParseError struct {
58 Line int // 1-based line number
59 Offset int // 0-based byte offset from start of input
62 func (p *ParseError) Error() string {
64 // show offset only for first line
65 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
67 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
73 line int // line number
74 offset int // byte number from start of input, not start of line
75 unquoted string // the unquoted version of value, if it was a quoted string
78 func (t *token) String() string {
80 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
82 return fmt.Sprintf("parse error: %v", t.err)
85 type textParser struct {
86 s string // remaining input
87 done bool // whether the parsing is finished (success or error)
88 backed bool // whether back() was called
93 func newTextParser(s string) *textParser {
101 func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
102 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
108 // Numbers and identifiers are matched by [-+._A-Za-z0-9]
109 func isIdentOrNumberChar(c byte) bool {
111 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
113 case '0' <= c && c <= '9':
117 case '-', '+', '.', '_':
123 func isWhitespace(c byte) bool {
125 case ' ', '\t', '\n', '\r':
131 func isQuote(c byte) bool {
139 func (p *textParser) skipWhitespace() {
141 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
143 // comment; skip to end of line or input
144 for i < len(p.s) && p.s[i] != '\n' {
157 p.s = p.s[i:len(p.s)]
163 func (p *textParser) advance() {
170 // Start of non-whitespace
172 p.cur.offset, p.cur.line = p.offset, p.line
175 case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
177 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
181 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
182 if p.s[i] == '\\' && i+1 < len(p.s) {
188 if i >= len(p.s) || p.s[i] != p.s[0] {
189 p.errorf("unmatched quote")
192 unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
194 p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
197 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
201 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
205 p.errorf("unexpected byte %#x", p.s[0])
208 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
210 p.offset += len(p.cur.value)
214 errBadUTF8 = errors.New("proto: bad UTF-8")
217 func unquoteC(s string, quote rune) (string, error) {
218 // This is based on C++'s tokenizer.cc.
219 // Despite its name, this is *not* parsing C syntax.
220 // For instance, "\0" is an invalid quoted string.
222 // Avoid allocation in trivial cases.
224 for _, r := range s {
225 if r == '\\' || r == quote {
234 buf := make([]byte, 0, 3*len(s)/2)
236 r, n := utf8.DecodeRuneInString(s)
237 if r == utf8.RuneError && n == 1 {
238 return "", errBadUTF8
242 if r < utf8.RuneSelf {
243 buf = append(buf, byte(r))
245 buf = append(buf, string(r)...)
250 ch, tail, err := unescape(s)
254 buf = append(buf, ch...)
257 return string(buf), nil
260 func unescape(s string) (ch string, tail string, err error) {
261 r, n := utf8.DecodeRuneInString(s)
262 if r == utf8.RuneError && n == 1 {
263 return "", "", errBadUTF8
282 return "?", s, nil // trigraph workaround
283 case '\'', '"', '\\':
284 return string(r), s, nil
285 case '0', '1', '2', '3', '4', '5', '6', '7':
287 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
289 ss := string(r) + s[:2]
291 i, err := strconv.ParseUint(ss, 8, 8)
293 return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
295 return string([]byte{byte(i)}), s, nil
296 case 'x', 'X', 'u', 'U':
307 return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
311 i, err := strconv.ParseUint(ss, 16, 64)
313 return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
315 if r == 'x' || r == 'X' {
316 return string([]byte{byte(i)}), s, nil
318 if i > utf8.MaxRune {
319 return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
321 return string(i), s, nil
323 return "", "", fmt.Errorf(`unknown escape \%c`, r)
326 // Back off the parser by one token. Can only be done between calls to next().
327 // It makes the next advance() a no-op.
328 func (p *textParser) back() { p.backed = true }
330 // Advances the parser and returns the new current token.
331 func (p *textParser) next() *token {
332 if p.backed || p.done {
339 } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
340 // Look for multiple quoted strings separated by whitespace,
341 // and concatenate them.
345 if p.done || !isQuote(p.s[0]) {
349 if p.cur.err != nil {
352 cat.value += " " + p.cur.value
353 cat.unquoted += p.cur.unquoted
355 p.done = false // parser may have seen EOF, but we want to return cat
361 func (p *textParser) consumeToken(s string) error {
368 return p.errorf("expected %q, found %q", s, tok.value)
373 // Return a RequiredNotSetError indicating which required field was not set.
374 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
376 sprops := GetProperties(st)
377 for i := 0; i < st.NumField(); i++ {
378 if !isNil(sv.Field(i)) {
382 props := sprops.Prop[i]
384 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
387 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
390 // Returns the index in the struct for the named field, as well as the parsed tag properties.
391 func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
392 i, ok := sprops.decoderOrigNames[name]
394 return i, sprops.Prop[i], true
396 return -1, nil, false
399 // Consume a ':' from the input stream (if the next token is a colon),
400 // returning an error if a colon is needed but not present.
401 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
406 if tok.value != ":" {
407 // Colon is optional when the field is a group or message.
413 // A "bytes" field is either a message, a string, or a repeated field;
414 // those three become *T, *string and []T respectively, so we can check for
415 // this field being a pointer to a non-string.
416 if typ.Kind() == reflect.Ptr {
418 if typ.Elem().Kind() == reflect.String {
421 } else if typ.Kind() == reflect.Slice {
423 if typ.Elem().Kind() != reflect.Ptr {
426 } else if typ.Kind() == reflect.String {
427 // The proto3 exception is for a string field,
428 // which requires a colon.
434 return p.errorf("expected ':', found %q", tok.value)
441 func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
443 sprops := GetProperties(st)
444 reqCount := sprops.reqCount
445 var reqFieldErr error
446 fieldSet := make(map[string]bool)
447 // A struct is a sequence of "name: value", terminated by one of
448 // '>' or '}', or the end of the input. A name may also be
449 // "[extension]" or "[type/url]".
451 // The whole struct can also be an expanded Any message, like:
452 // [type/url] < ... struct contents ... >
458 if tok.value == terminator {
461 if tok.value == "[" {
462 // Looks like an extension or an Any.
464 // TODO: Check whether we need to handle
465 // namespace rooted names (e.g. ".something.Foo").
466 extName, err := p.consumeExtName()
471 if s := strings.LastIndex(extName, "/"); s >= 0 {
472 // If it contains a slash, it's an Any type URL.
473 messageName := extName[s+1:]
474 mt := MessageType(messageName)
476 return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
482 // consume an optional colon
483 if tok.value == ":" {
489 var terminator string
496 return p.errorf("expected '{' or '<', found %q", tok.value)
498 v := reflect.New(mt.Elem())
499 if pe := p.readStruct(v.Elem(), terminator); pe != nil {
502 b, err := Marshal(v.Interface().(Message))
504 return p.errorf("failed to marshal message of type %q: %v", messageName, err)
506 if fieldSet["type_url"] {
507 return p.errorf(anyRepeatedlyUnpacked, "type_url")
509 if fieldSet["value"] {
510 return p.errorf(anyRepeatedlyUnpacked, "value")
512 sv.FieldByName("TypeUrl").SetString(extName)
513 sv.FieldByName("Value").SetBytes(b)
514 fieldSet["type_url"] = true
515 fieldSet["value"] = true
519 var desc *ExtensionDesc
520 // This could be faster, but it's functional.
521 // TODO: Do something smarter than a linear scan.
522 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
523 if d.Name == extName {
529 return p.errorf("unrecognized extension %q", extName)
532 props := &Properties{}
533 props.Parse(desc.Tag)
535 typ := reflect.TypeOf(desc.ExtensionType)
536 if err := p.checkForColon(props, typ); err != nil {
540 rep := desc.repeated()
542 // Read the extension structure, and set it in
543 // the value we're constructing.
544 var ext reflect.Value
546 ext = reflect.New(typ).Elem()
548 ext = reflect.New(typ.Elem()).Elem()
550 if err := p.readAny(ext, props); err != nil {
551 if _, ok := err.(*RequiredNotSetError); !ok {
556 ep := sv.Addr().Interface().(Message)
558 SetExtension(ep, desc, ext.Interface())
560 old, err := GetExtension(ep, desc)
563 sl = reflect.ValueOf(old) // existing slice
565 sl = reflect.MakeSlice(typ, 0, 1)
567 sl = reflect.Append(sl, ext)
568 SetExtension(ep, desc, sl.Interface())
570 if err := p.consumeOptionalSeparator(); err != nil {
576 // This is a normal, non-extension field.
578 var dst reflect.Value
579 fi, props, ok := structFieldByName(sprops, name)
582 } else if oop, ok := sprops.OneofTypes[name]; ok {
585 nv := reflect.New(oop.Type.Elem())
586 dst = nv.Elem().Field(0)
587 field := sv.Field(oop.Field)
589 return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, sv.Type().Field(oop.Field).Name)
594 return p.errorf("unknown field name %q in %v", name, st)
597 if dst.Kind() == reflect.Map {
598 // Consume any colon.
599 if err := p.checkForColon(props, dst.Type()); err != nil {
603 // Construct the map if it doesn't already exist.
605 dst.Set(reflect.MakeMap(dst.Type()))
607 key := reflect.New(dst.Type().Key()).Elem()
608 val := reflect.New(dst.Type().Elem()).Elem()
610 // The map entry should be this sequence of tokens:
611 // < key : KEY value : VALUE >
612 // However, implementations may omit key or value, and technically
613 // we should support them in any order. See b/28924776 for a time
617 var terminator string
624 return p.errorf("expected '{' or '<', found %q", tok.value)
631 if tok.value == terminator {
636 if err := p.consumeToken(":"); err != nil {
639 if err := p.readAny(key, props.MapKeyProp); err != nil {
642 if err := p.consumeOptionalSeparator(); err != nil {
646 if err := p.checkForColon(props.MapValProp, dst.Type().Elem()); err != nil {
649 if err := p.readAny(val, props.MapValProp); err != nil {
652 if err := p.consumeOptionalSeparator(); err != nil {
657 return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
661 dst.SetMapIndex(key, val)
665 // Check that it's not already set if it's not a repeated field.
666 if !props.Repeated && fieldSet[name] {
667 return p.errorf("non-repeated field %q was repeated", name)
670 if err := p.checkForColon(props, dst.Type()); err != nil {
674 // Parse into the field.
675 fieldSet[name] = true
676 if err := p.readAny(dst, props); err != nil {
677 if _, ok := err.(*RequiredNotSetError); !ok {
686 if err := p.consumeOptionalSeparator(); err != nil {
693 return p.missingRequiredFieldError(sv)
698 // consumeExtName consumes extension name or expanded Any type URL and the
699 // following ']'. It returns the name or URL consumed.
700 func (p *textParser) consumeExtName() (string, error) {
706 // If extension name or type url is quoted, it's a single token.
707 if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
708 name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
712 return name, p.consumeToken("]")
715 // Consume everything up to "]"
717 for tok.value != "]" {
718 parts = append(parts, tok.value)
721 return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
723 if p.done && tok.value != "]" {
724 return "", p.errorf("unclosed type_url or extension name")
727 return strings.Join(parts, ""), nil
730 // consumeOptionalSeparator consumes an optional semicolon or comma.
731 // It is used in readStruct to provide backward compatibility.
732 func (p *textParser) consumeOptionalSeparator() error {
737 if tok.value != ";" && tok.value != "," {
743 func (p *textParser) readAny(v reflect.Value, props *Properties) error {
749 return p.errorf("unexpected EOF")
751 if len(props.CustomType) > 0 {
753 t := reflect.TypeOf(v.Interface())
754 if t.Kind() == reflect.Slice {
755 tc := reflect.TypeOf(new(Marshaler))
756 ok := t.Elem().Implements(tc.Elem())
760 if flen == fv.Cap() {
761 nav := reflect.MakeSlice(v.Type(), flen, 2*flen+1)
762 reflect.Copy(nav, fv)
769 return p.readAny(fv.Index(flen), props)
773 if reflect.TypeOf(v.Interface()).Kind() == reflect.Ptr {
774 custom := reflect.New(props.ctype.Elem()).Interface().(Unmarshaler)
775 err := custom.Unmarshal([]byte(tok.unquoted))
777 return p.errorf("%v %v: %v", err, v.Type(), tok.value)
779 v.Set(reflect.ValueOf(custom))
781 custom := reflect.New(reflect.TypeOf(v.Interface())).Interface().(Unmarshaler)
782 err := custom.Unmarshal([]byte(tok.unquoted))
784 return p.errorf("%v %v: %v", err, v.Type(), tok.value)
786 v.Set(reflect.Indirect(reflect.ValueOf(custom)))
793 props.StdTime = false
794 tproto := ×tamp{}
795 err := p.readAny(reflect.ValueOf(tproto).Elem(), props)
800 tim, err := timestampFromProto(tproto)
805 t := reflect.TypeOf(v.Interface())
806 if t.Kind() == reflect.Slice {
807 if t.Elem().Kind() == reflect.Ptr {
808 ts := fv.Interface().([]*time.Time)
809 ts = append(ts, &tim)
810 fv.Set(reflect.ValueOf(ts))
813 ts := fv.Interface().([]time.Time)
815 fv.Set(reflect.ValueOf(ts))
820 if reflect.TypeOf(v.Interface()).Kind() == reflect.Ptr {
821 v.Set(reflect.ValueOf(&tim))
823 v.Set(reflect.Indirect(reflect.ValueOf(&tim)))
827 if props.StdDuration {
830 props.StdDuration = false
831 dproto := &duration{}
832 err := p.readAny(reflect.ValueOf(dproto).Elem(), props)
833 props.StdDuration = true
837 dur, err := durationFromProto(dproto)
842 t := reflect.TypeOf(v.Interface())
843 if t.Kind() == reflect.Slice {
844 if t.Elem().Kind() == reflect.Ptr {
845 ds := fv.Interface().([]*time.Duration)
846 ds = append(ds, &dur)
847 fv.Set(reflect.ValueOf(ds))
850 ds := fv.Interface().([]time.Duration)
852 fv.Set(reflect.ValueOf(ds))
857 if reflect.TypeOf(v.Interface()).Kind() == reflect.Ptr {
858 v.Set(reflect.ValueOf(&dur))
860 v.Set(reflect.Indirect(reflect.ValueOf(&dur)))
864 switch fv := v; fv.Kind() {
867 if at.Elem().Kind() == reflect.Uint8 {
868 // Special case for []byte
869 if tok.value[0] != '"' && tok.value[0] != '\'' {
870 // Deliberately written out here, as the error after
871 // this switch statement would write "invalid []byte: ...",
872 // which is not as user-friendly.
873 return p.errorf("invalid string: %v", tok.value)
875 bytes := []byte(tok.unquoted)
876 fv.Set(reflect.ValueOf(bytes))
880 if tok.value == "[" {
881 // Repeated field with list notation, like [1,2,3].
883 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
884 err := p.readAny(fv.Index(fv.Len()-1), props)
892 if ntok.value == "]" {
895 if ntok.value != "," {
896 return p.errorf("Expected ']' or ',' found %q", ntok.value)
901 // One value of the repeated field.
903 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
904 return p.readAny(fv.Index(fv.Len()-1), props)
906 // true/1/t/True or false/f/0/False.
908 case "true", "1", "t", "True":
911 case "false", "0", "f", "False":
915 case reflect.Float32, reflect.Float64:
917 // Ignore 'f' for compatibility with output generated by C++, but don't
918 // remove 'f' when the value is "-inf" or "inf".
919 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
922 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
927 if x, err := strconv.ParseInt(tok.value, 0, 8); err == nil {
932 if x, err := strconv.ParseInt(tok.value, 0, 16); err == nil {
937 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
942 if len(props.Enum) == 0 {
945 m, ok := enumValueMaps[props.Enum]
949 x, ok := m[tok.value]
956 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
962 // A basic field (indirected through pointer), or a repeated message/group
964 fv.Set(reflect.New(fv.Type().Elem()))
965 return p.readAny(fv.Elem(), props)
967 if tok.value[0] == '"' || tok.value[0] == '\'' {
968 fv.SetString(tok.unquoted)
972 var terminator string
979 return p.errorf("expected '{' or '<', found %q", tok.value)
981 // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
982 return p.readStruct(fv, terminator)
984 if x, err := strconv.ParseUint(tok.value, 0, 8); err == nil {
989 if x, err := strconv.ParseUint(tok.value, 0, 16); err == nil {
994 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
995 fv.SetUint(uint64(x))
999 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
1004 return p.errorf("invalid %v: %v", v.Type(), tok.value)
1007 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
1008 // before starting to unmarshal, so any existing data in pb is always removed.
1009 // If a required field is not set and no other error occurs,
1010 // UnmarshalText returns *RequiredNotSetError.
1011 func UnmarshalText(s string, pb Message) error {
1012 if um, ok := pb.(encoding.TextUnmarshaler); ok {
1013 return um.UnmarshalText([]byte(s))
1016 v := reflect.ValueOf(pb)
1017 return newTextParser(s).readStruct(v.Elem(), "")