vendor/github.com/golang/snappy/decode.go

   1 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package snappy
   6
   7 import (
   8         "encoding/binary"
   9         "errors"
  10         "io"
  11 )
  12
  13 var (
  14         // ErrCorrupt reports that the input is invalid.
  15         ErrCorrupt = errors.New("snappy: corrupt input")
  16         // ErrTooLarge reports that the uncompressed length is too large.
  17         ErrTooLarge = errors.New("snappy: decoded block is too large")
  18         // ErrUnsupported reports that the input isn't supported.
  19         ErrUnsupported = errors.New("snappy: unsupported input")
  20
  21         errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
  22 )
  23
  24 // DecodedLen returns the length of the decoded block.
  25 func DecodedLen(src []byte) (int, error) {
  26         v, _, err := decodedLen(src)
  27         return v, err
  28 }
  29
  30 // decodedLen returns the length of the decoded block and the number of bytes
  31 // that the length header occupied.
  32 func decodedLen(src []byte) (blockLen, headerLen int, err error) {
  33         v, n := binary.Uvarint(src)
  34         if n <= 0 || v > 0xffffffff {
  35                 return 0, 0, ErrCorrupt
  36         }
  37
  38         const wordSize = 32 << (^uint(0) >> 32 & 1)
  39         if wordSize == 32 && v > 0x7fffffff {
  40                 return 0, 0, ErrTooLarge
  41         }
  42         return int(v), n, nil
  43 }
  44
  45 const (
  46         decodeErrCodeCorrupt                  = 1
  47         decodeErrCodeUnsupportedLiteralLength = 2
  48 )
  49
  50 // Decode returns the decoded form of src. The returned slice may be a sub-
  51 // slice of dst if dst was large enough to hold the entire decoded block.
  52 // Otherwise, a newly allocated slice will be returned.
  53 //
  54 // The dst and src must not overlap. It is valid to pass a nil dst.
  55 func Decode(dst, src []byte) ([]byte, error) {
  56         dLen, s, err := decodedLen(src)
  57         if err != nil {
  58                 return nil, err
  59         }
  60         if dLen <= len(dst) {
  61                 dst = dst[:dLen]
  62         } else {
  63                 dst = make([]byte, dLen)
  64         }
  65         switch decode(dst, src[s:]) {
  66         case 0:
  67                 return dst, nil
  68         case decodeErrCodeUnsupportedLiteralLength:
  69                 return nil, errUnsupportedLiteralLength
  70         }
  71         return nil, ErrCorrupt
  72 }
  73
  74 // NewReader returns a new Reader that decompresses from r, using the framing
  75 // format described at
  76 // https://github.com/google/snappy/blob/master/framing_format.txt
  77 func NewReader(r io.Reader) *Reader {
  78         return &Reader{
  79                 r:       r,
  80                 decoded: make([]byte, maxBlockSize),
  81                 buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
  82         }
  83 }
  84
  85 // Reader is an io.Reader that can read Snappy-compressed bytes.
  86 type Reader struct {
  87         r       io.Reader
  88         err     error
  89         decoded []byte
  90         buf     []byte
  91         // decoded[i:j] contains decoded bytes that have not yet been passed on.
  92         i, j       int
  93         readHeader bool
  94 }
  95
  96 // Reset discards any buffered data, resets all state, and switches the Snappy
  97 // reader to read from r. This permits reusing a Reader rather than allocating
  98 // a new one.
  99 func (r *Reader) Reset(reader io.Reader) {
 100         r.r = reader
 101         r.err = nil
 102         r.i = 0
 103         r.j = 0
 104         r.readHeader = false
 105 }
 106
 107 func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
 108         if _, r.err = io.ReadFull(r.r, p); r.err != nil {
 109                 if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
 110                         r.err = ErrCorrupt
 111                 }
 112                 return false
 113         }
 114         return true
 115 }
 116
 117 // Read satisfies the io.Reader interface.
 118 func (r *Reader) Read(p []byte) (int, error) {
 119         if r.err != nil {
 120                 return 0, r.err
 121         }
 122         for {
 123                 if r.i < r.j {
 124                         n := copy(p, r.decoded[r.i:r.j])
 125                         r.i += n
 126                         return n, nil
 127                 }
 128                 if !r.readFull(r.buf[:4], true) {
 129                         return 0, r.err
 130                 }
 131                 chunkType := r.buf[0]
 132                 if !r.readHeader {
 133                         if chunkType != chunkTypeStreamIdentifier {
 134                                 r.err = ErrCorrupt
 135                                 return 0, r.err
 136                         }
 137                         r.readHeader = true
 138                 }
 139                 chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
 140                 if chunkLen > len(r.buf) {
 141                         r.err = ErrUnsupported
 142                         return 0, r.err
 143                 }
 144
 145                 // The chunk types are specified at
 146                 // https://github.com/google/snappy/blob/master/framing_format.txt
 147                 switch chunkType {
 148                 case chunkTypeCompressedData:
 149                         // Section 4.2. Compressed data (chunk type 0x00).
 150                         if chunkLen < checksumSize {
 151                                 r.err = ErrCorrupt
 152                                 return 0, r.err
 153                         }
 154                         buf := r.buf[:chunkLen]
 155                         if !r.readFull(buf, false) {
 156                                 return 0, r.err
 157                         }
 158                         checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 159                         buf = buf[checksumSize:]
 160
 161                         n, err := DecodedLen(buf)
 162                         if err != nil {
 163                                 r.err = err
 164                                 return 0, r.err
 165                         }
 166                         if n > len(r.decoded) {
 167                                 r.err = ErrCorrupt
 168                                 return 0, r.err
 169                         }
 170                         if _, err := Decode(r.decoded, buf); err != nil {
 171                                 r.err = err
 172                                 return 0, r.err
 173                         }
 174                         if crc(r.decoded[:n]) != checksum {
 175                                 r.err = ErrCorrupt
 176                                 return 0, r.err
 177                         }
 178                         r.i, r.j = 0, n
 179                         continue
 180
 181                 case chunkTypeUncompressedData:
 182                         // Section 4.3. Uncompressed data (chunk type 0x01).
 183                         if chunkLen < checksumSize {
 184                                 r.err = ErrCorrupt
 185                                 return 0, r.err
 186                         }
 187                         buf := r.buf[:checksumSize]
 188                         if !r.readFull(buf, false) {
 189                                 return 0, r.err
 190                         }
 191                         checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 192                         // Read directly into r.decoded instead of via r.buf.
 193                         n := chunkLen - checksumSize
 194                         if n > len(r.decoded) {
 195                                 r.err = ErrCorrupt
 196                                 return 0, r.err
 197                         }
 198                         if !r.readFull(r.decoded[:n], false) {
 199                                 return 0, r.err
 200                         }
 201                         if crc(r.decoded[:n]) != checksum {
 202                                 r.err = ErrCorrupt
 203                                 return 0, r.err
 204                         }
 205                         r.i, r.j = 0, n
 206                         continue
 207
 208                 case chunkTypeStreamIdentifier:
 209                         // Section 4.1. Stream identifier (chunk type 0xff).
 210                         if chunkLen != len(magicBody) {
 211                                 r.err = ErrCorrupt
 212                                 return 0, r.err
 213                         }
 214                         if !r.readFull(r.buf[:len(magicBody)], false) {
 215                                 return 0, r.err
 216                         }
 217                         for i := 0; i < len(magicBody); i++ {
 218                                 if r.buf[i] != magicBody[i] {
 219                                         r.err = ErrCorrupt
 220                                         return 0, r.err
 221                                 }
 222                         }
 223                         continue
 224                 }
 225
 226                 if chunkType <= 0x7f {
 227                         // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
 228                         r.err = ErrUnsupported
 229                         return 0, r.err
 230                 }
 231                 // Section 4.4 Padding (chunk type 0xfe).
 232                 // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
 233                 if !r.readFull(r.buf[:chunkLen], false) {
 234                         return 0, r.err
 235                 }
 236         }
 237 }