1 // Copyright (c) 2015-2016 The btcsuite developers
2 // Use of this source code is governed by an ISC
3 // license that can be found in the LICENSE file.
8 "github.com/btcsuite/btcd/btcec"
9 "github.com/btcsuite/btcd/txscript"
12 // -----------------------------------------------------------------------------
13 // A variable length quantity (VLQ) is an encoding that uses an arbitrary number
14 // of binary octets to represent an arbitrarily large integer. The scheme
15 // employs a most significant byte (MSB) base-128 encoding where the high bit in
16 // each byte indicates whether or not the byte is the final one. In addition,
17 // to ensure there are no redundant encodings, an offset is subtracted every
18 // time a group of 7 bits is shifted out. Therefore each integer can be
19 // represented in exactly one way, and each representation stands for exactly
22 // Another nice property of this encoding is that it provides a compact
23 // representation of values that are typically used to indicate sizes. For
24 // example, the values 0 - 127 are represented with a single byte, 128 - 16511
25 // with two bytes, and 16512 - 2113663 with three bytes.
27 // While the encoding allows arbitrarily large integers, it is artificially
28 // limited in this code to an unsigned 64-bit integer for efficiency purposes.
32 // 127 -> [0x7f] * Max 1-byte value
37 // 16511 -> [0xff 0x7f] * Max 2-byte value
38 // 16512 -> [0x80 0x80 0x00]
39 // 32895 -> [0x80 0xff 0x7f]
40 // 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value
41 // 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value
42 // 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f]
45 // https://en.wikipedia.org/wiki/Variable-length_quantity
46 // http://www.codecodex.com/wiki/Variable-Length_Integers
47 // -----------------------------------------------------------------------------
49 // serializeSizeVLQ returns the number of bytes it would take to serialize the
50 // passed number as a variable-length quantity according to the format described
52 func serializeSizeVLQ(n uint64) int {
54 for ; n > 0x7f; n = (n >> 7) - 1 {
61 // putVLQ serializes the provided number to a variable-length quantity according
62 // to the format described above and returns the number of bytes of the encoded
63 // value. The result is placed directly into the passed byte slice which must
64 // be at least large enough to handle the number of bytes returned by the
65 // serializeSizeVLQ function or it will panic.
66 func putVLQ(target []byte, n uint64) int {
69 // The high bit is set when another byte follows.
70 highBitMask := byte(0x80)
75 target[offset] = byte(n&0x7f) | highBitMask
82 // Reverse the bytes so it is MSB-encoded.
83 for i, j := 0, offset; i < j; i, j = i+1, j-1 {
84 target[i], target[j] = target[j], target[i]
90 // deserializeVLQ deserializes the provided variable-length quantity according
91 // to the format described above. It also returns the number of bytes
93 func deserializeVLQ(serialized []byte) (uint64, int) {
96 for _, val := range serialized {
98 n = (n << 7) | uint64(val&0x7f)
108 // -----------------------------------------------------------------------------
109 // In order to reduce the size of stored scripts, a domain specific compression
110 // algorithm is used which recognizes standard scripts and stores them using
111 // less bytes than the original script. The compression algorithm used here was
112 // obtained from Bitcoin Core, so all credits for the algorithm go to it.
114 // The general serialized format is:
116 // <script size or type><script data>
119 // script size or type VLQ variable
120 // script data []byte variable
122 // The specific serialized format for each recognized standard script is:
124 // - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash>
125 // - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash>
126 // - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value>
127 // 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use
128 // 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use
129 // ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported.
131 // Any scripts which are not recognized as one of the aforementioned standard
132 // scripts are encoded using the general serialized format and encode the script
133 // size as the sum of the actual size of the script and the number of special
135 // -----------------------------------------------------------------------------
137 // The following constants specify the special constants used to identify a
138 // special script type in the domain-specific compressed script encoding.
140 // NOTE: This section specifically does not use iota since these values are
141 // serialized and must be stable for long-term storage.
143 // cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script.
144 cstPayToPubKeyHash = 0
146 // cstPayToScriptHash identifies a compressed pay-to-script-hash script.
147 cstPayToScriptHash = 1
149 // cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to
150 // a compressed pubkey. Bit 0 specifies which y-coordinate to use
151 // to reconstruct the full uncompressed pubkey.
152 cstPayToPubKeyComp2 = 2
154 // cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to
155 // a compressed pubkey. Bit 0 specifies which y-coordinate to use
156 // to reconstruct the full uncompressed pubkey.
157 cstPayToPubKeyComp3 = 3
159 // cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to
160 // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use
161 // to reconstruct the full uncompressed pubkey.
162 cstPayToPubKeyUncomp4 = 4
164 // cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to
165 // an uncompressed pubkey. Bit 0 specifies which y-coordinate to use
166 // to reconstruct the full uncompressed pubkey.
167 cstPayToPubKeyUncomp5 = 5
169 // numSpecialScripts is the number of special scripts recognized by the
170 // domain-specific script compression algorithm.
171 numSpecialScripts = 6
174 // isPubKeyHash returns whether or not the passed public key script is a
175 // standard pay-to-pubkey-hash script along with the pubkey hash it is paying to
177 func isPubKeyHash(script []byte) (bool, []byte) {
178 if len(script) == 25 && script[0] == txscript.OP_DUP &&
179 script[1] == txscript.OP_HASH160 &&
180 script[2] == txscript.OP_DATA_20 &&
181 script[23] == txscript.OP_EQUALVERIFY &&
182 script[24] == txscript.OP_CHECKSIG {
184 return true, script[3:23]
190 // isScriptHash returns whether or not the passed public key script is a
191 // standard pay-to-script-hash script along with the script hash it is paying to
193 func isScriptHash(script []byte) (bool, []byte) {
194 if len(script) == 23 && script[0] == txscript.OP_HASH160 &&
195 script[1] == txscript.OP_DATA_20 &&
196 script[22] == txscript.OP_EQUAL {
198 return true, script[2:22]
204 // isPubKey returns whether or not the passed public key script is a standard
205 // pay-to-pubkey script that pays to a valid compressed or uncompressed public
206 // key along with the serialized pubkey it is paying to if it is.
208 // NOTE: This function ensures the public key is actually valid since the
209 // compression algorithm requires valid pubkeys. It does not support hybrid
210 // pubkeys. This means that even if the script has the correct form for a
211 // pay-to-pubkey script, this function will only return true when it is paying
212 // to a valid compressed or uncompressed pubkey.
213 func isPubKey(script []byte) (bool, []byte) {
214 // Pay-to-compressed-pubkey script.
215 if len(script) == 35 && script[0] == txscript.OP_DATA_33 &&
216 script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 ||
219 // Ensure the public key is valid.
220 serializedPubKey := script[1:34]
221 _, err := btcec.ParsePubKey(serializedPubKey, btcec.S256())
223 return true, serializedPubKey
227 // Pay-to-uncompressed-pubkey script.
228 if len(script) == 67 && script[0] == txscript.OP_DATA_65 &&
229 script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 {
231 // Ensure the public key is valid.
232 serializedPubKey := script[1:66]
233 _, err := btcec.ParsePubKey(serializedPubKey, btcec.S256())
235 return true, serializedPubKey
242 // compressedScriptSize returns the number of bytes the passed script would take
243 // when encoded with the domain specific compression algorithm described above.
244 func compressedScriptSize(pkScript []byte, version int32) int {
245 // Pay-to-pubkey-hash script.
246 if valid, _ := isPubKeyHash(pkScript); valid {
250 // Pay-to-script-hash script.
251 if valid, _ := isScriptHash(pkScript); valid {
255 // Pay-to-pubkey (compressed or uncompressed) script.
256 if valid, _ := isPubKey(pkScript); valid {
260 // When none of the above special cases apply, encode the script as is
261 // preceded by the sum of its size and the number of special cases
262 // encoded as a variable length quantity.
263 return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) +
267 // decodeCompressedScriptSize treats the passed serialized bytes as a compressed
268 // script, possibly followed by other data, and returns the number of bytes it
269 // occupies taking into account the special encoding of the script size by the
270 // domain specific compression algorithm described above.
271 func decodeCompressedScriptSize(serialized []byte, version int32) int {
272 scriptSize, bytesRead := deserializeVLQ(serialized)
278 case cstPayToPubKeyHash:
281 case cstPayToScriptHash:
284 case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4,
285 cstPayToPubKeyUncomp5:
289 scriptSize -= numSpecialScripts
290 scriptSize += uint64(bytesRead)
291 return int(scriptSize)
294 // putCompressedScript compresses the passed script according to the domain
295 // specific compression algorithm described above directly into the passed
296 // target byte slice. The target byte slice must be at least large enough to
297 // handle the number of bytes returned by the compressedScriptSize function or
299 func putCompressedScript(target, pkScript []byte, version int32) int {
300 // Pay-to-pubkey-hash script.
301 if valid, hash := isPubKeyHash(pkScript); valid {
302 target[0] = cstPayToPubKeyHash
303 copy(target[1:21], hash)
307 // Pay-to-script-hash script.
308 if valid, hash := isScriptHash(pkScript); valid {
309 target[0] = cstPayToScriptHash
310 copy(target[1:21], hash)
314 // Pay-to-pubkey (compressed or uncompressed) script.
315 if valid, serializedPubKey := isPubKey(pkScript); valid {
316 pubKeyFormat := serializedPubKey[0]
317 switch pubKeyFormat {
319 target[0] = pubKeyFormat
320 copy(target[1:33], serializedPubKey[1:33])
323 // Encode the oddness of the serialized pubkey into the
324 // compressed script type.
325 target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01)
326 copy(target[1:33], serializedPubKey[1:33])
331 // When none of the above special cases apply, encode the unmodified
332 // script preceded by the sum of its size and the number of special
333 // cases encoded as a variable length quantity.
334 encodedSize := uint64(len(pkScript) + numSpecialScripts)
335 vlqSizeLen := putVLQ(target, encodedSize)
336 copy(target[vlqSizeLen:], pkScript)
337 return vlqSizeLen + len(pkScript)
340 // decompressScript returns the original script obtained by decompressing the
341 // passed compressed script according to the domain specific compression
342 // algorithm described above.
344 // NOTE: The script parameter must already have been proven to be long enough
345 // to contain the number of bytes returned by decodeCompressedScriptSize or it
346 // will panic. This is acceptable since it is only an internal function.
347 func decompressScript(compressedPkScript []byte, version int32) []byte {
348 // In practice this function will not be called with a zero-length or
349 // nil script since the nil script encoding includes the length, however
350 // the code below assumes the length exists, so just return nil now if
351 // the function ever ends up being called with a nil script in the
353 if len(compressedPkScript) == 0 {
357 // Decode the script size and examine it for the special cases.
358 encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript)
359 switch encodedScriptSize {
360 // Pay-to-pubkey-hash script. The resulting script is:
361 // <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG>
362 case cstPayToPubKeyHash:
363 pkScript := make([]byte, 25)
364 pkScript[0] = txscript.OP_DUP
365 pkScript[1] = txscript.OP_HASH160
366 pkScript[2] = txscript.OP_DATA_20
367 copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20])
368 pkScript[23] = txscript.OP_EQUALVERIFY
369 pkScript[24] = txscript.OP_CHECKSIG
372 // Pay-to-script-hash script. The resulting script is:
373 // <OP_HASH160><20 byte script hash><OP_EQUAL>
374 case cstPayToScriptHash:
375 pkScript := make([]byte, 23)
376 pkScript[0] = txscript.OP_HASH160
377 pkScript[1] = txscript.OP_DATA_20
378 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20])
379 pkScript[22] = txscript.OP_EQUAL
382 // Pay-to-compressed-pubkey script. The resulting script is:
383 // <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG>
384 case cstPayToPubKeyComp2, cstPayToPubKeyComp3:
385 pkScript := make([]byte, 35)
386 pkScript[0] = txscript.OP_DATA_33
387 pkScript[1] = byte(encodedScriptSize)
388 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32])
389 pkScript[34] = txscript.OP_CHECKSIG
392 // Pay-to-uncompressed-pubkey script. The resulting script is:
393 // <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG>
394 case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5:
395 // Change the leading byte to the appropriate compressed pubkey
396 // identifier (0x02 or 0x03) so it can be decoded as a
397 // compressed pubkey. This really should never fail since the
398 // encoding ensures it is valid before compressing to this type.
399 compressedKey := make([]byte, 33)
400 compressedKey[0] = byte(encodedScriptSize - 2)
401 copy(compressedKey[1:], compressedPkScript[1:])
402 key, err := btcec.ParsePubKey(compressedKey, btcec.S256())
407 pkScript := make([]byte, 67)
408 pkScript[0] = txscript.OP_DATA_65
409 copy(pkScript[1:], key.SerializeUncompressed())
410 pkScript[66] = txscript.OP_CHECKSIG
414 // When none of the special cases apply, the script was encoded using
415 // the general format, so reduce the script size by the number of
416 // special cases and return the unmodified script.
417 scriptSize := int(encodedScriptSize - numSpecialScripts)
418 pkScript := make([]byte, scriptSize)
419 copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize])
423 // -----------------------------------------------------------------------------
424 // In order to reduce the size of stored amounts, a domain specific compression
425 // algorithm is used which relies on there typically being a lot of zeroes at
426 // end of the amounts. The compression algorithm used here was obtained from
427 // Bitcoin Core, so all credits for the algorithm go to it.
429 // While this is simply exchanging one uint64 for another, the resulting value
430 // for typical amounts has a much smaller magnitude which results in fewer bytes
431 // when encoded as variable length quantity. For example, consider the amount
432 // of 0.1 BTC which is 10000000 satoshi. Encoding 10000000 as a VLQ would take
433 // 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte.
435 // Essentially the compression is achieved by splitting the value into an
436 // exponent in the range [0-9] and a digit in the range [1-9], when possible,
437 // and encoding them in a way that can be decoded. More specifically, the
438 // encoding is as follows:
440 // - Find the exponent, e, as the largest power of 10 that evenly divides the
441 // value up to a maximum of 9
442 // - When e < 9, the final digit can't be 0 so store it as d and remove it by
443 // dividing the value by 10 (call the result n). The encoded value is thus:
444 // 1 + 10*(9*n + d-1) + e
445 // - When e==9, the only thing known is the amount is not 0. The encoded value
447 // 1 + 10*(n-1) + e == 10 + 10*(n-1)
449 // Example encodings:
450 // (The numbers in parenthesis are the number of bytes when serialized as a VLQ)
451 // 0 (1) -> 0 (1) * 0.00000000 BTC
452 // 1000 (2) -> 4 (1) * 0.00001000 BTC
453 // 10000 (2) -> 5 (1) * 0.00010000 BTC
454 // 12345678 (4) -> 111111101(4) * 0.12345678 BTC
455 // 50000000 (4) -> 47 (1) * 0.50000000 BTC
456 // 100000000 (4) -> 9 (1) * 1.00000000 BTC
457 // 500000000 (5) -> 49 (1) * 5.00000000 BTC
458 // 1000000000 (5) -> 10 (1) * 10.00000000 BTC
459 // -----------------------------------------------------------------------------
461 // compressTxOutAmount compresses the passed amount according to the domain
462 // specific compression algorithm described above.
463 func compressTxOutAmount(amount uint64) uint64 {
464 // No need to do any work if it's zero.
469 // Find the largest power of 10 (max of 9) that evenly divides the
471 exponent := uint64(0)
472 for amount%10 == 0 && exponent < 9 {
477 // The compressed result for exponents less than 9 is:
478 // 1 + 10*(9*n + d-1) + e
480 lastDigit := amount % 10
482 return 1 + 10*(9*amount+lastDigit-1) + exponent
485 // The compressed result for an exponent of 9 is:
486 // 1 + 10*(n-1) + e == 10 + 10*(n-1)
487 return 10 + 10*(amount-1)
490 // decompressTxOutAmount returns the original amount the passed compressed
491 // amount represents according to the domain specific compression algorithm
493 func decompressTxOutAmount(amount uint64) uint64 {
494 // No need to do any work if it's zero.
499 // The decompressed amount is either of the following two equations:
500 // x = 1 + 10*(9*n + d - 1) + e
501 // x = 1 + 10*(n - 1) + 9
504 // The decompressed amount is now one of the following two equations:
505 // x = 10*(9*n + d - 1) + e
506 // x = 10*(n - 1) + 9
507 exponent := amount % 10
510 // The decompressed amount is now one of the following two equations:
511 // x = 9*n + d - 1 | where e < 9
512 // x = n - 1 | where e = 9
515 lastDigit := amount%9 + 1
517 n = amount*10 + lastDigit
522 // Apply the exponent.
523 for ; exponent > 0; exponent-- {
530 // -----------------------------------------------------------------------------
531 // Compressed transaction outputs consist of an amount and a public key script
532 // both compressed using the domain specific compression algorithms previously
535 // The serialized format is:
537 // <compressed amount><compressed script>
540 // compressed amount VLQ variable
541 // compressed script []byte variable
542 // -----------------------------------------------------------------------------
544 // compressedTxOutSize returns the number of bytes the passed transaction output
545 // fields would take when encoded with the format described above. The
546 // preCompressed flag indicates the provided amount and script are already
547 // compressed. This is useful since loaded utxo entries are not decompressed
548 // until the output is accessed.
549 func compressedTxOutSize(amount uint64, pkScript []byte, version int32, preCompressed bool) int {
551 return serializeSizeVLQ(amount) + len(pkScript)
554 return serializeSizeVLQ(compressTxOutAmount(amount)) +
555 compressedScriptSize(pkScript, version)
558 // putCompressedTxOut potentially compresses the passed amount and script
559 // according to their domain specific compression algorithms and encodes them
560 // directly into the passed target byte slice with the format described above.
561 // The preCompressed flag indicates the provided amount and script are already
562 // compressed in which case the values are not modified. This is useful since
563 // loaded utxo entries are not decompressed until the output is accessed. The
564 // target byte slice must be at least large enough to handle the number of bytes
565 // returned by the compressedTxOutSize function or it will panic.
566 func putCompressedTxOut(target []byte, amount uint64, pkScript []byte, version int32, preCompressed bool) int {
568 offset := putVLQ(target, amount)
569 copy(target[offset:], pkScript)
570 return offset + len(pkScript)
573 offset := putVLQ(target, compressTxOutAmount(amount))
574 offset += putCompressedScript(target[offset:], pkScript, version)
578 // decodeCompressedTxOut decodes the passed compressed txout, possibly followed
579 // by other data, into its compressed amount and compressed script and returns
580 // them along with the number of bytes they occupied.
581 func decodeCompressedTxOut(serialized []byte, version int32) (uint64, []byte, int, error) {
582 // Deserialize the compressed amount and ensure there are bytes
583 // remaining for the compressed script.
584 compressedAmount, bytesRead := deserializeVLQ(serialized)
585 if bytesRead >= len(serialized) {
586 return 0, nil, bytesRead, errDeserialize("unexpected end of " +
587 "data after compressed amount")
590 // Decode the compressed script size and ensure there are enough bytes
591 // left in the slice for it.
592 scriptSize := decodeCompressedScriptSize(serialized[bytesRead:], version)
593 if len(serialized[bytesRead:]) < scriptSize {
594 return 0, nil, bytesRead, errDeserialize("unexpected end of " +
595 "data after script size")
598 // Make a copy of the compressed script so the original serialized data
599 // can be released as soon as possible.
600 compressedScript := make([]byte, scriptSize)
601 copy(compressedScript, serialized[bytesRead:bytesRead+scriptSize])
602 return compressedAmount, compressedScript, bytesRead + scriptSize, nil