@OggVorbisソリューション/libvorbis-1.3.2/doc/08-residue.tex

   1 % -*- mode: latex; TeX-master: "Vorbis_I_spec"; -*-
   2 %!TEX root = Vorbis_I_spec.tex
   3 % $Id$
   4 \section{Residue setup and decode} \label{vorbis:spec:residue}
   5
   6 \subsection{Overview}
   7
   8 A residue vector represents the fine detail of the audio spectrum of
   9 one channel in an audio frame after the encoder subtracts the floor
  10 curve and performs any channel coupling.  A residue vector may
  11 represent spectral lines, spectral magnitude, spectral phase or
  12 hybrids as mixed by channel coupling.  The exact semantic content of
  13 the vector does not matter to the residue abstraction.
  14
  15 Whatever the exact qualities, the Vorbis residue abstraction codes the
  16 residue vectors into the bitstream packet, and then reconstructs the
  17 vectors during decode.  Vorbis makes use of three different encoding
  18 variants (numbered 0, 1 and 2) of the same basic vector encoding
  19 abstraction.
  20
  21
  22
  23 \subsection{Residue format}
  24
  25 Residue format partitions each vector in the vector bundle into chunks,
  26 classifies each chunk, encodes the chunk classifications and finally
  27 encodes the chunks themselves using the the specific VQ arrangement
  28 defined for each selected classification.
  29 The exact interleaving and partitioning vary by residue encoding number,
  30 however the high-level process used to classify and encode the residue
  31 vector is the same in all three variants.
  32
  33 A set of coded residue vectors are all of the same length.  High level
  34 coding structure, ignoring for the moment exactly how a partition is
  35 encoded and simply trusting that it is, is as follows:
  36
  37 \begin{itemize}
  38 \item Each vector is partitioned into multiple equal sized chunks
  39 according to configuration specified.  If we have a vector size of
  40 \emph{n}, a partition size \emph{residue_partition_size}, and a total
  41 of \emph{ch} residue vectors, the total number of partitioned chunks
  42 coded is \emph{n}/\emph{residue_partition_size}*\emph{ch}.  It is
  43 important to note that the integer division truncates.  In the below
  44 example, we assume an example \emph{residue_partition_size} of 8.
  45
  46 \item Each partition in each vector has a classification number that
  47 specifies which of multiple configured VQ codebook setups are used to
  48 decode that partition.  The classification numbers of each partition
  49 can be thought of as forming a vector in their own right, as in the
  50 illustration below.  Just as the residue vectors are coded in grouped
  51 partitions to increase encoding efficiency, the classification vector
  52 is also partitioned into chunks.  The integer elements of each scalar
  53 in a classification chunk are built into a single scalar that
  54 represents the classification numbers in that chunk.  In the below
  55 example, the classification codeword encodes two classification
  56 numbers.
  57
  58 \item The values in a residue vector may be encoded monolithically in a
  59 single pass through the residue vector, but more often efficient
  60 codebook design dictates that each vector is encoded as the additive
  61 sum of several passes through the residue vector using more than one
  62 VQ codebook.  Thus, each residue value potentially accumulates values
  63 from multiple decode passes.  The classification value associated with
  64 a partition is the same in each pass, thus the classification codeword
  65 is coded only in the first pass.
  66
  67 \end{itemize}
  68
  69
  70 \begin{center}
  71 \includegraphics[width=\textwidth]{residue-pack}
  72 \captionof{figure}{illustration of residue vector format}
  73 \end{center}
  74
  75
  76
  77 \subsection{residue 0}
  78
  79 Residue 0 and 1 differ only in the way the values within a residue
  80 partition are interleaved during partition encoding (visually treated
  81 as a black box--or cyan box or brown box--in the above figure).
  82
  83 Residue encoding 0 interleaves VQ encoding according to the
  84 dimension of the codebook used to encode a partition in a specific
  85 pass.  The dimension of the codebook need not be the same in multiple
  86 passes, however the partition size must be an even multiple of the
  87 codebook dimension.
  88
  89 As an example, assume a partition vector of size eight, to be encoded
  90 by residue 0 using codebook sizes of 8, 4, 2 and 1:
  91
  92 \begin{programlisting}
  93
  94             original residue vector: [ 0 1 2 3 4 5 6 7 ]
  95
  96 codebook dimensions = 8  encoded as: [ 0 1 2 3 4 5 6 7 ]
  97
  98 codebook dimensions = 4  encoded as: [ 0 2 4 6 ], [ 1 3 5 7 ]
  99
 100 codebook dimensions = 2  encoded as: [ 0 4 ], [ 1 5 ], [ 2 6 ], [ 3 7 ]
 101
 102 codebook dimensions = 1  encoded as: [ 0 ], [ 1 ], [ 2 ], [ 3 ], [ 4 ], [ 5 ], [ 6 ], [ 7 ]
 103
 104 \end{programlisting}
 105
 106 It is worth mentioning at this point that no configurable value in the
 107 residue coding setup is restricted to a power of two.
 108
 109
 110
 111 \subsection{residue 1}
 112
 113 Residue 1 does not interleave VQ encoding.  It represents partition
 114 vector scalars in order.  As with residue 0, however, partition length
 115 must be an integer multiple of the codebook dimension, although
 116 dimension may vary from pass to pass.
 117
 118 As an example, assume a partition vector of size eight, to be encoded
 119 by residue 0 using codebook sizes of 8, 4, 2 and 1:
 120
 121 \begin{programlisting}
 122
 123             original residue vector: [ 0 1 2 3 4 5 6 7 ]
 124
 125 codebook dimensions = 8  encoded as: [ 0 1 2 3 4 5 6 7 ]
 126
 127 codebook dimensions = 4  encoded as: [ 0 1 2 3 ], [ 4 5 6 7 ]
 128
 129 codebook dimensions = 2  encoded as: [ 0 1 ], [ 2 3 ], [ 4 5 ], [ 6 7 ]
 130
 131 codebook dimensions = 1  encoded as: [ 0 ], [ 1 ], [ 2 ], [ 3 ], [ 4 ], [ 5 ], [ 6 ], [ 7 ]
 132
 133 \end{programlisting}
 134
 135
 136
 137 \subsection{residue 2}
 138
 139 Residue type two can be thought of as a variant of residue type 1.
 140 Rather than encoding multiple passed-in vectors as in residue type 1,
 141 the \emph{ch} passed in vectors of length \emph{n} are first
 142 interleaved and flattened into a single vector of length
 143 \emph{ch}*\emph{n}.  Encoding then proceeds as in type 1. Decoding is
 144 as in type 1 with decode interleave reversed. If operating on a single
 145 vector to begin with, residue type 1 and type 2 are equivalent.
 146
 147 \begin{center}
 148 \includegraphics[width=\textwidth]{residue2}
 149 \captionof{figure}{illustration of residue type 2}
 150 \end{center}
 151
 152
 153 \subsection{Residue decode}
 154
 155 \subsubsection{header decode}
 156
 157 Header decode for all three residue types is identical.
 158 \begin{programlisting}
 159   1) [residue_begin] = read 24 bits as unsigned integer
 160   2) [residue_end] = read 24 bits as unsigned integer
 161   3) [residue_partition_size] = read 24 bits as unsigned integer and add one
 162   4) [residue_classifications] = read 6 bits as unsigned integer and add one
 163   5) [residue_classbook] = read 8 bits as unsigned integer
 164 \end{programlisting}
 165
 166 \varname{[residue_begin]} and
 167 \varname{[residue_end]} select the specific sub-portion of
 168 each vector that is actually coded; it implements akin to a bandpass
 169 where, for coding purposes, the vector effectively begins at element
 170 \varname{[residue_begin]} and ends at
 171 \varname{[residue_end]}.  Preceding and following values in
 172 the unpacked vectors are zeroed.  Note that for residue type 2, these
 173 values as well as \varname{[residue_partition_size]}apply to
 174 the interleaved vector, not the individual vectors before interleave.
 175 \varname{[residue_partition_size]} is as explained above,
 176 \varname{[residue_classifications]} is the number of possible
 177 classification to which a partition can belong and
 178 \varname{[residue_classbook]} is the codebook number used to
 179 code classification codewords.  The number of dimensions in book
 180 \varname{[residue_classbook]} determines how many
 181 classification values are grouped into a single classification
 182 codeword.  Note that the number of entries and dimensions in book
 183 \varname{[residue_classbook]}, along with
 184 \varname{[residue_classifications]}, overdetermines to
 185 possible number of classification codewords.
 186 If \varname{[residue_classifications]}\^{}\varname{[residue_classbook]}.dimensions
 187 exceeds \varname{[residue_classbook]}.entries, the
 188 bitstream should be regarded to be undecodable.
 189
 190 Next we read a bitmap pattern that specifies which partition classes
 191 code values in which passes.
 192
 193 \begin{programlisting}
 194   1) iterate [i] over the range 0 ... [residue_classifications]-1 {
 195
 196        2) [high_bits] = 0
 197        3) [low_bits] = read 3 bits as unsigned integer
 198        4) [bitflag] = read one bit as boolean
 199        5) if ( [bitflag] is set ) then [high_bits] = read five bits as unsigned integer
 200        6) vector [residue_cascade] element [i] = [high_bits] * 8 + [low_bits]
 201      }
 202   7) done
 203 \end{programlisting}
 204
 205 Finally, we read in a list of book numbers, each corresponding to
 206 specific bit set in the cascade bitmap.  We loop over the possible
 207 codebook classifications and the maximum possible number of encoding
 208 stages (8 in Vorbis I, as constrained by the elements of the cascade
 209 bitmap being eight bits):
 210
 211 \begin{programlisting}
 212   1) iterate [i] over the range 0 ... [residue_classifications]-1 {
 213
 214        2) iterate [j] over the range 0 ... 7 {
 215
 216             3) if ( vector [residue_cascade] element [i] bit [j] is set ) {
 217
 218                  4) array [residue_books] element [i][j] = read 8 bits as unsigned integer
 219
 220                } else {
 221
 222                  5) array [residue_books] element [i][j] = unused
 223
 224                }
 225           }
 226       }
 227
 228   6) done
 229 \end{programlisting}
 230
 231 An end-of-packet condition at any point in header decode renders the
 232 stream undecodable.  In addition, any codebook number greater than the
 233 maximum numbered codebook set up in this stream also renders the
 234 stream undecodable. All codebooks in array [residue_books] are
 235 required to have a value mapping.  The presence of codebook in array
 236 [residue_books] without a value mapping (maptype equals zero) renders
 237 the stream undecodable.
 238
 239
 240
 241 \subsubsection{packet decode}
 242
 243 Format 0 and 1 packet decode is identical except for specific
 244 partition interleave.  Format 2 packet decode can be built out of the
 245 format 1 decode process.  Thus we describe first the decode
 246 infrastructure identical to all three formats.
 247
 248 In addition to configuration information, the residue decode process
 249 is passed the number of vectors in the submap bundle and a vector of
 250 flags indicating if any of the vectors are not to be decoded.  If the
 251 passed in number of vectors is 3 and vector number 1 is marked 'do not
 252 decode', decode skips vector 1 during the decode loop.  However, even
 253 'do not decode' vectors are allocated and zeroed.
 254
 255 Depending on the values of \varname{[residue_begin]} and
 256 \varname{[residue_end]}, it is obvious that the encoded
 257 portion of a residue vector may be the entire possible residue vector
 258 or some other strict subset of the actual residue vector size with
 259 zero padding at either uncoded end.  However, it is also possible to
 260 set \varname{[residue_begin]} and
 261 \varname{[residue_end]} to specify a range partially or
 262 wholly beyond the maximum vector size.  Before beginning residue
 263 decode, limit \varname{[residue_begin]} and
 264 \varname{[residue_end]} to the maximum possible vector size
 265 as follows.  We assume that the number of vectors being encoded,
 266 \varname{[ch]} is provided by the higher level decoding
 267 process.
 268
 269 \begin{programlisting}
 270   1) [actual_size] = current blocksize/2;
 271   2) if residue encoding is format 2
 272        3) [actual_size] = [actual_size] * [ch];
 273   4) [limit_residue_begin] = maximum of ([residue_begin],[actual_size]);
 274   5) [limit_residue_end] = maximum of ([residue_end],[actual_size]);
 275 \end{programlisting}
 276
 277 The following convenience values are conceptually useful to clarifying
 278 the decode process:
 279
 280 \begin{programlisting}
 281   1) [classwords_per_codeword] = [codebook_dimensions] value of codebook [residue_classbook]
 282   2) [n_to_read] = [limit_residue_end] - [limit_residue_begin]
 283   3) [partitions_to_read] = [n_to_read] / [residue_partition_size]
 284 \end{programlisting}
 285
 286 Packet decode proceeds as follows, matching the description offered earlier in the document.
 287 \begin{programlisting}
 288   1) allocate and zero all vectors that will be returned.
 289   2) if ([n_to_read] is zero), stop; there is no residue to decode.
 290   3) iterate [pass] over the range 0 ... 7 {
 291
 292        4) [partition_count] = 0
 293
 294        5) while [partition_count] is less than [partitions_to_read]
 295
 296             6) if ([pass] is zero) {
 297
 298                  7) iterate [j] over the range 0 .. [ch]-1 {
 299
 300                       8) if vector [j] is not marked 'do not decode' {
 301
 302                            9) [temp] = read from packet using codebook [residue_classbook] in scalar context
 303                           10) iterate [i] descending over the range [classwords_per_codeword]-1 ... 0 {
 304
 305                                11) array [classifications] element [j],([i]+[partition_count]) =
 306                                    [temp] integer modulo [residue_classifications]
 307                                12) [temp] = [temp] / [residue_classifications] using integer division
 308
 309                               }
 310
 311                          }
 312
 313                     }
 314
 315                }
 316
 317            13) iterate [i] over the range 0 .. ([classwords_per_codeword] - 1) while [partition_count]
 318                is also less than [partitions_to_read] {
 319
 320                  14) iterate [j] over the range 0 .. [ch]-1 {
 321
 322                       15) if vector [j] is not marked 'do not decode' {
 323
 324                            16) [vqclass] = array [classifications] element [j],[partition_count]
 325                            17) [vqbook] = array [residue_books] element [vqclass],[pass]
 326                            18) if ([vqbook] is not 'unused') {
 327
 328                                 19) decode partition into output vector number [j], starting at scalar
 329                                     offset [limit_residue_begin]+[partition_count]*[residue_partition_size] using
 330                                     codebook number [vqbook] in VQ context
 331                           }
 332                      }
 333
 334                  20) increment [partition_count] by one
 335
 336                }
 337           }
 338      }
 339
 340  21) done
 341
 342 \end{programlisting}
 343
 344 An end-of-packet condition during packet decode is to be considered a
 345 nominal occurrence.  Decode returns the result of vector decode up to
 346 that point.
 347
 348
 349
 350 \subsubsection{format 0 specifics}
 351
 352 Format zero decodes partitions exactly as described earlier in the
 353 'Residue Format: residue 0' section.  The following pseudocode
 354 presents the same algorithm. Assume:
 355
 356 \begin{itemize}
 357 \item  \varname{[n]} is the value in \varname{[residue_partition_size]}
 358 \item \varname{[v]} is the residue vector
 359 \item \varname{[offset]} is the beginning read offset in [v]
 360 \end{itemize}
 361
 362
 363 \begin{programlisting}
 364  1) [step] = [n] / [codebook_dimensions]
 365  2) iterate [i] over the range 0 ... [step]-1 {
 366
 367       3) vector [entry_temp] = read vector from packet using current codebook in VQ context
 368       4) iterate [j] over the range 0 ... [codebook_dimensions]-1 {
 369
 370            5) vector [v] element ([offset]+[i]+[j]*[step]) =
 371                 vector [v] element ([offset]+[i]+[j]*[step]) +
 372                 vector [entry_temp] element [j]
 373
 374          }
 375
 376     }
 377
 378   6) done
 379
 380 \end{programlisting}
 381
 382
 383
 384 \subsubsection{format 1 specifics}
 385
 386 Format 1 decodes partitions exactly as described earlier in the
 387 'Residue Format: residue 1' section.  The following pseudocode
 388 presents the same algorithm. Assume:
 389
 390 \begin{itemize}
 391 \item  \varname{[n]} is the value in
 392 \varname{[residue_partition_size]}
 393 \item \varname{[v]} is the residue vector
 394 \item \varname{[offset]} is the beginning read offset in [v]
 395 \end{itemize}
 396
 397
 398 \begin{programlisting}
 399  1) [i] = 0
 400  2) vector [entry_temp] = read vector from packet using current codebook in VQ context
 401  3) iterate [j] over the range 0 ... [codebook_dimensions]-1 {
 402
 403       4) vector [v] element ([offset]+[i]) =
 404           vector [v] element ([offset]+[i]) +
 405           vector [entry_temp] element [j]
 406       5) increment [i]
 407
 408     }
 409
 410   6) if ( [i] is less than [n] ) continue at step 2
 411   7) done
 412 \end{programlisting}
 413
 414
 415
 416 \subsubsection{format 2 specifics}
 417
 418 Format 2 is reducible to format 1.  It may be implemented as an additional step prior to and an additional post-decode step after a normal format 1 decode.
 419
 420
 421 Format 2 handles 'do not decode' vectors differently than residue 0 or
 422 1; if all vectors are marked 'do not decode', no decode occurrs.
 423 However, if at least one vector is to be decoded, all the vectors are
 424 decoded.  We then request normal format 1 to decode a single vector
 425 representing all output channels, rather than a vector for each
 426 channel.  After decode, deinterleave the vector into independent vectors, one for each output channel.  That is:
 427
 428 \begin{enumerate}
 429  \item If all vectors 0 through \emph{ch}-1 are marked 'do not decode', allocate and clear a single vector \varname{[v]}of length \emph{ch*n} and skip step 2 below; proceed directly to the post-decode step.
 430  \item Rather than performing format 1 decode to produce \emph{ch} vectors of length \emph{n} each, call format 1 decode to produce a single vector \varname{[v]} of length \emph{ch*n}.
 431  \item Post decode: Deinterleave the single vector \varname{[v]} returned by format 1 decode as described above into \emph{ch} independent vectors, one for each outputchannel, according to:
 432   \begin{programlisting}
 433   1) iterate [i] over the range 0 ... [n]-1 {
 434
 435        2) iterate [j] over the range 0 ... [ch]-1 {
 436
 437             3) output vector number [j] element [i] = vector [v] element ([i] * [ch] + [j])
 438
 439           }
 440      }
 441
 442   4) done
 443   \end{programlisting}
 444
 445 \end{enumerate}
 446
 447
 448
 449
 450
 451
 452