6 #include "FrameDecoder.h"
8 #include "FrameReconstructor.h"
12 #pragma warning(disable : 4799)
16 static BOOL Dequantize_MakeMatrix(
17 DequantizeMatrix_t* m,
18 const DequantizeTable_t* t,
23 for (i = 0; i < 2; i++) {
24 for (p = 0; p < 3; p++) {
34 for (q = 0; q < t->Count[i][p]; q++) {
35 s += t->Size[i][p][q];
41 ss = t->Size[i][p][q];
46 mi = t->Matrix[t->Base[i][p][q ]];
47 mj = t->Matrix[t->Base[i][p][q + 1]];
49 mat = m->Matrix[i][p];
51 for (j = 0; j < 64; j++) {
52 INT32 coeff = (2 * si * mi[j] - 2 * sj * mj[j] + ss) / (2 * ss);
54 INT32 qmin = 8 << ((j == 0) ? i + 1 : i);
55 INT32 qscl = (j == 0) ? t->DCScale[index] : t->ACScale[index];
57 INT32 v = ((qscl * coeff) / 100) * 4;
59 mat[j] = (INT16)((v < qmin) ? qmin : ((v > 4096) ? 4096 : v));
67 static void Filter_Setup(
69 const FilterTable_t* l,
73 INT32 lim = l->Limit[q];
77 memset(t->Delta, 0, sizeof(t->Delta));
81 for (x = 0; x < lim; x++) {
99 static void RunLength_Start(
103 t->Bit = !x; /* invert later */
107 static const UINT8 HLONG[64] = {
108 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
109 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
110 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
111 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
112 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
113 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
114 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
115 0x34,0x34,0x34,0x34,0x45,0x45,0x56,0x66
118 static const INT8 HLONG_BASE[7] = { 1, 2, 4, 6, 10, 18, 34 };
119 static const INT8 HLONG_BITS[7] = { 0, 1, 1, 2, 3, 4, 12 };
121 static const UINT8 HSHORT[32] = {
132 static const INT8 HSHORT_BASE[6] = { 1, 3, 5, 7, 11, 15 };
133 static const INT8 HSHORT_BITS[6] = { 1, 1, 1, 2, 2, 4 };
135 #define RL_LONG_DECODE \
136 if (r.Run == 0) { INT32 token; LOAD_BITS token = HLONG[GET_BITS_I(6)]; RETIRE_BITS(token & 0xf); r.Run = HLONG_BASE[token >> 4]; \
137 if (HLONG_BITS[token >> 4] > 0) { INT32 x; FETCH_BITS(x, HLONG_BITS[token >> 4]) r.Run += x; } r.Bit = !(r.Bit); } \
140 #define RL_SHORT_DECODE \
141 if (r.Run == 0) { INT32 token, x; LOAD_BITS token = HSHORT[GET_BITS_I(5)]; RETIRE_BITS(token & 0xf); r.Run = HSHORT_BASE[token >> 4]; \
142 FETCH_BITS(x, HSHORT_BITS[token >> 4]) r.Run += x; r.Bit = !(r.Bit); } \
147 static const UINT8 M_MODE[8][8] = {
148 { 0, 0, 0, 0, 0, 0, 0, 0 },
149 { 3, 4, 2, 0, 1, 5, 6, 7 },
150 { 3, 4, 0, 2, 1, 5, 6, 7 },
151 { 3, 2, 4, 0, 1, 5, 6, 7 },
152 { 3, 2, 0, 4, 1, 5, 6, 7 },
153 { 0, 3, 4, 2, 1, 5, 6, 7 },
154 { 0, 5, 3, 4, 2, 1, 6, 7 },
155 { 0, 1, 2, 3, 4, 5, 6, 7 }
158 static const UINT8 H_MODE[128] = {
159 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
160 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
161 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
162 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
163 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
164 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
165 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
166 0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x34,0x45,0x45,0x45,0x45,0x56,0x56,0x67,0x77
171 static const INT8 MV_VAL[0x100] = {
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
179 -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
180 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
182 4, 4, 4, 4, -4, -4, -4, -4,
183 5, 5, 5, 5, -5, -5, -5, -5,
184 6, 6, 6, 6, -6, -6, -6, -6,
185 7, 7, 7, 7, -7, -7, -7, -7,
212 static const INT8 MV_LEN[0x100] = {
213 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
214 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
215 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
216 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
217 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
218 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
219 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
220 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
221 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
222 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
223 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
224 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
225 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
226 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
227 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
228 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
233 static INT32 DecodeMV0(MotionVector_t* mv, INT32 bits)
238 b0 = (bits >> 8) & 0xff;
242 b1 = (bits >> (8 - s)) & 0xff;
249 static INT32 DecodeMV1(MotionVector_t* mv, INT32 bits)
251 INT32 b0 = (bits >> 10) & 0x3f;
252 INT32 b1 = (bits >> 4) & 0x3f;
269 static const INT32 CMV[2] = { 2, 1 };
273 struct DCTCoefficientsContext {
282 }; /* DCTCoefficientsContext */
284 typedef struct DCTCoefficientsContext DCTCoefficientsContext_t;
288 static const INT8 EOB_BITS_LEN[7] = { 0, 0, 0, 2, 3, 4, 12 };
289 static const INT8 EOB_RUN_BASE[7] = { 1, 2, 3, 4, 8, 16, 0 };
291 static const INT8 COEFF_SIGN[2] = { 1, -1 };
293 static const INT8 COEFF_BITS_LEN[32 - 7] = {
301 static const INT8 COEFF_BASE[32 - 7] = {
304 7, 9, 13, 21, 37, 69,
309 static const INT8 RUN_BITS_LEN[32 - 7] = {
311 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316 static const INT8 RUN_BASE[32 - 7] = {
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325 static const INT8 COEFFS[5] = { 1, 5, 9, 13, 36 };
329 static void DecodeDCCoefficients(FrameDecoder_t* t)
335 const UINT8* c = t->Count;
339 for (i = 0; i < 3; i++) {
340 const INT8* br = t->BRun [i][0];
341 const INT16* bc = t->BCoeff[i][0];
343 const UINT8* end = c + t->Index->BC[i];
345 const UINT16* bi = t->Index->BIndex[i];
350 for (; c < end && *c == 0xff; c++) {
351 dc[*(bi++)] = NOT_CODED;
379 dc += t->Index->BC[i];
385 static const INT32 DCP_W[16][4] = {
389 { 128, 0, 0, 0 }, /* 1 */
390 { 0, 128, 0, 0 }, /* 2 */
391 { 128, 0, 0, 0 }, /* 3 */
392 { 0, 0, 128, 0 }, /* 4 */
393 { 64, 0, 64, 0 }, /* 5 */
394 { 0, 0, 128, 0 }, /* 6 */
395 { 116, -104, 116, 0 }, /* 7 */
396 { 0, 0, 0, 128 }, /* 8 */
397 { 75, 0, 0, 53 }, /* 9 */
398 { 0, 64, 0, 64 }, /* 10 */
399 { 75, 0, 0, 53 }, /* 11 */
400 { 0, 0, 128, 0 }, /* 12 */
401 { 75, 0, 0, 53 }, /* 13 */
402 { 0, 24, 80, 24 }, /* 14 */
403 { 116, -104, 116, 0 } /* 15 */
407 static const INT32 DCP_T[8] = {
414 static void UndoDCPrediction(
422 const UINT8* mode = t->BMode;
424 for (i = 0; i < 3; i++) {
425 INT32 bx = t->Index->BX[i];
426 INT32 by = t->Index->BY[i];
428 INT16 last[3] = { 0 };
434 for (y = 0; y < by; y++) {
435 for (x = 0; x < bx; x++, idx++) {
437 if (dc != NOT_CODED) {
441 INT32 type = DCP_T[mode[idx]];
446 if (v[0] != NOT_CODED && DCP_T[mode[i0]] == type) {
453 INT32 i1 = idx - bx - 1;
455 if (v[1] != NOT_CODED && DCP_T[mode[i1]] == type) {
463 if (v[2] != NOT_CODED && DCP_T[mode[i2]] == type) {
469 INT32 i3 = idx - bx + 1;
471 if (v[3] != NOT_CODED && DCP_T[mode[i3]] == type) {
479 ( v[0] * DCP_W[t0][0]
480 + v[1] * DCP_W[t0][1]
481 + v[2] * DCP_W[t0][2]
482 + v[3] * DCP_W[t0][3] ) / 128;
484 if ((t0 & 0x7) == 7) {
485 INT32 d = pred - v[2]; /* D */
486 if (d < -128 || d > 128) {
488 } else if (d = pred - v[0], d < -128 || d > 128) { /* L */
490 } else if (d = pred - v[1], d < -128 || d > 128) { /* DL */
508 DC += t->Index->BC[i];
509 mode += t->Index->BC[i];
517 #include "QTheoraArch.h"
519 #define FrameHeader_Decode FrameHeader_Decode_C
521 #define FrameDecoder_DecodeCodedBlockFlag FrameDecoder_DecodeCodedBlockFlag_C
522 #define FrameDecoder_DecodeMacroBlockCodingModes FrameDecoder_DecodeMacroBlockCodingModes_C
523 #define FrameDecoder_DecodeMotionVectors FrameDecoder_DecodeMotionVectors_C
525 #define FrameDecoder_DecodeBlocks FrameDecoder_DecodeBlocks_C
526 #define FrameDecoder_DecodeDCTCoefficients FrameDecoder_DecodeDCTCoefficients_C
528 #define FrameDecoder_Decode FrameDecoder_Decode_C
530 #include "FrameDecoder_Impl.h"
538 #include "QTheoraArch.h"
540 #define FrameHeader_Decode FrameHeader_Decode_X86
542 #define FrameDecoder_DecodeCodedBlockFlag FrameDecoder_DecodeCodedBlockFlag_X86
543 #define FrameDecoder_DecodeMacroBlockCodingModes FrameDecoder_DecodeMacroBlockCodingModes_X86
544 #define FrameDecoder_DecodeMotionVectors FrameDecoder_DecodeMotionVectors_X86
546 #define FrameDecoder_DecodeBlocks FrameDecoder_DecodeBlocks_X86
547 #define FrameDecoder_DecodeDCTCoefficients FrameDecoder_DecodeDCTCoefficients_X86
549 #define FrameDecoder_Decode FrameDecoder_Decode_X86
551 #include "FrameDecoder_Impl.h"
559 #include "QTheoraArch.h"
561 #define FrameHeader_Decode FrameHeader_Decode_MMX
563 #define FrameDecoder_DecodeCodedBlockFlag FrameDecoder_DecodeCodedBlockFlag_MMX
564 #define FrameDecoder_DecodeMacroBlockCodingModes FrameDecoder_DecodeMacroBlockCodingModes_MMX
565 #define FrameDecoder_DecodeMotionVectors FrameDecoder_DecodeMotionVectors_MMX
567 #define FrameDecoder_DecodeBlocks FrameDecoder_DecodeBlocks_MMX
568 #define FrameDecoder_DecodeDCTCoefficients FrameDecoder_DecodeDCTCoefficients_MMX
570 #define FrameDecoder_Decode FrameDecoder_Decode_MMX
572 #include "FrameDecoder_Impl.h"
578 BOOL QT_FrameDecoder_Setup(
580 const BlockIndex_t* index,
581 const SetupHeader_t* setup,
584 extern BOOL g_QT_Enable_X86;
585 extern BOOL g_QT_Enable_MMX;
586 extern BOOL g_QT_Enable_SSE2;
592 memset(t, 0, sizeof(FrameDecoder_t));
599 for (i = 0; i < 3; i++) {
600 Plane_t* r = t->Plane + i * 3;
602 INT32 cb = index->MX * 16 * index->MY * 16;
604 UINT8* p = (UINT8*)QT_MemoryPool_Allocate(pool, sizeof(UINT8) * cb);
610 r->Pitch = index->MX * 16;
611 r->CX = index->MX * 16;
612 r->CY = index->MY * 16;
614 for (j = 1; j < 3; j++) {
615 p = (UINT8*)QT_MemoryPool_Allocate(pool, sizeof(UINT8) * cb / 4);
621 r[j].Pitch = index->MX * 8;
622 r[j].CX = index->MX * 8;
623 r[j].CY = index->MY * 8;
627 for (i = 0; i < 3; i++) {
628 t->Frame[i] = t->Plane + i * 3;
633 if (g_QT_Enable_SSE2) {
634 t->Reconstructor = (FrameReconstructor_SSE2_t*)QT_MemoryPool_Allocate(pool, sizeof(FrameReconstructor_SSE2_t));
635 if (t->Reconstructor == NULL) {
646 t->SBCoded = (INT8*)QT_MemoryPool_Allocate(pool, sizeof(INT8) * index->SBlocks);
647 if (t->SBCoded == NULL) {
651 t->Count = (UINT8*)QT_MemoryPool_Allocate(pool, sizeof(UINT8) * index->Blocks);
652 if (t->Count == NULL) {
656 t->MBMode = (UINT8*)QT_MemoryPool_Allocate(pool, sizeof(UINT8) * index->MC);
657 if (t->MBMode == NULL) {
661 t->BMode = (UINT8*)QT_MemoryPool_Allocate(pool, sizeof(UINT8) * index->Blocks);
662 if (t->BMode == NULL) {
666 t->MV = (MotionVector_t*)QT_MemoryPool_Allocate(pool, sizeof(MotionVector_t) * index->BC[0]);
671 t->MVC = (MotionVector_t*)QT_MemoryPool_Allocate(pool, sizeof(MotionVector_t) * index->BC[1]);
672 if (t->MVC == NULL) {
676 t->DCTRun = (INT8*)QT_MemoryPool_Allocate(pool, sizeof(INT8) * index->Blocks * 64);
677 if (t->DCTRun == NULL) {
681 t->DCTCoeff = (INT16*)QT_MemoryPool_Allocate(pool, sizeof(INT16) * index->Blocks * 64);
682 if (t->DCTCoeff == NULL) {
686 t->DC = (INT16*)QT_MemoryPool_Allocate(pool, sizeof(INT16) * index->Blocks);
693 if (g_QT_Enable_SSE2 || g_QT_Enable_MMX) {
694 t->Decode = FrameDecoder_Decode_MMX;
695 } else if (g_QT_Enable_X86) {
696 t->Decode = FrameDecoder_Decode_X86;
698 t->Decode = FrameDecoder_Decode_C;
703 if (g_QT_Enable_SSE2) {
704 t->UpdateDequantizeMatrix = QT_UpdateDequantizeMatrix_SSE2;
706 t->UpdateDequantizeMatrix = NULL;
709 if (g_QT_Enable_SSE2) {
710 t->Reconstruct = QT_ReconstructFrame_SSE2;
712 t->Reconstruct = QT_ReconstructFrame;
722 BOOL QT_FrameDecoder_DecodeFrame(
727 extern BOOL g_QT_Available_MMX;
729 BOOL b = t->Decode(t, p, size);
731 if (g_QT_Available_MMX) {