OSDN Git Service

optimize IDCT processing has only one DC coefficient.
authorNoumi Akira <noumiakira@users.sourceforge.jp>
Tue, 7 Jul 2009 08:13:03 +0000 (17:13 +0900)
committerNoumi Akira <noumiakira@users.sourceforge.jp>
Tue, 7 Jul 2009 08:13:03 +0000 (17:13 +0900)
Lib/QTheoraEx/FrameReconstructor_SSE2.c

index b2c0ac1..bfc08a6 100644 (file)
@@ -482,6 +482,26 @@ static __inline void DequantizeIDCT8x8_SSE2(
 
 /* */
 
+static __inline void DequantizeIDCT8x8_0_SSE2(
+       INT16        dc,
+       const INT16* matrix,
+       INT16*       coeff)
+{
+       __m64   d0 = _mm_set1_pi16(((dc * matrix[0]) + 15) >> 5);
+       __m128i d1 = _mm_unpacklo_epi64(_mm_movpi64_epi64(d0), _mm_movpi64_epi64(d0));
+
+       _mm_store_si128((__m128i*)(coeff + 0 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 1 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 2 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 3 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 4 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 5 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 6 * 8), d1);
+       _mm_store_si128((__m128i*)(coeff + 7 * 8), d1);
+}
+
+/* */
+
 struct DecodeCoefficientsContext {
 
        INT32 EOB_Run[64];
@@ -564,11 +584,13 @@ static void Reconstruct_IntraBlock(
                return;
        }
 
-       DecodeCoefficients_SSE2(t, ctx, block);
-
-       block[0] = dc;
+       if (DecodeCoefficients_SSE2(t, ctx, block) >= 2) {
+               block[0] = dc;
+               DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
 
-       DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
+       } else {
+               DequantizeIDCT8x8_0_SSE2(dc, mat[plane], coeff);
+       }
 
        Block_CopyIntra8x8_SSE2(p, x, y, coeff);
 }
@@ -595,11 +617,13 @@ static void Reconstruct_InterBlock(
                return;
        }
 
-       DecodeCoefficients_SSE2(t, ctx, block);
+       if (DecodeCoefficients_SSE2(t, ctx, block) >= 2) {
+               block[0] = dc;
+               DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
 
-       block[0] = dc;
-
-       DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
+       } else {
+               DequantizeIDCT8x8_0_SSE2(dc, mat[plane], coeff);
+       }
 
        Block_ReviseInter8x8_SSE2(p, x, y, coeff);
 }