/* */
+static __inline void DequantizeIDCT8x8_0_SSE2(
+ INT16 dc,
+ const INT16* matrix,
+ INT16* coeff)
+{
+ __m64 d0 = _mm_set1_pi16(((dc * matrix[0]) + 15) >> 5);
+ __m128i d1 = _mm_unpacklo_epi64(_mm_movpi64_epi64(d0), _mm_movpi64_epi64(d0));
+
+ _mm_store_si128((__m128i*)(coeff + 0 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 1 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 2 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 3 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 4 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 5 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 6 * 8), d1);
+ _mm_store_si128((__m128i*)(coeff + 7 * 8), d1);
+}
+
+/* */
+
struct DecodeCoefficientsContext {
INT32 EOB_Run[64];
return;
}
- DecodeCoefficients_SSE2(t, ctx, block);
-
- block[0] = dc;
+ if (DecodeCoefficients_SSE2(t, ctx, block) >= 2) {
+ block[0] = dc;
+ DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
- DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
+ } else {
+ DequantizeIDCT8x8_0_SSE2(dc, mat[plane], coeff);
+ }
Block_CopyIntra8x8_SSE2(p, x, y, coeff);
}
return;
}
- DecodeCoefficients_SSE2(t, ctx, block);
+ if (DecodeCoefficients_SSE2(t, ctx, block) >= 2) {
+ block[0] = dc;
+ DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
- block[0] = dc;
-
- DequantizeIDCT8x8_SSE2(block, mat[plane], coeff);
+ } else {
+ DequantizeIDCT8x8_0_SSE2(dc, mat[plane], coeff);
+ }
Block_ReviseInter8x8_SSE2(p, x, y, coeff);
}