OSDN Git Service

dnxhdenc: Optimize get_pixels_8x4_sym for 10-bit
authorTimothy Gu <timothygu99@gmail.com>
Tue, 29 Sep 2015 23:50:02 +0000 (16:50 -0700)
committerTimothy Gu <timothygu99@gmail.com>
Tue, 20 Oct 2015 19:36:29 +0000 (12:36 -0700)
This reverts commit 628e6d0164febc8e69b0f10dfa487e8a2dd1a28a and uses
a better fix.

Before:
4483 decicycles in get_pixels_8x4_sym,  131032 runs,     40 skips

After:
2569 decicycles in get_pixels_8x4_sym,  131054 runs,     18 skips

libavcodec/dnxhdenc.c

index 36154ac..cad4fcf 100644 (file)
@@ -87,22 +87,14 @@ void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block,
                                     const uint8_t *pixels,
                                     ptrdiff_t line_size)
 {
-    int i;
-    const uint16_t* pixels16 = (const uint16_t*)pixels;
-    line_size >>= 1;
-
-    for (i = 0; i < 4; i++) {
-        block[0] = pixels16[0]; block[1] = pixels16[1];
-        block[2] = pixels16[2]; block[3] = pixels16[3];
-        block[4] = pixels16[4]; block[5] = pixels16[5];
-        block[6] = pixels16[6]; block[7] = pixels16[7];
-        pixels16 += line_size;
-        block += 8;
-    }
-    memcpy(block,      block -  8, sizeof(*block) * 8);
-    memcpy(block +  8, block - 16, sizeof(*block) * 8);
-    memcpy(block + 16, block - 24, sizeof(*block) * 8);
-    memcpy(block + 24, block - 32, sizeof(*block) * 8);
+    memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
+    memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
+    memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
+    memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
+    memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
+    memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
+    memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
+    memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
 }
 
 static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,