Make LOAD4/STORE4 macros more generic.

author Victor Pollex <victor.pollex@web.de>

Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)

committer Benoit Fouet <benoit.fouet@free.fr>

Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)
author Victor Pollex <victor.pollex@web.de>
Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)
committer Benoit Fouet <benoit.fouet@free.fr>
Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)
diff --git a/libavcodec/i386/dsputil_mmx.h b/libavcodec/i386/dsputil_mmx.h

index 1428546..d944dbd 100644 (file)
--- a/libavcodec/i386/dsputil_mmx.h
+++ b/libavcodec/i386/dsputil_mmx.h
@@ -57,6 +57,18 @@ extern const uint64_t ff_pb_FC;
  extern const double ff_pd_1[2];
  extern const double ff_pd_2[2];
  
+#define LOAD4(stride,in,a,b,c,d)\
+    "movq 0*"#stride"+"#in", "#a"\n\t"\
+    "movq 1*"#stride"+"#in", "#b"\n\t"\
+    "movq 2*"#stride"+"#in", "#c"\n\t"\
+    "movq 3*"#stride"+"#in", "#d"\n\t"
+
+#define STORE4(stride,out,a,b,c,d)\
+    "movq "#a", 0*"#stride"+"#out"\n\t"\
+    "movq "#b", 1*"#stride"+"#out"\n\t"\
+    "movq "#c", 2*"#stride"+"#out"\n\t"\
+    "movq "#d", 3*"#stride"+"#out"\n\t"
+
  /* in/out: mma=mma+mmb, mmb=mmb-mma */
  #define SUMSUB_BA( a, b ) \
      "paddw "#b", "#a" \n\t"\
diff --git a/libavcodec/i386/dsputilenc_mmx.c b/libavcodec/i386/dsputilenc_mmx.c

index e7893de..be423f8 100644 (file)
--- a/libavcodec/i386/dsputilenc_mmx.c
+++ b/libavcodec/i386/dsputilenc_mmx.c
@@ -998,18 +998,6 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
      "paddusw %%xmm1, %%xmm0           \n\t"
  #endif
  
-#define LOAD4(o, a, b, c, d)\
-    "movq "#o"(%1),    "#a"           \n\t"\
-    "movq "#o"+8(%1),  "#b"           \n\t"\
-    "movq "#o"+16(%1), "#c"           \n\t"\
-    "movq "#o"+24(%1), "#d"           \n\t"\
-
-#define STORE4(o, a, b, c, d)\
-    "movq "#a", "#o"(%1)              \n\t"\
-    "movq "#b", "#o"+8(%1)            \n\t"\
-    "movq "#c", "#o"+16(%1)           \n\t"\
-    "movq "#d", "#o"+24(%1)           \n\t"\
-
  /* FIXME: HSUM_* saturates at 64k, while an 8x8 hadamard or dct block can get up to
   * about 100k on extreme inputs. But that's very unlikely to occur in natural video,
   * and it's even more unlikely to not have any alternative mvs/modes with lower cost. */
@@ -1053,11 +1041,11 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
          "movq %%mm7, 96(%1)             \n\t"\
  \
          TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)\
-        STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)\
+        STORE4(8,  0(%1), %%mm0, %%mm3, %%mm7, %%mm2)\
  \
          "movq 96(%1), %%mm7             \n\t"\
          TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)\
-        STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)\
+        STORE4(8, 64(%1), %%mm4, %%mm7, %%mm0, %%mm6)\
  \
          : "=r" (sum)\
          : "r"(temp)\
@@ -1071,7 +1059,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
          "movq %%mm7, 96(%1)             \n\t"\
  \
          TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)\
-        STORE4(32, %%mm0, %%mm3, %%mm7, %%mm2)\
+        STORE4(8, 32(%1), %%mm0, %%mm3, %%mm7, %%mm2)\
  \
          "movq 96(%1), %%mm7             \n\t"\
          TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)\
@@ -1079,7 +1067,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
          "movq %%mm6, %%mm7              \n\t"\
          "movq %%mm0, %%mm6              \n\t"\
  \
-        LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)\
+        LOAD4(8, 64(%1), %%mm0, %%mm1, %%mm2, %%mm3)\
  \
          HADAMARD48\
          "movq %%mm7, 64(%1)             \n\t"\
@@ -1095,8 +1083,8 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid
          "paddusw %%mm1, %%mm0           \n\t"\
          "movq %%mm0, 64(%1)             \n\t"\
  \
-        LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)\
-        LOAD4(32, %%mm4, %%mm5, %%mm6, %%mm7)\
+        LOAD4(8,  0(%1), %%mm0, %%mm1, %%mm2, %%mm3)\
+        LOAD4(8, 32(%1), %%mm4, %%mm5, %%mm6, %%mm7)\
  \
          HADAMARD48\
          "movq %%mm7, (%1)               \n\t"\
author	Victor Pollex <victor.pollex@web.de>
	Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)
committer	Benoit Fouet <benoit.fouet@free.fr>
	Tue, 8 Jul 2008 09:24:11 +0000 (09:24 +0000)
libavcodec/i386/dsputil_mmx.h		patch \| blob \| history
libavcodec/i386/dsputilenc_mmx.c		patch \| blob \| history