}
/* general case, bilinear */
- rnd_reg = rnd ? ff_pw_32 : &ff_pw_28;
+ rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28;
asm volatile("movd %2, %%mm4\n\t"
"movd %3, %%mm6\n\t"
"punpcklwd %%mm4, %%mm4\n\t"
"sub $2, %2 \n\t"
"jnz 1b \n\t"
: "+r"(dst), "+r"(src), "+r"(h)
- : "r"((long)stride), "m"(*ff_pw_32), "m"(x), "m"(y)
+ : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y)
);
}
"sub $1, %2\n\t"
"jnz 1b\n\t"
: "+r" (dst), "+r"(src), "+r"(h)
- : "m" (*ff_pw_32), "r"((long)stride)
+ : "m" (ff_pw_32), "r"((long)stride)
: "%esi");
}
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_16 ) = 0x0010001000100010ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
-DECLARE_ALIGNED_16(const uint64_t, ff_pw_32[2]) = {0x0020002000200020ULL, 0x0020002000200020ULL};
+DECLARE_ALIGNED_16(const xmm_t, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL;
DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
#include <stdint.h>
+typedef struct { uint64_t a, b; } xmm_t;
+
extern const uint64_t ff_bone;
extern const uint64_t ff_wtwo;
extern const uint64_t ff_pw_15;
extern const uint64_t ff_pw_16;
extern const uint64_t ff_pw_20;
-extern const uint64_t ff_pw_32[2];
+extern const xmm_t ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_64;
extern const uint64_t ff_pw_96;
IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
"pxor %%mm7, %%mm7 \n\t"
- :: "m"(*ff_pw_32));
+ :: "m"(ff_pw_32));
asm volatile(
STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
:"+r"(dst)
- :"r"(block), "r"((long)stride), "r"(3L*stride), "m"(*ff_pw_32)
+ :"r"(block), "r"((long)stride), "r"(3L*stride), "m"(ff_pw_32)
);
}
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
- : "S"((long)dstStride), "m"(*ff_pw_32)\
+ : "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
);\
}\
"decl %2 \n\t"\
" jnz 1b \n\t"\
: "+a"(tmp), "+c"(dst), "+m"(h)\
- : "S"((long)dstStride), "m"(*ff_pw_32)\
+ : "S"((long)dstStride), "m"(ff_pw_32)\
: "memory"\
);\
tmp += 8 - size*24;\