add ARM code for implementing av_clip_intp2 using the ssat instruction
on Cortex-A8, av_clip_intp2_arm() is faster than av_clip_intp2_c() and
the generic av_clip(), about -19%
Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
return x;
}
+#define av_clip_intp2 av_clip_intp2_arm
+static av_always_inline av_const int av_clip_intp2_arm(int a, int p)
+{
+ unsigned x;
+ __asm__ ("ssat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p+1));
+ return x;
+}
+
#define av_clip_uintp2 av_clip_uintp2_arm
static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p)
{