x86/sbrdsp: Use different mem moves

author Christophe Gisquet <christophe.gisquet@gmail.com>

Sun, 25 Jan 2015 21:13:10 +0000 (18:13 -0300)

committer James Almer <jamrial@gmail.com>

Sun, 25 Jan 2015 21:20:43 +0000 (18:20 -0300)
author Christophe Gisquet <christophe.gisquet@gmail.com>
Sun, 25 Jan 2015 21:13:10 +0000 (18:13 -0300)
committer James Almer <jamrial@gmail.com>
Sun, 25 Jan 2015 21:20:43 +0000 (18:20 -0300)
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm

index a8ec7ed..083461a 100644 (file)
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -454,13 +454,15 @@ cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
      neg   cntq
  
  %if cpuflag(sse3)
+%define   MOVH  movsd
      movddup m5, [xq+cntq]
  %else
+%define   MOVH  movlps
      movlps  m5, [xq+cntq]
      movlhps m5, m5
  %endif
-    movlps  m7, [xq+cntq+8 ]
-    movlps  m1, [xq+cntq+16]
+    MOVH    m7, [xq+cntq+8 ]
+    MOVH    m1, [xq+cntq+16]
      shufps  m7, m7, q0110
      shufps  m1, m1, q0110
      mulps   m3, m5, m7   ;              x[0][0] * x[1][0], x[0][1] * x[1][1], x[0][0] * x[1][1], x[0][1] * x[1][0]
@@ -470,7 +472,7 @@ cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
      movaps  [rsp+16], m4
      add   cntq, 8
  
-    movlps  m2, [xq+cntq+16]
+    MOVH    m2, [xq+cntq+16]
      movlhps m7, m7
      shufps  m2, m2, q0110
      mulps   m6, m7, m1   ; real_sum1  = x[1][0] * x[2][0], x[1][1] * x[2][1]; imag_sum1 += x[1][0] * x[2][1], x[1][1] * x[2][0]
@@ -481,7 +483,7 @@ cglobal sbr_autocorrelate, 2,3,8,32, x, phi, cnt
  align 16
  .loop:
      add   cntq, 8
-    movlps  m0, [xq+cntq+16]
+    MOVH    m0, [xq+cntq+16]
      movlhps m1, m1
      shufps  m0, m0, q0110
      mulps   m3, m1, m2
@@ -491,7 +493,7 @@ align 16
      addps   m5, m4       ; real_sum2 += x[i][0] * x[i + 2][0], x[i][1] * x[i + 2][1]; imag_sum2 += x[i][0] * x[i + 2][1], x[i][1] * x[i + 2][0];
      addps   m7, m1       ; real_sum0 += x[i][0] * x[i][0],     x[i][1] * x[i][1];
      add   cntq, 8
-    movlps  m1, [xq+cntq+16]
+    MOVH    m1, [xq+cntq+16]
      movlhps m2, m2
      shufps  m1, m1, q0110
      mulps   m3, m2, m0
@@ -501,7 +503,7 @@ align 16
      addps   m5, m4       ; real_sum2 += x[i][0] * x[i + 2][0], x[i][1] * x[i + 2][1]; imag_sum2 += x[i][0] * x[i + 2][1], x[i][1] * x[i + 2][0];
      addps   m7, m2       ; real_sum0 += x[i][0] * x[i][0],     x[i][1] * x[i][1];
      add   cntq, 8
-    movlps  m2, [xq+cntq+16]
+    MOVH    m2, [xq+cntq+16]
      movlhps m0, m0
      shufps  m2, m2, q0110
      mulps   m3, m0, m1
author	Christophe Gisquet <christophe.gisquet@gmail.com>
	Sun, 25 Jan 2015 21:13:10 +0000 (18:13 -0300)
committer	James Almer <jamrial@gmail.com>
	Sun, 25 Jan 2015 21:20:43 +0000 (18:20 -0300)