contrib/patch-x264-linux.patch

   1 Index: /common/ppc/quant.c
   2 ===================================================================
   3 --- /common/ppc/quant.c (revision 601)
   4 +++ /common/ppc/quant.c (revision 621)
   5 @@ -18,8 +18,4 @@
   6  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
   7  *****************************************************************************/
   8 -
   9 -#ifdef HAVE_ALTIVEC_H
  10 -#include <altivec.h>
  11 -#endif
  12
  13  #include "common/common.h"
  14 @@ -54,29 +50,29 @@
  15  temp2v = vec_xor(temp2v, mskB);                                              \
  16  temp1v = vec_adds(temp1v, vec_and(mskA, one));                                \
  17 -vec_st(temp1v, (dct0), dct);                                                 \
  18 +vec_st(temp1v, (dct0), (int16_t*)dct);                                        \
  19  temp2v = vec_adds(temp2v, vec_and(mskB, one));                                \
  20 -vec_st(temp2v, (dct1), dct);
  21 +vec_st(temp2v, (dct1), (int16_t*)dct);
  22
  23  void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f ) {
  24      vector bool short mskA;
  25 -    vec_s32_t i_qbitsv;
  26 +    vec_u32_t i_qbitsv;
  27      vec_u16_t coefvA;
  28      vec_u32_t multEvenvA, multOddvA;
  29 -    vec_u32_t mfvA;
  30 +    vec_u16_t mfvA;
  31      vec_s16_t zerov, one;
  32 -    vec_s32_t fV;
  33 +    vec_u32_t fV;
  34
  35      vector bool short mskB;
  36      vec_u16_t coefvB;
  37      vec_u32_t multEvenvB, multOddvB;
  38 -    vec_u32_t mfvB;
  39 +    vec_u16_t mfvB;
  40
  41      vec_s16_t temp1v, temp2v;
  42
  43 -    vect_sint_u qbits_u;
  44 +    vect_int_u qbits_u;
  45      qbits_u.s[0]=i_qbits;
  46      i_qbitsv = vec_splat(qbits_u.v, 0);
  47
  48 -    vect_sint_u f_u;
  49 +    vect_int_u f_u;
  50      f_u.s[0]=f;
  51
  52 @@ -114,16 +110,16 @@
  53  temp2v = vec_xor(temp2v, mskB);                                 \
  54  temp1v = vec_add(temp1v, vec_and(mskA, one));                   \
  55 -vec_st(temp1v, (dct0), dct);                                    \
  56 +vec_st(temp1v, (dct0), (int16_t*)dct);                          \
  57  temp2v = vec_add(temp2v, vec_and(mskB, one));                   \
  58 -vec_st(temp2v, (dct1), dct);
  59 +vec_st(temp2v, (dct1), (int16_t*)dct);
  60
  61
  62  void x264_quant_4x4_dc_altivec( int16_t dct[4][4], int i_quant_mf, int const i_qbits, int const f ) {
  63      vector bool short mskA;
  64 -    vec_s32_t i_qbitsv;
  65 +    vec_u32_t i_qbitsv;
  66      vec_u16_t coefvA;
  67      vec_u32_t multEvenvA, multOddvA;
  68      vec_s16_t zerov, one;
  69 -    vec_s32_t fV;
  70 +    vec_u32_t fV;
  71
  72      vector bool short mskB;
  73 @@ -133,15 +129,14 @@
  74      vec_s16_t temp1v, temp2v;
  75
  76 -    vec_u32_t mfv;
  77 -    vect_int_u mf_u;
  78 +    vec_u16_t mfv;
  79 +    vect_ushort_u mf_u;
  80      mf_u.s[0]=i_quant_mf;
  81      mfv = vec_splat( mf_u.v, 0 );
  82 -    mfv = vec_packs( mfv, mfv);
  83
  84 -    vect_sint_u qbits_u;
  85 +    vect_int_u qbits_u;
  86      qbits_u.s[0]=i_qbits;
  87      i_qbitsv = vec_splat(qbits_u.v, 0);
  88
  89 -    vect_sint_u f_u;
  90 +    vect_int_u f_u;
  91      f_u.s[0]=f;
  92      fV = vec_splat(f_u.v, 0);
  93 @@ -156,13 +151,15 @@
  94  void x264_quant_8x8_altivec( int16_t dct[8][8], int quant_mf[8][8], int const i_qbits, int const f ) {
  95      vector bool short mskA;
  96 -    vec_s32_t i_qbitsv;
  97 +    vec_u32_t i_qbitsv;
  98      vec_u16_t coefvA;
  99 -    vec_s32_t multEvenvA, multOddvA, mfvA;
 100 +    vec_u32_t multEvenvA, multOddvA;
 101 +    vec_u16_t mfvA;
 102      vec_s16_t zerov, one;
 103 -    vec_s32_t fV;
 104 +    vec_u32_t fV;
 105
 106      vector bool short mskB;
 107      vec_u16_t coefvB;
 108 -    vec_u32_t multEvenvB, multOddvB, mfvB;
 109 +    vec_u32_t multEvenvB, multOddvB;
 110 +    vec_u16_t mfvB;
 111
 112      vec_s16_t temp1v, temp2v;
 113 @@ -172,5 +169,5 @@
 114      i_qbitsv = vec_splat(qbits_u.v, 0);
 115
 116 -    vect_sint_u f_u;
 117 +    vect_int_u f_u;
 118      f_u.s[0]=f;
 119      fV = vec_splat(f_u.v, 0);
 120 Index: /common/ppc/dct.c
 121 ===================================================================
 122 --- /common/ppc/dct.c (revision 604)
 123 +++ /common/ppc/dct.c (revision 621)
 124 @@ -61,6 +61,6 @@
 125      VEC_DCT( dct0v, dct1v, dct2v, dct3v, tmp0v, tmp1v, tmp2v, tmp3v );
 126
 127 -    vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
 128 -    vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
 129 +    vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0,  (int16_t*)dct);
 130 +    vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, (int16_t*)dct);
 131  }
 132
 133 @@ -95,12 +95,12 @@
 134      VEC_DCT( dct4v, dct5v, dct6v, dct7v, tmp4v, tmp5v, tmp6v, tmp7v );
 135
 136 -    vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0, dct);
 137 -    vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16, dct);
 138 -    vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32, dct);
 139 -    vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48, dct);
 140 -    vec_st(vec_perm(tmp0v, tmp1v, permLowv),  64, dct);
 141 -    vec_st(vec_perm(tmp2v, tmp3v, permLowv), 80, dct);
 142 -    vec_st(vec_perm(tmp4v, tmp5v, permLowv), 96, dct);
 143 -    vec_st(vec_perm(tmp6v, tmp7v, permLowv), 112, dct);
 144 +    vec_st(vec_perm(tmp0v, tmp1v, permHighv), 0,   (int16_t*)dct);
 145 +    vec_st(vec_perm(tmp2v, tmp3v, permHighv), 16,  (int16_t*)dct);
 146 +    vec_st(vec_perm(tmp4v, tmp5v, permHighv), 32,  (int16_t*)dct);
 147 +    vec_st(vec_perm(tmp6v, tmp7v, permHighv), 48,  (int16_t*)dct);
 148 +    vec_st(vec_perm(tmp0v, tmp1v, permLowv),  64,  (int16_t*)dct);
 149 +    vec_st(vec_perm(tmp2v, tmp3v, permLowv),  80,  (int16_t*)dct);
 150 +    vec_st(vec_perm(tmp4v, tmp5v, permLowv),  96,  (int16_t*)dct);
 151 +    vec_st(vec_perm(tmp6v, tmp7v, permLowv),  112, (int16_t*)dct);
 152  }
 153
 154 @@ -312,6 +312,6 @@
 155  void x264_add8x8_idct8_altivec( uint8_t *dst, int16_t dct[8][8] )
 156  {
 157 -    vec_s16_t onev = vec_splat_s16(1);
 158 -    vec_s16_t twov = vec_splat_s16(2);
 159 +    vec_u16_t onev = vec_splat_s16(1);
 160 +    vec_u16_t twov = vec_splat_s16(2);
 161
 162      dct[0][0] += 32; // rounding for the >>6 at the end
 163 @@ -342,5 +342,5 @@
 164      vec_u8_t perm_ldv = vec_lvsl(0, dst);
 165      vec_u8_t perm_stv = vec_lvsr(8, dst);
 166 -    vec_s16_t sixv = vec_splat_s16(6);
 167 +    vec_u16_t sixv = vec_splat_s16(6);
 168      const vec_u8_t sel = (vec_u8_t) CV(0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1);
 169      LOAD_ZERO;
 170 Index: /common/ppc/quant.h
 171 ===================================================================
 172 --- /common/ppc/quant.h (revision 601)
 173 +++ /common/ppc/quant.h (revision 621)
 174 @@ -19,4 +19,8 @@
 175  *****************************************************************************/
 176
 177 +#ifdef SYS_LINUX
 178 +#include <altivec.h>
 179 +#endif
 180 +
 181  #ifndef _PPC_QUANT_H
 182  #define _PPC_QUANT_H 1
 183 @@ -28,8 +32,7 @@
 184
 185  typedef union {
 186 -  signed int s[4];
 187 -  vector signed int v;
 188 -} vect_sint_u;
 189 -
 190 +  unsigned short s[8];
 191 +  vector unsigned short v;
 192 +} vect_ushort_u;
 193
 194  void x264_quant_4x4_altivec( int16_t dct[4][4], int quant_mf[4][4], int const i_qbits, int const f );