libhb/eedi2.c

   1 /* $Id: eedi2.c,v 1.0 2009/03/06 5:00:00 jbrjake Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License.
   6
   7    The EEDI2 interpolator was created by tritical:
   8    http://web.missouri.edu/~kes25c/
   9 */
  10
  11 #include "hb.h"
  12 #include "eedi2.h"
  13
  14 /**
  15  * EEDI2 directional limit lookup table
  16  *
  17  * These values are used to limit the range of edge direction searches and filtering.
  18  */
  19 const int eedi2_limlut[33] __attribute__ ((aligned (16))) = {
  20                          6, 6, 7, 7, 8, 8, 9, 9, 9, 10,
  21                          10, 11, 11, 12, 12, 12, 12, 12, 12, 12,
  22                          12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
  23                          12, -1, -1 };
  24
  25 /**
  26  * Analog of _aligned_malloc
  27  * @param size Size of memory being pointed to
  28  * @param align_size Size of memory chunks to align to (must be power of 2)
  29  */
  30 void *eedi2_aligned_malloc( size_t size, size_t align_size )
  31 {
  32   char * ptr, * ptr2, * aligned_ptr;
  33   int align_mask = align_size - 1;
  34
  35   ptr = (char *)malloc( size + align_size + sizeof( int ) );
  36   if( ptr==NULL ) return( NULL );
  37
  38   ptr2 = ptr + sizeof( int );
  39   aligned_ptr = ptr2 + ( align_size - ( (size_t)ptr2 & align_mask ) );
  40
  41
  42   ptr2 = aligned_ptr - sizeof( int );
  43   *( (int *)ptr2 ) = (int)( aligned_ptr - ptr );
  44
  45   return( aligned_ptr );
  46 }
  47
  48 /**
  49  * Analog of _aligned_free
  50  * @param ptr The aligned pointer, created with eedi2_aligned_malloc, to be freed
  51  */
  52 void eedi2_aligned_free( void *ptr )
  53 {
  54   int * ptr2 = (int *)ptr - 1;
  55   ptr -= * ptr2;
  56   free(ptr);
  57 }
  58
  59 /**
  60  * Sorts metrics for median filtering
  61  * @param order Pointer to the table of values to sort
  62  * @param length Length of the order array
  63  */
  64 void eedi2_sort_metrics( int *order, const int length )
  65 {
  66     int i;
  67     for( i = 1; i < length; ++i )
  68     {
  69         int j = i;
  70         const int temp = order[j];
  71         while( j > 0 && order[j-1] > temp )
  72         {
  73             order[j] = order[j-1];
  74             --j;
  75         }
  76         order[j] = temp;
  77     }
  78 }
  79
  80 /**
  81  * Bitblits an image plane (overwrites one bitmap with another)
  82  * @param dtsp Pointer to destination bitmap
  83  * @param dst_pitch Stride of destination bitmap
  84  * @param srcp Pointer to source bitmap
  85  * @param src_pitch Stride of destination bitmap
  86  * @param row_size Width of the bitmap being copied
  87  * @param height Height of the source bitmap
  88  *
  89  * When row_size, dst_pitch, and src_pitch are equal, eedi2_bit_blit can work more quickly by copying the whole plane at once instead of individual lines.
  90  */
  91 void eedi2_bit_blit( uint8_t * dstp, int dst_pitch,
  92                      const uint8_t * srcp, int src_pitch,
  93                      int row_size, int height )
  94 {
  95     if( ( !height ) || ( !row_size ) )
  96         return;
  97
  98     if( height == 1 || ( dst_pitch == src_pitch && src_pitch == row_size ) )
  99     {
 100         memcpy( dstp, srcp, row_size * height );
 101     }
 102     else
 103     {
 104         int y;
 105         for( y = height; y > 0; --y )
 106         {
 107             memcpy( dstp, srcp, row_size );
 108             dstp += dst_pitch;
 109             srcp += src_pitch;
 110         }
 111     }
 112 }
 113
 114 /**
 115  * A specialized variant of bit_blit, just for setting up the initial, field-sized bitmap planes that EEDI2 interpolates from.
 116  * @param src Pointer to source bitmap plane being copied from
 117  * @param dst Pointer to the destination bitmap plane being copied to
 118  * @param pitch Stride of both bitmaps
 119  * @param height Height of the original, full-size src plane being copied from
 120  */
 121 void eedi2_fill_half_height_buffer_plane( uint8_t * src, uint8_t * dst, int pitch, int height )
 122 {
 123     /* When TFF, we want to copy alternating
 124        lines starting at 0, the top field.
 125        When BFF, we want to start at line 1. */
 126     int y;
 127     for( y = height; y > 0; y = y - 2 )
 128     {
 129       memcpy( dst, src, pitch );
 130       dst += pitch;
 131       src += pitch * 2;
 132     }
 133 }
 134
 135 /**
 136  * A specialized variant of bit_blit, just for resizing the field-height maps EEDI2 generates to frame-height...a simple line doubler
 137  * @param srcp Pointer to source bitmap plane being copied from
 138  * @param dstp Pointer to the destination bitmap plane being copied to
 139  * @param height Height of the input, half-size src plane being copied from
 140  * @param pitch Stride of both bitmaps
 141  */
 142 void eedi2_upscale_by_2( uint8_t * srcp, uint8_t * dstp, int height, int pitch )
 143 {
 144     int y;
 145     for( y = height; y > 0; y-- )
 146     {
 147       memcpy( dstp, srcp, pitch );
 148       dstp += pitch;
 149       memcpy( dstp, srcp, pitch );
 150       srcp += pitch;
 151       dstp += pitch;
 152     }
 153 }
 154
 155 /**
 156  * Finds places where verticaly adjacent pixels abruptly change in intensity, i.e., sharp edges.
 157  * @param dstp Pointer to the destination bitmap
 158  * @param dst_pitch Stride of dstp
 159  * @param srcp Pointer to the source bitmap
 160  * @param src_pitch Stride of srcp
 161  * @param mtresh Magnitude threshold, ensures it doesn't mark edges on pixels that are too similar (10 is a good default value)
 162  * @param vthresh Variance threshold, ensures it doesn't look for edges in highly random pixel blocks (20 is a good default value)
 163  * @param lthresh Laplacian threshold, ensures edges are still prominent in the 2nd spatial derivative of the srcp plane (20 is a good default value)
 164  * @param height Height of half-height single-field frame
 165  * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
 166  */
 167 void eedi2_build_edge_mask( uint8_t * dstp, int dst_pitch, uint8_t *srcp, int src_pitch,
 168                             int mthresh, int lthresh, int vthresh, int height, int width )
 169 {
 170     int x, y;
 171
 172     mthresh = mthresh * 10;
 173     vthresh = vthresh * 81;
 174
 175     memset( dstp, 0, ( height / 2 ) * dst_pitch );
 176
 177     srcp += src_pitch;
 178     dstp += dst_pitch;
 179     unsigned char *srcpp = srcp-src_pitch;
 180     unsigned char *srcpn = srcp+src_pitch;
 181     for( y = 1; y < height - 1; ++y )
 182     {
 183         for( x = 1; x < width-1; ++x )
 184         {
 185             if( ( abs( srcpp[x]  -   srcp[x] ) < 10 &&
 186                   abs(  srcp[x]  -  srcpn[x] ) < 10 &&
 187                   abs( srcpp[x]  -  srcpn[x] ) < 10 )
 188               ||
 189                 ( abs( srcpp[x-1] -  srcp[x-1] ) < 10 &&
 190                   abs(  srcp[x-1] - srcpn[x-1] ) < 10 &&
 191                   abs( srcpp[x-1] - srcpn[x-1] ) < 10 &&
 192                   abs( srcpp[x+1] -  srcp[x+1] ) < 10 &&
 193                   abs(  srcp[x+1] - srcpn[x+1] ) < 10 &&
 194                   abs( srcpp[x+1] - srcpn[x+1] ) < 10) )
 195                 continue;
 196
 197             const int sum = srcpp[x-1] + srcpp[x] + srcpp[x+1] +
 198                              srcp[x-1] +  srcp[x]+   srcp[x+1] +
 199                             srcpn[x-1] + srcpn[x] + srcpn[x+1];
 200
 201             const int sumsq = srcpp[x-1] * srcpp[x-1] +
 202                               srcpp[x]   * srcpp[x]   +
 203                               srcpp[x+1] * srcpp[x+1] +
 204                                srcp[x-1] *  srcp[x-1] +
 205                                srcp[x]   *  srcp[x]   +
 206                                srcp[x+1] *  srcp[x+1] +
 207                               srcpn[x-1] * srcpn[x-1] +
 208                               srcpn[x]   * srcpn[x]   +
 209                               srcpn[x+1] * srcpn[x+1];
 210
 211             if( 9 * sumsq-sum * sum < vthresh )
 212                 continue;
 213
 214             const int Ix = srcp[x+1] - srcp[x-1];
 215             const int Iy = MAX( MAX( abs( srcpp[x] - srcpn[x] ),
 216                                      abs( srcpp[x] -  srcp[x] ) ),
 217                                 abs( srcp[x] - srcpn[x] ) );
 218             if( Ix * Ix + Iy * Iy >= mthresh )
 219             {
 220                 dstp[x] = 255;
 221                 continue;
 222             }
 223
 224             const int Ixx =  srcp[x-1] - 2 * srcp[x] +  srcp[x+1];
 225             const int Iyy = srcpp[x]   - 2 * srcp[x] + srcpn[x];
 226             if( abs( Ixx ) + abs( Iyy ) >= lthresh )
 227                 dstp[x] = 255;
 228         }
 229         dstp += dst_pitch;
 230         srcpp += src_pitch;
 231         srcp += src_pitch;
 232         srcpn += src_pitch;
 233     }
 234 }
 235
 236 /**
 237  * Expands and smooths out the edge mask
 238  * @param mskp Pointer to the source edge mask being read from
 239  * @param msk_pitch Stride of mskp
 240  * @param dstp Pointer to the destination to store the dilated edge mask
 241  * @param dst_pitch Stride of dstp
 242  * @param dstr Dilation threshold, ensures a pixel is only retained as an edge in dstp if this number of adjacent pixels or greater are also edges in mskp (4 is a good default value)
 243  * @param height Height of half-height field-sized frame
 244  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
 245  */
 246 void eedi2_dilate_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
 247                              int dstr, int height, int width )
 248 {
 249     int x, y;
 250
 251     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
 252
 253     mskp += msk_pitch;
 254     unsigned char *mskpp = mskp - msk_pitch;
 255     unsigned char *mskpn = mskp + msk_pitch;
 256     dstp += dst_pitch;
 257     for( y = 1; y < height - 1; ++y )
 258     {
 259         for( x = 1; x < width - 1; ++x )
 260         {
 261             if( mskp[x] != 0 )
 262                 continue;
 263
 264             int count = 0;
 265             if( mskpp[x-1] == 0xFF ) ++count;
 266             if( mskpp[x]   == 0xFF ) ++count;
 267             if( mskpp[x+1] == 0xFF ) ++count;
 268             if(  mskp[x-1] == 0xFF ) ++count;
 269             if(  mskp[x+1] == 0xFF ) ++count;
 270             if( mskpn[x-1] == 0xFF ) ++count;
 271             if( mskpn[x]   == 0xFF ) ++count;
 272             if( mskpn[x+1] == 0xFF ) ++count;
 273
 274             if( count >= dstr )
 275                 dstp[x] = 0xFF;
 276         }
 277         mskpp += msk_pitch;
 278         mskp += msk_pitch;
 279         mskpn += msk_pitch;
 280         dstp += dst_pitch;
 281     }
 282 }
 283
 284 /**
 285  * Contracts the edge mask
 286  * @param mskp Pointer to the source edge mask being read from
 287  * @param msk_pitch Stride of mskp
 288  * @param dstp Pointer to the destination to store the eroded edge mask
 289  * @param dst_pitch Stride of dstp
 290  * @param estr Erosion threshold, ensures a pixel isn't retained as an edge in dstp if fewer than this number of adjacent pixels are also edges in mskp (2 is a good default value)
 291  * @param height Height of half-height field-sized frame
 292  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
 293  */
 294 void eedi2_erode_edge_mask( uint8_t *mskp, int msk_pitch, uint8_t *dstp, int dst_pitch,
 295                             int estr, int height, int width )
 296 {
 297     int x, y;
 298
 299     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
 300
 301     mskp += msk_pitch;
 302     unsigned char *mskpp = mskp - msk_pitch;
 303     unsigned char *mskpn = mskp + msk_pitch;
 304     dstp += dst_pitch;
 305     for ( y = 1; y < height - 1; ++y )
 306     {
 307         for ( x = 1; x < width - 1; ++x )
 308         {
 309             if( mskp[x] != 0xFF ) continue;
 310
 311             int count = 0;
 312             if  ( mskpp[x-1] == 0xFF ) ++count;
 313             if  ( mskpp[x]   == 0xFF ) ++count;
 314             if  ( mskpp[x+1] == 0xFF ) ++count;
 315             if  (  mskp[x-1] == 0xFF ) ++count;
 316             if  (  mskp[x+1] == 0xFF ) ++count;
 317             if  ( mskpn[x-1] == 0xFF ) ++count;
 318             if  ( mskpn[x]   == 0xFF ) ++count;
 319             if  ( mskpn[x+1] == 0xFF ) ++count;
 320
 321             if  ( count < estr) dstp[x] = 0;
 322         }
 323         mskpp += msk_pitch;
 324         mskp += msk_pitch;
 325         mskpn += msk_pitch;
 326         dstp += dst_pitch;
 327     }
 328 }
 329
 330 /**
 331  * Smooths out horizontally aligned holes in the mask
 332  *
 333  * If none of the 6 horizontally adjacent pixels are edges, mark the current pixel as not edged.
 334  * If at least 1 of the 3 on either side are edges, mark the current pixel as an edge.
 335  *
 336  * @param mskp Pointer to the source edge mask being read from
 337  * @param msk_pitch Stride of mskp
 338  * @param dstp Pointer to the destination to store the smoothed edge mask
 339  * @param dst_pitch Stride of dstp
 340  * @param height Height of half-height field-sized frame
 341  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
 342  */
 343 void eedi2_remove_small_gaps( uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch,
 344                               int height, int width )
 345 {
 346     int x, y;
 347
 348     eedi2_bit_blit( dstp, dst_pitch, mskp, msk_pitch, width, height );
 349
 350     mskp += msk_pitch;
 351     dstp += dst_pitch;
 352     for( y = 1; y < height - 1; ++y )
 353     {
 354         for( x = 3; x < width - 3; ++x )
 355         {
 356             if( mskp[x] )
 357             {
 358                 if( mskp[x-3] ) continue;
 359                 if( mskp[x-2] ) continue;
 360                 if( mskp[x-1] ) continue;
 361                 if( mskp[x+1] ) continue;
 362                 if( mskp[x+2] ) continue;
 363                 if( mskp[x+3] ) continue;
 364                 dstp[x] = 0;
 365             }
 366             else
 367             {
 368                 if ( ( mskp[x+1] && ( mskp[x-1] || mskp[x-2] || mskp[x-3] ) ) ||
 369                      ( mskp[x+2] && ( mskp[x-1] || mskp[x-2] ) ) ||
 370                      ( mskp[x+3] && mskp[x-1] ) )
 371                     dstp[x] = 0xFF;
 372             }
 373         }
 374         mskp += msk_pitch;
 375         dstp += dst_pitch;
 376     }
 377 }
 378
 379 /**
 380  * Calculates spatial direction vectors for the edges. This is EEDI2's timesink, and can be thought of as YADIF_CHECK on steroids, as both try to discern which angle a given edge follows
 381  * @param plane The plane of the image being processed, to know to reduce maxd for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
 382  * @param mskp Pointer to the source edge mask being read from
 383  * @param msk_pitch Stride of mskp
 384  * @param srcp Pointer to the source image being filtered
 385  * @param src_pitch Stride of srcp
 386  * @param dstp Pointer to the destination to store the dilated edge mask
 387  * @param dst_pitch Stride of dstp
 388  * @param maxd Maximum pixel distance to search (24 is a good default value)
 389  * @param nt Noise threshold (50 is a good default value)
 390  * @param height Height of half-height field-sized frame
 391  * @param width Width of srcp bitmap rows, as opposed to the pdded stride in src_pitch
 392  */
 393 void eedi2_calc_directions( const int plane, uint8_t * mskp, int msk_pitch, uint8_t * srcp, int src_pitch,
 394                             uint8_t * dstp, int dst_pitch, int maxd, int nt, int height, int width  )
 395 {
 396     int x, y, u, i;
 397
 398     memset( dstp, 255, dst_pitch * height );
 399     mskp += msk_pitch;
 400     dstp += dst_pitch;
 401     srcp += src_pitch;
 402     unsigned char *src2p = srcp - src_pitch * 2;
 403     unsigned char *srcpp = srcp - src_pitch;
 404     unsigned char *srcpn = srcp + src_pitch;
 405     unsigned char *src2n = srcp + src_pitch * 2;
 406     unsigned char *mskpp = mskp - msk_pitch;
 407     unsigned char *mskpn = mskp + msk_pitch;
 408     const int maxdt = plane == 0 ? maxd : ( maxd >> 1 );
 409
 410     for( y = 1; y < height - 1; ++y )
 411     {
 412         for( x = 1; x < width - 1; ++x )
 413         {
 414             if( mskp[x] != 0xFF || ( mskp[x-1] != 0xFF && mskp[x+1] != 0xFF ) )
 415                 continue;
 416             const int startu = MAX( -x + 1, -maxdt );
 417             const int stopu = MIN( width - 2 - x, maxdt );
 418             int minb = MIN( 13 * nt,
 419                             ( abs( srcp[x] - srcpn[x] ) +
 420                               abs( srcp[x] - srcpp[x] ) ) * 6 );
 421             int mina = MIN( 19 * nt,
 422                             ( abs( srcp[x] - srcpn[x] ) +
 423                               abs( srcp[x] - srcpp[x] ) ) * 9 );
 424             int minc = mina;
 425             int mind = minb;
 426             int mine = minb;
 427             int dira = -5000, dirb = -5000, dirc = -5000, dird = -5000, dire = -5000;
 428             for( u = startu; u <= stopu; ++u )
 429             {
 430                 if( y == 1 ||
 431                       mskpp[x-1+u] == 0xFF || mskpp[x+u] == 0xFF || mskpp[x+1+u] == 0xFF )
 432                 {
 433                     if( y == height - 2 ||
 434                         mskpn[x-1-u] == 0xFF || mskpn[x-u] == 0xFF || mskpn[x+1-u] == 0xFF )
 435                     {
 436                         const int diffsn = abs(  srcp[x-1] - srcpn[x-1-u] ) +
 437                                            abs(  srcp[x]   - srcpn[x-u] )   +
 438                                            abs(  srcp[x+1] - srcpn[x+1-u] );
 439
 440                         const int diffsp = abs(  srcp[x-1] - srcpp[x-1+u] ) +
 441                                            abs(  srcp[x]   - srcpp[x+u] )   +
 442                                            abs(  srcp[x+1] - srcpp[x+1+u] );
 443
 444                         const int diffps = abs( srcpp[x-1] -  srcp[x-1-u] ) +
 445                                            abs( srcpp[x]   -  srcp[x-u] )   +
 446                                            abs( srcpp[x+1] -  srcp[x+1-u] );
 447
 448                         const int diffns = abs( srcpn[x-1] -  srcp[x-1+u] ) +
 449                                            abs( srcpn[x]   -  srcp[x+u] )   +
 450                                            abs( srcpn[x+1] -  srcp[x+1+u] );
 451
 452                         const int diff = diffsn + diffsp + diffps + diffns;
 453                         int diffd = diffsp + diffns;
 454                         int diffe = diffsn + diffps;
 455                         if( diff < minb )
 456                         {
 457                             dirb = u;
 458                             minb = diff;
 459                         }
 460                         if( __builtin_expect( y > 1, 1) )
 461                         {
 462                             const int diff2pp = abs( src2p[x-1] - srcpp[x-1-u] ) +
 463                                             abs( src2p[x]   - srcpp[x-u] )   +
 464                                             abs( src2p[x+1] - srcpp[x+1-u] );
 465                             const int diffp2p = abs( srcpp[x-1] - src2p[x-1+u] ) +
 466                                             abs( srcpp[x]   - src2p[x+u] )   +
 467                                             abs( srcpp[x+1] - src2p[x+1+u] );
 468                             const int diffa = diff + diff2pp + diffp2p;
 469                             diffd += diffp2p;
 470                             diffe += diff2pp;
 471                             if( diffa < mina )
 472                             {
 473                                 dira = u;
 474                                 mina = diffa;
 475                             }
 476                         }
 477                         if( __builtin_expect( y < height-2, 1) )
 478                         {
 479                             const int diff2nn = abs( src2n[x-1] - srcpn[x-1+u] ) +
 480                                                 abs( src2n[x]   - srcpn[x+u] )   +
 481                                                 abs( src2n[x+1] - srcpn[x+1+u] );
 482                             const int diffn2n = abs( srcpn[x-1] - src2n[x-1-u] ) +
 483                                                 abs( srcpn[x]   - src2n[x-u] )   +
 484                                                 abs( srcpn[x+1] - src2n[x+1-u] );
 485                             const int diffc = diff + diff2nn + diffn2n;
 486                             diffd += diff2nn;
 487                             diffe += diffn2n;
 488                             if( diffc < minc )
 489                             {
 490                                 dirc = u;
 491                                 minc = diffc;
 492                             }
 493                         }
 494                         if( diffd < mind )
 495                         {
 496                             dird = u;
 497                             mind = diffd;
 498                         }
 499                         if( diffe < mine )
 500                         {
 501                             dire = u;
 502                             mine = diffe;
 503                         }
 504                     }
 505                 }
 506             }
 507             int order[5], k=0;
 508             if( dira != -5000 ) order[k++] = dira;
 509             if( dirb != -5000 ) order[k++] = dirb;
 510             if( dirc != -5000 ) order[k++] = dirc;
 511             if( dird != -5000 ) order[k++] = dird;
 512             if( dire != -5000 ) order[k++] = dire;
 513             if( k > 1 )
 514             {
 515                 eedi2_sort_metrics( order, k );
 516                 const int mid = ( k & 1 ) ?
 517                                     order[k>>1] :
 518                                     ( order[(k-1)>>1] + order[k>>1] + 1 ) >> 1;
 519                 const int tlim = MAX( eedi2_limlut[abs(mid)] >> 2, 2 );
 520                 int sum = 0, count = 0;
 521                 for( i = 0; i < k; ++i )
 522                 {
 523                     if( abs( order[i] - mid ) <= tlim )
 524                     {
 525                         ++count;
 526                         sum += order[i];
 527                     }
 528                 }
 529                 if( count > 1 )
 530                     dstp[x] = 128 + ( (int)( (float)sum / (float)count ) * 4 );
 531                 else
 532                     dstp[x] = 128;
 533             }
 534             else dstp[x] = 128;
 535         }
 536         mskpp += msk_pitch;
 537         mskp += msk_pitch;
 538         mskpn += msk_pitch;
 539         src2p += src_pitch;
 540         srcpp += src_pitch;
 541         srcp += src_pitch;
 542         srcpn += src_pitch;
 543         src2n += src_pitch;
 544         dstp += dst_pitch;
 545     }
 546 }
 547
 548 /**
 549  * Filters the edge mask
 550  * @param mskp Pointer to the source edge mask being read from
 551  * @param msk_pitch Stride of mskp
 552  * @param dmskp Pointer to the edge direction mask
 553  * @param dmsk_pitch Stride of dmskp
 554  * @param dstp Pointer to the destination to store the filtered edge mask
 555  * @param dst_pitch Stride of dstp
 556  * @param height Height of half-height field-sized frame
 557  * @param width Width of mskp bitmap rows, as opposed to the pdded stride in msk_pitch
 558  */
 559 void eedi2_filter_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 560                        uint8_t * dstp, int dst_pitch, int height, int width )
 561 {
 562     int x, y, j;
 563
 564     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
 565
 566     mskp += msk_pitch;
 567     dmskp += dmsk_pitch;
 568     dstp += dst_pitch;
 569     unsigned char *dmskpp = dmskp - dmsk_pitch;
 570     unsigned char *dmskpn = dmskp + dmsk_pitch;
 571
 572     for( y = 1; y < height - 1; ++y )
 573     {
 574         for( x = 1; x < width - 1; ++x )
 575         {
 576             if( dmskp[x] == 0xFF || mskp[x] != 0xFF )
 577                 continue;
 578             const int dir = ( dmskp[x] - 128 ) >> 2;
 579             const int lim = MAX( abs( dir ) * 2, 12 );
 580             int ict = 0, icb = 0;
 581             if( dir < 0 )
 582             {
 583                 const int dirt = MAX( -x, dir );
 584                 for( j = dirt; j <= 0; ++j )
 585                 {
 586                     if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
 587                         ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
 588                         ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
 589                     {
 590                         ict = 1;
 591                         break;
 592                     }
 593                 }
 594             }
 595             else
 596             {
 597                 const int dirt = MIN( width - x - 1, dir );
 598                 for( j = 0; j <= dirt; ++j )
 599                 {
 600                     if( ( abs( dmskpp[x+j] - dmskp[x] ) > lim && dmskpp[x+j] != 0xFF ) ||
 601                         ( dmskp[x+j] == 0xFF && dmskpp[x+j] == 0xFF ) ||
 602                         ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
 603                     {
 604                         ict = 1;
 605                         break;
 606                     }
 607                 }
 608             }
 609             if( ict )
 610             {
 611                 if( dir < 0 )
 612                 {
 613                     const int dirt = MIN( width - x - 1, abs( dir ) );
 614                     for( j = 0; j <= dirt; ++j )
 615                     {
 616                         if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
 617                             ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
 618                             ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
 619                         {
 620                             icb = 1;
 621                             break;
 622                         }
 623                     }
 624                 }
 625                 else
 626                 {
 627                     const int dirt = MAX( -x, -dir );
 628                     for( j = dirt; j <= 0; ++j )
 629                     {
 630                         if( ( abs( dmskpn[x+j] - dmskp[x] ) > lim && dmskpn[x+j] != 0xFF ) ||
 631                             ( dmskpn[x+j] == 0xFF && dmskp[x+j] == 0xFF ) ||
 632                             ( abs(  dmskp[x+j] - dmskp[x] ) > lim &&  dmskp[x+j] != 0xFF ) )
 633                         {
 634                             icb = 1;
 635                             break;
 636                         }
 637                     }
 638                 }
 639                 if( icb )
 640                     dstp[x] = 255;
 641             }
 642         }
 643         mskp += msk_pitch;
 644         dmskpp += dmsk_pitch;
 645         dmskp += dmsk_pitch;
 646         dmskpn += dmsk_pitch;
 647         dstp += dst_pitch;
 648     }
 649 }
 650
 651
 652 /**
 653  * Filters the edge direction mask
 654  * @param mskp Pointer to the edge mask
 655  * @param msk_pitch Stride of mskp
 656  * @param dmskp Pointer to the edge direction mask being read from
 657  * @param dmsk_pitch Stride of dmskp
 658  * @param dstp Pointer to the destination to store the filtered edge direction mask
 659  * @param dst_pitch Stride of dstp
 660  * @param height Height of half_height field-sized frame
 661  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
 662  */
 663 void eedi2_filter_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 664                            uint8_t * dstp, int dst_pitch, int height, int width )
 665 {
 666     int x, y, i;
 667
 668     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
 669
 670     dmskp += dmsk_pitch;
 671     unsigned char *dmskpp = dmskp - dmsk_pitch;
 672     unsigned char *dmskpn = dmskp + dmsk_pitch;
 673     dstp += dst_pitch;
 674     mskp += msk_pitch;
 675     for( y = 1; y < height - 1; ++y )
 676     {
 677         for( x = 1; x < width - 1; ++x )
 678         {
 679             if( mskp[x] != 0xFF ) continue;
 680             int u = 0, order[9];
 681             if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
 682             if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
 683             if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
 684             if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
 685             if(  dmskp[x]   != 0xFF ) order[u++] =  dmskp[x];
 686             if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
 687             if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
 688             if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
 689             if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
 690             if( u < 4 )
 691             {
 692                 dstp[x] = 255;
 693                 continue;
 694             }
 695             eedi2_sort_metrics( order, u );
 696             const int mid = ( u & 1 ) ?
 697                 order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
 698             int sum = 0, count = 0;
 699             const int lim = eedi2_limlut[abs(mid-128)>>2];
 700             for( i = 0; i < u; ++i )
 701             {
 702                 if( abs( order[i] - mid ) <= lim )
 703                 {
 704                     ++count;
 705                     sum += order[i];
 706                 }
 707             }
 708             if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
 709             {
 710                 dstp[x] = 255;
 711                 continue;
 712             }
 713             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
 714         }
 715         dmskpp += dmsk_pitch;
 716         dmskp += dmsk_pitch;
 717         dmskpn += dmsk_pitch;
 718         dstp += dst_pitch;
 719         mskp += msk_pitch;
 720     }
 721 }
 722
 723 /**
 724  * Smoothes out the edge direction map
 725  * @param mskp Pointer to the edge mask
 726  * @param msk_pitch Stride of mskp
 727  * @param dmskp Pointer to the edge direction mask being read from
 728  * @param dmsk_pitch Stride of dmskp
 729  * @param dstp Pointer to the destination to store the expanded edge direction mask
 730  * @param dst_pitch Stride of dstp
 731  * @param height Height of half-height field-sized frame
 732  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
 733  */
 734 void eedi2_expand_dir_map( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 735                            uint8_t * dstp, int dst_pitch, int height, int width )
 736 {
 737     int x, y, i;
 738
 739     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
 740
 741     dmskp += dmsk_pitch;
 742     unsigned char *dmskpp = dmskp - dmsk_pitch;
 743     unsigned char *dmskpn = dmskp + dmsk_pitch;
 744     dstp += dst_pitch;
 745     mskp += msk_pitch;
 746     for( y = 1; y < height - 1; ++y )
 747     {
 748         for( x = 1; x < width - 1; ++x )
 749         {
 750             if( dmskp[x] != 0xFF || mskp[x] != 0xFF ) continue;
 751             int u = 0, order[9];
 752             if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
 753             if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
 754             if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
 755             if(  dmskp[x-1] != 0xFF ) order[u++] =  dmskp[x-1];
 756             if(  dmskp[x+1] != 0xFF ) order[u++] =  dmskp[x+1];
 757             if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
 758             if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
 759             if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
 760             if( u < 5 ) continue;
 761             eedi2_sort_metrics( order, u );
 762             const int mid = ( u & 1 ) ?
 763                 order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
 764             int sum = 0, count = 0;
 765             const int lim = eedi2_limlut[abs(mid-128)>>2];
 766             for( i = 0; i < u; ++i )
 767             {
 768                 if( abs( order[i] - mid ) <= lim )
 769                 {
 770                     ++count;
 771                     sum += order[i];
 772                 }
 773             }
 774             if( count < 5 ) continue;
 775             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
 776         }
 777         dmskpp += dmsk_pitch;
 778         dmskp += dmsk_pitch;
 779         dmskpn += dmsk_pitch;
 780         dstp += dst_pitch;
 781         mskp += msk_pitch;
 782     }
 783 }
 784
 785 /**
 786  * Re-draws a clearer, less blocky frame-height edge direction mask
 787  * @param mskp Pointer to the edge mask
 788  * @param msk_pitch Stride of mskp
 789  * @param dmskp Pointer to the edge direction mask being read from
 790  * @param dmsk_pitch Stride of dmskp
 791  * @param dstp Pointer to the destination to store the redrawn direction mask
 792  * @param dst_pitch Stride of dstp
 793  * @param tff Whether or not the frame parity is Top Field First
 794  * @param height Height of the full-frame output
 795  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
 796  */
 797 void eedi2_mark_directions_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 798                                uint8_t * dstp, int dst_pitch, int tff, int height, int width )
 799 {
 800     int x, y, i;
 801     memset( dstp, 255, dst_pitch * height );
 802     dstp  += dst_pitch  * ( 2 - tff );
 803     dmskp += dmsk_pitch * ( 1 - tff );
 804     mskp  += msk_pitch  * ( 1 - tff );
 805     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
 806     unsigned char *mskpn = mskp + msk_pitch * 2;
 807     for( y = 2 - tff; y < height - 1; y += 2 )
 808     {
 809         for( x = 1; x < width - 1; ++x )
 810         {
 811             if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
 812             int v = 0, order[6];
 813             if(  dmskp[x-1] != 0xFF ) order[v++] = dmskp[x-1];
 814             if(  dmskp[x]   != 0xFF ) order[v++] = dmskp[x];
 815             if(  dmskp[x+1] != 0xFF ) order[v++] = dmskp[x+1];
 816             if( dmskpn[x-1] != 0xFF ) order[v++] = dmskpn[x-1];
 817             if( dmskpn[x]   != 0xFF ) order[v++] = dmskpn[x];
 818             if( dmskpn[x+1] != 0xFF ) order[v++] = dmskpn[x+1];
 819             if( v < 3 ) continue;
 820             else
 821             {
 822                 eedi2_sort_metrics( order, v );
 823                 const int mid = ( v & 1 ) ? order[v>>1] : ( order[(v-1)>>1] + order[v>>1]+1) >> 1;
 824                 const int lim = eedi2_limlut[abs(mid-128)>>2];
 825                 int u = 0;
 826                 if( abs( dmskp[x-1] - dmskpn[x-1] ) <= lim ||
 827                     dmskp[x-1] == 0xFF || dmskpn[x-1] == 0xFF )
 828                         ++u;
 829                 if( abs( dmskp[x]   - dmskpn[x] )   <= lim ||
 830                     dmskp[x]   == 0xFF || dmskpn[x]   == 0xFF )
 831                         ++u;
 832                 if( abs( dmskp[x+1] - dmskpn[x-1] ) <= lim ||
 833                     dmskp[x+1] == 0xFF || dmskpn[x+1] == 0xFF)
 834                         ++u;
 835                 if( u < 2 ) continue;
 836                 int count = 0, sum = 0;
 837                 for( i = 0; i < v; ++i )
 838                 {
 839                     if( abs( order[i] - mid ) <= lim )
 840                     {
 841                         ++count;
 842                         sum += order[i];
 843                     }
 844                 }
 845                 if( count < v - 2 || count < 2 ) continue;
 846                 dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
 847             }
 848         }
 849         mskp += msk_pitch * 2;
 850         mskpn += msk_pitch * 2;
 851         dstp += dst_pitch * 2;
 852         dmskp += dmsk_pitch * 2;
 853         dmskpn += dmsk_pitch * 2;
 854     }
 855 }
 856
 857 /**
 858  * Filters the frane-height edge direction mask
 859  * @param mskp Pointer to the edge mask
 860  * @param msk_pitch Stride of mskp
 861  * @param dmskp Pointer to the edge direction mask being read from
 862  * @param dmsk_pitch Stride of dmskp
 863  * @param dstp Pointer to the destination to store the filtered direction mask
 864  * @param dst_pitch Stride of dstp
 865  * @param field Field to filter
 866  * @param height Height of the full-frame output
 867  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
 868  */
 869 void eedi2_filter_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 870                               uint8_t * dstp, int dst_pitch, int field, int height, int width )
 871 {
 872     int x, y, i;
 873     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
 874     dmskp += dmsk_pitch * ( 2 - field );
 875     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
 876     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
 877     mskp += msk_pitch * ( 1 - field );
 878     unsigned char *mskpn = mskp + msk_pitch * 2;
 879     dstp += dst_pitch * ( 2 - field );
 880     for( y = 2 - field; y < height - 1; y += 2 )
 881     {
 882         for( x = 1; x < width - 1; ++x )
 883         {
 884             if( mskp[x] != 0xFF && mskpn[x] != 0xFF ) continue;
 885             int u = 0, order[9];
 886             if( y > 1 )
 887             {
 888                 if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
 889                 if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
 890                 if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
 891             }
 892             if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
 893             if( dmskp[x]   != 0xFF ) order[u++] = dmskp[x];
 894             if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
 895             if( y < height - 2 )
 896             {
 897                 if( dmskpn[x-1] != 0xFF ) order[u++] = dmskpn[x-1];
 898                 if( dmskpn[x]   != 0xFF ) order[u++] = dmskpn[x];
 899                 if( dmskpn[x+1] != 0xFF ) order[u++] = dmskpn[x+1];
 900             }
 901             if( u < 4 )
 902             {
 903                 dstp[x] = 255;
 904                 continue;
 905             }
 906             eedi2_sort_metrics( order, u );
 907             const int mid = ( u & 1 ) ? order[u>>1] : (order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
 908             int sum = 0, count = 0;
 909             const int lim = eedi2_limlut[abs(mid-128)>>2];
 910             for( i = 0; i < u; ++i )
 911             {
 912                 if( abs( order[i] - mid ) <= lim )
 913                 {
 914                     ++count;
 915                     sum += order[i];
 916                 }
 917             }
 918             if( count < 4 || ( count < 5 && dmskp[x] == 0xFF ) )
 919             {
 920                 dstp[x] = 255;
 921                 continue;
 922             }
 923             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
 924         }
 925         mskp += msk_pitch * 2;
 926         mskpn += msk_pitch * 2;
 927         dmskpp += dmsk_pitch * 2;
 928         dmskp += dmsk_pitch * 2;
 929         dmskpn += dmsk_pitch * 2;
 930         dstp += dst_pitch * 2;
 931     }
 932 }
 933
 934 /**
 935  * Smoothes out the frame-height edge direction mask
 936  * @param mskp Pointer to the edge mask
 937  * @param msk_pitch Stride of mskp
 938  * @param dmskp Pointer to the edge direction mask being read from
 939  * @param dmsk_pitch Stride of dmskp
 940  * @param dstp Pointer to the destination to store the expanded direction mask
 941  * @param dst_pitch Stride of dstp
 942  * @param field Field to filter
 943  * @param height Height of the full-frame output
 944  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
 945  */
 946 void eedi2_expand_dir_map_2x( uint8_t * mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
 947                               uint8_t * dstp, int dst_pitch, int field, int height, int width )
 948 {
 949     int x, y, i;
 950
 951     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
 952
 953     dmskp += dmsk_pitch * ( 2 - field );
 954     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
 955     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
 956     mskp += msk_pitch * ( 1 - field );
 957     unsigned char *mskpn = mskp + msk_pitch * 2;
 958     dstp += dst_pitch * ( 2 - field );
 959     for( y = 2 - field; y < height - 1; y += 2)
 960     {
 961         for( x = 1; x < width - 1; ++x )
 962         {
 963             if( dmskp[x] != 0xFF || ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
 964             int u = 0, order[9];
 965             if( y > 1 )
 966             {
 967                 if( dmskpp[x-1] != 0xFF ) order[u++] = dmskpp[x-1];
 968                 if( dmskpp[x]   != 0xFF ) order[u++] = dmskpp[x];
 969                 if( dmskpp[x+1] != 0xFF ) order[u++] = dmskpp[x+1];
 970             }
 971             if( dmskp[x-1] != 0xFF ) order[u++] = dmskp[x-1];
 972             if( dmskp[x+1] != 0xFF ) order[u++] = dmskp[x+1];
 973             if( y < height - 2 )
 974             {
 975                 if( dmskpn[x-1] != 0xFF) order[u++] = dmskpn[x-1];
 976                 if( dmskpn[x]   != 0xFF) order[u++] = dmskpn[x];
 977                 if( dmskpn[x+1] != 0xFF) order[u++] = dmskpn[x+1];
 978             }
 979             if( u < 5 ) continue;
 980             eedi2_sort_metrics( order, u );
 981             const int mid = ( u & 1 ) ? order[u>>1] : ( order[(u-1)>>1] + order[u>>1] + 1 ) >> 1;
 982             int sum = 0, count = 0;
 983             const int lim = eedi2_limlut[abs(mid-128)>>2];
 984             for( i = 0; i < u; ++i )
 985             {
 986                 if( abs( order[i] - mid ) <= lim )
 987                 {
 988                     ++count;
 989                     sum += order[i];
 990                 }
 991             }
 992             if( count < 5 ) continue;
 993             dstp[x] = (int)( ( (float)( sum + mid ) / (float)( count + 1 ) ) + 0.5f );
 994         }
 995         mskp += msk_pitch * 2;
 996         mskpn += msk_pitch * 2;
 997         dmskpp += dmsk_pitch * 2;
 998         dmskp += dmsk_pitch * 2;
 999         dmskpn += dmsk_pitch * 2;
1000         dstp += dst_pitch * 2;
1001     }
1002 }
1003
1004 /**
1005  * Like the name suggests, this function fills in gaps in the frame-height edge direction mask
1006  * @param mskp Pointer to the edge mask
1007  * @param msk_pitch Stride of mskp
1008  * @param dmskp Pointer to the edge direction mask being read from
1009  * @param dmsk_pitch Stride of dmskp
1010  * @param dstp Pointer to the destination to store the filled-in direction mask
1011  * @param dst_pitch Stride of dstp
1012  * @param field Field to filter
1013  * @param height Height of the full-frame output
1014  * @param width Width of dmskp bitmap rows, as opposed to the pdded stride in dmsk_pitch
1015  */
1016 void eedi2_fill_gaps_2x( uint8_t *mskp, int msk_pitch, uint8_t * dmskp, int dmsk_pitch,
1017                          uint8_t * dstp, int dst_pitch, int field, int height, int width )
1018 {
1019     int x, y, j;
1020
1021     eedi2_bit_blit( dstp, dst_pitch, dmskp, dmsk_pitch, width, height );
1022
1023     dmskp += dmsk_pitch * ( 2 - field );
1024     unsigned char *dmskpp = dmskp - dmsk_pitch * 2;
1025     unsigned char *dmskpn = dmskp + dmsk_pitch * 2;
1026     mskp += msk_pitch * ( 1 - field );
1027     unsigned char *mskpp = mskp - msk_pitch * 2;
1028     unsigned char *mskpn = mskp + msk_pitch * 2;
1029     unsigned char *mskpnn = mskpn + msk_pitch * 2;
1030     dstp += dst_pitch * ( 2 - field );
1031     for( y = 2 - field; y < height - 1; y += 2 )
1032     {
1033         for( x = 1; x < width - 1; ++x )
1034         {
1035             if( dmskp[x] != 0xFF ||
1036                 ( mskp[x] != 0xFF && mskpn[x] != 0xFF ) ) continue;
1037             int u = x - 1, back = 500, forward = -500;
1038             while( u )
1039             {
1040                 if( dmskp[u] != 0xFF )
1041                 {
1042                     back = dmskp[u];
1043                     break;
1044                 }
1045                 if( mskp[u] != 0xFF && mskpn[u] != 0xFF ) break;
1046                 --u;
1047             }
1048             int v = x + 1;
1049             while( v < width )
1050             {
1051                 if( dmskp[v] != 0xFF )
1052                 {
1053                     forward = dmskp[v];
1054                     break;
1055                 }
1056                 if( mskp[v] != 0xFF && mskpn[v] != 0xFF ) break;
1057                 ++v;
1058             }
1059             int tc = 1, bc = 1;
1060             int mint = 500, maxt = -20;
1061             int minb = 500, maxb = -20;
1062             for( j = u; j <= v; ++j )
1063             {
1064                 if( tc )
1065                 {
1066                     if( y <= 2 || dmskpp[j] == 0xFF || ( mskpp[j] != 0xFF && mskp[j] != 0xFF ) )
1067                     {
1068                         tc = 0;
1069                         mint = maxt = 20;
1070                     }
1071                     else
1072                     {
1073                         if( dmskpp[j] < mint ) mint = dmskpp[j];
1074                         if( dmskpp[j] > maxt ) maxt = dmskpp[j];
1075                     }
1076                 }
1077                 if( bc )
1078                 {
1079                     if( y >= height - 3 || dmskpn[j] == 0xFF || ( mskpn[j] != 0xFF && mskpnn[j] != 0xFF ) )
1080                     {
1081                         bc = 0;
1082                         minb = maxb = 20;
1083                     }
1084                     else
1085                     {
1086                         if( dmskpn[j] < minb ) minb = dmskpn[j];
1087                         if( dmskpn[j] > maxb ) maxb = dmskpn[j];
1088                     }
1089                 }
1090             }
1091             if( maxt == -20 ) maxt = mint = 20;
1092             if( maxb == -20 ) maxb = minb = 20;
1093             int thresh = MAX(
1094                             MAX( MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2, 8 ),
1095                             MAX( abs( mint - maxt ), abs( minb - maxb ) ) );
1096             const int flim = MIN(
1097                                 MAX( abs( forward - 128 ), abs( back - 128 ) ) >> 2,
1098                                 6 );
1099             if( abs( forward - back ) <= thresh && ( v - u - 1 <= flim || tc || bc ) )
1100             {
1101                 double step = (double)( forward - back ) / (double)( v - u );
1102                 for( j = 0; j < v - u - 1; ++j )
1103                     dstp[u+j+1] = back + (int)( j * step + 0.5 );
1104             }
1105         }
1106         mskpp += msk_pitch * 2;
1107         mskp += msk_pitch * 2;
1108         mskpn += msk_pitch * 2;
1109         mskpnn += msk_pitch * 2;
1110         dmskpp += dmsk_pitch * 2;
1111         dmskp += dmsk_pitch * 2;
1112         dmskpn += dmsk_pitch * 2;
1113         dstp += dst_pitch * 2;
1114     }
1115 }
1116
1117 /**
1118  * Actually renders the output frame, based on the edge and edge direction masks
1119  * @param plane The plane of the image being processed, to know to reduce a search distance for chroma planes (HandBrake only works with YUV420 video so it is assumed they are half-height)
1120  * @param dmskp Pointer to the edge direction mask being read from
1121  * @param dmsk_pitch Stride of dmskp
1122  * @param dstp Pointer to the line-doubled source field used being filtered in place
1123  * @param dst_pitch Stride of dstp
1124  * @param omskp Pointer to the destination to store the output edge mask used for post-processing
1125  * @param osmk_pitch Stride of omskp
1126  * @param field Field to filter
1127  * @nt Noise threshold, (50 is a good default value)
1128  * @param height Height of the full-frame output
1129  * @param width Width of dstp bitmap rows, as opposed to the pdded stride in dst_pitch
1130  */
1131 void eedi2_interpolate_lattice( const int plane, uint8_t * dmskp, int dmsk_pitch, uint8_t * dstp,
1132                                 int dst_pitch, uint8_t * omskp, int omsk_pitch, int field, int nt,
1133                                 int height, int width )
1134 {
1135     int x, y, u;
1136
1137     if( field == 1 )
1138     {
1139         eedi2_bit_blit( dstp + ( height - 1 ) * dst_pitch,
1140                   dst_pitch,
1141                   dstp + ( height - 2 ) * dst_pitch,
1142                   dst_pitch,
1143                   width,
1144                   1 );
1145     }
1146     else
1147     {
1148         eedi2_bit_blit( dstp,
1149                   dst_pitch,
1150                   dstp + dst_pitch,
1151                   dst_pitch,
1152                   width,
1153                   1 );
1154     }
1155
1156     dstp += dst_pitch * ( 1 - field );
1157     omskp += omsk_pitch * ( 1 - field );
1158     unsigned char *dstpn = dstp + dst_pitch;
1159     unsigned char *dstpnn = dstp + dst_pitch * 2;
1160     unsigned char *omskn = omskp + omsk_pitch * 2;
1161     dmskp += dmsk_pitch * ( 2 - field );
1162     for( y = 2 - field; y < height - 1; y += 2 )
1163     {
1164         for( x = 0; x < width; ++x )
1165         {
1166             int dir = dmskp[x];
1167             const int lim = eedi2_limlut[abs(dir-128)>>2];
1168             if( dir == 255 ||
1169                 ( abs( dmskp[x] - dmskp[x-1] ) > lim &&
1170                   abs( dmskp[x] - dmskp[x+1] ) > lim ) )
1171             {
1172                 dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1173                 if( dir != 255 ) dmskp[x] = 128;
1174                 continue;
1175             }
1176             if( lim < 9 )
1177             {
1178                 const int sum =   dstp[x-1] +   dstp[x] +   dstp[x+1] +
1179                                 dstpnn[x-1] + dstpnn[x] + dstpnn[x+1];
1180                 const int sumsq = dstp[x-1] *   dstp[x-1] +
1181                                   dstp[x]   *   dstp[x]   +
1182                                   dstp[x+1] *   dstp[x+1] +
1183                                 dstpnn[x-1] * dstpnn[x-1] +
1184                                 dstpnn[x]   * dstpnn[x]   +
1185                                 dstpnn[x+1] * dstpnn[x+1];
1186                 if( 6 * sumsq - sum * sum < 576 )
1187                 {
1188                     dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1189                     dmskp[x] = 255;
1190                     continue;
1191                 }
1192             }
1193             if( x > 1 && x < width - 2 &&
1194                 (     dstp[x] < MAX(   dstp[x-2],   dstp[x-1] ) - 3 &&
1195                       dstp[x] < MAX(   dstp[x+2],   dstp[x+1] ) - 3 &&
1196                     dstpnn[x] < MAX( dstpnn[x-2], dstpnn[x-1] ) - 3 &&
1197                     dstpnn[x] < MAX( dstpnn[x+2], dstpnn[x+1] ) - 3 )
1198                 ||
1199                 (     dstp[x] > MIN(   dstp[x-2],   dstp[x-1] ) + 3 &&
1200                       dstp[x] > MIN(   dstp[x+2],   dstp[x+1] ) + 3 &&
1201                     dstpnn[x] > MIN( dstpnn[x-2], dstpnn[x-1] ) + 3 &&
1202                     dstpnn[x] > MIN( dstpnn[x+2], dstpnn[x+1] ) + 3 ) )
1203             {
1204                 dstpn[x] = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1205                 dmskp[x] = 128;
1206                 continue;
1207             }
1208             dir = ( dir - 128 + 2 ) >> 2;
1209             int val = ( dstp[x] + dstpnn[x] + 1 ) >> 1;
1210             const int startu = ( dir - 2 < 0 ) ?
1211                         MAX( -x + 1, MAX( dir - 2, -width + 2 + x ) )
1212                         :
1213                         MIN(  x - 1, MIN( dir - 2,  width - 2 - x ) );
1214             const int stopu =  ( dir + 2 < 0 ) ?
1215                         MAX( -x + 1, MAX( dir + 2, -width + 2 + x ) )
1216                         :
1217                         MIN(  x - 1, MIN( dir + 2,  width - 2 - x ) );
1218             int min = 8 * nt;
1219             for( u = startu; u <= stopu; ++u )
1220             {
1221                 const int diff =
1222                     abs(   dstp[x-1] - dstpnn[x-u-1] ) +
1223                     abs(   dstp[x]   - dstpnn[x-u] )   +
1224                     abs(   dstp[x+1] - dstpnn[x-u+1] ) +
1225                     abs( dstpnn[x-1] -   dstp[x+u-1] ) +
1226                     abs( dstpnn[x]   -   dstp[x+u] )   +
1227                     abs( dstpnn[x+1] -   dstp[x+u+1] );
1228                 if( diff < min &&
1229                     ( ( omskp[x-1+u] != 0xFF && abs( omskp[x-1+u] - dmskp[x] ) <= lim ) ||
1230                      (  omskp[x+u]   != 0xFF && abs( omskp[x+u]   - dmskp[x]) <= lim )  ||
1231                      (  omskp[x+1+u] != 0xFF && abs( omskp[x+1+u] - dmskp[x]) <= lim ) ) &&
1232                     ( ( omskn[x-1-u] != 0xFF && abs( omskn[x-1-u] - dmskp[x]) <= lim ) ||
1233                      (  omskn[x-u]   != 0xFF && abs( omskn[x-u]   - dmskp[x]) <= lim ) ||
1234                      (  omskn[x+1-u] != 0xFF && abs( omskn[x+1-u] - dmskp[x]) <= lim ) ) )
1235                 {
1236                     const int diff2 =
1237                         abs( dstp[x+(u>>1)-1] - dstpnn[x-(u>>1)-1] ) +
1238                         abs( dstp[x+(u>>1)]   - dstpnn[x-(u>>1)]   ) +
1239                         abs( dstp[x+(u>>1)+1] - dstpnn[x-(u>>1)+1] );
1240                     if( diff2 < 4 * nt &&
1241                         ( ( ( abs( omskp[x+(u>>1)] - omskn[x-(u>>1)]     ) <= lim ||
1242                               abs( omskp[x+(u>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) &&
1243                             omskp[x+(u>>1)] != 0xFF )
1244                           ||
1245                           ( ( abs( omskp[x+((u+1)>>1)] - omskn[x-(u>>1)] )     <= lim ||
1246                               abs( omskp[x+((u+1)>>1)] - omskn[x-((u+1)>>1)] ) <= lim ) &&
1247                             omskp[x+((u+1)>>1)] != 0xFF ) ) )
1248                     {
1249                         if( ( abs( dmskp[x] - omskp[x+(u>>1)] )     <= lim ||
1250                               abs( dmskp[x] - omskp[x+((u+1)>>1)] ) <= lim ) &&
1251                             ( abs( dmskp[x] - omskn[x-(u>>1)] )     <= lim ||
1252                               abs( dmskp[x] - omskn[x-((u+1)>>1)] ) <= lim ) )
1253                         {
1254                             val = (   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)] +
1255                                     dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)] + 2 ) >> 2;
1256                             min = diff;
1257                             dir = u;
1258                         }
1259                     }
1260                 }
1261             }
1262             if( min != 8 * nt )
1263             {
1264                 dstpn[x] = val;
1265                 dmskp[x] = 128 + dir * 4;
1266             }
1267             else
1268             {
1269                 const int minm = MIN( dstp[x], dstpnn[x] );
1270                 const int maxm = MAX( dstp[x], dstpnn[x] );
1271                 const int d = plane == 0 ? 4 : 2;
1272                 const int startu = MAX( -x + 1, -d );
1273                 const int stopu = MIN( width - 2 - x, d );
1274                 min = 7 * nt;
1275                 for( u = startu; u <= stopu; ++u )
1276                 {
1277                     const int p1 =   dstp[x+(u>>1)] +   dstp[x+((u+1)>>1)];
1278                     const int p2 = dstpnn[x-(u>>1)] + dstpnn[x-((u+1)>>1)];
1279                     const int diff =
1280                         abs(   dstp[x-1] - dstpnn[x-u-1] ) +
1281                         abs(   dstp[x]   - dstpnn[x-u] )   +
1282                         abs(   dstp[x+1] - dstpnn[x-u+1] ) +
1283                         abs( dstpnn[x-1] - dstp[x+u-1] )   +
1284                         abs( dstpnn[x]   - dstp[x+u] )     +
1285                         abs( dstpnn[x+1] - dstp[x+u+1] )   +
1286                         abs( p1 - p2 );
1287                     if( diff < min )
1288                     {
1289                         const int valt = ( p1 + p2 + 2 ) >> 2;
1290                         if( valt >= minm && valt <= maxm )
1291                         {
1292                             val = valt;
1293                             min = diff;
1294                             dir = u;
1295                         }
1296                     }
1297                 }
1298                 dstpn[x] = val;
1299                 if( min == 7*nt ) dmskp[x] = 128;
1300                 else dmskp[x] = 128 + dir * 4;
1301             }
1302         }
1303         dstp += dst_pitch * 2;
1304         dstpn += dst_pitch * 2;
1305         dstpnn += dst_pitch * 2;
1306         dmskp += dmsk_pitch * 2;
1307         omskp += omsk_pitch * 2;
1308         omskn += omsk_pitch * 2;
1309     }
1310 }
1311
1312 /**
1313  * Applies some extra filtering to smooth the edge direction mask
1314  * @param nmskp Pointer to the newly-filtered edge direction mask being read from
1315  * @param nmsk_pitch Stride of nmskp
1316  * @param omskp Pointer to the old unfiltered edge direction mask being read from
1317  * @param omsk_pitch Stride of osmkp
1318  * @param dstp Pointer to the output image being filtered in place
1319  * @param src_pitch Stride of dstp ....not sure why it's named this
1320  * @param field Field to filter
1321  * @param height Height of the full-frame output
1322  * @param width Width of dstp bitmap rows, as opposed to the pdded stride in src_pitch
1323  */
1324 void eedi2_post_process( uint8_t * nmskp, int nmsk_pitch, uint8_t * omskp, int omsk_pitch,
1325                          uint8_t * dstp, int src_pitch, int field, int height, int width )
1326 {
1327     int x, y;
1328
1329     nmskp += ( 2 - field ) * nmsk_pitch;
1330     omskp += ( 2 - field ) * omsk_pitch;
1331     dstp += ( 2 - field ) * src_pitch;
1332     unsigned char *srcpp = dstp - src_pitch;
1333     unsigned char *srcpn = dstp + src_pitch;
1334     for( y = 2 - field; y < height - 1; y += 2 )
1335     {
1336         for( x = 0; x < width; ++x )
1337         {
1338             const int lim = eedi2_limlut[abs(nmskp[x]-128)>>2];
1339             if( abs( nmskp[x] - omskp[x] ) > lim && omskp[x] != 255 && omskp[x] != 128 )
1340                 dstp[x] = ( srcpp[x] + srcpn[x] + 1 ) >> 1;
1341         }
1342         nmskp += nmsk_pitch * 2;
1343         omskp += omsk_pitch * 2;
1344         srcpp += src_pitch * 2;
1345         dstp += src_pitch * 2;
1346         srcpn += src_pitch * 2;
1347     }
1348 }
1349
1350 /**
1351  * Blurs the source field plane
1352  * @param src Pointer to the half-height source field plane
1353  * @param src_pitch Stride of src
1354  * @param tmp Pointer to a temporary buffer for juggling bitmaps
1355  * @param tmp_pitch Stride of tmp
1356  * @param dst Pointer to the destination to store the blurred field plane
1357  * @param dst_pitch Stride of dst
1358  * @param height Height of the hakf-height field-sized frame
1359  * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
1360  */
1361 void eedi2_gaussian_blur1( uint8_t * src, int src_pitch, uint8_t * tmp, int tmp_pitch, uint8_t * dst, int dst_pitch, int height, int width )
1362 {
1363     uint8_t * srcp = src;
1364     uint8_t * dstp = tmp;
1365     int x, y;
1366
1367     for( y = 0; y < height; ++y )
1368     {
1369         dstp[0] = ( srcp[3] * 582 + srcp[2] * 7078 + srcp[1] * 31724 +
1370                     srcp[0] * 26152 + 32768 ) >> 16;
1371         dstp[1] = ( srcp[4] * 582 + srcp[3] * 7078 +
1372                     ( srcp[0] + srcp[2] ) * 15862 +
1373                     srcp[1] * 26152 + 32768 ) >> 16;
1374         dstp[2] = ( srcp[5] * 582 + ( srcp[0] + srcp[4] ) * 3539 +
1375                     ( srcp[1] + srcp[3] ) * 15862 +
1376                     srcp[2]*26152 + 32768 ) >> 16;
1377         for( x = 3; x < width - 3; ++x )
1378         {
1379             dstp[x] = ( ( srcp[x-3] + srcp[x+3] ) * 291 +
1380                         ( srcp[x-2] + srcp[x+2] ) * 3539 +
1381                         ( srcp[x-1] + srcp[x+1] ) * 15862 +
1382                         srcp[x] * 26152 + 32768 ) >> 16;
1383         }
1384         dstp[x] = ( srcp[x-3] * 582 + ( srcp[x-2] + srcp[x+2] ) * 3539 +
1385                     ( srcp[x-1] + srcp[x+1] ) * 15862 +
1386                     srcp[x]   * 26152 + 32768 ) >> 16;
1387         ++x;
1388         dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
1389                     ( srcp[x-1] + srcp[x+1] ) * 15862 +
1390                     srcp[x] * 26152 + 32768 ) >> 16;
1391         ++x;
1392         dstp[x] = ( srcp[x-3] * 582 + srcp[x-2] * 7078 +
1393                     srcp[x-1] * 31724 + srcp[x] * 26152 + 32768 ) >> 16;
1394         srcp += src_pitch;
1395         dstp += tmp_pitch;
1396     }
1397     srcp = tmp;
1398     dstp = dst;
1399     unsigned char *src3p = srcp - tmp_pitch * 3;
1400     unsigned char *src2p = srcp - tmp_pitch * 2;
1401     unsigned char *srcpp = srcp - tmp_pitch;
1402     unsigned char *srcpn = srcp + tmp_pitch;
1403     unsigned char *src2n = srcp + tmp_pitch * 2;
1404     unsigned char *src3n = srcp + tmp_pitch * 3;
1405     for( x = 0; x < width; ++x )
1406     {
1407         dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 + srcpn[x] * 31724 +
1408                      srcp[x] * 26152 + 32768 ) >> 16;
1409     }
1410     src3p += tmp_pitch;
1411     src2p += tmp_pitch;
1412     srcpp += tmp_pitch;
1413     srcp += tmp_pitch;
1414     srcpn += tmp_pitch;
1415     src2n += tmp_pitch;
1416     src3n += tmp_pitch;
1417     dstp += dst_pitch;
1418     for( x = 0; x < width; ++x )
1419     {
1420         dstp[x] = ( src3n[x] * 582 + src2n[x] * 7078 +
1421                     ( srcpp[x] + srcpn[x] ) * 15862 +
1422                     srcp[x] * 26152 + 32768 ) >> 16;
1423     }
1424     src3p += tmp_pitch;
1425     src2p += tmp_pitch;
1426     srcpp += tmp_pitch;
1427     srcp += tmp_pitch;
1428     srcpn += tmp_pitch;
1429     src2n += tmp_pitch;
1430     src3n += tmp_pitch;
1431     dstp += dst_pitch;
1432     for( x = 0; x < width; ++x )
1433     {
1434         dstp[x] = ( src3n[x] * 582 + ( src2p[x] + src2n[x] ) * 3539 +
1435                     ( srcpp[x] + srcpn[x] ) * 15862 +
1436                     srcp[x] * 26152 + 32768 ) >> 16;
1437     }
1438     src3p += src_pitch;
1439     src2p += src_pitch;
1440     srcpp += src_pitch;
1441     srcp += src_pitch;
1442     srcpn += src_pitch;
1443     src2n += src_pitch;
1444     src3n += src_pitch;
1445     dstp += dst_pitch;
1446     for( y = 3; y < height - 3; ++y )
1447     {
1448         for( x = 0; x < width; ++x )
1449         {
1450             dstp[x] = ( ( src3p[x] + src3n[x] ) * 291 +
1451                         ( src2p[x] + src2n[x] ) * 3539 +
1452                         ( srcpp[x] + srcpn[x] ) * 15862 +
1453                         srcp[x] * 26152 + 32768 ) >> 16;
1454         }
1455         src3p += tmp_pitch;
1456         src2p += tmp_pitch;
1457         srcpp += tmp_pitch;
1458         srcp += tmp_pitch;
1459         srcpn += tmp_pitch;
1460         src2n += tmp_pitch;
1461         src3n += tmp_pitch;
1462         dstp += dst_pitch;
1463     }
1464     for( x = 0; x < width; ++x )
1465     {
1466         dstp[x] = ( src3p[x] * 582 + ( src2p[x] + src2n[x] ) *3539 +
1467                     ( srcpp[x] + srcpn[x] ) * 15862 +
1468                     srcp[x] * 26152 + 32768 ) >> 16;
1469     }
1470     src3p += tmp_pitch;
1471     src2p += tmp_pitch;
1472     srcpp += tmp_pitch;
1473     srcp += tmp_pitch;
1474     srcpn += tmp_pitch;
1475     src2n += tmp_pitch;
1476     src3n += tmp_pitch;
1477     dstp += dst_pitch;
1478     for( x = 0; x < width; ++x )
1479     {
1480         dstp[x] = ( src3p[x] * 582 + src2p[x] * 7078 +
1481                     ( srcpp[x] + srcpn[x] ) * 15862 +
1482                      srcp[x] * 26152 + 32768 ) >> 16;
1483     }
1484     src3p += tmp_pitch;
1485     src2p += tmp_pitch;
1486     srcpp += tmp_pitch;
1487     srcp += tmp_pitch;
1488     srcpn += tmp_pitch;
1489     src2n += tmp_pitch;
1490     src3n += tmp_pitch;
1491     dstp += dst_pitch;
1492     for( x = 0; x < width; ++x )
1493     {
1494         dstp[x] = ( src3p[x] * 582   + src2p[x] * 7078 +
1495                     srcpp[x] * 31724 +  srcp[x] * 26152 + 32768 ) >> 16;
1496     }
1497 }
1498
1499
1500 /**
1501  * Blurs the spatial derivatives of the source field plane
1502  * @param src Pointer to the derivative array to filter
1503  * @param tmp Pointer to a temporary storage for the derivative array while it's being filtered
1504  * @param dst Pointer to the destination to store the filtered output derivative array
1505  * @param pitch Stride of the bitmap from which the src array is derived
1506  * @param height Height of the half-height field-sized frame from which the src array derivs were taken
1507  * @param width Width of the bitmap from which the src array is derived, as opposed to the padded stride in pitch
1508  */
1509 void eedi2_gaussian_blur_sqrt2( int *src, int *tmp, int *dst, const int pitch, int height, const int width )
1510 {
1511     int * srcp = src;
1512     int * dstp = tmp;
1513     int x, y;
1514
1515     for( y = 0; y < height; ++y )
1516     {
1517         x = 0;
1518         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902  + srcp[x+2] * 13618 +
1519                     srcp[x+1] * 28830 + srcp[x]   * 18508 + 32768 ) >> 16;
1520         ++x;
1521         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 + srcp[x+2] * 13618 +
1522                     ( srcp[x-1] + srcp[x+1] ) *14415 +
1523                     srcp[x]   * 18508 + 32768 ) >> 16;
1524         ++x;
1525         dstp[x] = ( srcp[x+4] * 678   + srcp[x+3] * 3902 +
1526                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1527                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1528                     srcp[x]   * 18508 + 32768 ) >> 16;
1529         ++x;
1530         dstp[x] = ( srcp[x+4] * 678   + ( srcp[x-3] + srcp[x+3] ) * 1951 +
1531                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1532                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1533                     srcp[x]   * 18508 + 32768 ) >> 16;
1534
1535         for( x = 4; x < width - 4; ++x )
1536         {
1537             dstp[x] = ( ( srcp[x-4] + srcp[x+4] ) * 339 +
1538                         ( srcp[x-3] + srcp[x+3] ) * 1951 +
1539                         ( srcp[x-2] + srcp[x+2] ) * 6809 +
1540                         ( srcp[x-1] + srcp[x+1] ) * 14415 +
1541                         srcp[x] * 18508 + 32768 ) >> 16;
1542         }
1543
1544         dstp[x] = ( srcp[x-4] * 678 + ( srcp[x-3] + srcp[x+3] ) * 1951 +
1545                     ( srcp[x-2] + srcp[x+2] ) * 6809  +
1546                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1547                     srcp[x] * 18508 + 32768 ) >> 16;
1548         ++x;
1549         dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 +
1550                     ( srcp[x-2] + srcp[x+2] ) * 6809 +
1551                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1552                     srcp[x] * 18508 + 32768 ) >> 16;
1553         ++x;
1554         dstp[x] = ( srcp[x-4] * 678 + srcp[x+3] * 3902 + srcp[x-2] * 13618 +
1555                     ( srcp[x-1] + srcp[x+1] ) * 14415 +
1556                     srcp[x] * 18508 + 32768 ) >> 16;
1557         ++x;
1558         dstp[x] = ( srcp[x-4] * 678 + srcp[x-3] * 3902 + srcp[x-2] * 13618 +
1559                     srcp[x-1] * 28830 +
1560                     srcp[x] * 18508 + 32768 ) >> 16;
1561         srcp += pitch;
1562         dstp += pitch;
1563     }
1564     dstp = dst;
1565     srcp = tmp;
1566     int * src4p = srcp - pitch * 4;
1567     int * src3p = srcp - pitch * 3;
1568     int * src2p = srcp - pitch * 2;
1569     int * srcpp = srcp - pitch;
1570     int * srcpn = srcp + pitch;
1571     int * src2n = srcp + pitch * 2;
1572     int * src3n = srcp + pitch * 3;
1573     int * src4n = srcp + pitch * 4;
1574     for( x = 0; x < width; ++x )
1575     {
1576         dstp[x] = ( src4n[x] * 678   + src3n[x] * 3902  +
1577                     src2n[x] * 13618 + srcpn[x] * 28830 +
1578                      srcp[x] * 18508 + 32768 ) >> 18;
1579     }
1580     src4p += pitch;
1581     src3p += pitch;
1582     src2p += pitch;
1583     srcpp += pitch;
1584     srcp += pitch;
1585     srcpn += pitch;
1586     src2n += pitch;
1587     src3n += pitch;
1588     src4n += pitch;
1589     dstp += pitch;
1590     for( x = 0; x < width; ++x )
1591     {
1592         dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 + src2n[x] * 13618 +
1593                     ( srcpp[x] + srcpn[x] ) * 14415 +
1594                     srcp[x] * 18508 + 32768 ) >> 18;
1595     }
1596     src4p += pitch;
1597     src3p += pitch;
1598     src2p += pitch;
1599     srcpp += pitch;
1600     srcp += pitch;
1601     srcpn += pitch;
1602     src2n += pitch;
1603     src3n += pitch;
1604     src4n += pitch;
1605     dstp += pitch;
1606     for( x = 0; x < width; ++x )
1607     {
1608         dstp[x] = ( src4n[x] * 678 + src3n[x] * 3902 +
1609                     ( src2p[x] + src2n[x] ) * 6809 +
1610                     ( srcpp[x] + srcpn[x] ) * 14415 +
1611                     srcp[x] * 18508 + 32768 ) >> 18;
1612     }
1613     src4p += pitch;
1614     src3p += pitch;
1615     src2p += pitch;
1616     srcpp += pitch;
1617     srcp += pitch;
1618     srcpn += pitch;
1619     src2n += pitch;
1620     src3n += pitch;
1621     src4n += pitch;
1622     dstp += pitch;
1623     for( x = 0; x < width; ++x )
1624     {
1625         dstp[x] = ( src4n[x] * 678 + ( src3p[x] + src3n[x] ) * 1951 +
1626                     ( src2p[x] + src2n[x] ) * 6809 +
1627                     ( srcpp[x] + srcpn[x] ) * 14415 +
1628                     srcp[x] * 18508 + 32768 ) >> 18;
1629     }
1630     src4p += pitch;
1631     src3p += pitch;
1632     src2p += pitch;
1633     srcpp += pitch;
1634     srcp += pitch;
1635     srcpn += pitch;
1636     src2n += pitch;
1637     src3n += pitch;
1638     src4n += pitch;
1639     dstp += pitch;
1640     for( y = 4; y < height - 4; ++y )
1641     {
1642         for( x = 0; x < width; ++x )
1643         {
1644             dstp[x] = ( ( src4p[x] + src4n[x] ) * 339 +
1645                         ( src3p[x] + src3n[x] ) * 1951 +
1646                         ( src2p[x] + src2n[x] ) * 6809 +
1647                         ( srcpp[x] + srcpn[x] ) * 14415 +
1648                         srcp[x] * 18508 + 32768 ) >> 18;
1649         }
1650         src4p += pitch;
1651         src3p += pitch;
1652         src2p += pitch;
1653         srcpp += pitch;
1654         srcp += pitch;
1655         srcpn += pitch;
1656         src2n += pitch;
1657         src3n += pitch;
1658         src4n += pitch;
1659         dstp += pitch;
1660     }
1661     for( x = 0; x < width; ++x )
1662     {
1663         dstp[x] = ( src4p[x] * 678 +
1664                     ( src3p[x] + src3n[x] ) * 1951 +
1665                     ( src2p[x] + src2n[x] ) * 6809 +
1666                     ( srcpp[x] + srcpn[x] ) * 14415 +
1667                     srcp[x] * 18508 + 32768 ) >> 18;
1668     }
1669     src4p += pitch;
1670     src3p += pitch;
1671     src2p += pitch;
1672     srcpp += pitch;
1673     srcp += pitch;
1674     srcpn += pitch;
1675     src2n += pitch;
1676     src3n += pitch;
1677     src4n += pitch;
1678     dstp += pitch;
1679     for( x = 0; x < width; ++x )
1680     {
1681         dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 +
1682                     ( src2p[x] + src2n[x] ) * 6809 +
1683                     ( srcpp[x] + srcpn[x] ) * 14415 +
1684                     srcp[x] * 18508 + 32768 ) >> 18;
1685     }
1686     src4p += pitch;
1687     src3p += pitch;
1688     src2p += pitch;
1689     srcpp += pitch;
1690     srcp += pitch;
1691     srcpn += pitch;
1692     src2n += pitch;
1693     src3n += pitch;
1694     src4n += pitch;
1695     dstp += pitch;
1696     for( x = 0; x < width; ++x )
1697     {
1698         dstp[x] = ( src4p[x] * 678 + src3p[x] * 3902 + src2p[x] * 13618 +
1699                     ( srcpp[x] + srcpn[x] ) * 14415 +
1700                     srcp[x] * 18508 + 32768 ) >> 18;
1701     }
1702     src4p += pitch;
1703     src3p += pitch;
1704     src2p += pitch;
1705     srcpp += pitch;
1706     srcp += pitch;
1707     srcpn += pitch;
1708     src2n += pitch;
1709     src3n += pitch;
1710     src4n += pitch;
1711     dstp += pitch;
1712     for( x = 0; x < width; ++x )
1713     {
1714         dstp[x] = ( src4p[x] * 678   + src3p[x] * 3902 +
1715                     src2p[x] * 13618 + srcpp[x] * 28830 +
1716                     srcp[x]  * 18508 + 32768 ) >> 18;
1717     }
1718 }
1719
1720 /**
1721  * Finds spatial derivatives for a a source field plane
1722  * @param srcp Pointer to the plane to derive
1723  * @param src_pitch Stride of srcp
1724  * @param height Height of the half-height field-sized frame
1725  * @param width Width of srcp bitmap rows, as opposed to the padded stride in src_pitch
1726  * @param x2 Pointed to the array to store the x/x derivatives
1727  * @param y2 Pointer to the array to store the y/y derivatives
1728  * @param xy Pointer to the array to store the x/y derivatives
1729  */
1730 void eedi2_calc_derivatives( uint8_t *srcp, int src_pitch, int height, int width, int *x2, int *y2, int *xy)
1731 {
1732
1733     unsigned char * srcpp = srcp - src_pitch;
1734     unsigned char * srcpn = srcp + src_pitch;
1735     int x, y;
1736     {
1737         const int Ix = srcp[1] -  srcp[0];
1738         const int Iy = srcp[0] - srcpn[0];
1739         x2[0] = ( Ix * Ix ) >> 1;
1740         y2[0] = ( Iy * Iy ) >> 1;
1741         xy[0] = ( Ix * Iy ) >> 1;
1742     }
1743     for( x = 1; x < width - 1; ++x )
1744     {
1745         const int Ix = srcp[x+1] -  srcp[x-1];
1746         const int Iy = srcp[x]   - srcpn[x];
1747         x2[x] = ( Ix * Ix ) >> 1;
1748         y2[x] = ( Iy * Iy ) >> 1;
1749         xy[x] = ( Ix * Iy ) >> 1;
1750     }
1751     {
1752         const int Ix = srcp[x] -  srcp[x-1];
1753         const int Iy = srcp[x] - srcpn[x];
1754         x2[x] = ( Ix * Ix ) >> 1;
1755         y2[x] = ( Iy * Iy ) >> 1;
1756         xy[x] = ( Ix * Iy ) >> 1;
1757     }
1758     srcpp += src_pitch;
1759     srcp += src_pitch;
1760     srcpn += src_pitch;
1761     x2 += src_pitch;
1762     y2 += src_pitch;
1763     xy += src_pitch;
1764     for( y = 1; y < height - 1; ++y )
1765     {
1766         {
1767             const int Ix =  srcp[1] -  srcp[0];
1768             const int Iy = srcpp[0] - srcpn[0];
1769             x2[0] = ( Ix * Ix ) >> 1;
1770             y2[0] = ( Iy * Iy ) >> 1;
1771             xy[0] = ( Ix * Iy ) >> 1;
1772         }
1773         for ( x = 1; x < width - 1; ++x )
1774         {
1775             const int Ix =  srcp[x+1] -  srcp[x-1];
1776             const int Iy = srcpp[x]   - srcpn[x];
1777             x2[x] = ( Ix * Ix ) >> 1;
1778             y2[x] = ( Iy * Iy ) >> 1;
1779             xy[x] = ( Ix * Iy ) >> 1;
1780         }
1781         {
1782             const int Ix =  srcp[x] -  srcp[x-1];
1783             const int Iy = srcpp[x] - srcpn[x];
1784             x2[x] = ( Ix *Ix ) >> 1;
1785             y2[x] = ( Iy *Iy ) >> 1;
1786             xy[x] = ( Ix *Iy ) >> 1;
1787         }
1788         srcpp += src_pitch;
1789         srcp += src_pitch;
1790         srcpn += src_pitch;
1791         x2 += src_pitch;
1792         y2 += src_pitch;
1793         xy += src_pitch;
1794     }
1795     {
1796         const int Ix =  srcp[1] - srcp[0];
1797         const int Iy = srcpp[0] - srcp[0];
1798         x2[0] = ( Ix * Ix ) >> 1;
1799         y2[0] = ( Iy * Iy ) >> 1;
1800         xy[0] = ( Ix * Iy ) >> 1;
1801     }
1802     for( x = 1; x < width - 1; ++x )
1803     {
1804         const int Ix =  srcp[x+1] - srcp[x-1];
1805         const int Iy = srcpp[x]   - srcp[x];
1806         x2[x] = ( Ix * Ix ) >> 1;
1807         y2[x] = ( Iy * Iy ) >> 1;
1808         xy[x] = ( Ix * Iy ) >> 1;
1809     }
1810     {
1811         const int Ix =  srcp[x] - srcp[x-1];
1812         const int Iy = srcpp[x] - srcp[x];
1813         x2[x] = ( Ix * Ix ) >> 1;
1814         y2[x] = ( Iy * Iy ) >> 1;
1815         xy[x] = ( Ix * Iy ) >> 1;
1816     }
1817 }
1818
1819 /**
1820  * Filters junctions and corners for the output image
1821  * @param x2 Pointer to the x/x derivatives
1822  * @param y2 Pointer to the y/y derivatives
1823  * @param xy Pointer to the x/y derivatives
1824  * @param pitch Stride of the source field plane from which the derivatives were calculated
1825  * @param mskp Pointer to the edge direction mask
1826  * @param msk_pitch Stride of mskp
1827  * @param dstp Pointer to the output image being filtered in place
1828  * @param dst_pitch Stride of dstp
1829  * @param height Height of the full-frame output plane
1830  * @param width Width of dstp bitmap rows, as opposed to the padded stride in dst_pitch
1831  * @param field Field to filter
1832  */
1833 void eedi2_post_process_corner( int *x2, int *y2, int *xy, const int pitch, uint8_t * mskp, int msk_pitch, uint8_t * dstp, int dst_pitch, int height, int width, int field )
1834 {
1835     mskp += ( 8 - field ) * msk_pitch;
1836     dstp += ( 8 - field ) * dst_pitch;
1837     unsigned char * dstpp = dstp - dst_pitch;
1838     unsigned char * dstpn = dstp + dst_pitch;
1839     x2 += pitch * 3;
1840     y2 += pitch * 3;
1841     xy += pitch * 3;
1842     int *x2n = x2 + pitch;
1843     int *y2n = y2 + pitch;
1844     int *xyn = xy + pitch;
1845     int x, y;
1846
1847     for( y = 8 - field; y < height - 7; y += 2 )
1848     {
1849         for( x = 4; x < width - 4; ++x )
1850         {
1851             if( mskp[x] == 255 || mskp[x] == 128 ) continue;
1852             const int c1 = (int)( x2[x]  *  y2[x] -  xy[x] * xy[x] - 0.09 *
1853                                   ( x2[x]  + y2[x] )  * ( x2[x]  + y2[x] ) );
1854             const int c2 = (int)( x2n[x] * y2n[x] - xyn[x]* xyn[x] - 0.09 *
1855                                   ( x2n[x] + y2n[x] ) * ( x2n[x] + y2n[x] ) );
1856             if (c1 > 775 || c2 > 775)
1857                 dstp[x] = ( dstpp[x] + dstpn[x] + 1 ) >> 1;
1858         }
1859         mskp += msk_pitch * 2;
1860         dstpp += dst_pitch * 2;
1861         dstp += dst_pitch * 2;
1862         dstpn += dst_pitch * 2;
1863         x2 += pitch;
1864         x2n += pitch;
1865         y2 += pitch;
1866         y2n += pitch;
1867         xy += pitch;
1868         xyn += pitch;
1869     }
1870 }