OSDN Git Service

import jp-0.9.3
[handbrake-jp/handbrake-jp.git] / libhb / deblock.c
1 /*
2  Copyright (C) 2005 Michael Niedermayer <michaelni@gmx.at>
3
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  GNU General Public License for more details.
13
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "hb.h"
20 #include "libavcodec/avcodec.h"
21 #include "mpeg2dec/mpeg2.h"
22
23 #define PP7_QP_DEFAULT    5
24 #define PP7_MODE_DEFAULT  2
25
26 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
27 #define XMAX(a,b) ((a) > (b) ? (a) : (b))
28
29 typedef short DCTELEM;
30
31 //===========================================================================//
32 static const uint8_t  __attribute__((aligned(8))) pp7_dither[8][8] =
33 {
34     {  0,  48,  12,  60,   3,  51,  15,  63, },
35     { 32,  16,  44,  28,  35,  19,  47,  31, },
36     {  8,  56,   4,  52,  11,  59,   7,  55, },
37     { 40,  24,  36,  20,  43,  27,  39,  23, },
38     {  2,  50,  14,  62,   1,  49,  13,  61, },
39     { 34,  18,  46,  30,  33,  17,  45,  29, },
40     { 10,  58,   6,  54,   9,  57,   5,  53, },
41     { 42,  26,  38,  22,  41,  25,  37,  21, },
42 };
43
44 struct hb_filter_private_s
45 {
46     int           pix_fmt;
47     int           width[3];
48     int           height[3];
49
50     int           pp7_qp;
51     int           pp7_mode;
52     int           pp7_mpeg2;
53     int           pp7_temp_stride;
54     uint8_t     * pp7_src;
55
56     AVPicture     pic_in;
57     AVPicture     pic_out;
58     hb_buffer_t * buf_out;
59 };
60
61 hb_filter_private_t * hb_deblock_init( int pix_fmt,
62                                        int width,
63                                        int height,
64                                        char * settings );
65
66 int hb_deblock_work( const hb_buffer_t * buf_in,
67                      hb_buffer_t ** buf_out,
68                      int pix_fmt,
69                      int width,
70                      int height,
71                      hb_filter_private_t * pv );
72
73 void hb_deblock_close( hb_filter_private_t * pv );
74
75 hb_filter_object_t hb_filter_deblock =
76 {
77     FILTER_DEBLOCK,
78     "Deblock (pp7)",
79     NULL,
80     hb_deblock_init,
81     hb_deblock_work,
82     hb_deblock_close,
83 };
84
85 static inline void pp7_dct_a( DCTELEM * dst, uint8_t * src, int stride )
86 {
87     int i;
88
89     for( i = 0; i < 4; i++ )
90     {
91         int s0 =  src[0*stride] + src[6*stride];
92         int s1 =  src[1*stride] + src[5*stride];
93         int s2 =  src[2*stride] + src[4*stride];
94         int s3 =  src[3*stride];
95         int s  =  s3+s3;
96
97         s3 = s  - s0;
98         s0 = s  + s0;
99         s  = s2 + s1;
100         s2 = s2 - s1;
101
102         dst[0] =   s0 + s;
103         dst[2] =   s0 - s;
104         dst[1] = 2*s3 + s2;
105         dst[3] =   s3 - s2*2;
106
107         src++;
108         dst += 4;
109     }
110 }
111
112 static void pp7_dct_b( DCTELEM * dst, DCTELEM * src )
113 {
114     int i;
115
116     for( i = 0; i < 4; i++ )
117     {
118         int s0 = src[0*4] + src[6*4];
119         int s1 = src[1*4] + src[5*4];
120         int s2 = src[2*4] + src[4*4];
121         int s3 = src[3*4];
122         int s  = s3+s3;
123
124         s3 = s  - s0;
125         s0 = s  + s0;
126         s  = s2 + s1;
127         s2 = s2 - s1;
128
129         dst[0*4] =   s0 + s;
130         dst[2*4] =   s0 - s;
131         dst[1*4] = 2*s3 + s2;
132         dst[3*4] =   s3 - s2*2;
133
134         src++;
135         dst++;
136     }
137 }
138
139 #define N   (1<<16)
140 #define N0  4
141 #define N1  5
142 #define N2  10
143 #define SN0 2
144 #define SN1 2.2360679775
145 #define SN2 3.16227766017
146
147 static const int pp7_factor[16] =
148 {
149     N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
150     N/(N1*N0), N/(N1*N1), N/(N1*N0),N/(N1*N2),
151     N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
152     N/(N2*N0), N/(N2*N1), N/(N2*N0),N/(N2*N2),
153 };
154
155 static int pp7_threshold[99][16];
156
157 static void pp7_init_threshold( void )
158 {
159     int qp, i;
160     int bias = 0;
161
162     for( qp = 0; qp < 99; qp++ )
163     {
164         for( i = 0; i < 16; i++ )
165         {
166             pp7_threshold[qp][i] =
167                 ((i&1)?SN2:SN0) * ((i&4)?SN2:SN0) *
168                  XMAX(1,qp) * (1<<2) - 1 - bias;
169         }
170     }
171 }
172
173 static int pp7_hard_threshold( DCTELEM * src, int qp )
174 {
175     int i;
176     int a;
177
178     a = src[0] * pp7_factor[0];
179     for( i = 1; i < 16; i++ )
180     {
181         unsigned int threshold1 = pp7_threshold[qp][i];
182         unsigned int threshold2 = (threshold1<<1);
183         int level= src[i];
184         if( ((unsigned)(level+threshold1)) > threshold2 )
185         {
186             a += level * pp7_factor[i];
187         }
188     }
189     return (a + (1<<11)) >> 12;
190 }
191
192 static int pp7_medium_threshold( DCTELEM * src, int qp )
193 {
194     int i;
195     int a;
196
197     a = src[0] * pp7_factor[0];
198     for( i = 1; i < 16; i++ )
199     {
200         unsigned int threshold1 = pp7_threshold[qp][i];
201         unsigned int threshold2 = (threshold1<<1);
202         int level= src[i];
203         if( ((unsigned)(level+threshold1)) > threshold2 )
204         {
205             if( ((unsigned)(level+2*threshold1)) > 2*threshold2 )
206             {
207                 a += level * pp7_factor[i];
208             }
209             else
210             {
211                 if( level>0 )
212                 {
213                     a += 2*(level - (int)threshold1) * pp7_factor[i];
214                 }
215                 else
216                 {
217                     a += 2*(level + (int)threshold1) * pp7_factor[i];
218                 }
219             }
220         }
221     }
222     return (a + (1<<11)) >> 12;
223 }
224
225 static int pp7_soft_threshold( DCTELEM * src, int qp )
226 {
227     int i;
228     int a;
229
230     a = src[0] * pp7_factor[0];
231     for( i = 1; i < 16; i++ )
232     {
233         unsigned int threshold1 = pp7_threshold[qp][i];
234         unsigned int threshold2 = (threshold1<<1);
235         int level= src[i];
236         if( ((unsigned)(level+threshold1))>threshold2 )
237         {
238             if( level>0 )
239             {
240                 a += (level - (int)threshold1) * pp7_factor[i];
241             }
242             else
243             {
244                 a += (level + (int)threshold1) * pp7_factor[i];
245             }
246         }
247     }
248     return (a + (1<<11)) >> 12;
249 }
250
251 static int ( * pp7_requantize )( DCTELEM * src, int qp ) = pp7_hard_threshold;
252
253 static void pp7_filter( hb_filter_private_t * pv,
254                         uint8_t * dst,
255                         uint8_t * src,
256                         int width,
257                         int height,
258                         uint8_t * qp_store,
259                         int qp_stride,
260                         int is_luma)
261 {
262     int x, y;
263
264     const int  stride = is_luma ? pv->pp7_temp_stride : ((width+16+15)&(~15));
265     uint8_t  * p_src  = pv->pp7_src + 8*stride;
266     DCTELEM  * block  = (DCTELEM *)(pv->pp7_src);
267     DCTELEM  * temp   = (DCTELEM *)(pv->pp7_src + 32);
268
269     if( !src || !dst )
270     {
271         return;
272     }
273
274     for( y = 0; y < height; y++ )
275     {
276         int index = 8 + 8*stride + y*stride;
277         memcpy( p_src + index, src + y*width, width );
278
279         for( x = 0; x < 8; x++ )
280         {
281             p_src[index         - x - 1] = p_src[index +         x    ];
282             p_src[index + width + x    ] = p_src[index + width - x - 1];
283         }
284     }
285
286     for( y = 0; y < 8; y++ )
287     {
288         memcpy( p_src + (     7-y)*stride,
289                 p_src + (     y+8)*stride, stride );
290         memcpy( p_src + (height+8+y)*stride,
291                 p_src + (height-y+7)*stride, stride );
292     }
293
294     for( y = 0; y < height; y++ )
295     {
296         for( x = -8; x < 0; x += 4 )
297         {
298             const int index = x + y*stride + (8-3)*(1+stride) + 8;
299             uint8_t * src   = p_src + index;
300             DCTELEM * tp    = temp+4*x;
301
302             pp7_dct_a( tp+4*8, src, stride );
303         }
304
305         for( x = 0; x < width; )
306         {
307             const int qps = 3 + is_luma;
308             int end = XMIN(x+8, width);
309
310             int qp;
311             if( pv->pp7_qp )
312             {
313                 qp = pv->pp7_qp;
314             }
315             else
316             {
317                 qp = qp_store[ (XMIN(x, width-1)>>qps) +
318                                (XMIN(y, height-1)>>qps) * qp_stride ];
319
320                 if( pv->pp7_mpeg2 )
321                 {
322                     qp >>= 1;
323                 }
324             }
325
326             for( ; x < end; x++ )
327             {
328                 const int index = x + y*stride + (8-3)*(1+stride) + 8;
329                 uint8_t * src   = p_src + index;
330                 DCTELEM * tp    = temp+4*x;
331                 int v;
332
333                 if( (x&3) == 0 )
334                 {
335                     pp7_dct_a( tp+4*8, src, stride );
336                 }
337
338                 pp7_dct_b( block, tp );
339
340                 v = pp7_requantize( block, qp );
341                 v = (v + pp7_dither[y&7][x&7]) >> 6;
342                 if( (unsigned)v > 255 )
343                 {
344                     v = (-v) >> 31;
345                 }
346                 dst[x + y*width] = v;
347             }
348         }
349     }
350 }
351
352 hb_filter_private_t * hb_deblock_init( int pix_fmt,
353                                        int width,
354                                        int height,
355                                        char * settings )
356 {
357     if( pix_fmt != PIX_FMT_YUV420P )
358     {
359         return 0;
360     }
361
362     hb_filter_private_t * pv = malloc( sizeof(struct hb_filter_private_s) );
363
364     pv->pix_fmt = pix_fmt;
365
366     pv->width[0] = width;
367     pv->height[0] = height;
368
369     pv->width[1] = pv->width[2] = width >> 1;
370     pv->height[1] = pv->height[2] = height >> 1;
371
372
373     pv->pp7_qp    = PP7_QP_DEFAULT;
374     pv->pp7_mode  = PP7_MODE_DEFAULT;
375     pv->pp7_mpeg2 = 1; /*mpi->qscale_type;*/
376
377     if( settings )
378     {
379         sscanf( settings, "%d:%d", &pv->pp7_qp, &pv->pp7_mode );
380     }
381
382     if( pv->pp7_qp < 0 )
383     {
384         pv->pp7_qp = 0;
385     }
386
387     pp7_init_threshold();
388
389     switch( pv->pp7_mode )
390     {
391         case 0:
392             pp7_requantize = pp7_hard_threshold;
393             break;
394         case 1:
395             pp7_requantize = pp7_soft_threshold;
396             break;
397         case 2:
398             pp7_requantize = pp7_medium_threshold;
399             break;
400     }
401
402     int h = (height+16+15)&(~15);
403
404     pv->pp7_temp_stride = (width+16+15)&(~15);
405
406     pv->pp7_src = (uint8_t*)malloc( pv->pp7_temp_stride*(h+8)*sizeof(uint8_t) );
407
408     pv->buf_out = hb_video_buffer_init( width, height );
409
410     return pv;
411 }
412
413 void hb_deblock_close( hb_filter_private_t * pv )
414 {
415     if( !pv )
416     {
417         return;
418     }
419
420     if( pv->buf_out )
421     {
422         hb_buffer_close( &pv->buf_out );
423     }
424
425     free( pv );
426 }
427
428 int hb_deblock_work( const hb_buffer_t * buf_in,
429                      hb_buffer_t ** buf_out,
430                      int pix_fmt,
431                      int width,
432                      int height,
433                      hb_filter_private_t * pv )
434 {
435     if( !pv ||
436         pix_fmt != pv->pix_fmt ||
437         width != pv->width[0] ||
438         height != pv->height[0] )
439     {
440         return FILTER_FAILED;
441     }
442
443     avpicture_fill( &pv->pic_in, buf_in->data,
444                     pix_fmt, width, height );
445
446     avpicture_fill( &pv->pic_out, pv->buf_out->data,
447                     pix_fmt, width, height );
448
449     if( /*TODO: mpi->qscale ||*/ pv->pp7_qp )
450     {
451         pp7_filter( pv,
452                 pv->pic_out.data[0],
453                 pv->pic_in.data[0],
454                 pv->width[0],
455                 pv->height[0],
456                 NULL, /* TODO: mpi->qscale*/
457                 0,    /* TODO: mpi->qstride*/
458                 1 );
459
460         pp7_filter( pv,
461                 pv->pic_out.data[1],
462                 pv->pic_in.data[1],
463                 pv->width[1],
464                 pv->height[1],
465                 NULL, /* TODO: mpi->qscale*/
466                 0,    /* TODO: mpi->qstride*/
467                 0 );
468
469         pp7_filter( pv,
470                 pv->pic_out.data[2],
471                 pv->pic_in.data[2],
472                 pv->width[2],
473                 pv->height[2],
474                 NULL, /* TODO: mpi->qscale*/
475                 0,    /* TODO: mpi->qstride*/
476                 0 );
477     }
478     else
479     {
480         memcpy( pv->buf_out->data, buf_in->data, buf_in->size );
481     }
482
483     hb_buffer_copy_settings( pv->buf_out, buf_in );
484
485     *buf_out = pv->buf_out;
486
487     return FILTER_OK;
488 }
489
490