OSDN Git Service

Fix AUD insert issue in AVC encoder with GEN6/7.5/8.
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for (i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     int min_qp = MAX(1, encoder_context->brc.min_qp);
102     double bpf, factor, hrd_factor;
103     int inum = encoder_context->brc.num_iframes_in_gop,
104         pnum = encoder_context->brc.num_pframes_in_gop,
105         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106     int intra_period = encoder_context->brc.gop_size;
107     int i;
108     int tmp_min_qp = 0;
109
110     if (encoder_context->layer.num_layers > 1)
111         qp1_size = 0.15 * frame_per_bits;
112
113     mfc_context->brc.mode = encoder_context->rate_control_mode;
114
115     mfc_context->hrd.violation_noted = 0;
116
117     for (i = 0; i < encoder_context->layer.num_layers; i++) {
118         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
119         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
120         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
121
122         if (i == 0) {
123             bitrate = encoder_context->brc.bits_per_second[0];
124             framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
125         } else {
126             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
127             framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
128                         ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
129         }
130
131         if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
132             bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
133
134         if (i == encoder_context->layer.num_layers - 1)
135             factor = 1.0;
136         else {
137             factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
138                      ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
139         }
140
141         hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
142
143         mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
144         mfc_context->hrd.current_buffer_fullness[i] =
145             (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
146             encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
147         mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
148         mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
149         mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
150
151         if (encoder_context->layer.num_layers > 1) {
152             if (i == 0) {
153                 intra_period = (int)(encoder_context->brc.gop_size * factor);
154                 inum = 1;
155                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
156                 bnum = intra_period - inum - pnum;
157             } else {
158                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
159                 inum = 0;
160                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
161                 bnum = intra_period - inum - pnum;
162             }
163         }
164
165         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
166         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
167         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
168
169         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
170                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
171         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
172         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
173
174         bpf = mfc_context->brc.bits_per_frame[i] = bitrate / framerate;
175
176         if (encoder_context->brc.initial_qp) {
177             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
178             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
179             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
180
181             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
182             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
183             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
184         } else {
185             if ((bpf > qp51_size) && (bpf < qp1_size)) {
186                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
187             } else if (bpf >= qp1_size)
188                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
189             else if (bpf <= qp51_size)
190                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
191
192             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
193             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
194
195             tmp_min_qp = (min_qp < 36) ? min_qp : 36;
196             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], tmp_min_qp, 36);
197             tmp_min_qp = (min_qp < 40) ? min_qp : 40;
198             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], tmp_min_qp, 40);
199             tmp_min_qp = (min_qp < 45) ? min_qp : 45;
200             BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], tmp_min_qp, 45);
201         }
202     }
203 }
204
205 int intel_mfc_update_hrd(struct encode_state *encode_state,
206                          struct intel_encoder_context *encoder_context,
207                          int frame_bits)
208 {
209     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
210     int layer_id = encoder_context->layer.curr_frame_layer_id;
211     double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
212
213     mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
214
215     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
216         mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
217         return BRC_UNDERFLOW;
218     }
219
220     mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
221     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
222         if (mfc_context->brc.mode == VA_RC_VBR)
223             mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
224         else {
225             mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
226             return BRC_OVERFLOW;
227         }
228     }
229     return BRC_NO_HRD_VIOLATION;
230 }
231
232 static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
233                                       struct intel_encoder_context *encoder_context,
234                                       int frame_bits)
235 {
236     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
237     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
238     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
239     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
240     int curr_frame_layer_id, next_frame_layer_id;
241     int qpi, qpp, qpb;
242     int qp; // quantizer of previously encoded slice of current type
243     int qpn; // predicted quantizer for next frame of current type in integer format
244     double qpf; // predicted quantizer for next frame of current type in float format
245     double delta_qp; // QP correction
246     int min_qp = MAX(1, encoder_context->brc.min_qp);
247     int target_frame_size, frame_size_next;
248     /* Notes:
249      *  x - how far we are from HRD buffer borders
250      *  y - how far we are from target HRD buffer fullness
251      */
252     double x, y;
253     double frame_size_alpha;
254
255     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
256         curr_frame_layer_id = 0;
257         next_frame_layer_id = 0;
258     } else {
259         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
260         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
261     }
262
263     /* checking wthether HRD compliance first */
264     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
265
266     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
267         /* nothing */
268     } else {
269         next_frame_layer_id = curr_frame_layer_id;
270     }
271
272     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
273     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
274
275     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
276     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
277
278     /* 0 means the next frame is the first frame of next layer */
279     if (frame_bits == 0)
280         return sts;
281
282     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
283     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
284     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
285
286     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
287
288     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
289     if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
290         frame_size_alpha = 0;
291     else
292         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
293     if (frame_size_alpha > 30) frame_size_alpha = 30;
294     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
295                       (double)(frame_size_alpha + 1.);
296
297     /* frame_size_next: avoiding negative number and too small value */
298     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
299         frame_size_next = (int)((double)target_frame_size * 0.25);
300
301     qpf = (double)qp * target_frame_size / frame_size_next;
302     qpn = (int)(qpf + 0.5);
303
304     if (qpn == qp) {
305         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
306         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
307         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
308             qpn++;
309             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
310         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
311             qpn--;
312             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
313         }
314     }
315     /* making sure that QP is not changing too fast */
316     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
317     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
318     /* making sure that with QP predictions we did do not leave QPs range */
319     BRC_CLIP(qpn, 1, 51);
320
321     /* calculating QP delta as some function*/
322     x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
323     if (x > 0) {
324         x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
325         y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
326     } else {
327         x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
328         y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
329     }
330     if (y < 0.01) y = 0.01;
331     if (x > 1) x = 1;
332     else if (x < -1) x = -1;
333
334     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
335     qpn = (int)(qpn + delta_qp + 0.5);
336
337     /* making sure that with QP predictions we did do not leave QPs range */
338     BRC_CLIP(qpn, min_qp, 51);
339
340     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
341         /* correcting QPs of slices of other types */
342         if (slicetype == SLICE_TYPE_P) {
343             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
344                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
345             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
346                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
347         } else if (slicetype == SLICE_TYPE_I) {
348             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
349                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
350             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
351                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
352         } else { // SLICE_TYPE_B
353             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
354                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
355             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
356                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
357         }
358         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
359         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
360         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
361     } else if (sts == BRC_UNDERFLOW) { // underflow
362         if (qpn <= qp) qpn = qp + 1;
363         if (qpn > 51) {
364             qpn = 51;
365             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
366         }
367     } else if (sts == BRC_OVERFLOW) {
368         if (qpn >= qp) qpn = qp - 1;
369         if (qpn < min_qp) { // overflow with minQP
370             qpn = min_qp;
371             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
372         }
373     }
374
375     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
376
377     return sts;
378 }
379
380 static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
381                                       struct intel_encoder_context *encoder_context,
382                                       int frame_bits)
383 {
384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
385     gen6_brc_status sts;
386     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
387     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
388     int *qp = mfc_context->brc.qp_prime_y[0];
389     int min_qp = MAX(1, encoder_context->brc.min_qp);
390     int qp_delta, large_frame_adjustment;
391
392     // This implements a simple reactive VBR rate control mode for single-layer H.264.  The primary
393     // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
394     // scene changes, where the QP can get pushed up by a large amount in a short period and
395     // compromise the quality of following frames to a very visible degree.
396     // The main idea, then, is to try to keep the HRD buffering above the target level most of the
397     // time, so that when a large frame is generated (on a scene change or when the stream
398     // complexity increases) we have plenty of slack to be able to encode the more difficult region
399     // without compromising quality immediately on the following frames.   It is optimistic about
400     // the complexity of future frames, so even after generating one or more large frames on a
401     // significant change it will try to keep the QP at its current level until the HRD buffer
402     // bounds force a change to maintain the intended rate.
403
404     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
405
406     // This adjustment is applied to increase the QP by more than we normally would if a very
407     // large frame is encountered and we are in danger of running out of slack.
408     large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
409
410     if (sts == BRC_UNDERFLOW) {
411         // The frame is far too big and we don't have the bits available to send it, so it will
412         // have to be re-encoded at a higher QP.
413         qp_delta = +2;
414         if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
415             qp_delta += large_frame_adjustment;
416     } else if (sts == BRC_OVERFLOW) {
417         // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
418         // does not occur because we ignore overflow in VBR mode.
419         assert(0 && "Overflow in VBR mode");
420     } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
421         // The frame is smaller than the average size expected for this frame type.
422         if (mfc_context->hrd.current_buffer_fullness[0] >
423             (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
424             // We currently have lots of bits available, so decrease the QP slightly for the next
425             // frame.
426             qp_delta = -1;
427         } else {
428             // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
429             // level here, but are moving in the right direction.)
430             qp_delta = 0;
431         }
432     } else {
433         // The frame is larger than the average size expected for this frame type.
434         if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
435             // We are currently over the target level, so do nothing.
436             qp_delta = 0;
437         } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
438             // We are under the target level, but not critically.  Increase the QP by one step if
439             // continuing like this would underflow soon (currently within one second).
440             if (mfc_context->hrd.current_buffer_fullness[0] /
441                 (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
442                 ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
443                 qp_delta = +1;
444             else
445                 qp_delta = 0;
446         } else {
447             // We are a long way under the target level.  Always increase the QP, possibly by a
448             // larger amount dependent on how big the frame we just made actually was.
449             qp_delta = +1 + large_frame_adjustment;
450         }
451     }
452
453     switch (slice_type) {
454     case SLICE_TYPE_I:
455         qp[SLICE_TYPE_I] += qp_delta;
456         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
457         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
458         break;
459     case SLICE_TYPE_P:
460         qp[SLICE_TYPE_P] += qp_delta;
461         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
462         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
463         break;
464     case SLICE_TYPE_B:
465         qp[SLICE_TYPE_B] += qp_delta;
466         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
467         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
468         break;
469     }
470     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
471     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
472     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
473
474     if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
475         sts = BRC_UNDERFLOW_WITH_MAX_QP;
476     if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
477         sts = BRC_OVERFLOW_WITH_MIN_QP;
478
479     return sts;
480 }
481
482 int intel_mfc_brc_postpack(struct encode_state *encode_state,
483                            struct intel_encoder_context *encoder_context,
484                            int frame_bits)
485 {
486     switch (encoder_context->rate_control_mode) {
487     case VA_RC_CBR:
488         return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
489     case VA_RC_VBR:
490         return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
491     }
492     assert(0 && "Invalid RC mode");
493     return 1;
494 }
495
496 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
497                                        struct intel_encoder_context *encoder_context)
498 {
499     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
500     unsigned int rate_control_mode = encoder_context->rate_control_mode;
501     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
502
503     // current we only support CBR mode.
504     if (rate_control_mode == VA_RC_CBR) {
505         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
506         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
507         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
508         mfc_context->vui_hrd.i_frame_number = 0;
509
510         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
511         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
512         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
513     }
514
515 }
516
517 void
518 intel_mfc_hrd_context_update(struct encode_state *encode_state,
519                              struct gen6_mfc_context *mfc_context)
520 {
521     mfc_context->vui_hrd.i_frame_number++;
522 }
523
524 int intel_mfc_interlace_check(VADriverContextP ctx,
525                               struct encode_state *encode_state,
526                               struct intel_encoder_context *encoder_context)
527 {
528     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
529     VAEncSliceParameterBufferH264 *pSliceParameter;
530     int i;
531     int mbCount = 0;
532     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
533     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
534
535     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
536         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
537         mbCount += pSliceParameter->num_macroblocks;
538     }
539
540     if (mbCount == (width_in_mbs * height_in_mbs))
541         return 0;
542
543     return 1;
544 }
545
546 void intel_mfc_brc_prepare(struct encode_state *encode_state,
547                            struct intel_encoder_context *encoder_context)
548 {
549     unsigned int rate_control_mode = encoder_context->rate_control_mode;
550
551     if (encoder_context->codec != CODEC_H264 &&
552         encoder_context->codec != CODEC_H264_MVC)
553         return;
554
555     if (rate_control_mode != VA_RC_CQP) {
556         /*Programing bit rate control */
557         if (encoder_context->brc.need_reset) {
558             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
559             intel_mfc_brc_init(encode_state, encoder_context);
560         }
561
562         /*Programing HRD control */
563         if (encoder_context->brc.need_reset)
564             intel_mfc_hrd_context_init(encode_state, encoder_context);
565     }
566 }
567
568 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
569                                               struct encode_state *encode_state,
570                                               struct intel_encoder_context *encoder_context,
571                                               struct intel_batchbuffer *slice_batch)
572 {
573     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
574     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
575     unsigned int rate_control_mode = encoder_context->rate_control_mode;
576     unsigned int skip_emul_byte_cnt;
577
578     if (encode_state->packed_header_data[idx]) {
579         VAEncPackedHeaderParameterBuffer *param = NULL;
580         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
581         unsigned int length_in_bits;
582
583         assert(encode_state->packed_header_param[idx]);
584         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
585         length_in_bits = param->bit_length;
586
587         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
588         mfc_context->insert_object(ctx,
589                                    encoder_context,
590                                    header_data,
591                                    ALIGN(length_in_bits, 32) >> 5,
592                                    length_in_bits & 0x1f,
593                                    skip_emul_byte_cnt,
594                                    0,
595                                    0,
596                                    !param->has_emulation_bytes,
597                                    slice_batch);
598     }
599
600     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
601
602     if (encode_state->packed_header_data[idx]) {
603         VAEncPackedHeaderParameterBuffer *param = NULL;
604         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
605         unsigned int length_in_bits;
606
607         assert(encode_state->packed_header_param[idx]);
608         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
609         length_in_bits = param->bit_length;
610
611         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
612
613         mfc_context->insert_object(ctx,
614                                    encoder_context,
615                                    header_data,
616                                    ALIGN(length_in_bits, 32) >> 5,
617                                    length_in_bits & 0x1f,
618                                    skip_emul_byte_cnt,
619                                    0,
620                                    0,
621                                    !param->has_emulation_bytes,
622                                    slice_batch);
623     }
624
625     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
626
627     if (encode_state->packed_header_data[idx]) {
628         VAEncPackedHeaderParameterBuffer *param = NULL;
629         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
630         unsigned int length_in_bits;
631
632         assert(encode_state->packed_header_param[idx]);
633         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
634         length_in_bits = param->bit_length;
635
636         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
637         mfc_context->insert_object(ctx,
638                                    encoder_context,
639                                    header_data,
640                                    ALIGN(length_in_bits, 32) >> 5,
641                                    length_in_bits & 0x1f,
642                                    skip_emul_byte_cnt,
643                                    0,
644                                    0,
645                                    !param->has_emulation_bytes,
646                                    slice_batch);
647     } else if (rate_control_mode == VA_RC_CBR) {
648         // this is frist AU
649         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
650
651         unsigned char *sei_data = NULL;
652
653         int length_in_bits = build_avc_sei_buffer_timing(
654                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
655                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay,
656                                  0,
657                                  mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
658                                  mfc_context->vui_hrd.i_dpb_output_delay_length,
659                                  0,
660                                  &sei_data);
661         mfc_context->insert_object(ctx,
662                                    encoder_context,
663                                    (unsigned int *)sei_data,
664                                    ALIGN(length_in_bits, 32) >> 5,
665                                    length_in_bits & 0x1f,
666                                    5,
667                                    0,
668                                    0,
669                                    1,
670                                    slice_batch);
671         free(sei_data);
672     }
673 }
674
675 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
676                                struct encode_state *encode_state,
677                                struct intel_encoder_context *encoder_context)
678 {
679     struct i965_driver_data *i965 = i965_driver_data(ctx);
680     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
681     struct object_surface *obj_surface;
682     struct object_buffer *obj_buffer;
683     GenAvcSurface *gen6_avc_surface;
684     dri_bo *bo;
685     VAStatus vaStatus = VA_STATUS_SUCCESS;
686     int i, j, enable_avc_ildb = 0;
687     VAEncSliceParameterBufferH264 *slice_param;
688     struct i965_coded_buffer_segment *coded_buffer_segment;
689     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
690     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
691     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
692
693     if (IS_GEN6(i965->intel.device_info)) {
694         /* On the SNB it should be fixed to 128 for the DMV buffer */
695         width_in_mbs = 128;
696     }
697
698     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
699         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
700         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
701
702         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
703             assert((slice_param->slice_type == SLICE_TYPE_I) ||
704                    (slice_param->slice_type == SLICE_TYPE_SI) ||
705                    (slice_param->slice_type == SLICE_TYPE_P) ||
706                    (slice_param->slice_type == SLICE_TYPE_SP) ||
707                    (slice_param->slice_type == SLICE_TYPE_B));
708
709             if (slice_param->disable_deblocking_filter_idc != 1) {
710                 enable_avc_ildb = 1;
711                 break;
712             }
713
714             slice_param++;
715         }
716     }
717
718     /*Setup all the input&output object*/
719
720     /* Setup current frame and current direct mv buffer*/
721     obj_surface = encode_state->reconstructed_object;
722     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
723
724     if (obj_surface->private_data == NULL) {
725         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
726         assert(gen6_avc_surface);
727         gen6_avc_surface->dmv_top =
728             dri_bo_alloc(i965->intel.bufmgr,
729                          "Buffer",
730                          68 * width_in_mbs * height_in_mbs,
731                          64);
732         gen6_avc_surface->dmv_bottom =
733             dri_bo_alloc(i965->intel.bufmgr,
734                          "Buffer",
735                          68 * width_in_mbs * height_in_mbs,
736                          64);
737         assert(gen6_avc_surface->dmv_top);
738         assert(gen6_avc_surface->dmv_bottom);
739         obj_surface->private_data = (void *)gen6_avc_surface;
740         obj_surface->free_private_data = (void *)gen_free_avc_surface;
741     }
742     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
743     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
744     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
745     dri_bo_reference(gen6_avc_surface->dmv_top);
746     dri_bo_reference(gen6_avc_surface->dmv_bottom);
747
748     if (enable_avc_ildb) {
749         mfc_context->post_deblocking_output.bo = obj_surface->bo;
750         dri_bo_reference(mfc_context->post_deblocking_output.bo);
751     } else {
752         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
753         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
754     }
755
756     mfc_context->surface_state.width = obj_surface->orig_width;
757     mfc_context->surface_state.height = obj_surface->orig_height;
758     mfc_context->surface_state.w_pitch = obj_surface->width;
759     mfc_context->surface_state.h_pitch = obj_surface->height;
760
761     /* Setup reference frames and direct mv buffers*/
762     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
763         obj_surface = encode_state->reference_objects[i];
764
765         if (obj_surface && obj_surface->bo) {
766             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
767             dri_bo_reference(obj_surface->bo);
768
769             /* Check DMV buffer */
770             if (obj_surface->private_data == NULL) {
771
772                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
773                 assert(gen6_avc_surface);
774                 gen6_avc_surface->dmv_top =
775                     dri_bo_alloc(i965->intel.bufmgr,
776                                  "Buffer",
777                                  68 * width_in_mbs * height_in_mbs,
778                                  64);
779                 gen6_avc_surface->dmv_bottom =
780                     dri_bo_alloc(i965->intel.bufmgr,
781                                  "Buffer",
782                                  68 * width_in_mbs * height_in_mbs,
783                                  64);
784                 assert(gen6_avc_surface->dmv_top);
785                 assert(gen6_avc_surface->dmv_bottom);
786                 obj_surface->private_data = gen6_avc_surface;
787                 obj_surface->free_private_data = gen_free_avc_surface;
788             }
789
790             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
791             /* Setup DMV buffer */
792             mfc_context->direct_mv_buffers[i * 2].bo = gen6_avc_surface->dmv_top;
793             mfc_context->direct_mv_buffers[i * 2 + 1].bo = gen6_avc_surface->dmv_bottom;
794             dri_bo_reference(gen6_avc_surface->dmv_top);
795             dri_bo_reference(gen6_avc_surface->dmv_bottom);
796         } else {
797             break;
798         }
799     }
800
801     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
802     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
803
804     obj_buffer = encode_state->coded_buf_object;
805     bo = obj_buffer->buffer_store->bo;
806     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
807     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
808     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
809     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
810
811     dri_bo_map(bo, 1);
812     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
813     coded_buffer_segment->mapped = 0;
814     coded_buffer_segment->codec = encoder_context->codec;
815     dri_bo_unmap(bo);
816
817     return vaStatus;
818 }
819 /*
820  * The LUT uses the pair of 4-bit units: (shift, base) structure.
821  * 2^K * X = value .
822  * So it is necessary to convert one cost into the nearest LUT format.
823  * The derivation is:
824  * 2^K *x = 2^n * (1 + deltaX)
825  *    k + log2(x) = n + log2(1 + deltaX)
826  *    log2(x) = n - k + log2(1 + deltaX)
827  *    As X is in the range of [1, 15]
828  *      4 > n - k + log2(1 + deltaX) >= 0
829  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
830  *    Then we can derive the corresponding K and get the nearest LUT format.
831  */
832 int intel_format_lutvalue(int value, int max)
833 {
834     int ret;
835     int logvalue, temp1, temp2;
836
837     if (value <= 0)
838         return 0;
839
840     logvalue = (int)(log2f((float)value));
841     if (logvalue < 4) {
842         ret = value;
843     } else {
844         int error, temp_value, base, j, temp_err;
845         error = value;
846         j = logvalue - 4 + 1;
847         ret = -1;
848         for (; j <= logvalue; j++) {
849             if (j == 0) {
850                 base = value >> j;
851             } else {
852                 base = (value + (1 << (j - 1)) - 1) >> j;
853             }
854             if (base >= 16)
855                 continue;
856
857             temp_value = base << j;
858             temp_err = abs(value - temp_value);
859             if (temp_err < error) {
860                 error = temp_err;
861                 ret = (j << 4) | base;
862                 if (temp_err == 0)
863                     break;
864             }
865         }
866     }
867     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
868     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
869     if (temp1 > temp2)
870         ret = max;
871     return ret;
872
873 }
874
875
876 #define     QP_MAX          52
877 #define     VP8_QP_MAX          128
878
879
880 static float intel_lambda_qp(int qp)
881 {
882     float value, lambdaf;
883     value = qp;
884     value = value / 6 - 2;
885     if (value < 0)
886         value = 0;
887     lambdaf = roundf(powf(2, value));
888     return lambdaf;
889 }
890
891 static
892 void intel_h264_calc_mbmvcost_qp(int qp,
893                                  int slice_type,
894                                  uint8_t *vme_state_message)
895 {
896     int m_cost, j, mv_count;
897     float   lambda, m_costf;
898
899     assert(qp <= QP_MAX);
900     lambda = intel_lambda_qp(qp);
901
902     m_cost = lambda;
903     vme_state_message[MODE_CHROMA_INTRA] = 0;
904     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
905
906     if (slice_type == SLICE_TYPE_I) {
907         vme_state_message[MODE_INTRA_16X16] = 0;
908         m_cost = lambda * 4;
909         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
910         m_cost = lambda * 16;
911         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
912         m_cost = lambda * 3;
913         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
914     } else {
915         m_cost = 0;
916         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
917         for (j = 1; j < 3; j++) {
918             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
919             m_cost = (int)m_costf;
920             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
921         }
922         mv_count = 3;
923         for (j = 4; j <= 64; j *= 2) {
924             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
925             m_cost = (int)m_costf;
926             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
927             mv_count++;
928         }
929
930         if (qp <= 25) {
931             vme_state_message[MODE_INTRA_16X16] = 0x4a;
932             vme_state_message[MODE_INTRA_8X8] = 0x4a;
933             vme_state_message[MODE_INTRA_4X4] = 0x4a;
934             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
935             vme_state_message[MODE_INTER_16X16] = 0x4a;
936             vme_state_message[MODE_INTER_16X8] = 0x4a;
937             vme_state_message[MODE_INTER_8X8] = 0x4a;
938             vme_state_message[MODE_INTER_8X4] = 0x4a;
939             vme_state_message[MODE_INTER_4X4] = 0x4a;
940             vme_state_message[MODE_INTER_BWD] = 0x2a;
941             return;
942         }
943         m_costf = lambda * 10;
944         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
945         m_cost = lambda * 14;
946         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
947         m_cost = lambda * 24;
948         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
949         m_costf = lambda * 3.5;
950         m_cost = m_costf;
951         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
952         if (slice_type == SLICE_TYPE_P) {
953             m_costf = lambda * 2.5;
954             m_cost = m_costf;
955             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
956             m_costf = lambda * 4;
957             m_cost = m_costf;
958             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
959             m_costf = lambda * 1.5;
960             m_cost = m_costf;
961             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
962             m_costf = lambda * 3;
963             m_cost = m_costf;
964             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
965             m_costf = lambda * 5;
966             m_cost = m_costf;
967             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
968             /* BWD is not used in P-frame */
969             vme_state_message[MODE_INTER_BWD] = 0;
970         } else {
971             m_costf = lambda * 2.5;
972             m_cost = m_costf;
973             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
974             m_costf = lambda * 5.5;
975             m_cost = m_costf;
976             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
977             m_costf = lambda * 3.5;
978             m_cost = m_costf;
979             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
980             m_costf = lambda * 5.0;
981             m_cost = m_costf;
982             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
983             m_costf = lambda * 6.5;
984             m_cost = m_costf;
985             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
986             m_costf = lambda * 1.5;
987             m_cost = m_costf;
988             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
989         }
990     }
991     return;
992 }
993
994 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
995                                 struct encode_state *encode_state,
996                                 struct intel_encoder_context *encoder_context)
997 {
998     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
999     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1000     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1001     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1002     int qp;
1003     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1004
1005     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1006
1007     if (encoder_context->rate_control_mode == VA_RC_CQP)
1008         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1009     else
1010         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1011
1012     if (vme_state_message == NULL)
1013         return;
1014
1015     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
1016 }
1017
1018 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
1019                                     struct encode_state *encode_state,
1020                                     struct intel_encoder_context *encoder_context)
1021 {
1022     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1023     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1024     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1025     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
1026     int qp, m_cost, j, mv_count;
1027     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1028     float   lambda, m_costf;
1029
1030     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
1031     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
1032
1033     if (vme_state_message == NULL)
1034         return;
1035
1036     if (encoder_context->rate_control_mode == VA_RC_CQP)
1037         qp = q_matrix->quantization_index[0];
1038     else
1039         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1040
1041     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
1042
1043     m_cost = lambda;
1044     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
1045
1046     if (is_key_frame) {
1047         vme_state_message[MODE_INTRA_16X16] = 0;
1048         m_cost = lambda * 16;
1049         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1050         m_cost = lambda * 3;
1051         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1052     } else {
1053         m_cost = 0;
1054         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1055         for (j = 1; j < 3; j++) {
1056             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1057             m_cost = (int)m_costf;
1058             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1059         }
1060         mv_count = 3;
1061         for (j = 4; j <= 64; j *= 2) {
1062             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1063             m_cost = (int)m_costf;
1064             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
1065             mv_count++;
1066         }
1067
1068         if (qp < 92) {
1069             vme_state_message[MODE_INTRA_16X16] = 0x4a;
1070             vme_state_message[MODE_INTRA_4X4] = 0x4a;
1071             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
1072             vme_state_message[MODE_INTER_16X16] = 0x4a;
1073             vme_state_message[MODE_INTER_16X8] = 0x4a;
1074             vme_state_message[MODE_INTER_8X8] = 0x4a;
1075             vme_state_message[MODE_INTER_4X4] = 0x4a;
1076             vme_state_message[MODE_INTER_BWD] = 0;
1077             return;
1078         }
1079         m_costf = lambda * 10;
1080         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1081         m_cost = lambda * 24;
1082         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1083
1084         m_costf = lambda * 3.5;
1085         m_cost = m_costf;
1086         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1087
1088         m_costf = lambda * 2.5;
1089         m_cost = m_costf;
1090         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1091         m_costf = lambda * 4;
1092         m_cost = m_costf;
1093         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
1094         m_costf = lambda * 1.5;
1095         m_cost = m_costf;
1096         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
1097         m_costf = lambda * 5;
1098         m_cost = m_costf;
1099         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
1100         /* BWD is not used in P-frame */
1101         vme_state_message[MODE_INTER_BWD] = 0;
1102     }
1103 }
1104
1105 #define     MB_SCOREBOARD_A     (1 << 0)
1106 #define     MB_SCOREBOARD_B     (1 << 1)
1107 #define     MB_SCOREBOARD_C     (1 << 2)
1108 void
1109 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
1110 {
1111     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
1112     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
1113     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
1114                                                            MB_SCOREBOARD_B |
1115                                                            MB_SCOREBOARD_C);
1116
1117     /* In VME prediction the current mb depends on the neighbour
1118      * A/B/C macroblock. So the left/up/up-right dependency should
1119      * be considered.
1120      */
1121     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
1122     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
1123     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1124     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1125     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1126     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1127
1128     vme_context->gpe_context.vfe_desc7.dword = 0;
1129     return;
1130 }
1131
1132 /* check whether the mb of (x_index, y_index) is out of bound */
1133 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1134 {
1135     int mb_index;
1136     if (x_index < 0 || x_index >= mb_width)
1137         return -1;
1138     if (y_index < 0 || y_index >= mb_height)
1139         return -1;
1140
1141     mb_index = y_index * mb_width + x_index;
1142     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1143         return -1;
1144     return 0;
1145 }
1146
1147 void
1148 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1149                                      struct encode_state *encode_state,
1150                                      int mb_width, int mb_height,
1151                                      int kernel,
1152                                      int transform_8x8_mode_flag,
1153                                      struct intel_encoder_context *encoder_context)
1154 {
1155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1156     int mb_row;
1157     int s;
1158     unsigned int *command_ptr;
1159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1160     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1162     int qp, qp_mb, qp_index;
1163     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1164
1165     if (encoder_context->rate_control_mode == VA_RC_CQP)
1166         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1167     else
1168         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1169
1170 #define     USE_SCOREBOARD      (1 << 21)
1171
1172     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1173     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1174
1175     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1176         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1177         int first_mb = pSliceParameter->macroblock_address;
1178         int num_mb = pSliceParameter->num_macroblocks;
1179         unsigned int mb_intra_ub, score_dep;
1180         int x_outer, y_outer, x_inner, y_inner;
1181         int xtemp_outer = 0;
1182
1183         x_outer = first_mb % mb_width;
1184         y_outer = first_mb / mb_width;
1185         mb_row = y_outer;
1186
1187         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1188             x_inner = x_outer;
1189             y_inner = y_outer;
1190             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1191                 mb_intra_ub = 0;
1192                 score_dep = 0;
1193                 if (x_inner != 0) {
1194                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1195                     score_dep |= MB_SCOREBOARD_A;
1196                 }
1197                 if (y_inner != mb_row) {
1198                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1199                     score_dep |= MB_SCOREBOARD_B;
1200                     if (x_inner != 0)
1201                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1202                     if (x_inner != (mb_width - 1)) {
1203                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1204                         score_dep |= MB_SCOREBOARD_C;
1205                     }
1206                 }
1207
1208                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1209                 *command_ptr++ = kernel;
1210                 *command_ptr++ = USE_SCOREBOARD;
1211                 /* Indirect data */
1212                 *command_ptr++ = 0;
1213                 /* the (X, Y) term of scoreboard */
1214                 *command_ptr++ = ((y_inner << 16) | x_inner);
1215                 *command_ptr++ = score_dep;
1216                 /*inline data */
1217                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1218                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1219                 /* QP occupies one byte */
1220                 if (vme_context->roi_enabled) {
1221                     qp_index = y_inner * mb_width + x_inner;
1222                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1223                 } else
1224                     qp_mb = qp;
1225                 *command_ptr++ = qp_mb;
1226                 x_inner -= 2;
1227                 y_inner += 1;
1228             }
1229             x_outer += 1;
1230         }
1231
1232         xtemp_outer = mb_width - 2;
1233         if (xtemp_outer < 0)
1234             xtemp_outer = 0;
1235         x_outer = xtemp_outer;
1236         y_outer = first_mb / mb_width;
1237         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1238             y_inner = y_outer;
1239             x_inner = x_outer;
1240             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1241                 mb_intra_ub = 0;
1242                 score_dep = 0;
1243                 if (x_inner != 0) {
1244                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1245                     score_dep |= MB_SCOREBOARD_A;
1246                 }
1247                 if (y_inner != mb_row) {
1248                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1249                     score_dep |= MB_SCOREBOARD_B;
1250                     if (x_inner != 0)
1251                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1252
1253                     if (x_inner != (mb_width - 1)) {
1254                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1255                         score_dep |= MB_SCOREBOARD_C;
1256                     }
1257                 }
1258
1259                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1260                 *command_ptr++ = kernel;
1261                 *command_ptr++ = USE_SCOREBOARD;
1262                 /* Indirect data */
1263                 *command_ptr++ = 0;
1264                 /* the (X, Y) term of scoreboard */
1265                 *command_ptr++ = ((y_inner << 16) | x_inner);
1266                 *command_ptr++ = score_dep;
1267                 /*inline data */
1268                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1269                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1270                 /* qp occupies one byte */
1271                 if (vme_context->roi_enabled) {
1272                     qp_index = y_inner * mb_width + x_inner;
1273                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1274                 } else
1275                     qp_mb = qp;
1276                 *command_ptr++ = qp_mb;
1277
1278                 x_inner -= 2;
1279                 y_inner += 1;
1280             }
1281             x_outer++;
1282             if (x_outer >= mb_width) {
1283                 y_outer += 1;
1284                 x_outer = xtemp_outer;
1285             }
1286         }
1287     }
1288
1289     *command_ptr++ = 0;
1290     *command_ptr++ = MI_BATCH_BUFFER_END;
1291
1292     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1293 }
1294
1295 static uint8_t
1296 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1297 {
1298     unsigned int is_long_term =
1299         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1300     unsigned int is_top_field =
1301         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1302     unsigned int is_bottom_field =
1303         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1304
1305     return ((is_long_term                         << 6) |
1306             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1307             (frame_store_id                       << 1) |
1308             ((is_top_field ^ 1) & is_bottom_field));
1309 }
1310
1311 void
1312 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1313                             struct encode_state *encode_state,
1314                             struct intel_encoder_context *encoder_context)
1315 {
1316     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1317     struct intel_batchbuffer *batch = encoder_context->base.batch;
1318     int slice_type;
1319     struct object_surface *obj_surface;
1320     unsigned int fref_entry, bref_entry;
1321     int frame_index, i;
1322     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1323
1324     fref_entry = 0x80808080;
1325     bref_entry = 0x80808080;
1326     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1327
1328     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1329         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1330
1331         if (ref_idx_l0 > 3) {
1332             WARN_ONCE("ref_idx_l0 is out of range\n");
1333             ref_idx_l0 = 0;
1334         }
1335
1336         obj_surface = vme_context->used_reference_objects[0];
1337         frame_index = -1;
1338         for (i = 0; i < 16; i++) {
1339             if (obj_surface &&
1340                 obj_surface == encode_state->reference_objects[i]) {
1341                 frame_index = i;
1342                 break;
1343             }
1344         }
1345         if (frame_index == -1) {
1346             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1347         } else {
1348             int ref_idx_l0_shift = ref_idx_l0 * 8;
1349             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1350             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1351         }
1352     }
1353
1354     if (slice_type == SLICE_TYPE_B) {
1355         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1356
1357         if (ref_idx_l1 > 3) {
1358             WARN_ONCE("ref_idx_l1 is out of range\n");
1359             ref_idx_l1 = 0;
1360         }
1361
1362         obj_surface = vme_context->used_reference_objects[1];
1363         frame_index = -1;
1364         for (i = 0; i < 16; i++) {
1365             if (obj_surface &&
1366                 obj_surface == encode_state->reference_objects[i]) {
1367                 frame_index = i;
1368                 break;
1369             }
1370         }
1371         if (frame_index == -1) {
1372             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1373         } else {
1374             int ref_idx_l1_shift = ref_idx_l1 * 8;
1375             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1376             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1377         }
1378     }
1379
1380     BEGIN_BCS_BATCH(batch, 10);
1381     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1382     OUT_BCS_BATCH(batch, 0);                  //Select L0
1383     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1384     for (i = 0; i < 7; i++) {
1385         OUT_BCS_BATCH(batch, 0x80808080);
1386     }
1387     ADVANCE_BCS_BATCH(batch);
1388
1389     BEGIN_BCS_BATCH(batch, 10);
1390     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1391     OUT_BCS_BATCH(batch, 1);                  //Select L1
1392     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1393     for (i = 0; i < 7; i++) {
1394         OUT_BCS_BATCH(batch, 0x80808080);
1395     }
1396     ADVANCE_BCS_BATCH(batch);
1397 }
1398
1399
1400 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1401                                  struct encode_state *encode_state,
1402                                  struct intel_encoder_context *encoder_context)
1403 {
1404     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1405     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1406     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1407     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1408     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1409     uint32_t mv_x, mv_y;
1410     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1411     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1412     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1413
1414     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1415         mv_x = 512;
1416         mv_y = 64;
1417     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1418         mv_x = 1024;
1419         mv_y = 128;
1420     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1421         mv_x = 2048;
1422         mv_y = 128;
1423     } else {
1424         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1425         mv_x = 512;
1426         mv_y = 64;
1427     }
1428
1429     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1430     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1431         int qp, m_cost, j, mv_count;
1432         float   lambda, m_costf;
1433         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1434                       encode_state->slice_params_ext[0]->buffer;
1435         qp = slice_param->quantiser_scale_code;
1436         lambda = intel_lambda_qp(qp);
1437         /* No Intra prediction. So it is zero */
1438         vme_state_message[MODE_INTRA_8X8] = 0;
1439         vme_state_message[MODE_INTRA_4X4] = 0;
1440         vme_state_message[MODE_INTER_MV0] = 0;
1441         for (j = 1; j < 3; j++) {
1442             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1443             m_cost = (int)m_costf;
1444             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1445         }
1446         mv_count = 3;
1447         for (j = 4; j <= 64; j *= 2) {
1448             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1449             m_cost = (int)m_costf;
1450             vme_state_message[MODE_INTER_MV0 + mv_count] =
1451                 intel_format_lutvalue(m_cost, 0x6f);
1452             mv_count++;
1453         }
1454         m_cost = lambda;
1455         /* It can only perform the 16x16 search. So mode cost can be ignored for
1456          * the other mode. for example: 16x8/8x8
1457          */
1458         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1459         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1460
1461         vme_state_message[MODE_INTER_16X8] = 0;
1462         vme_state_message[MODE_INTER_8X8] = 0;
1463         vme_state_message[MODE_INTER_8X4] = 0;
1464         vme_state_message[MODE_INTER_4X4] = 0;
1465         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1466
1467     }
1468     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1469
1470     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1471                                                 width_in_mbs;
1472 }
1473
1474 void
1475 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1476                                            struct encode_state *encode_state,
1477                                            int mb_width, int mb_height,
1478                                            int kernel,
1479                                            struct intel_encoder_context *encoder_context)
1480 {
1481     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1482     unsigned int *command_ptr;
1483
1484 #define     MPEG2_SCOREBOARD        (1 << 21)
1485
1486     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1487     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1488
1489     {
1490         unsigned int mb_intra_ub, score_dep;
1491         int x_outer, y_outer, x_inner, y_inner;
1492         int xtemp_outer = 0;
1493         int first_mb = 0;
1494         int num_mb = mb_width * mb_height;
1495
1496         x_outer = 0;
1497         y_outer = 0;
1498
1499
1500         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1501             x_inner = x_outer;
1502             y_inner = y_outer;
1503             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1504                 mb_intra_ub = 0;
1505                 score_dep = 0;
1506                 if (x_inner != 0) {
1507                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1508                     score_dep |= MB_SCOREBOARD_A;
1509                 }
1510                 if (y_inner != 0) {
1511                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1512                     score_dep |= MB_SCOREBOARD_B;
1513
1514                     if (x_inner != 0)
1515                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1516
1517                     if (x_inner != (mb_width - 1)) {
1518                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1519                         score_dep |= MB_SCOREBOARD_C;
1520                     }
1521                 }
1522
1523                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1524                 *command_ptr++ = kernel;
1525                 *command_ptr++ = MPEG2_SCOREBOARD;
1526                 /* Indirect data */
1527                 *command_ptr++ = 0;
1528                 /* the (X, Y) term of scoreboard */
1529                 *command_ptr++ = ((y_inner << 16) | x_inner);
1530                 *command_ptr++ = score_dep;
1531                 /*inline data */
1532                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1533                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1534                 x_inner -= 2;
1535                 y_inner += 1;
1536             }
1537             x_outer += 1;
1538         }
1539
1540         xtemp_outer = mb_width - 2;
1541         if (xtemp_outer < 0)
1542             xtemp_outer = 0;
1543         x_outer = xtemp_outer;
1544         y_outer = 0;
1545         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1546             y_inner = y_outer;
1547             x_inner = x_outer;
1548             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1549                 mb_intra_ub = 0;
1550                 score_dep = 0;
1551                 if (x_inner != 0) {
1552                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1553                     score_dep |= MB_SCOREBOARD_A;
1554                 }
1555                 if (y_inner != 0) {
1556                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1557                     score_dep |= MB_SCOREBOARD_B;
1558
1559                     if (x_inner != 0)
1560                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1561
1562                     if (x_inner != (mb_width - 1)) {
1563                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1564                         score_dep |= MB_SCOREBOARD_C;
1565                     }
1566                 }
1567
1568                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1569                 *command_ptr++ = kernel;
1570                 *command_ptr++ = MPEG2_SCOREBOARD;
1571                 /* Indirect data */
1572                 *command_ptr++ = 0;
1573                 /* the (X, Y) term of scoreboard */
1574                 *command_ptr++ = ((y_inner << 16) | x_inner);
1575                 *command_ptr++ = score_dep;
1576                 /*inline data */
1577                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1578                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1579
1580                 x_inner -= 2;
1581                 y_inner += 1;
1582             }
1583             x_outer++;
1584             if (x_outer >= mb_width) {
1585                 y_outer += 1;
1586                 x_outer = xtemp_outer;
1587             }
1588         }
1589     }
1590
1591     *command_ptr++ = 0;
1592     *command_ptr++ = MI_BATCH_BUFFER_END;
1593
1594     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1595     return;
1596 }
1597
1598 static int
1599 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1600                           VAPictureH264 *ref_list,
1601                           int num_pictures,
1602                           int dir)
1603 {
1604     int i, found = -1, min = 0x7FFFFFFF;
1605
1606     for (i = 0; i < num_pictures; i++) {
1607         int tmp;
1608
1609         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1610             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1611             break;
1612
1613         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1614
1615         if (dir)
1616             tmp = -tmp;
1617
1618         if (tmp > 0 && tmp < min) {
1619             min = tmp;
1620             found = i;
1621         }
1622     }
1623
1624     return found;
1625 }
1626
1627 void
1628 intel_avc_vme_reference_state(VADriverContextP ctx,
1629                               struct encode_state *encode_state,
1630                               struct intel_encoder_context *encoder_context,
1631                               int list_index,
1632                               int surface_index,
1633                               void (* vme_source_surface_state)(
1634                                   VADriverContextP ctx,
1635                                   int index,
1636                                   struct object_surface *obj_surface,
1637                                   struct intel_encoder_context *encoder_context))
1638 {
1639     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1640     struct object_surface *obj_surface = NULL;
1641     struct i965_driver_data *i965 = i965_driver_data(ctx);
1642     VASurfaceID ref_surface_id;
1643     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1644     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1645     int max_num_references;
1646     VAPictureH264 *curr_pic;
1647     VAPictureH264 *ref_list;
1648     int ref_idx;
1649
1650     if (list_index == 0) {
1651         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1652         ref_list = slice_param->RefPicList0;
1653     } else {
1654         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1655         ref_list = slice_param->RefPicList1;
1656     }
1657
1658     if (max_num_references == 1) {
1659         if (list_index == 0) {
1660             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1661             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1662         } else {
1663             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1664             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1665         }
1666
1667         if (ref_surface_id != VA_INVALID_SURFACE)
1668             obj_surface = SURFACE(ref_surface_id);
1669
1670         if (!obj_surface ||
1671             !obj_surface->bo) {
1672             obj_surface = encode_state->reference_objects[list_index];
1673             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1674         }
1675
1676         ref_idx = 0;
1677     } else {
1678         curr_pic = &pic_param->CurrPic;
1679
1680         /* select the reference frame in temporal space */
1681         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1682         ref_surface_id = ref_list[ref_idx].picture_id;
1683
1684         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1685             obj_surface = SURFACE(ref_surface_id);
1686
1687         vme_context->used_reference_objects[list_index] = obj_surface;
1688         vme_context->used_references[list_index] = &ref_list[ref_idx];
1689     }
1690
1691     if (obj_surface &&
1692         obj_surface->bo) {
1693         assert(ref_idx >= 0);
1694         vme_context->used_reference_objects[list_index] = obj_surface;
1695         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1696         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1697                                                     ref_idx << 16 |
1698                                                     ref_idx <<  8 |
1699                                                     ref_idx);
1700     } else {
1701         vme_context->used_reference_objects[list_index] = NULL;
1702         vme_context->used_references[list_index] = NULL;
1703         vme_context->ref_index_in_mb[list_index] = 0;
1704     }
1705 }
1706
1707 #define AVC_NAL_DELIMITER           9
1708 void
1709 intel_avc_insert_aud_packed_data(VADriverContextP ctx,
1710                                  struct encode_state *encode_state,
1711                                  struct intel_encoder_context *encoder_context,
1712                                  struct intel_batchbuffer *batch)
1713 {
1714     VAEncPackedHeaderParameterBuffer *param = NULL;
1715     unsigned int length_in_bits;
1716     unsigned int *header_data = NULL;
1717     unsigned char *nal_type = NULL;
1718     int count, i, start_index;
1719     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1720
1721     count = encode_state->slice_rawdata_count[0];
1722     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
1723
1724     for (i = 0; i < count; i++) {
1725         unsigned int skip_emul_byte_cnt;
1726
1727         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1728         nal_type = (unsigned char *)header_data;
1729
1730         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
1731
1732         length_in_bits = param->bit_length;
1733
1734         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1735
1736         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
1737             mfc_context->insert_object(ctx,
1738                                        encoder_context,
1739                                        header_data,
1740                                        ALIGN(length_in_bits, 32) >> 5,
1741                                        length_in_bits & 0x1f,
1742                                        skip_emul_byte_cnt,
1743                                        0,
1744                                        0,
1745                                        !param->has_emulation_bytes,
1746                                        batch);
1747             break;
1748         }
1749     }
1750 }
1751
1752
1753 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1754                                         struct encode_state *encode_state,
1755                                         struct intel_encoder_context *encoder_context,
1756                                         int slice_index,
1757                                         struct intel_batchbuffer *slice_batch)
1758 {
1759     int count, i, start_index;
1760     unsigned int length_in_bits;
1761     VAEncPackedHeaderParameterBuffer *param = NULL;
1762     unsigned int *header_data = NULL;
1763     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1764     int slice_header_index;
1765     unsigned char *nal_type = NULL;
1766
1767     if (encode_state->slice_header_index[slice_index] == 0)
1768         slice_header_index = -1;
1769     else
1770         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1771
1772     count = encode_state->slice_rawdata_count[slice_index];
1773     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1774
1775     for (i = 0; i < count; i++) {
1776         unsigned int skip_emul_byte_cnt;
1777
1778         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1779         nal_type = (unsigned char *)header_data;
1780
1781         param = (VAEncPackedHeaderParameterBuffer *)
1782                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1783
1784         length_in_bits = param->bit_length;
1785
1786         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1787
1788         /* skip the slice header/AUD packed data type as it is lastly inserted */
1789         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
1790             continue;
1791
1792         /* as the slice header is still required, the last header flag is set to
1793          * zero.
1794          */
1795         mfc_context->insert_object(ctx,
1796                                    encoder_context,
1797                                    header_data,
1798                                    ALIGN(length_in_bits, 32) >> 5,
1799                                    length_in_bits & 0x1f,
1800                                    skip_emul_byte_cnt,
1801                                    0,
1802                                    0,
1803                                    !param->has_emulation_bytes,
1804                                    slice_batch);
1805     }
1806
1807     if (slice_header_index == -1) {
1808         unsigned char *slice_header = NULL;
1809         int slice_header_length_in_bits = 0;
1810         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1811         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1812         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1813
1814         /* No slice header data is passed. And the driver needs to generate it */
1815         /* For the Normal H264 */
1816         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1817                                                              pPicParameter,
1818                                                              pSliceParameter,
1819                                                              &slice_header);
1820         mfc_context->insert_object(ctx, encoder_context,
1821                                    (unsigned int *)slice_header,
1822                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1823                                    slice_header_length_in_bits & 0x1f,
1824                                    5,  /* first 5 bytes are start code + nal unit type */
1825                                    1, 0, 1, slice_batch);
1826
1827         free(slice_header);
1828     } else {
1829         unsigned int skip_emul_byte_cnt;
1830
1831         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1832
1833         param = (VAEncPackedHeaderParameterBuffer *)
1834                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1835         length_in_bits = param->bit_length;
1836
1837         /* as the slice header is the last header data for one slice,
1838          * the last header flag is set to one.
1839          */
1840         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1841
1842         mfc_context->insert_object(ctx,
1843                                    encoder_context,
1844                                    header_data,
1845                                    ALIGN(length_in_bits, 32) >> 5,
1846                                    length_in_bits & 0x1f,
1847                                    skip_emul_byte_cnt,
1848                                    1,
1849                                    0,
1850                                    !param->has_emulation_bytes,
1851                                    slice_batch);
1852     }
1853
1854     return;
1855 }
1856
1857 void
1858 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1859                                 struct encode_state *encode_state,
1860                                 struct intel_encoder_context *encoder_context)
1861 {
1862     struct i965_driver_data *i965 = i965_driver_data(ctx);
1863     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1864     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1865     int qp;
1866     dri_bo *bo;
1867     uint8_t *cost_table;
1868
1869     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1870
1871
1872     if (slice_type == SLICE_TYPE_I) {
1873         if (vme_context->i_qp_cost_table)
1874             return;
1875     } else if (slice_type == SLICE_TYPE_P) {
1876         if (vme_context->p_qp_cost_table)
1877             return;
1878     } else {
1879         if (vme_context->b_qp_cost_table)
1880             return;
1881     }
1882
1883     /* It is enough to allocate 32 bytes for each qp. */
1884     bo = dri_bo_alloc(i965->intel.bufmgr,
1885                       "cost_table ",
1886                       QP_MAX * 32,
1887                       64);
1888
1889     dri_bo_map(bo, 1);
1890     assert(bo->virtual);
1891     cost_table = (uint8_t *)(bo->virtual);
1892     for (qp = 0; qp < QP_MAX; qp++) {
1893         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1894         cost_table += 32;
1895     }
1896
1897     dri_bo_unmap(bo);
1898
1899     if (slice_type == SLICE_TYPE_I) {
1900         vme_context->i_qp_cost_table = bo;
1901     } else if (slice_type == SLICE_TYPE_P) {
1902         vme_context->p_qp_cost_table = bo;
1903     } else {
1904         vme_context->b_qp_cost_table = bo;
1905     }
1906
1907     vme_context->cost_table_size = QP_MAX * 32;
1908     return;
1909 }
1910
1911 extern void
1912 intel_h264_setup_cost_surface(VADriverContextP ctx,
1913                               struct encode_state *encode_state,
1914                               struct intel_encoder_context *encoder_context,
1915                               unsigned long binding_table_offset,
1916                               unsigned long surface_state_offset)
1917 {
1918     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1919     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1920     dri_bo *bo;
1921
1922
1923     struct i965_buffer_surface cost_table;
1924
1925     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1926
1927
1928     if (slice_type == SLICE_TYPE_I) {
1929         bo = vme_context->i_qp_cost_table;
1930     } else if (slice_type == SLICE_TYPE_P) {
1931         bo = vme_context->p_qp_cost_table;
1932     } else {
1933         bo = vme_context->b_qp_cost_table;
1934     }
1935
1936     cost_table.bo = bo;
1937     cost_table.num_blocks = QP_MAX;
1938     cost_table.pitch = 16;
1939     cost_table.size_block = 32;
1940
1941     vme_context->vme_buffer_suface_setup(ctx,
1942                                          &vme_context->gpe_context,
1943                                          &cost_table,
1944                                          binding_table_offset,
1945                                          surface_state_offset);
1946 }
1947
1948 /*
1949  * the idea of conversion between qp and qstep comes from scaling process
1950  * of transform coeff for Luma component in H264 spec.
1951  *   2^(Qpy / 6 - 6)
1952  * In order to avoid too small qstep, it is multiplied by 16.
1953  */
1954 static float intel_h264_qp_qstep(int qp)
1955 {
1956     float value, qstep;
1957     value = qp;
1958     value = value / 6 - 2;
1959     qstep = powf(2, value);
1960     return qstep;
1961 }
1962
1963 static int intel_h264_qstep_qp(float qstep)
1964 {
1965     float qp;
1966
1967     qp = 12.0f + 6.0f * log2f(qstep);
1968
1969     return floorf(qp);
1970 }
1971
1972 /*
1973  * Currently it is based on the following assumption:
1974  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1975  *                 total_aread * 1 / baseqp_qstep
1976  *
1977  * qstep is the linearized quantizer of H264 quantizer
1978  */
1979 typedef struct {
1980     int row_start_in_mb;
1981     int row_end_in_mb;
1982     int col_start_in_mb;
1983     int col_end_in_mb;
1984
1985     int width_mbs;
1986     int height_mbs;
1987
1988     int roi_qp;
1989 } ROIRegionParam;
1990
1991 static VAStatus
1992 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1993                        int base_qp,
1994                        struct encode_state *encode_state,
1995                        struct intel_encoder_context *encoder_context)
1996 {
1997     int nonroi_qp;
1998     int min_qp = MAX(1, encoder_context->brc.min_qp);
1999     bool quickfill = 0;
2000
2001     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
2002     int num_roi = 0;
2003     int i, j;
2004
2005     float temp;
2006     float qstep_nonroi, qstep_base;
2007     float roi_area, total_area, nonroi_area;
2008     float sum_roi;
2009
2010     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2011     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2012     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2013     int mbs_in_picture = width_in_mbs * height_in_mbs;
2014
2015     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2016     VAStatus vaStatus = VA_STATUS_SUCCESS;
2017
2018     /* currently roi_value_is_qp_delta is the only supported mode of priority.
2019      *
2020      * qp_delta set by user is added to base_qp, which is then clapped by
2021      * [base_qp-min_delta, base_qp+max_delta].
2022      */
2023     ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
2024
2025     num_roi = encoder_context->brc.num_roi;
2026
2027     /* when the base_qp is lower than 12, the quality is quite good based
2028      * on the H264 test experience.
2029      * In such case it is unnecessary to adjust the quality for ROI region.
2030      */
2031     if (base_qp <= 12) {
2032         nonroi_qp = base_qp;
2033         quickfill = 1;
2034         goto qp_fill;
2035     }
2036
2037     sum_roi = 0.0f;
2038     roi_area = 0;
2039     for (i = 0; i < num_roi; i++) {
2040         int row_start, row_end, col_start, col_end;
2041         int roi_width_mbs, roi_height_mbs;
2042         int mbs_in_roi;
2043         int roi_qp;
2044         float qstep_roi;
2045
2046         col_start = encoder_context->brc.roi[i].left;
2047         col_end = encoder_context->brc.roi[i].right;
2048         row_start = encoder_context->brc.roi[i].top;
2049         row_end = encoder_context->brc.roi[i].bottom;
2050
2051         col_start = col_start / 16;
2052         col_end = (col_end + 15) / 16;
2053         row_start = row_start / 16;
2054         row_end = (row_end + 15) / 16;
2055
2056         roi_width_mbs = col_end - col_start;
2057         roi_height_mbs = row_end - row_start;
2058         mbs_in_roi = roi_width_mbs * roi_height_mbs;
2059
2060         param_regions[i].row_start_in_mb = row_start;
2061         param_regions[i].row_end_in_mb = row_end;
2062         param_regions[i].col_start_in_mb = col_start;
2063         param_regions[i].col_end_in_mb = col_end;
2064         param_regions[i].width_mbs = roi_width_mbs;
2065         param_regions[i].height_mbs = roi_height_mbs;
2066
2067         roi_qp = base_qp + encoder_context->brc.roi[i].value;
2068         BRC_CLIP(roi_qp, min_qp, 51);
2069
2070         param_regions[i].roi_qp = roi_qp;
2071         qstep_roi = intel_h264_qp_qstep(roi_qp);
2072
2073         roi_area += mbs_in_roi;
2074         sum_roi += mbs_in_roi / qstep_roi;
2075     }
2076
2077     total_area = mbs_in_picture;
2078     nonroi_area = total_area - roi_area;
2079
2080     qstep_base = intel_h264_qp_qstep(base_qp);
2081     temp = (total_area / qstep_base - sum_roi);
2082
2083     if (temp < 0) {
2084         nonroi_qp = 51;
2085     } else {
2086         qstep_nonroi = nonroi_area / temp;
2087         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
2088     }
2089
2090     BRC_CLIP(nonroi_qp, min_qp, 51);
2091
2092 qp_fill:
2093     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
2094     if (!quickfill) {
2095         char *qp_ptr;
2096
2097         for (i = 0; i < num_roi; i++) {
2098             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
2099                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
2100                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
2101             }
2102         }
2103     }
2104     return vaStatus;
2105 }
2106
2107 extern void
2108 intel_h264_enc_roi_config(VADriverContextP ctx,
2109                           struct encode_state *encode_state,
2110                           struct intel_encoder_context *encoder_context)
2111 {
2112     char *qp_ptr;
2113     int i, j;
2114     struct i965_driver_data *i965 = i965_driver_data(ctx);
2115     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2116     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2117     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2118     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2119     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2120
2121     int row_start, row_end, col_start, col_end;
2122     int num_roi = 0;
2123
2124     vme_context->roi_enabled = 0;
2125     /* Restriction: Disable ROI when multi-slice is enabled */
2126     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
2127         return;
2128
2129     vme_context->roi_enabled = !!encoder_context->brc.num_roi;
2130
2131     if (!vme_context->roi_enabled)
2132         return;
2133
2134     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
2135         (vme_context->saved_height_mbs != height_in_mbs)) {
2136         free(vme_context->qp_per_mb);
2137         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
2138
2139         vme_context->saved_width_mbs = width_in_mbs;
2140         vme_context->saved_height_mbs = height_in_mbs;
2141         assert(vme_context->qp_per_mb);
2142     }
2143     if (encoder_context->rate_control_mode == VA_RC_CBR) {
2144         /*
2145          * TODO: More complex Qp adjust needs to be added.
2146          * Currently it is initialized to slice_qp.
2147          */
2148         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2149         int qp;
2150         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2151
2152         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
2153         intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
2154
2155     } else if (encoder_context->rate_control_mode == VA_RC_CQP) {
2156         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2157         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2158         int qp;
2159         int min_qp = MAX(1, encoder_context->brc.min_qp);
2160
2161         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2162         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2163
2164
2165         for (j = num_roi; j ; j--) {
2166             int qp_delta, qp_clip;
2167
2168             col_start = encoder_context->brc.roi[i].left;
2169             col_end = encoder_context->brc.roi[i].right;
2170             row_start = encoder_context->brc.roi[i].top;
2171             row_end = encoder_context->brc.roi[i].bottom;
2172
2173             col_start = col_start / 16;
2174             col_end = (col_end + 15) / 16;
2175             row_start = row_start / 16;
2176             row_end = (row_end + 15) / 16;
2177
2178             qp_delta = encoder_context->brc.roi[i].value;
2179             qp_clip = qp + qp_delta;
2180
2181             BRC_CLIP(qp_clip, min_qp, 51);
2182
2183             for (i = row_start; i < row_end; i++) {
2184                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2185                 memset(qp_ptr, qp_clip, (col_end - col_start));
2186             }
2187         }
2188     } else {
2189         /*
2190          * TODO: Disable it for non CBR-CQP.
2191          */
2192         vme_context->roi_enabled = 0;
2193     }
2194
2195     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2196         encoder_context->soft_batch_force = 1;
2197
2198     return;
2199 }
2200
2201 /* HEVC */
2202 static int
2203 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2204                            VAPictureHEVC *ref_list,
2205                            int num_pictures,
2206                            int dir)
2207 {
2208     int i, found = -1, min = 0x7FFFFFFF;
2209
2210     for (i = 0; i < num_pictures; i++) {
2211         int tmp;
2212
2213         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2214             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2215             break;
2216
2217         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2218
2219         if (dir)
2220             tmp = -tmp;
2221
2222         if (tmp > 0 && tmp < min) {
2223             min = tmp;
2224             found = i;
2225         }
2226     }
2227
2228     return found;
2229 }
2230 void
2231 intel_hevc_vme_reference_state(VADriverContextP ctx,
2232                                struct encode_state *encode_state,
2233                                struct intel_encoder_context *encoder_context,
2234                                int list_index,
2235                                int surface_index,
2236                                void (* vme_source_surface_state)(
2237                                    VADriverContextP ctx,
2238                                    int index,
2239                                    struct object_surface *obj_surface,
2240                                    struct intel_encoder_context *encoder_context))
2241 {
2242     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2243     struct object_surface *obj_surface = NULL;
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245     VASurfaceID ref_surface_id;
2246     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2247     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2248     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2249     int max_num_references;
2250     VAPictureHEVC *curr_pic;
2251     VAPictureHEVC *ref_list;
2252     int ref_idx;
2253     unsigned int is_hevc10 = 0;
2254     GenHevcSurface *hevc_encoder_surface = NULL;
2255
2256     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2257         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2258         is_hevc10 = 1;
2259
2260     if (list_index == 0) {
2261         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2262         ref_list = slice_param->ref_pic_list0;
2263     } else {
2264         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2265         ref_list = slice_param->ref_pic_list1;
2266     }
2267
2268     if (max_num_references == 1) {
2269         if (list_index == 0) {
2270             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2271             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2272         } else {
2273             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2274             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2275         }
2276
2277         if (ref_surface_id != VA_INVALID_SURFACE)
2278             obj_surface = SURFACE(ref_surface_id);
2279
2280         if (!obj_surface ||
2281             !obj_surface->bo) {
2282             obj_surface = encode_state->reference_objects[list_index];
2283             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2284         }
2285
2286         ref_idx = 0;
2287     } else {
2288         curr_pic = &pic_param->decoded_curr_pic;
2289
2290         /* select the reference frame in temporal space */
2291         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2292         ref_surface_id = ref_list[ref_idx].picture_id;
2293
2294         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2295             obj_surface = SURFACE(ref_surface_id);
2296
2297         vme_context->used_reference_objects[list_index] = obj_surface;
2298         vme_context->used_references[list_index] = &ref_list[ref_idx];
2299     }
2300
2301     if (obj_surface &&
2302         obj_surface->bo) {
2303         assert(ref_idx >= 0);
2304         vme_context->used_reference_objects[list_index] = obj_surface;
2305
2306         if (is_hevc10) {
2307             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2308             assert(hevc_encoder_surface);
2309             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2310         }
2311         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2312         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2313                                                     ref_idx << 16 |
2314                                                     ref_idx <<  8 |
2315                                                     ref_idx);
2316     } else {
2317         vme_context->used_reference_objects[list_index] = NULL;
2318         vme_context->used_references[list_index] = NULL;
2319         vme_context->ref_index_in_mb[list_index] = 0;
2320     }
2321 }
2322
2323 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2324                                      struct encode_state *encode_state,
2325                                      struct intel_encoder_context *encoder_context)
2326 {
2327     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2328     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2329     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2330     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2331     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2332     int qp, m_cost, j, mv_count;
2333     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2334     float   lambda, m_costf;
2335
2336     /* here no SI SP slice for HEVC, do not need slice fixup */
2337     int slice_type = slice_param->slice_type;
2338
2339
2340     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2341
2342     if (encoder_context->rate_control_mode == VA_RC_CBR) {
2343         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2344         if (slice_type == HEVC_SLICE_B) {
2345             if (pSequenceParameter->ip_period == 1) {
2346                 slice_type = HEVC_SLICE_P;
2347                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2348
2349             } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2350                 slice_type = HEVC_SLICE_P;
2351                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2352             }
2353         }
2354
2355     }
2356
2357     if (vme_state_message == NULL)
2358         return;
2359
2360     assert(qp <= QP_MAX);
2361     lambda = intel_lambda_qp(qp);
2362     if (slice_type == HEVC_SLICE_I) {
2363         vme_state_message[MODE_INTRA_16X16] = 0;
2364         m_cost = lambda * 4;
2365         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2366         m_cost = lambda * 16;
2367         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2368         m_cost = lambda * 3;
2369         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2370     } else {
2371         m_cost = 0;
2372         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2373         for (j = 1; j < 3; j++) {
2374             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2375             m_cost = (int)m_costf;
2376             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2377         }
2378         mv_count = 3;
2379         for (j = 4; j <= 64; j *= 2) {
2380             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2381             m_cost = (int)m_costf;
2382             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2383             mv_count++;
2384         }
2385
2386         if (qp <= 25) {
2387             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2388             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2389             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2390             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2391             vme_state_message[MODE_INTER_16X16] = 0x4a;
2392             vme_state_message[MODE_INTER_16X8] = 0x4a;
2393             vme_state_message[MODE_INTER_8X8] = 0x4a;
2394             vme_state_message[MODE_INTER_8X4] = 0x4a;
2395             vme_state_message[MODE_INTER_4X4] = 0x4a;
2396             vme_state_message[MODE_INTER_BWD] = 0x2a;
2397             return;
2398         }
2399         m_costf = lambda * 10;
2400         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2401         m_cost = lambda * 14;
2402         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2403         m_cost = lambda * 24;
2404         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2405         m_costf = lambda * 3.5;
2406         m_cost = m_costf;
2407         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2408         if (slice_type == HEVC_SLICE_P) {
2409             m_costf = lambda * 2.5;
2410             m_cost = m_costf;
2411             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2412             m_costf = lambda * 4;
2413             m_cost = m_costf;
2414             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2415             m_costf = lambda * 1.5;
2416             m_cost = m_costf;
2417             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2418             m_costf = lambda * 3;
2419             m_cost = m_costf;
2420             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2421             m_costf = lambda * 5;
2422             m_cost = m_costf;
2423             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2424             /* BWD is not used in P-frame */
2425             vme_state_message[MODE_INTER_BWD] = 0;
2426         } else {
2427             m_costf = lambda * 2.5;
2428             m_cost = m_costf;
2429             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2430             m_costf = lambda * 5.5;
2431             m_cost = m_costf;
2432             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2433             m_costf = lambda * 3.5;
2434             m_cost = m_costf;
2435             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2436             m_costf = lambda * 5.0;
2437             m_cost = m_costf;
2438             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2439             m_costf = lambda * 6.5;
2440             m_cost = m_costf;
2441             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2442             m_costf = lambda * 1.5;
2443             m_cost = m_costf;
2444             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2445         }
2446     }
2447 }