OSDN Git Service

ENC: add kernel related structure and define for AVC
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 extern int
45 intel_avc_enc_slice_type_fixup(int slice_type);
46
47 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
48     0,   0, -8, -12, -16, -20, -28, -36,
49     0,   0, -4,  -8, -12, -16, -24, -32,
50     4,   2,  0,  -1,  -3,  -8, -16, -24,
51     8,   4,  2,   0,  -1,  -4,  -8, -16,
52     20, 16,  4,   0,  -1,  -4,  -8, -16,
53     24, 20, 16,   8,   4,   0,  -4,  -8,
54     28, 24, 20,  16,   8,   4,   0,  -8,
55     32, 24, 20,  16,   8,   4,   0,  -4,
56     64, 48, 28,  20,   16, 12,   8,   4,
57 };
58
59 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
60     -8, -24, -32, -40, -44, -48, -52, -80,
61     -8, -16, -32, -40, -40, -44, -44, -56,
62     0,    0, -12, -20, -24, -28, -32, -36,
63     8,    4,   0,   0,  -8, -16, -24, -32,
64     32,  16,   8,   4,  -4,  -8, -16, -20,
65     36,  24,  16,   8,   4,  -2,  -4,  -8,
66     40,  36,  24,  20,  16,   8,   0,  -8,
67     48,  40,  28,  24,  20,  12,   0,  -4,
68     64,  48,  28,  20,  16,  12,   8,   4,
69 };
70
71 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
72     0,  -4, -8, -16, -24, -32, -40, -48,
73     1,   0, -4,  -8, -16, -24, -32, -40,
74     4,   2,  0,  -1,  -3,  -8, -16, -24,
75     8,   4,  2,   0,  -1,  -4,  -8, -16,
76     20, 16,  4,   0,  -1,  -4,  -8, -16,
77     24, 20, 16,   8,   4,   0,  -4,  -8,
78     28, 24, 20,  16,   8,   4,   0,  -8,
79     32, 24, 20,  16,   8,   4,   0,  -4,
80     64, 48, 28,  20,  16,  12,   8,   4,
81 };
82
83 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
84     +0,  0,  0,  0, 0, 3, 4, 6, 8,
85     +0,  0,  0,  0, 0, 2, 3, 5, 7,
86     -1,  0,  0,  0, 0, 2, 2, 4, 5,
87     -1, -1,  0,  0, 0, 1, 2, 2, 4,
88     -2, -2, -1,  0, 0, 0, 1, 2, 4,
89     -2, -2, -1,  0, 0, 0, 1, 2, 4,
90     -3, -2, -1, -1, 0, 0, 1, 2, 5,
91     -3, -2, -1, -1, 0, 0, 2, 4, 7,
92     -4, -3, -2, -1, 0, 1, 3, 5, 8,
93 };
94
95 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
96     -1,  0,  0,  0, 0, 1, 1, 2, 3,
97     -1, -1,  0,  0, 0, 1, 1, 2, 3,
98     -2, -1, -1,  0, 0, 1, 1, 2, 3,
99     -3, -2, -2, -1, 0, 0, 1, 2, 3,
100     -3, -2, -1, -1, 0, 0, 1, 2, 3,
101     -3, -2, -1, -1, 0, 0, 1, 2, 3,
102     -3, -2, -1, -1, 0, 0, 1, 2, 3,
103     -3, -2, -1, -1, 0, 0, 1, 2, 3,
104     -3, -2, -1, -1, 0, 0, 1, 2, 3,
105 };
106
107 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
108     +0,  0,  0,  0, 0, 2, 3, 3, 4,
109     +0,  0,  0,  0, 0, 2, 3, 3, 4,
110     -1,  0,  0,  0, 0, 2, 2, 3, 3,
111     -1, -1,  0,  0, 0, 1, 2, 2, 2,
112     -1, -1, -1,  0, 0, 0, 1, 2, 2,
113     -2, -1, -1,  0, 0, 0, 0, 1, 2,
114     -2, -1, -1, -1, 0, 0, 0, 1, 3,
115     -2, -2, -1, -1, 0, 0, 1, 1, 3,
116     -2, -2, -1, -1, 0, 1, 1, 2, 4,
117 };
118
119 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
120     -4, -20, -28, -36, -40, -44, -48, -80,
121     +0,  -8, -12, -20, -24, -28, -32, -36,
122     +0,   0,  -8, -16, -20, -24, -28, -32,
123     +8,   4,   0,   0,  -8, -16, -24, -28,
124     32,  24,  16,   2,  -4,  -8, -16, -20,
125     36,  32,  28,  16,   8,   0,  -4,  -8,
126     40,  36,  24,  20,  16,   8,   0,  -8,
127     48,  40,  28,  24,  20,  12,   0,  -4,
128     64,  48,  28,  20,  16,  12,   8,   4,
129 };
130
131 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
132     -8, -24, -32, -44, -48, -56, -64, -80,
133     -8, -16, -32, -40, -44, -52, -56, -64,
134     +0,   0, -16, -28, -36, -40, -44, -48,
135     +8,   4,   0,   0,  -8, -16, -24, -36,
136     20,  12,   4,   0,  -8,  -8,  -8, -16,
137     24,  16,   8,   8,   8,   0,  -4,  -8,
138     40,  36,  24,  20,  16,   8,   0,  -8,
139     48,  40,  28,  24,  20,  12,   0,  -4,
140     64,  48,  28,  20,  16,  12,   8,   4,
141 };
142
143 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
144     0,  -4, -8, -16, -24, -32, -40, -48,
145     1,   0, -4,  -8, -16, -24, -32, -40,
146     4,   2,  0,  -1,  -3,  -8, -16, -24,
147     8,   4,  2,   0,  -1,  -4,  -8, -16,
148     20, 16,  4,   0,  -1,  -4,  -8, -16,
149     24, 20, 16,   8,   4,   0,  -4,  -8,
150     28, 24, 20,  16,   8,   4,   0,  -8,
151     32, 24, 20,  16,   8,   4,   0,  -4,
152     64, 48, 28,  20,  16,  12,   8,   4,
153 };
154
155 static const struct huc_brc_update_constant_data
156 gen9_brc_update_constant_data = {
157     .global_rate_qp_adj_tab_i = {
158         48, 40, 32,  24,  16,   8,   0,  -8,
159         40, 32, 24,  16,   8,   0,  -8, -16,
160         32, 24, 16,   8,   0,  -8, -16, -24,
161         24, 16,  8,   0,  -8, -16, -24, -32,
162         16, 8,   0,  -8, -16, -24, -32, -40,
163         8,  0,  -8, -16, -24, -32, -40, -48,
164         0, -8, -16, -24, -32, -40, -48, -56,
165         48, 40, 32,  24,  16,   8,   0,  -8,
166     },
167
168     .global_rate_qp_adj_tab_p = {
169         48,  40,  32,  24,  16,  8,    0,  -8,
170         40,  32,  24,  16,   8,  0,   -8, -16,
171         16,   8,   8,   4,  -8, -16, -16, -24,
172         8,    0,   0,  -8, -16, -16, -16, -24,
173         8,    0,   0, -24, -32, -32, -32, -48,
174         0,  -16, -16, -24, -32, -48, -56, -64,
175         -8, -16, -32, -32, -48, -48, -56, -64,
176         -16,-32, -48, -48, -48, -56, -64, -80,
177     },
178
179     .global_rate_qp_adj_tab_b = {
180         48, 40, 32, 24,  16,   8,   0,  -8,
181         40, 32, 24, 16,  8,    0,  -8, -16,
182         32, 24, 16,  8,  0,   -8, -16, -24,
183         24, 16, 8,   0, -8,   -8, -16, -24,
184         16, 8,  0,   0, -8,  -16, -24, -32,
185         16, 8,  0,   0, -8,  -16, -24, -32,
186         0, -8, -8, -16, -32, -48, -56, -64,
187         0, -8, -8, -16, -32, -48, -56, -64
188     },
189
190     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
192     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
193
194     .dist_qp_adj_tab_i = {
195         0,   0,  0,  0,  0,  3,  4,  6,  8,
196         0,   0,  0,  0,  0,  2,  3,  5,  7,
197         -1,  0,  0,  0,  0,  2,  2,  4,  5,
198         -1, -1,  0,  0,  0,  1,  2,  2,  4,
199         -2, -2, -1,  0,  0,  0,  1,  2,  4,
200         -2, -2, -1,  0,  0,  0,  1,  2,  4,
201         -3, -2, -1, -1,  0,  0,  1,  2,  5,
202         -3, -2, -1, -1,  0,  0,  2,  4,  7,
203         -4, -3, -2, -1,  0,  1,  3,  5,  8,
204     },
205
206     .dist_qp_adj_tab_p = {
207         -1,   0,  0,  0,  0,  1,  1,  2,  3,
208         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
209         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
210         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
211         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
212         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
213         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
214         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
215         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
216     },
217
218     .dist_qp_adj_tab_b = {
219         0,   0,  0,  0, 0, 2, 3, 3, 4,
220         0,   0,  0,  0, 0, 2, 3, 3, 4,
221         -1,  0,  0,  0, 0, 2, 2, 3, 3,
222         -1, -1,  0,  0, 0, 1, 2, 2, 2,
223         -1, -1, -1,  0, 0, 0, 1, 2, 2,
224         -2, -1, -1,  0, 0, 0, 0, 1, 2,
225         -2, -1, -1, -1, 0, 0, 0, 1, 3,
226         -2, -2, -1, -1, 0, 0, 1, 1, 3,
227         -2, -2, -1, -1, 0, 1, 1, 2, 4,
228     },
229
230     /* default table for non lowdelay */
231     .buf_rate_adj_tab_i = {
232         -4, -20, -28, -36, -40, -44, -48, -80,
233         0,   -8, -12, -20, -24, -28, -32, -36,
234         0,    0,  -8, -16, -20, -24, -28, -32,
235         8,    4,   0,   0,  -8, -16, -24, -28,
236         32,  24,  16,   2,  -4,  -8, -16, -20,
237         36,  32,  28,  16,   8,   0,  -4,  -8,
238         40,  36,  24,  20,  16,   8,   0,  -8,
239         48,  40,  28,  24,  20,  12,   0,  -4,
240         64,  48,  28,  20,  16,  12,   8,   4,
241     },
242
243     /* default table for non lowdelay */
244     .buf_rate_adj_tab_p = {
245         -8, -24, -32, -44, -48, -56, -64, -80,
246         -8, -16, -32, -40, -44, -52, -56, -64,
247         0,    0, -16, -28, -36, -40, -44, -48,
248         8,    4,   0,   0,  -8, -16, -24, -36,
249         20,  12,   4,   0,  -8,  -8,  -8, -16,
250         24,  16,   8,   8,   8,   0,  -4,  -8,
251         40,  36,  24,  20,  16,   8,   0,  -8,
252         48,  40,  28,  24,  20,  12,   0,  -4,
253         64,  48,  28,  20,  16,  12,   8,   4,
254     },
255
256     /* default table for non lowdelay */
257     .buf_rate_adj_tab_b = {
258         0,  -4, -8, -16, -24, -32, -40, -48,
259         1,   0, -4,  -8, -16, -24, -32, -40,
260         4,   2,  0,  -1,  -3,  -8, -16, -24,
261         8,   4,  2,   0,  -1,  -4,  -8, -16,
262         20, 16,  4,   0,  -1,  -4,  -8, -16,
263         24, 20, 16,   8,   4,   0,  -4,  -8,
264         28, 24, 20,  16,   8,   4,   0,  -8,
265         32, 24, 20,  16,   8,   4,   0,  -4,
266         64, 48, 28,  20,  16,  12,   8,   4,
267     },
268
269     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
270     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
271
272     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
273     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
274
275     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
276     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
277
278     .i_intra_non_pred = {
279         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
280         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
281         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
282     },
283
284     .i_intra_16x16 = {
285         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
286         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
287         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288     },
289
290     .i_intra_8x8 = {
291         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
292         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
293         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
294     },
295
296     .i_intra_4x4 = {
297         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
298         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
299         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
300     },
301
302     .i_intra_chroma = {
303         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
304         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
306     },
307
308     .p_intra_non_pred = {
309         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
310         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
311         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
312     },
313
314     .p_intra_16x16 = {
315         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
316         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
317         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
318     },
319
320     .p_intra_8x8 = {
321         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
322         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
323         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
324     },
325
326     .p_intra_4x4 = {
327         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
328         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
329         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
330     },
331
332     .p_intra_chroma = {
333         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
334         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
335         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336     },
337
338     .p_inter_16x8 = {
339         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
340         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
341         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
342     },
343
344     .p_inter_8x8 = {
345         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
346         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
347         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
348     },
349
350     .p_inter_16x16 = {
351         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
352         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
353         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
354     },
355
356     .p_ref_id = {
357         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
358         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
359         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
360     },
361
362     .hme_mv_cost = {
363         /* mv = 0 */
364         {
365             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
366             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
368         },
369
370         /* mv <= 16 */
371         {
372             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
373             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
374             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
375         },
376
377         /* mv <= 32 */
378         {
379             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
380             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
381             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
382         },
383
384         /* mv <= 64 */
385         {
386             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
387             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
388             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
389         },
390
391         /* mv <= 128 */
392         {
393             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
394             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
395             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
396         },
397
398         /* mv <= 256 */
399         {
400             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
401             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
402             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
403         },
404
405         /* mv <= 512 */
406         {
407             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
408             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
409             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
410         },
411
412         /* mv <= 1024 */
413         {
414             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
415             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
416             0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
417         },
418     },
419 };
420
421 /* 11 DWs */
422 static const uint8_t vdenc_const_qp_lambda[44] = {
423     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
424     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
425     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
426     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
427     0x4a, 0x53, 0x00, 0x00
428 };
429
430 /* 14 DWs */
431 static const uint16_t vdenc_const_skip_threshold[28] = {
432
433 };
434
435 /* 14 DWs */
436 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
437
438 };
439
440 /* 7 DWs */
441 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
442
443 };
444
445 /* 7 DWs */
446 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
447
448 };
449
450 /* 7 DWs */
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
452
453 };
454
455 /* P frame */
456 /* 11 DWs */
457 static const uint8_t vdenc_const_qp_lambda_p[44] = {
458     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
459     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
460     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
461     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
462     0x4a, 0x53, 0x00, 0x00
463 };
464
465 /* 14 DWs */
466 static const uint16_t vdenc_const_skip_threshold_p[28] = {
467     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
468     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
469     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
470     0x0333, 0x03b0, 0x0000, 0x0000
471 };
472
473 /* 14 DWs */
474 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
475     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
476     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
477     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
478 };
479
480 /* 7 DWs */
481 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
482     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
483     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
484     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
485 };
486
487 /* 7 DWs */
488 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
489     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
490     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
491     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
492 };
493
494 /* 7 DWs */
495 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
496     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
497     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
498     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
499 };
500
501 static const double
502 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
503
504 static const double
505 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
506
507 static const double
508 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
509
510 static const double
511 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
512
513 static const double
514 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
515
516 static const double
517 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
518
519 static const unsigned char
520 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521
522 static const unsigned char
523 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
524
525 static const uint16_t
526 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
527
528 static const uint8_t
529 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
530
531 static const uint8_t
532 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
533
534 static const uint8_t
535 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
536
537 static const int8_t
538 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
539
540 static const int vdenc_mode_const[2][12][52] = {
541     //INTRASLICE
542     {
543         //LUTMODE_INTRA_NONPRED
544         {
545             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
546             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
547             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
548             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
549         },
550
551         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
552         {
553             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
554             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
555             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
556             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
557         },
558
559         //LUTMODE_INTRA_8x8
560         {
561             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
562             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
563             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
564             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
565         },
566
567         //LUTMODE_INTRA_4x4
568         {
569             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
570             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
571             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
572             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
573         },
574
575         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
576         { 0, },
577
578         //LUTMODE_INTER_8X8Q
579         { 0, },
580
581         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
582         { 0, },
583
584         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
585         { 0, },
586
587         //LUTMODE_INTER_16x16, LUTMODE_INTER
588         { 0, },
589
590         //LUTMODE_INTER_BWD
591         { 0, },
592
593         //LUTMODE_REF_ID
594         { 0, },
595
596         //LUTMODE_INTRA_CHROMA
597         { 0, },
598     },
599
600     //PREDSLICE
601     {
602         //LUTMODE_INTRA_NONPRED
603         {
604             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
605             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
606             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
607             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
608         },
609
610         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
611         {
612             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
613             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
614             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
615             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
616
617         },
618
619         //LUTMODE_INTRA_8x8
620         {
621             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
622             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
623             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
624             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
625         },
626
627         //LUTMODE_INTRA_4x4
628         {
629             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
630             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
631             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
632             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
633         },
634
635         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
636         {
637             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
638             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
639             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
640             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
641         },
642
643         //LUTMODE_INTER_8X8Q
644         {
645             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
646             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
647             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
648             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
649         },
650
651         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
652         {
653             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
654             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
655             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
656             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
657         },
658
659         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
660         {
661             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
662             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
663             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
664             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
665         },
666
667         //LUTMODE_INTER_16x16, LUTMODE_INTER
668         {
669             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
670             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
671             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
672             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
673         },
674
675         //LUTMODE_INTER_BWD
676         {
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
678             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
679             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
680             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
681         },
682
683         //LUTMODE_REF_ID
684         {
685             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
686             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
687             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
688             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
689         },
690
691         //LUTMODE_INTRA_CHROMA
692         {
693             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
694             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
695             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
696             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
697         },
698     },
699 };
700
701 static const int vdenc_mv_cost_skipbias_qpel[8] = {
702     //PREDSLICE
703     0, 6, 6, 9, 10, 13, 14, 16
704 };
705
706 static const int vdenc_hme_cost[8][52] = {
707     //mv=0
708     {
709         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
710         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
711         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
712         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
713     },
714     //mv<=16
715     {
716         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
717         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
718         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
719         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
720     },
721     //mv<=32
722     {
723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
724         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
725         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
726         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
727     },
728     //mv<=64
729     {
730         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
731         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
732         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
733         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
734     },
735     //mv<=128
736     {
737         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
738         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
739         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
740         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
741     },
742     //mv<=256
743     {
744         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
745         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
746         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
747         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
748     },
749     //mv<=512
750     {
751         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
752         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
753         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
754         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
755     },
756
757     //mv<=1024
758     {
759         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
760         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
761         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
762         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
763     },
764 };
765
766 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
767         if (bo) {                                                       \
768             OUT_BCS_RELOC64(batch,                                      \
769                             bo,                                         \
770                             I915_GEM_DOMAIN_RENDER,                     \
771                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
772                             delta);                                     \
773         } else {                                                        \
774             OUT_BCS_BATCH(batch, 0);                                    \
775             OUT_BCS_BATCH(batch, 0);                                    \
776         }                                                               \
777     } while (0)
778
779 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
780         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
781         OUT_BCS_BATCH(batch, i965->intel.mocs_state);                             \
782     } while (0)
783
784 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do {   \
785         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
786         buffer.width = bfsize;                                  \
787         buffer.height = 1;                                      \
788         buffer.pitch = buffer.width;                            \
789         buffer.size = buffer.pitch;                             \
790         buffer.tiling = I915_TILING_NONE;                       \
791         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
792                                    &buffer,                     \
793                                    bfsize,                      \
794                                    (des));                      \
795     } while (0)
796
797 static int
798 gen9_vdenc_get_max_vmv_range(int level)
799 {
800     int max_vmv_range = 512;
801
802     if (level == 10)
803         max_vmv_range = 256;
804     else if (level <= 20)
805         max_vmv_range = 512;
806     else if (level <= 30)
807         max_vmv_range = 1024;
808     else
809         max_vmv_range = 2048;
810
811     return max_vmv_range;
812 }
813
814 static unsigned char
815 map_44_lut_value(unsigned int v, unsigned char max)
816 {
817     unsigned int maxcost;
818     int d;
819     unsigned char ret;
820
821     if (v == 0) {
822         return 0;
823     }
824
825     maxcost = ((max & 15) << (max >> 4));
826
827     if (v >= maxcost) {
828         return max;
829     }
830
831     d = (int)(log((double)v) / log(2.0)) - 3;
832
833     if (d < 0) {
834         d = 0;
835     }
836
837     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
838     ret =  (ret & 0xf) == 0 ? (ret | 8) : ret;
839
840     return ret;
841 }
842
843 static void
844 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
845                                   struct encode_state *encode_state,
846                                   struct intel_encoder_context *encoder_context)
847 {
848     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
849     int i;
850
851     vdenc_context->gop_size = encoder_context->brc.gop_size;
852     vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
853
854     if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
855         encoder_context->brc.need_reset) {
856         /* So far, vdenc doesn't support temporal layer */
857         vdenc_context->framerate = encoder_context->brc.framerate[0];
858
859         vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
860         vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
861
862         vdenc_context->max_bit_rate = ALIGN(encoder_context->brc.bits_per_second[0], 1000) / 1000;
863         vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
864         vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
865
866         if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
867             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
868             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
869         } else {
870             assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
871             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
872             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
873         }
874     }
875
876     vdenc_context->mb_brc_enabled = 1;
877     vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
878     vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
879     vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
880     vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
881
882     for (i = 0; i < vdenc_context->num_roi; i++) {
883         vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
884         vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
885         vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
886         vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].top >> 4;
887         vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
888     }
889 }
890
891 static void
892 gen9_vdenc_update_parameters(VADriverContextP ctx,
893                              VAProfile profile,
894                              struct encode_state *encode_state,
895                              struct intel_encoder_context *encoder_context)
896 {
897     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
899     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
900
901     if (profile == VAProfileH264High)
902         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
903     else
904         vdenc_context->transform_8x8_mode_enable = 0;
905
906     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
907     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
908
909     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
910     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
911
912     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
913     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
914     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
915     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
916     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
917
918     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
919
920     vdenc_context->current_pass = 0;
921     vdenc_context->num_passes = 1;
922
923     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
924         vdenc_context->internal_rate_mode == I965_BRC_VBR)
925         vdenc_context->brc_enabled = 1;
926     else
927         vdenc_context->brc_enabled = 0;
928
929     if (vdenc_context->brc_enabled &&
930         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
931          !vdenc_context->vbv_buffer_size_in_bit ||
932          !vdenc_context->max_bit_rate ||
933          !vdenc_context->target_bit_rate ||
934          !vdenc_context->framerate.num ||
935          !vdenc_context->framerate.den))
936         vdenc_context->brc_enabled = 0;
937
938     if (!vdenc_context->brc_enabled) {
939         vdenc_context->target_bit_rate = 0;
940         vdenc_context->max_bit_rate = 0;
941         vdenc_context->min_bit_rate = 0;
942         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
943         vdenc_context->vbv_buffer_size_in_bit = 0;
944     } else {
945         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
946     }
947 }
948
949 static void
950 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
951                                    struct encode_state *encode_state,
952                                    struct intel_encoder_context *encoder_context,
953                                    int qp)
954 {
955     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
956     unsigned int frame_type = vdenc_context->frame_type;
957
958     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
959     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
960     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
961
962     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
963     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
964     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
965     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
966
967     if (frame_type == VDENC_FRAME_P) {
968         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
969         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
970         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
971         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
972         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
973         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
974
975         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
976         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
977         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
978         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
979         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
980         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
981         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
982         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
983
984         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
985         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
986         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
987         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
988         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
989         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
990         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
991         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
992     }
993 }
994
995 static void
996 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
997                                         struct intel_encoder_context *encoder_context)
998 {
999     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1000     struct gen9_vdenc_streamin_state *streamin_state;
1001     int row, col, i;
1002
1003     if (!vdenc_context->num_roi)
1004         return;
1005
1006     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1007
1008     if (!streamin_state)
1009         return;
1010
1011     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
1012         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1013             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1014
1015             /* The last one has higher priority */
1016             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1017                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1018                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1019                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1020
1021                     break;
1022                 }
1023             }
1024         }
1025     }
1026
1027     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1028 }
1029
1030 static VAStatus
1031 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1032                        VAProfile profile,
1033                        struct encode_state *encode_state,
1034                        struct intel_encoder_context *encoder_context)
1035 {
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1038     struct i965_coded_buffer_segment *coded_buffer_segment;
1039     struct object_surface *obj_surface;
1040     struct object_buffer *obj_buffer;
1041     VAEncPictureParameterBufferH264 *pic_param;
1042     VAEncSliceParameterBufferH264 *slice_param;
1043     VDEncAvcSurface *vdenc_avc_surface;
1044     dri_bo *bo;
1045     int i, j, enable_avc_ildb = 0;
1046     int qp;
1047     char *pbuffer;
1048
1049     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1050
1051     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1052         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1053         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1054
1055         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1056             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1057                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1058                    (slice_param->slice_type == SLICE_TYPE_P) ||
1059                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1060                    (slice_param->slice_type == SLICE_TYPE_B));
1061
1062             if (slice_param->disable_deblocking_filter_idc != 1) {
1063                 enable_avc_ildb = 1;
1064                 break;
1065             }
1066
1067             slice_param++;
1068         }
1069     }
1070
1071     /* Setup current frame */
1072     obj_surface = encode_state->reconstructed_object;
1073     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1074
1075     if (obj_surface->private_data == NULL) {
1076         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1077         assert(vdenc_avc_surface);
1078
1079         vdenc_avc_surface->ctx = ctx;
1080         i965_CreateSurfaces(ctx,
1081                             vdenc_context->down_scaled_width_4x,
1082                             vdenc_context->down_scaled_height_4x,
1083                             VA_RT_FORMAT_YUV420,
1084                             1,
1085                             &vdenc_avc_surface->scaled_4x_surface_id);
1086         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1087         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1088         i965_check_alloc_surface_bo(ctx,
1089                                     vdenc_avc_surface->scaled_4x_surface_obj,
1090                                     1,
1091                                     VA_FOURCC_NV12,
1092                                     SUBSAMPLE_YUV420);
1093
1094         obj_surface->private_data = (void *)vdenc_avc_surface;
1095         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1096     }
1097
1098     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1099     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1100
1101     /* Reconstructed surfaces */
1102     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1103     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1104     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1105     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1106
1107     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1108     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1109
1110     if (enable_avc_ildb) {
1111         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1112     } else {
1113         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1114     }
1115
1116
1117     /* Reference surfaces */
1118     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1119         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1120                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1121         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1122         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1123         obj_surface = encode_state->reference_objects[i];
1124
1125         if (obj_surface && obj_surface->bo) {
1126             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1127
1128             if (obj_surface->private_data == NULL) {
1129                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1130                 assert(vdenc_avc_surface);
1131
1132                 vdenc_avc_surface->ctx = ctx;
1133                 i965_CreateSurfaces(ctx,
1134                                     vdenc_context->down_scaled_width_4x,
1135                                     vdenc_context->down_scaled_height_4x,
1136                                     VA_RT_FORMAT_YUV420,
1137                                     1,
1138                                     &vdenc_avc_surface->scaled_4x_surface_id);
1139                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1140                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1141                 i965_check_alloc_surface_bo(ctx,
1142                                             vdenc_avc_surface->scaled_4x_surface_obj,
1143                                             1,
1144                                             VA_FOURCC_NV12,
1145                                             SUBSAMPLE_YUV420);
1146
1147                 obj_surface->private_data = vdenc_avc_surface;
1148                 obj_surface->free_private_data = gen_free_avc_surface;
1149             }
1150
1151             vdenc_avc_surface = obj_surface->private_data;
1152             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1153         }
1154     }
1155
1156     /* Input YUV surface */
1157     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1158     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1159
1160     /* Encoded bitstream */
1161     obj_buffer = encode_state->coded_buf_object;
1162     bo = obj_buffer->buffer_store->bo;
1163     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1164     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1165     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1166     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1167
1168     /* Status buffer */
1169     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1170     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1171     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1172     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1173     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1174     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1175            vdenc_context->compressed_bitstream.start_offset);
1176
1177     dri_bo_map(bo, 1);
1178
1179     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1180     coded_buffer_segment->mapped = 0;
1181     coded_buffer_segment->codec = encoder_context->codec;
1182     coded_buffer_segment->status_support = 1;
1183
1184     pbuffer = bo->virtual;
1185     pbuffer += vdenc_context->status_bffuer.base_offset;
1186     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1187
1188     dri_bo_unmap(bo);
1189
1190     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1191     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1192                                 vdenc_context->frame_width_in_mbs * 64,
1193                                 "Intra row store scratch buffer");
1194
1195     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1196     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1197                                 vdenc_context->frame_width_in_mbs * 256,
1198                                 "Deblocking filter row store scratch buffer");
1199
1200     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1201     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1202                                 vdenc_context->frame_width_in_mbs * 128,
1203                                 "BSD/MPC row store scratch buffer");
1204
1205     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1206     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1207                                 vdenc_context->frame_width_in_mbs * 64,
1208                                 "VDENC row store scratch buffer");
1209
1210     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1211     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1212     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1213                                 vdenc_context->frame_width_in_mbs *
1214                                 vdenc_context->frame_height_in_mbs *
1215                                 sizeof(struct gen9_vdenc_streamin_state),
1216                                 "VDENC StreamIn buffer");
1217
1218     /*
1219      * Calculate the index for each reference surface in list0 for the first slice
1220      * TODO: other slices
1221      */
1222     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1223     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1224
1225     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1226
1227     if (slice_param->num_ref_idx_active_override_flag)
1228         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1229
1230     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1231         return VA_STATUS_ERROR_INVALID_VALUE;
1232
1233     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1234         VAPictureH264 *va_pic;
1235
1236         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1237         vdenc_context->list_ref_idx[0][i] = 0;
1238
1239         if (i >= vdenc_context->num_refs[0])
1240             continue;
1241
1242         va_pic = &slice_param->RefPicList0[i];
1243
1244         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1245             obj_surface = encode_state->reference_objects[j];
1246
1247             if (obj_surface &&
1248                 obj_surface->bo &&
1249                 obj_surface->base.id == va_pic->picture_id) {
1250
1251                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1252                 vdenc_context->list_ref_idx[0][i] = j;
1253
1254                 break;
1255             }
1256         }
1257     }
1258
1259     if (slice_param->slice_type == SLICE_TYPE_I ||
1260         slice_param->slice_type == SLICE_TYPE_SI)
1261         vdenc_context->frame_type = VDENC_FRAME_I;
1262     else
1263         vdenc_context->frame_type = VDENC_FRAME_P;
1264
1265     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1266
1267     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1268     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1269
1270     return VA_STATUS_SUCCESS;
1271 }
1272
1273 static void
1274 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1275                                 struct intel_encoder_context *encoder_context,
1276                                 struct huc_pipe_mode_select_parameter *params)
1277 {
1278     struct intel_batchbuffer *batch = encoder_context->base.batch;
1279
1280     BEGIN_BCS_BATCH(batch, 3);
1281
1282     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1283     OUT_BCS_BATCH(batch,
1284                   (params->huc_stream_object_enable << 10) |
1285                   (params->indirect_stream_out_enable << 4));
1286     OUT_BCS_BATCH(batch,
1287                   params->media_soft_reset_counter);
1288
1289     ADVANCE_BCS_BATCH(batch);
1290 }
1291
1292 static void
1293 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1294                           struct intel_encoder_context *encoder_context,
1295                           struct huc_imem_state_parameter *params)
1296 {
1297     struct intel_batchbuffer *batch = encoder_context->base.batch;
1298
1299     BEGIN_BCS_BATCH(batch, 5);
1300
1301     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1302     OUT_BCS_BATCH(batch, 0);
1303     OUT_BCS_BATCH(batch, 0);
1304     OUT_BCS_BATCH(batch, 0);
1305     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1306
1307     ADVANCE_BCS_BATCH(batch);
1308 }
1309
1310 static void
1311 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1312                           struct intel_encoder_context *encoder_context,
1313                           struct huc_dmem_state_parameter *params)
1314 {
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct intel_batchbuffer *batch = encoder_context->base.batch;
1317
1318     BEGIN_BCS_BATCH(batch, 6);
1319
1320     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1321     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1322     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1323     OUT_BCS_BATCH(batch, params->huc_data_length);
1324
1325     ADVANCE_BCS_BATCH(batch);
1326 }
1327
1328 /*
1329 static void
1330 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1331                          struct intel_encoder_context *encoder_context,
1332                          struct huc_cfg_state_parameter *params)
1333 {
1334     struct intel_batchbuffer *batch = encoder_context->base.batch;
1335
1336     BEGIN_BCS_BATCH(batch, 2);
1337
1338     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1339     OUT_BCS_BATCH(batch, !!params->force_reset);
1340
1341     ADVANCE_BCS_BATCH(batch);
1342 }
1343 */
1344 static void
1345 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1346                                   struct intel_encoder_context *encoder_context,
1347                                   struct huc_virtual_addr_parameter *params)
1348 {
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     struct intel_batchbuffer *batch = encoder_context->base.batch;
1351     int i;
1352
1353     BEGIN_BCS_BATCH(batch, 49);
1354
1355     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1356
1357     for (i = 0; i < 16; i++) {
1358         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1359             OUT_BUFFER_3DW(batch,
1360                            params->regions[i].huc_surface_res->bo,
1361                            !!params->regions[i].is_target, 0, 0);
1362         else
1363             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1364     }
1365
1366     ADVANCE_BCS_BATCH(batch);
1367 }
1368
1369 static void
1370 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1371                                        struct intel_encoder_context *encoder_context,
1372                                        struct huc_ind_obj_base_addr_parameter *params)
1373 {
1374     struct i965_driver_data *i965 = i965_driver_data(ctx);
1375     struct intel_batchbuffer *batch = encoder_context->base.batch;
1376
1377     BEGIN_BCS_BATCH(batch, 11);
1378
1379     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1380
1381     if (params->huc_indirect_stream_in_object_res)
1382         OUT_BUFFER_3DW(batch,
1383                        params->huc_indirect_stream_in_object_res->bo,
1384                        0, 0, 0);
1385     else
1386         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1387
1388     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1389
1390     if (params->huc_indirect_stream_out_object_res)
1391         OUT_BUFFER_3DW(batch,
1392                        params->huc_indirect_stream_out_object_res->bo,
1393                        1, 0, 0);
1394     else
1395         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1396
1397     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1398
1399     ADVANCE_BCS_BATCH(batch);
1400 }
1401
1402 static void
1403 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1404                                  struct intel_encoder_context *encoder_context)
1405 {
1406     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1407     struct intel_batchbuffer *batch = encoder_context->base.batch;
1408     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1409     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1410
1411     /* Write HUC_STATUS2 mask (1 << 6) */
1412     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1413     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1414     mi_store_data_imm_params.offset = 0;
1415     mi_store_data_imm_params.dw0 = (1 << 6);
1416     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1417
1418     /* Store HUC_STATUS2 */
1419     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1420     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1421     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1422     mi_store_register_mem_params.offset = 4;
1423     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1424 }
1425
1426 static void
1427 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1428                              struct intel_encoder_context *encoder_context,
1429                              struct huc_stream_object_parameter *params)
1430 {
1431     struct intel_batchbuffer *batch = encoder_context->base.batch;
1432
1433     BEGIN_BCS_BATCH(batch, 5);
1434
1435     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1436     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1437     OUT_BCS_BATCH(batch,
1438                   (1 << 31) |   /* Must be 1 */
1439                   params->indirect_stream_in_start_address);
1440     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1441     OUT_BCS_BATCH(batch,
1442                   (!!params->huc_bitstream_enable << 29) |
1443                   (params->length_mode << 27) |
1444                   (!!params->stream_out << 26) |
1445                   (!!params->emulation_prevention_byte_removal << 25) |
1446                   (!!params->start_code_search_engine << 24) |
1447                   (params->start_code_byte2 << 16) |
1448                   (params->start_code_byte1 << 8) |
1449                   params->start_code_byte0);
1450
1451     ADVANCE_BCS_BATCH(batch);
1452 }
1453
1454 static void
1455 gen9_vdenc_huc_start(VADriverContextP ctx,
1456                      struct intel_encoder_context *encoder_context,
1457                      struct huc_start_parameter *params)
1458 {
1459     struct intel_batchbuffer *batch = encoder_context->base.batch;
1460
1461     BEGIN_BCS_BATCH(batch, 2);
1462
1463     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1464     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1465
1466     ADVANCE_BCS_BATCH(batch);
1467 }
1468
1469 static void
1470 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1471                              struct intel_encoder_context *encoder_context,
1472                              struct vd_pipeline_flush_parameter *params)
1473 {
1474     struct intel_batchbuffer *batch = encoder_context->base.batch;
1475
1476     BEGIN_BCS_BATCH(batch, 2);
1477
1478     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1479     OUT_BCS_BATCH(batch,
1480                   params->mfx_pipeline_command_flush << 19 |
1481                   params->mfl_pipeline_command_flush << 18 |
1482                   params->vdenc_pipeline_command_flush << 17 |
1483                   params->hevc_pipeline_command_flush << 16 |
1484                   params->vd_command_message_parser_done << 4 |
1485                   params->mfx_pipeline_done << 3 |
1486                   params->mfl_pipeline_done << 2 |
1487                   params->vdenc_pipeline_done << 1 |
1488                   params->hevc_pipeline_done);
1489
1490     ADVANCE_BCS_BATCH(batch);
1491 }
1492
1493 static int
1494 gen9_vdenc_get_max_mbps(int level_idc)
1495 {
1496     int max_mbps = 11880;
1497
1498     switch (level_idc) {
1499     case 20:
1500         max_mbps = 11880;
1501         break;
1502
1503     case 21:
1504         max_mbps = 19800;
1505         break;
1506
1507     case 22:
1508         max_mbps = 20250;
1509         break;
1510
1511     case 30:
1512         max_mbps = 40500;
1513         break;
1514
1515     case 31:
1516         max_mbps = 108000;
1517         break;
1518
1519     case 32:
1520         max_mbps = 216000;
1521         break;
1522
1523     case 40:
1524     case 41:
1525         max_mbps = 245760;
1526         break;
1527
1528     case 42:
1529         max_mbps = 522240;
1530         break;
1531
1532     case 50:
1533         max_mbps = 589824;
1534         break;
1535
1536     case 51:
1537         max_mbps = 983040;
1538         break;
1539
1540     case 52:
1541         max_mbps = 2073600;
1542         break;
1543
1544     default:
1545         break;
1546     }
1547
1548     return max_mbps;
1549 };
1550
1551 static unsigned int
1552 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1553                                        struct intel_encoder_context *encoder_context,
1554                                        int level_idc)
1555 {
1556     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1557     double bits_per_mb, tmpf;
1558     int max_mbps, num_mb_per_frame;
1559     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1560     unsigned int ret;
1561
1562     if (level_idc >= 31 && level_idc <= 40)
1563         bits_per_mb = 96.0;
1564     else
1565         bits_per_mb = 192.0;
1566
1567     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1568     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1569
1570     tmpf = (double)num_mb_per_frame;
1571
1572     if (tmpf < max_mbps / 172.0)
1573         tmpf = max_mbps / 172.0;
1574
1575     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1576     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1577                                      (double)vdenc_context->framerate.num * bits_per_mb);
1578
1579     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1580     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1581     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1582
1583     return ret;
1584 }
1585
1586 static int
1587 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1588                                 struct encode_state *encode_state,
1589                                 struct intel_encoder_context *encoder_context)
1590 {
1591     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1592     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1593     unsigned frame_size;
1594     int qp, delat_qp;
1595
1596     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1597     qp = (int)(1.0 / 1.2 * pow(10.0,
1598                                (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1599                                       ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)) - x0) *
1600                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1601     qp += 2;
1602     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1603                           ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)));
1604     if (delat_qp > 0)
1605         qp += delat_qp;
1606
1607     qp = CLAMP(1, 51, qp);
1608     qp--;
1609
1610     if (qp < 0)
1611         qp = 1;
1612
1613     return qp;
1614 }
1615
1616 static void
1617 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1618                                     struct encode_state *encode_state,
1619                                     struct intel_encoder_context *encoder_context)
1620 {
1621     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1622     struct huc_brc_init_dmem *dmem;
1623     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1624     double input_bits_per_frame, bps_ratio;
1625     int i;
1626
1627     vdenc_context->brc_init_reset_input_bits_per_frame =
1628         ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1629     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1630     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1631
1632     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1633
1634     if (!dmem)
1635         return;
1636
1637     memset(dmem, 0, sizeof(*dmem));
1638
1639     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1640
1641     dmem->frame_width = vdenc_context->frame_width;
1642     dmem->frame_height = vdenc_context->frame_height;
1643
1644     dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1645     dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1646     dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1647     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1648     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1649
1650     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1651         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1652
1653     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1654         dmem->brc_flag |= 0x10;
1655     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1656         dmem->brc_flag |= 0x20;
1657
1658     dmem->frame_rate_m = vdenc_context->framerate.num;
1659     dmem->frame_rate_d = vdenc_context->framerate.den;
1660
1661     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1662
1663     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1664         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1665
1666     dmem->min_qp = 10;
1667     dmem->max_qp = 51;
1668
1669     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1670     bps_ratio = input_bits_per_frame /
1671         ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1672
1673     if (bps_ratio < 0.1)
1674         bps_ratio = 0.1;
1675
1676     if (bps_ratio > 3.5)
1677         bps_ratio = 3.5;
1678
1679     for (i = 0; i < 4; i++) {
1680         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1681         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1682
1683         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1684         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1685
1686         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1687         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1688     }
1689
1690     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1691
1692     if (vdenc_context->mb_brc_enabled) {
1693         dmem->mb_qp_ctrl = 1;
1694         dmem->dist_qp_delta[0] = -5;
1695         dmem->dist_qp_delta[1] = -2;
1696         dmem->dist_qp_delta[2] = 2;
1697         dmem->dist_qp_delta[3] = 5;
1698     }
1699
1700     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1701
1702     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1703     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1704
1705     // 2nd re-encode pass if possible
1706     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1707         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1708         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1709         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1710         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1711     } else {
1712         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1713         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1714         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1715         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1716     }
1717
1718     dmem->qp_select_for_first_pass = 1;
1719     dmem->mb_header_compensation = 1;
1720     dmem->delta_qp_adaptation = 1;
1721     dmem->max_crf_quality_factor = 52;
1722
1723     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1724     dmem->scenario_info = 0;
1725
1726     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1727     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1728
1729     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1730 }
1731
1732 static void
1733 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1734                               struct encode_state *encode_state,
1735                               struct intel_encoder_context *encoder_context)
1736 {
1737     struct intel_batchbuffer *batch = encoder_context->base.batch;
1738     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1739     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1740     struct huc_imem_state_parameter imem_state_params;
1741     struct huc_dmem_state_parameter dmem_state_params;
1742     struct huc_virtual_addr_parameter virtual_addr_params;
1743     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1744     struct huc_stream_object_parameter stream_object_params;
1745     struct huc_start_parameter start_params;
1746     struct vd_pipeline_flush_parameter pipeline_flush_params;
1747     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1748
1749     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1750
1751     memset(&imem_state_params, 0, sizeof(imem_state_params));
1752     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1753     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1754
1755     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1756     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1757
1758     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1759     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1760     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1761     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1762     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1763     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1764
1765     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1766     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1767     virtual_addr_params.regions[0].is_target = 1;
1768     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1769
1770     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1771     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1772     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1773     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1774
1775     memset(&stream_object_params, 0, sizeof(stream_object_params));
1776     stream_object_params.indirect_stream_in_data_length = 1;
1777     stream_object_params.indirect_stream_in_start_address = 0;
1778     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1779
1780     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1781
1782     memset(&start_params, 0, sizeof(start_params));
1783     start_params.last_stream_object = 1;
1784     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1785
1786     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1787     pipeline_flush_params.hevc_pipeline_done = 1;
1788     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1789     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1790
1791     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1792     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1793     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1794 }
1795
1796 static void
1797 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1798 {
1799     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1800     struct huc_brc_update_dmem *dmem;
1801     int i, num_p_in_gop = 0;
1802
1803     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1804
1805     if (!dmem)
1806         return;
1807
1808     dmem->brc_func = 1;
1809
1810     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1811         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1812             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1813         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1814         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1815     }
1816
1817     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1818         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1819
1820     dmem->target_size = vdenc_context->brc_target_size;
1821
1822     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1823
1824     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1825
1826     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1827     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1828
1829     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1830
1831     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1832     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1833     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1834
1835     dmem->current_pak_pass = vdenc_context->current_pass;
1836     dmem->max_num_passes = 2;
1837
1838     dmem->scene_change_detect_enable = 1;
1839     dmem->scene_change_prev_intra_percent_threshold = 96;
1840     dmem->scene_change_cur_intra_perent_threshold = 192;
1841
1842     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1843         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1844
1845     for (i = 0; i < 2; i++)
1846         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1847
1848     if (vdenc_context->is_low_delay)
1849         dmem->ip_average_coeff = 0;
1850     else
1851         dmem->ip_average_coeff = 128;
1852
1853     dmem->skip_frame_size = 0;
1854     dmem->num_of_frames_skipped = 0;
1855
1856     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1857     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1858     dmem->hme_cost_enable = 1;
1859
1860     dmem->second_level_batchbuffer_size = 228;
1861
1862     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1863 }
1864
1865 static void
1866 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1867                                   struct encode_state *encode_state,
1868                                   struct intel_encoder_context *encoder_context,
1869                                   struct gen9_mfx_avc_img_state *pstate,
1870                                   int use_huc)
1871 {
1872     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1873     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1874     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1875
1876     memset(pstate, 0, sizeof(*pstate));
1877
1878     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1879
1880     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1881
1882     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1883     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1884
1885     pstate->dw3.image_structure = 0;
1886     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1887     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1888     pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1889     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1890     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1891
1892     pstate->dw4.field_picture_flag = 0;
1893     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1894     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1895     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1896     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1897     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1898     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1899     pstate->dw4.mb_mv_format_flag = 1;
1900     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1901     pstate->dw4.mv_unpacked_flag = 1;
1902     pstate->dw4.insert_test_flag = 0;
1903     pstate->dw4.load_slice_pointer_flag = 0;
1904     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1905     pstate->dw4.minimum_frame_size = 0;
1906
1907     pstate->dw5.intra_mb_max_bit_flag = 1;
1908     pstate->dw5.inter_mb_max_bit_flag = 1;
1909     pstate->dw5.frame_size_over_flag = 1;
1910     pstate->dw5.frame_size_under_flag = 1;
1911     pstate->dw5.intra_mb_ipcm_flag = 1;
1912     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1913     pstate->dw5.non_first_pass_flag = 0;
1914     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1915     pstate->dw5.aq_chroma_disable = 1;
1916
1917     pstate->dw6.intra_mb_max_size = 2700;
1918     pstate->dw6.inter_mb_max_size = 4095;
1919
1920     pstate->dw8.slice_delta_qp_max0 = 0;
1921     pstate->dw8.slice_delta_qp_max1 = 0;
1922     pstate->dw8.slice_delta_qp_max2 = 0;
1923     pstate->dw8.slice_delta_qp_max3 = 0;
1924
1925     pstate->dw9.slice_delta_qp_min0 = 0;
1926     pstate->dw9.slice_delta_qp_min1 = 0;
1927     pstate->dw9.slice_delta_qp_min2 = 0;
1928     pstate->dw9.slice_delta_qp_min3 = 0;
1929
1930     pstate->dw10.frame_bitrate_min = 0;
1931     pstate->dw10.frame_bitrate_min_unit = 1;
1932     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1933     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1934     pstate->dw10.frame_bitrate_max_unit = 1;
1935     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1936
1937     pstate->dw11.frame_bitrate_min_delta = 0;
1938     pstate->dw11.frame_bitrate_max_delta = 0;
1939
1940     pstate->dw12.vad_error_logic = 1;
1941     /* TODO: set paramters DW19/DW20 for slices */
1942 }
1943
1944 static void
1945 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1946                                 struct encode_state *encode_state,
1947                                 struct intel_encoder_context *encoder_context,
1948                                 struct gen9_vdenc_img_state *pstate,
1949                                 int update_cost)
1950 {
1951     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1952     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1953     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1954     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1955
1956     memset(pstate, 0, sizeof(*pstate));
1957
1958     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1959
1960     if (vdenc_context->frame_type == VDENC_FRAME_I) {
1961         pstate->dw4.intra_sad_measure_adjustment = 2;
1962         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1963
1964         pstate->dw5.cre_prefetch_enable = 1;
1965
1966         pstate->dw9.mode0_cost = 10;
1967         pstate->dw9.mode1_cost = 0;
1968         pstate->dw9.mode2_cost = 3;
1969         pstate->dw9.mode3_cost = 30;
1970
1971         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1972         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1973         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1974
1975         pstate->dw22.small_mb_size_in_word = 0xff;
1976         pstate->dw22.large_mb_size_in_word = 0xff;
1977
1978         pstate->dw27.max_hmv_r = 0x2000;
1979         pstate->dw27.max_vmv_r = 0x200;
1980
1981         pstate->dw33.qp_range_check_upper_bound = 0x33;
1982         pstate->dw33.qp_range_check_lower_bound = 0x0a;
1983         pstate->dw33.qp_range_check_value = 0x0f;
1984     } else {
1985         pstate->dw2.bidirectional_weight = 0x20;
1986
1987         pstate->dw4.subpel_mode = 3;
1988         pstate->dw4.bme_disable_for_fbr_message = 1;
1989         pstate->dw4.inter_sad_measure_adjustment = 2;
1990         pstate->dw4.intra_sad_measure_adjustment = 2;
1991         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1992
1993         pstate->dw5.cre_prefetch_enable = 1;
1994
1995         pstate->dw8.non_skip_zero_mv_const_added = 1;
1996         pstate->dw8.non_skip_mb_mode_const_added = 1;
1997         pstate->dw8.ref_id_cost_mode_select = 1;
1998
1999         pstate->dw9.mode0_cost = 7;
2000         pstate->dw9.mode1_cost = 26;
2001         pstate->dw9.mode2_cost = 30;
2002         pstate->dw9.mode3_cost = 57;
2003
2004         pstate->dw10.mode4_cost = 8;
2005         pstate->dw10.mode5_cost = 2;
2006         pstate->dw10.mode6_cost = 4;
2007         pstate->dw10.mode7_cost = 6;
2008
2009         pstate->dw11.mode8_cost = 5;
2010         pstate->dw11.mode9_cost = 0;
2011         pstate->dw11.ref_id_cost = 4;
2012         pstate->dw11.chroma_intra_mode_cost = 0;
2013
2014         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2015         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2016         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2017         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2018         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2019         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2020         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2021         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2022
2023         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2024         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2025         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2026
2027         pstate->dw22.small_mb_size_in_word = 0xff;
2028         pstate->dw22.large_mb_size_in_word = 0xff;
2029
2030         pstate->dw27.max_hmv_r = 0x2000;
2031         pstate->dw27.max_vmv_r = 0x200;
2032
2033         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2034
2035         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2036         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2037
2038         pstate->dw33.qp_range_check_upper_bound = 0x33;
2039         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2040         pstate->dw33.qp_range_check_value = 0x0f;
2041
2042         pstate->dw34.midpoint_distortion = 0x640;
2043     }
2044
2045     /* ROI will be updated in HuC kernel for CBR/VBR */
2046     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2047         pstate->dw34.roi_enable = 1;
2048
2049         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2050
2051         if (vdenc_context->num_roi > 1)
2052             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2053
2054         if (vdenc_context->num_roi > 2)
2055             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2056     }
2057
2058     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2059     pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2060
2061     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2062
2063     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2064
2065     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2066     pstate->dw5.picture_type = vdenc_context->frame_type;
2067     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2068
2069     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2070         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2071     }
2072
2073     pstate->dw5.mb_slice_threshold_value = 0;
2074
2075     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2076
2077     if (pstate->dw1.transform_8x8_flag)
2078         pstate->dw8.luma_intra_partition_mask = 0;
2079     else
2080         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2081
2082     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2083
2084     if (update_cost) {
2085         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2086         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2087         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2088         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2089
2090         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2091         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2092         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2093         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2094
2095         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2096         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2097         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2098         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2099
2100         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2101         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2102         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2103         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2104         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2105         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2106         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2107         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2108
2109         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2110         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2111         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2112         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2113         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2114         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2115         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2116         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2117     }
2118
2119     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2120
2121     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2122
2123     /* TODO: check rolling I */
2124
2125     /* TODO: handle ROI */
2126
2127     /* TODO: check stream in support */
2128 }
2129
2130 static void
2131 gen9_vdenc_init_img_states(VADriverContextP ctx,
2132                            struct encode_state *encode_state,
2133                            struct intel_encoder_context *encoder_context)
2134 {
2135     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2136     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2137     struct gen9_vdenc_img_state *vdenc_img_cmd;
2138     char *pbuffer;
2139
2140     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2141
2142     if (!pbuffer)
2143         return;
2144
2145     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2146     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2147     pbuffer += sizeof(*mfx_img_cmd);
2148
2149     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2150     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2151     pbuffer += sizeof(*vdenc_img_cmd);
2152
2153     /* Add batch buffer end command */
2154     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2155
2156     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2157 }
2158
2159 static void
2160 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2161                                         struct encode_state *encode_state,
2162                                         struct intel_encoder_context *encoder_context)
2163 {
2164     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2165     struct huc_brc_update_constant_data *brc_buffer;
2166
2167     brc_buffer = (struct huc_brc_update_constant_data *)
2168         i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2169
2170     if (!brc_buffer)
2171         return;
2172
2173     memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2174
2175     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2176         memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2177         memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2178         memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2179         memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2180         memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2181         memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2182     }
2183
2184
2185     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2186 }
2187
2188 static void
2189 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2190                           struct encode_state *encode_state,
2191                           struct intel_encoder_context *encoder_context)
2192 {
2193     struct intel_batchbuffer *batch = encoder_context->base.batch;
2194     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2195     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2196     struct huc_imem_state_parameter imem_state_params;
2197     struct huc_dmem_state_parameter dmem_state_params;
2198     struct huc_virtual_addr_parameter virtual_addr_params;
2199     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2200     struct huc_stream_object_parameter stream_object_params;
2201     struct huc_start_parameter start_params;
2202     struct vd_pipeline_flush_parameter pipeline_flush_params;
2203     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2204     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2205     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2206
2207     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2208     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2209     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2210
2211     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2212         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2213
2214         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2215         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2216         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2217     }
2218
2219     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2220
2221     memset(&imem_state_params, 0, sizeof(imem_state_params));
2222     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2223     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2224
2225     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2226     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2227
2228     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2229     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2230     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2231     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2232     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2233     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2234
2235     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2236     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2237     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2238     virtual_addr_params.regions[0].is_target = 1;
2239     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2240     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2241     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2242     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2243     virtual_addr_params.regions[4].is_target = 1;
2244     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2245     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2246     virtual_addr_params.regions[6].is_target = 1;
2247     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2248
2249     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2250     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2251     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2252     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2253
2254     memset(&stream_object_params, 0, sizeof(stream_object_params));
2255     stream_object_params.indirect_stream_in_data_length = 1;
2256     stream_object_params.indirect_stream_in_start_address = 0;
2257     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2258
2259     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2260
2261     memset(&start_params, 0, sizeof(start_params));
2262     start_params.last_stream_object = 1;
2263     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2264
2265     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2266     pipeline_flush_params.hevc_pipeline_done = 1;
2267     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2268     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2269
2270     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2271     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2272     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2273
2274     /* Store HUC_STATUS */
2275     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2276     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2277     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2278     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2279
2280     /* Write HUC_STATUS mask (1 << 31) */
2281     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2282     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2283     mi_store_data_imm_params.offset = 4;
2284     mi_store_data_imm_params.dw0 = (1 << 31);
2285     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2286 }
2287
2288 static void
2289 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2290                                 struct encode_state *encode_state,
2291                                 struct intel_encoder_context *encoder_context)
2292 {
2293     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2294     struct intel_batchbuffer *batch = encoder_context->base.batch;
2295
2296     BEGIN_BCS_BATCH(batch, 5);
2297
2298     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2299     OUT_BCS_BATCH(batch,
2300                   (1 << 29) |
2301                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2302                   (MFD_MODE_VLD << 15) |
2303                   (1 << 13) |                   /* VDEnc mode */
2304                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2305                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2306                   (1 << 7)  |                   /* Scaled surface enable */
2307                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2308                   (1 << 4)  |                   /* encoding mode */
2309                   (MFX_FORMAT_AVC << 0));
2310     OUT_BCS_BATCH(batch, 0);
2311     OUT_BCS_BATCH(batch, 0);
2312     OUT_BCS_BATCH(batch, 0);
2313
2314     ADVANCE_BCS_BATCH(batch);
2315 }
2316
2317 static void
2318 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2319                              struct intel_encoder_context *encoder_context,
2320                              struct i965_gpe_resource *gpe_resource,
2321                              int id)
2322 {
2323     struct intel_batchbuffer *batch = encoder_context->base.batch;
2324
2325     BEGIN_BCS_BATCH(batch, 6);
2326
2327     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2328     OUT_BCS_BATCH(batch, id);
2329     OUT_BCS_BATCH(batch,
2330                   ((gpe_resource->height - 1) << 18) |
2331                   ((gpe_resource->width - 1) << 4));
2332     OUT_BCS_BATCH(batch,
2333                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2334                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2335                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2336                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2337                   (1 << 1)  |                           /* must be tiled */
2338                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2339     OUT_BCS_BATCH(batch,
2340                   (0 << 16) |                           /* must be 0 for interleave U/V */
2341                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2342     OUT_BCS_BATCH(batch,
2343                   (0 << 16) |                           /* must be 0 for interleave U/V */
2344                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2345
2346     ADVANCE_BCS_BATCH(batch);
2347 }
2348
2349 static void
2350 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2351 {
2352     struct i965_driver_data *i965 = i965_driver_data(ctx);
2353     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2354     struct intel_batchbuffer *batch = encoder_context->base.batch;
2355     int i;
2356
2357     BEGIN_BCS_BATCH(batch, 65);
2358
2359     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2360
2361     /* the DW1-3 is for pre_deblocking */
2362     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2363
2364     /* the DW4-6 is for the post_deblocking */
2365     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2366
2367     /* the DW7-9 is for the uncompressed_picture */
2368     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2369
2370     /* the DW10-12 is for PAK information (write) */
2371     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2372
2373     /* the DW13-15 is for the intra_row_store_scratch */
2374     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2375
2376     /* the DW16-18 is for the deblocking filter */
2377     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2378
2379     /* the DW 19-50 is for Reference pictures*/
2380     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2381         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2382     }
2383
2384     /* DW 51, reference picture attributes */
2385     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2386
2387     /* The DW 52-54 is for PAK information (read) */
2388     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2389
2390     /* the DW 55-57 is the ILDB buffer */
2391     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2392
2393     /* the DW 58-60 is the second ILDB buffer */
2394     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2395
2396     /* DW 61, memory compress enable & mode */
2397     OUT_BCS_BATCH(batch, 0);
2398
2399     /* the DW 62-64 is the 4x Down Scaling surface */
2400     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2401
2402     ADVANCE_BCS_BATCH(batch);
2403 }
2404
2405 static void
2406 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2407 {
2408     struct i965_driver_data *i965 = i965_driver_data(ctx);
2409     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2410     struct intel_batchbuffer *batch = encoder_context->base.batch;
2411
2412     BEGIN_BCS_BATCH(batch, 26);
2413
2414     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2415     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2416     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2417     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2418
2419     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2420     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2421     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2422
2423     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2424     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2425     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2426
2427     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2428     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2429     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2430
2431     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2432      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2433      */
2434     OUT_BUFFER_3DW(batch,
2435                    vdenc_context->compressed_bitstream.res.bo,
2436                    1,
2437                    0,
2438                    0);
2439     OUT_BUFFER_2DW(batch,
2440                    vdenc_context->compressed_bitstream.res.bo,
2441                    1,
2442                    vdenc_context->compressed_bitstream.end_offset);
2443
2444     ADVANCE_BCS_BATCH(batch);
2445 }
2446
2447 static void
2448 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2449 {
2450     struct i965_driver_data *i965 = i965_driver_data(ctx);
2451     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2452     struct intel_batchbuffer *batch = encoder_context->base.batch;
2453
2454     BEGIN_BCS_BATCH(batch, 10);
2455
2456     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2457
2458     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2459     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2460
2461     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2462     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2463
2464     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2465     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2466
2467     ADVANCE_BCS_BATCH(batch);
2468 }
2469
2470 static void
2471 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2472                         int qm_type,
2473                         unsigned int *qm,
2474                         int qm_length,
2475                         struct intel_encoder_context *encoder_context)
2476 {
2477     struct intel_batchbuffer *batch = encoder_context->base.batch;
2478     unsigned int qm_buffer[16];
2479
2480     assert(qm_length <= 16);
2481     assert(sizeof(*qm) == 4);
2482     memcpy(qm_buffer, qm, qm_length * 4);
2483
2484     BEGIN_BCS_BATCH(batch, 18);
2485     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2486     OUT_BCS_BATCH(batch, qm_type << 0);
2487     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2488     ADVANCE_BCS_BATCH(batch);
2489 }
2490
2491 static void
2492 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2493 {
2494     /* TODO: add support for non flat matrix */
2495     unsigned int qm[16] = {
2496         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2497         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2498         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2499         0x10101010, 0x10101010, 0x10101010, 0x10101010
2500     };
2501
2502     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2503     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2504     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2505     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2506 }
2507
2508 static void
2509 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2510                          int fqm_type,
2511                          unsigned int *fqm,
2512                          int fqm_length,
2513                          struct intel_encoder_context *encoder_context)
2514 {
2515     struct intel_batchbuffer *batch = encoder_context->base.batch;
2516     unsigned int fqm_buffer[32];
2517
2518     assert(fqm_length <= 32);
2519     assert(sizeof(*fqm) == 4);
2520     memcpy(fqm_buffer, fqm, fqm_length * 4);
2521
2522     BEGIN_BCS_BATCH(batch, 34);
2523     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2524     OUT_BCS_BATCH(batch, fqm_type << 0);
2525     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2526     ADVANCE_BCS_BATCH(batch);
2527 }
2528
2529 static void
2530 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2531 {
2532     /* TODO: add support for non flat matrix */
2533     unsigned int qm[32] = {
2534         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2535         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2536         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2537         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2538         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2539         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2540         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2541         0x10001000, 0x10001000, 0x10001000, 0x10001000
2542     };
2543
2544     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2545     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2546     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2547     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2548 }
2549
2550 static void
2551 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2552                              struct encode_state *encode_state,
2553                              struct intel_encoder_context *encoder_context)
2554 {
2555     struct intel_batchbuffer *batch = encoder_context->base.batch;
2556     struct gen9_mfx_avc_img_state mfx_img_cmd;
2557
2558     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2559
2560     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2561     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2562     ADVANCE_BCS_BATCH(batch);
2563 }
2564
2565 static void
2566 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2567                                   struct encode_state *encode_state,
2568                                   struct intel_encoder_context *encoder_context)
2569 {
2570     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2571     struct intel_batchbuffer *batch = encoder_context->base.batch;
2572
2573     BEGIN_BCS_BATCH(batch, 2);
2574
2575     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2576     OUT_BCS_BATCH(batch,
2577                   (vdenc_context->vdenc_streamin_enable << 9) |
2578                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2579                   (1 << 7)  |                   /* Tlb prefetch enable */
2580                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2581                   (VDENC_CODEC_AVC << 0));
2582
2583     ADVANCE_BCS_BATCH(batch);
2584 }
2585
2586 static void
2587 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2588                                struct intel_encoder_context *encoder_context,
2589                                struct i965_gpe_resource *gpe_resource,
2590                                int vdenc_surface_cmd)
2591 {
2592     struct intel_batchbuffer *batch = encoder_context->base.batch;
2593
2594     BEGIN_BCS_BATCH(batch, 6);
2595
2596     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2597     OUT_BCS_BATCH(batch, 0);
2598     OUT_BCS_BATCH(batch,
2599                   ((gpe_resource->height - 1) << 18) |
2600                   ((gpe_resource->width - 1) << 4));
2601     OUT_BCS_BATCH(batch,
2602                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2603                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2604                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2605                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2606                   (1 << 1)  |                           /* must be tiled */
2607                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2608     OUT_BCS_BATCH(batch,
2609                   (0 << 16) |                           /* must be 0 for interleave U/V */
2610                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2611     OUT_BCS_BATCH(batch,
2612                   (0 << 16) |                           /* must be 0 for interleave U/V */
2613                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2614
2615     ADVANCE_BCS_BATCH(batch);
2616 }
2617
2618 static void
2619 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2620                                    struct intel_encoder_context *encoder_context,
2621                                    struct i965_gpe_resource *gpe_resource)
2622 {
2623     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2624 }
2625
2626 static void
2627 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2628                                    struct intel_encoder_context *encoder_context,
2629                                    struct i965_gpe_resource *gpe_resource)
2630 {
2631     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2632 }
2633
2634 static void
2635 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2636                                       struct intel_encoder_context *encoder_context,
2637                                       struct i965_gpe_resource *gpe_resource)
2638 {
2639     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2640 }
2641
2642 static void
2643 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2644                                      struct encode_state *encode_state,
2645                                      struct intel_encoder_context *encoder_context)
2646 {
2647     struct i965_driver_data *i965 = i965_driver_data(ctx);
2648     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2649     struct intel_batchbuffer *batch = encoder_context->base.batch;
2650
2651     BEGIN_BCS_BATCH(batch, 37);
2652
2653     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2654
2655     /* DW1-6 for DS FWD REF0/REF1 */
2656     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2657     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2658
2659     /* DW7-9 for DS BWD REF0, ignored on SKL */
2660     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2661
2662     /* DW10-12 for uncompressed input data */
2663     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2664
2665     /* DW13-DW15 for streamin data */
2666     if (vdenc_context->vdenc_streamin_enable)
2667         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2668     else
2669         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2670
2671     /* DW16-DW18 for row scratch buffer */
2672     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2673
2674     /* DW19-DW21, ignored on SKL */
2675     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2676
2677     /* DW22-DW27 for FWD REF0/REF1 */
2678     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2679     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2680
2681     /* DW28-DW30 for FWD REF2, ignored on SKL */
2682     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2683
2684     /* DW31-DW33 for BDW REF0, ignored on SKL */
2685     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2686
2687     /* DW34-DW36 for VDEnc statistics streamout */
2688     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2689
2690     ADVANCE_BCS_BATCH(batch);
2691 }
2692
2693 static void
2694 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2695                                  struct encode_state *encode_state,
2696                                  struct intel_encoder_context *encoder_context)
2697 {
2698     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2699     struct intel_batchbuffer *batch = encoder_context->base.batch;
2700
2701     BEGIN_BCS_BATCH(batch, 61);
2702
2703     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2704
2705     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2706         /* DW1-DW11 */
2707         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2708
2709         /* DW12-DW25 */
2710         intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2711
2712         /* DW26-DW39 */
2713         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2714
2715         /* DW40-DW46 */
2716         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2717
2718         /* DW47-DW53 */
2719         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2720
2721         /* DW54-DW60 */
2722         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2723     } else {
2724         int i;
2725         uint16_t tmp_vdenc_skip_threshold_p[28];
2726
2727         memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2728
2729         for (i = 0; i < 28; i++) {
2730             tmp_vdenc_skip_threshold_p[i] *= 3;
2731         }
2732
2733         /* DW1-DW11 */
2734         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2735
2736         /* DW12-DW25 */
2737         intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2738
2739         /* DW26-DW39 */
2740         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2741
2742         /* DW40-DW46 */
2743         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2744
2745         /* DW47-DW53 */
2746         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2747
2748         /* DW54-DW60 */
2749         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2750     }
2751
2752     ADVANCE_BCS_BATCH(batch);
2753 }
2754
2755 static void
2756 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2757                               struct encode_state *encode_state,
2758                               struct intel_encoder_context *encoder_context)
2759 {
2760     struct intel_batchbuffer *batch = encoder_context->base.batch;
2761
2762     BEGIN_BCS_BATCH(batch, 2);
2763
2764     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2765     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2766
2767     ADVANCE_BCS_BATCH(batch);
2768 }
2769
2770 static void
2771 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2772                                        struct encode_state *encode_state,
2773                                        struct intel_encoder_context *encoder_context,
2774                                        VAEncSliceParameterBufferH264 *slice_param)
2775 {
2776     struct intel_batchbuffer *batch = encoder_context->base.batch;
2777     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2778
2779     BEGIN_BCS_BATCH(batch, 3);
2780
2781     OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2782
2783     if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2784         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2785                               slice_param->luma_weight_l0[1] << 16 |
2786                               slice_param->luma_offset_l0[0] << 8 |
2787                               slice_param->luma_weight_l0[0] << 0));
2788         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2789                               slice_param->luma_weight_l0[2] << 0));
2790     } else {
2791         OUT_BCS_BATCH(batch, (0 << 24 |
2792                               1 << 16 |
2793                               0 << 8 |
2794                               1 << 0));
2795         OUT_BCS_BATCH(batch, (0 << 8 |
2796                               1 << 0));
2797     }
2798
2799
2800     ADVANCE_BCS_BATCH(batch);
2801 }
2802
2803 static void
2804 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2805                                struct encode_state *encode_state,
2806                                struct intel_encoder_context *encoder_context,
2807                                VAEncSliceParameterBufferH264 *slice_param,
2808                                VAEncSliceParameterBufferH264 *next_slice_param)
2809 {
2810     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2811     struct intel_batchbuffer *batch = encoder_context->base.batch;
2812     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2813     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2814     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2815     int luma_log2_weight_denom, weighted_pred_idc;
2816
2817     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2818     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2819
2820     if (next_slice_param) {
2821         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2822         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2823     } else {
2824         next_slice_hor_pos = 0;
2825         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2826     }
2827
2828     if (slice_type == SLICE_TYPE_P)
2829         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2830     else
2831         weighted_pred_idc = 0;
2832
2833     if (weighted_pred_idc == 1)
2834         luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2835     else
2836         luma_log2_weight_denom = 0;
2837
2838     BEGIN_BCS_BATCH(batch, 4);
2839
2840     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2841     OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2842                           slice_ver_pos));
2843     OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2844                           next_slice_ver_pos));
2845     OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2846
2847     ADVANCE_BCS_BATCH(batch);
2848 }
2849
2850 static void
2851 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2852                            struct encode_state *encode_state,
2853                            struct intel_encoder_context *encoder_context)
2854 {
2855     struct intel_batchbuffer *batch = encoder_context->base.batch;
2856     struct gen9_vdenc_img_state vdenc_img_cmd;
2857
2858     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2859
2860     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2861     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2862     ADVANCE_BCS_BATCH(batch);
2863 }
2864
2865 static void
2866 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2867                                  struct intel_encoder_context *encoder_context,
2868                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2869                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2870                                  int slice_header_indicator)
2871 {
2872     struct intel_batchbuffer *batch = encoder_context->base.batch;
2873
2874     if (data_bits_in_last_dw == 0)
2875         data_bits_in_last_dw = 32;
2876
2877     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2878
2879     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2880     OUT_BCS_BATCH(batch,
2881                   (0 << 16) |   /* always start at offset 0 */
2882                   (slice_header_indicator << 14) |
2883                   (data_bits_in_last_dw << 8) |
2884                   (skip_emul_byte_count << 4) |
2885                   (!!emulation_flag << 3) |
2886                   ((!!is_last_header) << 2) |
2887                   ((!!is_end_of_slice) << 1) |
2888                   (0 << 0));    /* TODO: check this flag */
2889     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2890
2891     ADVANCE_BCS_BATCH(batch);
2892 }
2893
2894 static void
2895 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2896                                             struct encode_state *encode_state,
2897                                             struct intel_encoder_context *encoder_context,
2898                                             int slice_index)
2899 {
2900     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2901     struct i965_driver_data *i965 = i965_driver_data(ctx);
2902     VAEncPackedHeaderParameterBuffer *param = NULL;
2903     unsigned int length_in_bits;
2904     unsigned int *header_data = NULL;
2905     int count, i, start_index;
2906     int slice_header_index;
2907     unsigned int insert_one_zero_byte = 0;
2908
2909     if (encode_state->slice_header_index[slice_index] == 0)
2910         slice_header_index = -1;
2911     else
2912         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2913
2914     count = encode_state->slice_rawdata_count[slice_index];
2915     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2916
2917     for (i = 0; i < count; i++) {
2918         unsigned int skip_emul_byte_cnt;
2919
2920         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2921
2922         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2923
2924         /* skip the slice header packed data type as it is lastly inserted */
2925         if (param->type == VAEncPackedHeaderSlice)
2926             continue;
2927
2928         length_in_bits = param->bit_length;
2929
2930         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2931
2932         /* as the slice header is still required, the last header flag is set to
2933          * zero.
2934          */
2935         gen9_vdenc_mfx_avc_insert_object(ctx,
2936                                          encoder_context,
2937                                          header_data,
2938                                          ALIGN(length_in_bits, 32) >> 5,
2939                                          length_in_bits & 0x1f,
2940                                          skip_emul_byte_cnt,
2941                                          0,
2942                                          0,
2943                                          !param->has_emulation_bytes,
2944                                          0);
2945
2946     }
2947
2948     if (!vdenc_context->is_frame_level_vdenc) {
2949         insert_one_zero_byte = 1;
2950     }
2951
2952     /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
2953     if (insert_one_zero_byte) {
2954         unsigned int insert_data[] = { 0, };
2955
2956         gen9_vdenc_mfx_avc_insert_object(ctx,
2957                                          encoder_context,
2958                                          insert_data,
2959                                          1,
2960                                          8,
2961                                          1,
2962                                          0, 0, 0, 0);
2963     }
2964
2965     if (slice_header_index == -1) {
2966         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2967         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2968         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2969         unsigned char *slice_header = NULL, *slice_header1 = NULL;
2970         int slice_header_length_in_bits = 0;
2971         uint32_t saved_macroblock_address = 0;
2972
2973         /* No slice header data is passed. And the driver needs to generate it */
2974         /* For the Normal H264 */
2975
2976         if (slice_index &&
2977             IS_KBL(i965->intel.device_info)) {
2978             saved_macroblock_address = slice_params->macroblock_address;
2979             slice_params->macroblock_address = 0;
2980         }
2981
2982         slice_header_length_in_bits = build_avc_slice_header(seq_param,
2983                                                              pic_param,
2984                                                              slice_params,
2985                                                              &slice_header);
2986
2987         slice_header1 = slice_header;
2988
2989         if (slice_index &&
2990             IS_KBL(i965->intel.device_info)) {
2991             slice_params->macroblock_address = saved_macroblock_address;
2992         }
2993
2994         if (insert_one_zero_byte) {
2995             slice_header1 += 1;
2996             slice_header_length_in_bits -= 8;
2997         }
2998
2999         gen9_vdenc_mfx_avc_insert_object(ctx,
3000                                          encoder_context,
3001                                          (unsigned int *)slice_header1,
3002                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
3003                                          slice_header_length_in_bits & 0x1f,
3004                                          5,  /* first 5 bytes are start code + nal unit type */
3005                                          1, 0, 1,
3006                                          1);
3007
3008         free(slice_header);
3009     } else {
3010         unsigned int skip_emul_byte_cnt;
3011         unsigned char *slice_header1 = NULL;
3012
3013         if (slice_index &&
3014             IS_KBL(i965->intel.device_info)) {
3015             slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3016         }
3017
3018         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3019
3020         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3021         length_in_bits = param->bit_length;
3022
3023         slice_header1 = (unsigned char *)header_data;
3024
3025         if (insert_one_zero_byte) {
3026             slice_header1 += 1;
3027             length_in_bits -= 8;
3028         }
3029
3030         /* as the slice header is the last header data for one slice,
3031          * the last header flag is set to one.
3032          */
3033         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3034
3035         if (insert_one_zero_byte)
3036             skip_emul_byte_cnt -= 1;
3037
3038         gen9_vdenc_mfx_avc_insert_object(ctx,
3039                                          encoder_context,
3040                                          (unsigned int *)slice_header1,
3041                                          ALIGN(length_in_bits, 32) >> 5,
3042                                          length_in_bits & 0x1f,
3043                                          skip_emul_byte_cnt,
3044                                          1,
3045                                          0,
3046                                          !param->has_emulation_bytes,
3047                                          1);
3048     }
3049
3050     return;
3051 }
3052
3053 static void
3054 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3055                                  struct encode_state *encode_state,
3056                                  struct intel_encoder_context *encoder_context,
3057                                  VAEncSliceParameterBufferH264 *slice_param,
3058                                  int slice_index)
3059 {
3060     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3061     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3062     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3063     unsigned int skip_emul_byte_cnt;
3064
3065     if (slice_index == 0) {
3066
3067         if (encode_state->packed_header_data[idx]) {
3068             VAEncPackedHeaderParameterBuffer *param = NULL;
3069             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3070             unsigned int length_in_bits;
3071
3072             assert(encode_state->packed_header_param[idx]);
3073             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3074             length_in_bits = param->bit_length;
3075
3076             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3077             gen9_vdenc_mfx_avc_insert_object(ctx,
3078                                              encoder_context,
3079                                              header_data,
3080                                              ALIGN(length_in_bits, 32) >> 5,
3081                                              length_in_bits & 0x1f,
3082                                              skip_emul_byte_cnt,
3083                                              0,
3084                                              0,
3085                                              !param->has_emulation_bytes,
3086                                              0);
3087
3088         }
3089
3090         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3091
3092         if (encode_state->packed_header_data[idx]) {
3093             VAEncPackedHeaderParameterBuffer *param = NULL;
3094             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3095             unsigned int length_in_bits;
3096
3097             assert(encode_state->packed_header_param[idx]);
3098             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3099             length_in_bits = param->bit_length;
3100
3101             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3102
3103             gen9_vdenc_mfx_avc_insert_object(ctx,
3104                                              encoder_context,
3105                                              header_data,
3106                                              ALIGN(length_in_bits, 32) >> 5,
3107                                              length_in_bits & 0x1f,
3108                                              skip_emul_byte_cnt,
3109                                              0,
3110                                              0,
3111                                              !param->has_emulation_bytes,
3112                                              0);
3113
3114         }
3115
3116         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3117
3118         if (encode_state->packed_header_data[idx]) {
3119             VAEncPackedHeaderParameterBuffer *param = NULL;
3120             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3121             unsigned int length_in_bits;
3122
3123             assert(encode_state->packed_header_param[idx]);
3124             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3125             length_in_bits = param->bit_length;
3126
3127             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3128             gen9_vdenc_mfx_avc_insert_object(ctx,
3129                                              encoder_context,
3130                                              header_data,
3131                                              ALIGN(length_in_bits, 32) >> 5,
3132                                              length_in_bits & 0x1f,
3133                                              skip_emul_byte_cnt,
3134                                              0,
3135                                              0,
3136                                              !param->has_emulation_bytes,
3137                                              0);
3138
3139         } else if (internal_rate_mode == I965_BRC_CBR) {
3140             /* TODO: insert others */
3141         }
3142     }
3143
3144     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3145                                                 encode_state,
3146                                                 encoder_context,
3147                                                 slice_index);
3148 }
3149
3150 static void
3151 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3152                                struct encode_state *encode_state,
3153                                struct intel_encoder_context *encoder_context,
3154                                VAEncPictureParameterBufferH264 *pic_param,
3155                                VAEncSliceParameterBufferH264 *slice_param,
3156                                VAEncSliceParameterBufferH264 *next_slice_param,
3157                                int slice_index)
3158 {
3159     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3160     struct intel_batchbuffer *batch = encoder_context->base.batch;
3161     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3162     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3163     unsigned char correct[6], grow, shrink;
3164     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3165     int max_qp_n, max_qp_p;
3166     int i;
3167     int weighted_pred_idc = 0;
3168     int num_ref_l0 = 0, num_ref_l1 = 0;
3169     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3170     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3171     int inter_rounding = 0;
3172
3173     if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3174         inter_rounding = 3;
3175
3176     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3177     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3178
3179     if (next_slice_param) {
3180         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3181         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3182     } else {
3183         next_slice_hor_pos = 0;
3184         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3185     }
3186
3187     if (slice_type == SLICE_TYPE_I) {
3188         luma_log2_weight_denom = 0;
3189         chroma_log2_weight_denom = 0;
3190     } else if (slice_type == SLICE_TYPE_P) {
3191         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3192         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3193
3194         if (slice_param->num_ref_idx_active_override_flag)
3195             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3196     } else if (slice_type == SLICE_TYPE_B) {
3197         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3198         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3199         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3200
3201         if (slice_param->num_ref_idx_active_override_flag) {
3202             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3203             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3204         }
3205
3206         if (weighted_pred_idc == 2) {
3207             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3208             luma_log2_weight_denom = 5;
3209             chroma_log2_weight_denom = 5;
3210         }
3211     }
3212
3213     max_qp_n = 0;       /* TODO: update it */
3214     max_qp_p = 0;       /* TODO: update it */
3215     grow = 0;           /* TODO: update it */
3216     shrink = 0;         /* TODO: update it */
3217
3218     for (i = 0; i < 6; i++)
3219         correct[i] = 0; /* TODO: update it */
3220
3221     BEGIN_BCS_BATCH(batch, 11);
3222
3223     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3224     OUT_BCS_BATCH(batch, slice_type);
3225     OUT_BCS_BATCH(batch,
3226                   (num_ref_l0 << 16) |
3227                   (num_ref_l1 << 24) |
3228                   (chroma_log2_weight_denom << 8) |
3229                   (luma_log2_weight_denom << 0));
3230     OUT_BCS_BATCH(batch,
3231                   (weighted_pred_idc << 30) |
3232                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3233                   (slice_param->disable_deblocking_filter_idc << 27) |
3234                   (slice_param->cabac_init_idc << 24) |
3235                   (slice_qp << 16) |
3236                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3237                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3238
3239     OUT_BCS_BATCH(batch,
3240                   slice_ver_pos << 24 |
3241                   slice_hor_pos << 16 |
3242                   slice_param->macroblock_address);
3243     OUT_BCS_BATCH(batch,
3244                   next_slice_ver_pos << 16 |
3245                   next_slice_hor_pos);
3246
3247     OUT_BCS_BATCH(batch,
3248                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3249                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3250                   (2 << 28) |           /* Loose Rate Control */
3251                   (0 << 24) |           /* RC Stable Tolerance */
3252                   (0 << 23) |           /* RC Panic Enable */
3253                   (1 << 22) |           /* CBP mode */
3254                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3255                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3256                   (!next_slice_param << 19) |                   /* Is Last Slice */
3257                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3258                   (1 << 17) |           /* HeaderPresentFlag */
3259                   (1 << 16) |           /* SliceData PresentFlag */
3260                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3261                   (1 << 13) |           /* RBSP NAL TYPE */
3262                   (slice_index << 4) |
3263                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3264
3265     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3266
3267     OUT_BCS_BATCH(batch,
3268                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3269                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3270                   (shrink << 8) |
3271                   (grow << 0));
3272     OUT_BCS_BATCH(batch,
3273                   (1 << 31) |
3274                   (inter_rounding << 28) |
3275                   (1 << 27) |
3276                   (5 << 24) |
3277                   (correct[5] << 20) |
3278                   (correct[4] << 16) |
3279                   (correct[3] << 12) |
3280                   (correct[2] << 8) |
3281                   (correct[1] << 4) |
3282                   (correct[0] << 0));
3283     OUT_BCS_BATCH(batch, 0);
3284
3285     ADVANCE_BCS_BATCH(batch);
3286 }
3287
3288 static uint8_t
3289 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3290 {
3291     unsigned int is_long_term =
3292         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3293     unsigned int is_top_field =
3294         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3295     unsigned int is_bottom_field =
3296         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3297
3298     return ((is_long_term                         << 6) |
3299             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3300             (frame_store_id                       << 1) |
3301             ((is_top_field ^ 1) & is_bottom_field));
3302 }
3303
3304 static void
3305 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3306                                  struct encode_state *encode_state,
3307                                  struct intel_encoder_context *encoder_context,
3308                                  VAEncSliceParameterBufferH264 *slice_param)
3309 {
3310     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3311     struct intel_batchbuffer *batch = encoder_context->base.batch;
3312     VAPictureH264 *ref_pic;
3313     int i, slice_type, ref_idx_shift;
3314     unsigned int fwd_ref_entry;
3315
3316     fwd_ref_entry = 0x80808080;
3317     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3318
3319     for (i = 0; i < MAX(vdenc_context->num_refs[0], 3); i++) {
3320         ref_pic = &slice_param->RefPicList0[i];
3321         ref_idx_shift = i * 8;
3322
3323         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3324         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3325     }
3326
3327     if (slice_type == SLICE_TYPE_P) {
3328         BEGIN_BCS_BATCH(batch, 10);
3329         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3330         OUT_BCS_BATCH(batch, 0);                        // L0
3331         OUT_BCS_BATCH(batch, fwd_ref_entry);
3332
3333         for (i = 0; i < 7; i++) {
3334             OUT_BCS_BATCH(batch, 0x80808080);
3335         }
3336
3337         ADVANCE_BCS_BATCH(batch);
3338     }
3339
3340     if (slice_type == SLICE_TYPE_B) {
3341         /* VDEnc on SKL doesn't support BDW */
3342         assert(0);
3343     }
3344 }
3345
3346 static void
3347 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3348                                       struct encode_state *encode_state,
3349                                       struct intel_encoder_context *encoder_context,
3350                                       VAEncPictureParameterBufferH264 *pic_param,
3351                                       VAEncSliceParameterBufferH264 *slice_param)
3352 {
3353     struct intel_batchbuffer *batch = encoder_context->base.batch;
3354     int i, slice_type;
3355     short weightoffsets[32 * 6];
3356
3357     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3358
3359     if (slice_type == SLICE_TYPE_P &&
3360         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3361
3362         for (i = 0; i < 32; i++) {
3363             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3364             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3365             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3366             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3367             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3368             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3369         }
3370
3371         BEGIN_BCS_BATCH(batch, 98);
3372         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3373         OUT_BCS_BATCH(batch, 0);
3374         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3375
3376         ADVANCE_BCS_BATCH(batch);
3377     }
3378
3379     if (slice_type == SLICE_TYPE_B) {
3380         /* VDEnc on SKL doesn't support BWD */
3381         assert(0);
3382     }
3383 }
3384
3385 static void
3386 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3387                                 struct encode_state *encode_state,
3388                                 struct intel_encoder_context *encoder_context,
3389                                 VAEncSliceParameterBufferH264 *slice_param,
3390                                 VAEncSliceParameterBufferH264 *next_slice_param,
3391                                 int slice_index)
3392 {
3393     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3394     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3395
3396     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3397     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3398                                           encode_state,
3399                                           encoder_context,
3400                                           pic_param,
3401                                           slice_param);
3402     gen9_vdenc_mfx_avc_slice_state(ctx,
3403                                    encode_state,
3404                                    encoder_context,
3405                                    pic_param,
3406                                    slice_param,
3407                                    next_slice_param,
3408                                    slice_index);
3409     gen9_vdenc_mfx_avc_inset_headers(ctx,
3410                                      encode_state,
3411                                      encoder_context,
3412                                      slice_param,
3413                                      slice_index);
3414
3415     if (!vdenc_context->is_frame_level_vdenc) {
3416         gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3417                                                encode_state,
3418                                                encoder_context,
3419                                                slice_param);
3420         gen95_vdenc_vdenc_walker_state(ctx,
3421                                        encode_state,
3422                                        encoder_context,
3423                                        slice_param,
3424                                        next_slice_param);
3425     }
3426 }
3427
3428 static void
3429 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3430                                 struct encode_state *encode_state,
3431                                 struct intel_encoder_context *encoder_context)
3432 {
3433     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3434     struct intel_batchbuffer *batch = encoder_context->base.batch;
3435     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3436     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3437     int i, j;
3438     int slice_index = 0;
3439     int has_tail = 0;                   /* TODO: check it later */
3440
3441     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3442         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3443
3444         if (j == encode_state->num_slice_params_ext - 1)
3445             next_slice_group_param = NULL;
3446         else
3447             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3448
3449         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3450             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3451                 next_slice_param = slice_param + 1;
3452             else
3453                 next_slice_param = next_slice_group_param;
3454
3455             gen9_vdenc_mfx_avc_single_slice(ctx,
3456                                             encode_state,
3457                                             encoder_context,
3458                                             slice_param,
3459                                             next_slice_param,
3460                                             slice_index);
3461
3462             if (vdenc_context->is_frame_level_vdenc)
3463                 break;
3464             else {
3465                 struct vd_pipeline_flush_parameter pipeline_flush_params;
3466                 int insert_mi_flush;
3467
3468                 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3469
3470                 if (next_slice_group_param) {
3471                     pipeline_flush_params.mfx_pipeline_done = 1;
3472                     insert_mi_flush = 1;
3473                 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3474                     pipeline_flush_params.mfx_pipeline_done = 1;
3475                     insert_mi_flush = 1;
3476                 } else {
3477                     pipeline_flush_params.mfx_pipeline_done = !has_tail;
3478                     insert_mi_flush = 0;
3479                 }
3480
3481                 pipeline_flush_params.vdenc_pipeline_done = 1;
3482                 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3483                 pipeline_flush_params.vd_command_message_parser_done = 1;
3484                 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3485
3486                 if (insert_mi_flush) {
3487                     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3488                     mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3489                     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3490                 }
3491             }
3492
3493             slice_param++;
3494             slice_index++;
3495         }
3496
3497         if (vdenc_context->is_frame_level_vdenc)
3498             break;
3499     }
3500
3501     if (vdenc_context->is_frame_level_vdenc) {
3502         struct vd_pipeline_flush_parameter pipeline_flush_params;
3503
3504         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3505
3506         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3507         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3508         pipeline_flush_params.vdenc_pipeline_done = 1;
3509         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3510         pipeline_flush_params.vd_command_message_parser_done = 1;
3511         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3512     }
3513
3514     if (has_tail) {
3515         /* TODO: insert a tail if required */
3516     }
3517
3518     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3519     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3520     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3521 }
3522
3523 static void
3524 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3525                               struct encode_state *encode_state,
3526                               struct intel_encoder_context *encoder_context)
3527 {
3528     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3529     struct intel_batchbuffer *batch = encoder_context->base.batch;
3530     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3531
3532     if (vdenc_context->brc_enabled) {
3533         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3534
3535         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3536         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3537         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3538     }
3539
3540     if (vdenc_context->current_pass) {
3541         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3542
3543         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3544         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3545         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3546     }
3547
3548     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3549
3550     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3551     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3552     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3553
3554     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3555     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3556     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3557
3558     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3559     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3560     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3561     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3562     gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3563     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3564
3565     if (!vdenc_context->brc_enabled) {
3566         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3567         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3568     } else {
3569         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3570         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3571         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3572         gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3573     }
3574
3575     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3576     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3577
3578     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3579 }
3580
3581 static void
3582 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3583                                struct intel_encoder_context *encoder_context)
3584 {
3585     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3586     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3587
3588     switch (rate_control_mode & 0x7f) {
3589     case VA_RC_CBR:
3590         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3591         break;
3592
3593     case VA_RC_VBR:
3594         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3595         break;
3596
3597     case VA_RC_CQP:
3598     default:
3599         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3600         break;
3601     }
3602 }
3603
3604 static void
3605 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3606 {
3607     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3608     struct intel_batchbuffer *batch = encoder_context->base.batch;
3609     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3610     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3611     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3612     int i;
3613
3614     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3615     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3616
3617     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3618     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3619     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3620     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3621     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3622
3623     /* Update DMEM buffer for BRC Update */
3624     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3625         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3626         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3627         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3628         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3629
3630         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3631         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3632         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3633         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3634     }
3635 }
3636
3637 static VAStatus
3638 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3639                                 struct encode_state *encode_state,
3640                                 struct intel_encoder_context *encoder_context)
3641 {
3642     VAEncSliceParameterBufferH264 *slice_param;
3643     int i, j;
3644
3645     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3646         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3647
3648         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3649             if (slice_param->slice_type == SLICE_TYPE_B)
3650                 return VA_STATUS_ERROR_UNKNOWN;
3651
3652             slice_param++;
3653         }
3654     }
3655
3656     return VA_STATUS_SUCCESS;
3657 }
3658
3659 static VAStatus
3660 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3661                               VAProfile profile,
3662                               struct encode_state *encode_state,
3663                               struct intel_encoder_context *encoder_context)
3664 {
3665     VAStatus va_status;
3666     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3667     struct intel_batchbuffer *batch = encoder_context->base.batch;
3668
3669     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3670
3671     if (va_status != VA_STATUS_SUCCESS)
3672         return va_status;
3673
3674     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3675
3676     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3677         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3678         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3679
3680         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3681
3682         intel_batchbuffer_emit_mi_flush(batch);
3683
3684         if (vdenc_context->brc_enabled) {
3685             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3686                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3687
3688             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3689             intel_batchbuffer_emit_mi_flush(batch);
3690         }
3691
3692         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3693         gen9_vdenc_read_status(ctx, encoder_context);
3694
3695         intel_batchbuffer_end_atomic(batch);
3696         intel_batchbuffer_flush(batch);
3697
3698         vdenc_context->brc_initted = 1;
3699         vdenc_context->brc_need_reset = 0;
3700     }
3701
3702     return VA_STATUS_SUCCESS;
3703 }
3704
3705 static VAStatus
3706 gen9_vdenc_pipeline(VADriverContextP ctx,
3707                     VAProfile profile,
3708                     struct encode_state *encode_state,
3709                     struct intel_encoder_context *encoder_context)
3710 {
3711     VAStatus vaStatus;
3712
3713     switch (profile) {
3714     case VAProfileH264ConstrainedBaseline:
3715     case VAProfileH264Main:
3716     case VAProfileH264High:
3717         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3718         break;
3719
3720     default:
3721         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3722         break;
3723     }
3724
3725     return vaStatus;
3726 }
3727
3728 static void
3729 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3730 {
3731     int i;
3732
3733     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3734     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3735     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3736     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3737     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3738
3739     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3740         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3741
3742     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3743     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3744     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3745     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3746     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3747     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3748
3749     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3750     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3751
3752     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3753     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3754     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3755     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3756
3757     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3758         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3759         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3760     }
3761
3762     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3763     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3764     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3765
3766     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3767     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3768     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3769     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3770
3771     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3772 }
3773
3774 static void
3775 gen9_vdenc_context_destroy(void *context)
3776 {
3777     struct gen9_vdenc_context *vdenc_context = context;
3778
3779     gen9_vdenc_free_resources(vdenc_context);
3780
3781     free(vdenc_context);
3782 }
3783
3784 static void
3785 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3786                               struct intel_encoder_context *encoder_context,
3787                               struct gen9_vdenc_context *vdenc_context)
3788 {
3789     struct i965_driver_data *i965 = i965_driver_data(ctx);
3790     int i;
3791
3792     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3793                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3794                                 "HuC Init&Reset DMEM buffer");
3795
3796     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3797                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3798                                 "HuC History buffer");
3799
3800     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3801                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3802                                 "HuC Stream In buffer");
3803
3804     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3805                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3806                                 "HuC Stream Out buffer");
3807
3808     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3809                                 0x1000,
3810                                 "HuC dummy buffer");
3811
3812     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3813         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3814                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3815                                     "HuC BRC Update buffer");
3816         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3817     }
3818
3819     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3820                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3821                                 "VDENC statistics buffer");
3822
3823     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3824                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3825                                 "PAK statistics buffer");
3826
3827     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3828                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3829                                 "VDENC/AVC image state buffer");
3830
3831     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3832                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3833                                 "HME summary buffer");
3834
3835     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3836                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3837                                 "BRC constant buffer");
3838
3839     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3840                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3841                                 "Second level batch buffer");
3842
3843     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3844                                 0x1000,
3845                                 "HuC Status buffer");
3846
3847     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3848                                 0x1000,
3849                                 "HuC Status buffer");
3850 }
3851
3852 static void
3853 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3854                               struct intel_encoder_context *encoder_context,
3855                               struct gen9_vdenc_context *vdenc_context)
3856 {
3857     vdenc_context->is_frame_level_vdenc = 1;
3858 }
3859
3860 static void
3861 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
3862                                struct intel_encoder_context *encoder_context,
3863                                struct gen9_vdenc_context *vdenc_context)
3864 {
3865     vdenc_context->use_extended_pak_obj_cmd = 1;
3866 }
3867
3868 static void
3869 vdenc_hw_interfaces_init(VADriverContextP ctx,
3870                          struct intel_encoder_context *encoder_context,
3871                          struct gen9_vdenc_context *vdenc_context)
3872 {
3873     struct i965_driver_data *i965 = i965_driver_data(ctx);
3874
3875     if (IS_KBL(i965->intel.device_info)) {
3876         gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3877     } else {
3878         gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3879     }
3880 }
3881
3882 static VAStatus
3883 gen9_vdenc_context_get_status(VADriverContextP ctx,
3884                               struct intel_encoder_context *encoder_context,
3885                               struct i965_coded_buffer_segment *coded_buffer_segment)
3886 {
3887     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3888
3889     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3890
3891     return VA_STATUS_SUCCESS;
3892 }
3893
3894 Bool
3895 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3896 {
3897     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3898
3899     if (!vdenc_context)
3900         return False;
3901
3902     vdenc_context->brc_initted = 0;
3903     vdenc_context->brc_need_reset = 0;
3904     vdenc_context->is_low_delay = 0;
3905     vdenc_context->current_pass = 0;
3906     vdenc_context->num_passes = 1;
3907     vdenc_context->vdenc_streamin_enable = 0;
3908     vdenc_context->vdenc_pak_threshold_check_enable = 0;
3909     vdenc_context->is_frame_level_vdenc = 0;
3910
3911     vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3912     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3913
3914     encoder_context->mfc_context = vdenc_context;
3915     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3916     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3917     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3918     encoder_context->get_status = gen9_vdenc_context_get_status;
3919
3920     return True;
3921 }