OSDN Git Service

Add vdenc common commands for CNL
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 extern int
45 intel_avc_enc_slice_type_fixup(int slice_type);
46
47 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
48     +0,  0,  0,  0, 0, 3, 4, 6, 8,
49     +0,  0,  0,  0, 0, 2, 3, 5, 7,
50     -1,  0,  0,  0, 0, 2, 2, 4, 5,
51     -1, -1,  0,  0, 0, 1, 2, 2, 4,
52     -2, -2, -1,  0, 0, 0, 1, 2, 4,
53     -2, -2, -1,  0, 0, 0, 1, 2, 4,
54     -3, -2, -1, -1, 0, 0, 1, 2, 5,
55     -3, -2, -1, -1, 0, 0, 2, 4, 7,
56     -4, -3, -2, -1, 0, 1, 3, 5, 8,
57 };
58
59 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
60     -1,  0,  0,  0, 0, 1, 1, 2, 3,
61     -1, -1,  0,  0, 0, 1, 1, 2, 3,
62     -2, -1, -1,  0, 0, 1, 1, 2, 3,
63     -3, -2, -2, -1, 0, 0, 1, 2, 3,
64     -3, -2, -1, -1, 0, 0, 1, 2, 3,
65     -3, -2, -1, -1, 0, 0, 1, 2, 3,
66     -3, -2, -1, -1, 0, 0, 1, 2, 3,
67     -3, -2, -1, -1, 0, 0, 1, 2, 3,
68     -3, -2, -1, -1, 0, 0, 1, 2, 3,
69 };
70
71 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
72     +0,  0,  0,  0, 0, 2, 3, 3, 4,
73     +0,  0,  0,  0, 0, 2, 3, 3, 4,
74     -1,  0,  0,  0, 0, 2, 2, 3, 3,
75     -1, -1,  0,  0, 0, 1, 2, 2, 2,
76     -1, -1, -1,  0, 0, 0, 1, 2, 2,
77     -2, -1, -1,  0, 0, 0, 0, 1, 2,
78     -2, -1, -1, -1, 0, 0, 0, 1, 3,
79     -2, -2, -1, -1, 0, 0, 1, 1, 3,
80     -2, -2, -1, -1, 0, 1, 1, 2, 4,
81 };
82
83 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
84     -4, -20, -28, -36, -40, -44, -48, -80,
85     +0,  -8, -12, -20, -24, -28, -32, -36,
86     +0,   0,  -8, -16, -20, -24, -28, -32,
87     +8,   4,   0,   0,  -8, -16, -24, -28,
88     32,  24,  16,   2,  -4,  -8, -16, -20,
89     36,  32,  28,  16,   8,   0,  -4,  -8,
90     40,  36,  24,  20,  16,   8,   0,  -8,
91     48,  40,  28,  24,  20,  12,   0,  -4,
92     64,  48,  28,  20,  16,  12,   8,   4,
93 };
94
95 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
96     -8, -24, -32, -44, -48, -56, -64, -80,
97     -8, -16, -32, -40, -44, -52, -56, -64,
98     +0,   0, -16, -28, -36, -40, -44, -48,
99     +8,   4,   0,   0,  -8, -16, -24, -36,
100     20,  12,   4,   0,  -8,  -8,  -8, -16,
101     24,  16,   8,   8,   8,   0,  -4,  -8,
102     40,  36,  24,  20,  16,   8,   0,  -8,
103     48,  40,  28,  24,  20,  12,   0,  -4,
104     64,  48,  28,  20,  16,  12,   8,   4,
105 };
106
107 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
108     0,  -4, -8, -16, -24, -32, -40, -48,
109     1,   0, -4,  -8, -16, -24, -32, -40,
110     4,   2,  0,  -1,  -3,  -8, -16, -24,
111     8,   4,  2,   0,  -1,  -4,  -8, -16,
112     20, 16,  4,   0,  -1,  -4,  -8, -16,
113     24, 20, 16,   8,   4,   0,  -4,  -8,
114     28, 24, 20,  16,   8,   4,   0,  -8,
115     32, 24, 20,  16,   8,   4,   0,  -4,
116     64, 48, 28,  20,  16,  12,   8,   4,
117 };
118
119 static const struct huc_brc_update_constant_data gen9_brc_update_constant_data = {
120     .global_rate_qp_adj_tab_i = {
121         48, 40, 32,  24,  16,   8,   0,  -8,
122         40, 32, 24,  16,   8,   0,  -8, -16,
123         32, 24, 16,   8,   0,  -8, -16, -24,
124         24, 16,  8,   0,  -8, -16, -24, -32,
125         16, 8,   0,  -8, -16, -24, -32, -40,
126         8,  0,  -8, -16, -24, -32, -40, -48,
127         0, -8, -16, -24, -32, -40, -48, -56,
128         48, 40, 32,  24,  16,   8,   0,  -8,
129     },
130
131     .global_rate_qp_adj_tab_p = {
132         48,  40,  32,  24,  16,  8,    0,  -8,
133         40,  32,  24,  16,   8,  0,   -8, -16,
134         16,   8,   8,   4,  -8, -16, -16, -24,
135         8,    0,   0,  -8, -16, -16, -16, -24,
136         8,    0,   0, -24, -32, -32, -32, -48,
137         0,  -16, -16, -24, -32, -48, -56, -64,
138         -8, -16, -32, -32, -48, -48, -56, -64,
139         -16, -32, -48, -48, -48, -56, -64, -80,
140     },
141
142     .global_rate_qp_adj_tab_b = {
143         48, 40, 32, 24,  16,   8,   0,  -8,
144         40, 32, 24, 16,  8,    0,  -8, -16,
145         32, 24, 16,  8,  0,   -8, -16, -24,
146         24, 16, 8,   0, -8,   -8, -16, -24,
147         16, 8,  0,   0, -8,  -16, -24, -32,
148         16, 8,  0,   0, -8,  -16, -24, -32,
149         0, -8, -8, -16, -32, -48, -56, -64,
150         0, -8, -8, -16, -32, -48, -56, -64
151     },
152
153     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
154     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
155     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
156
157     .dist_qp_adj_tab_i = {
158         0,   0,  0,  0,  0,  3,  4,  6,  8,
159         0,   0,  0,  0,  0,  2,  3,  5,  7,
160         -1,  0,  0,  0,  0,  2,  2,  4,  5,
161         -1, -1,  0,  0,  0,  1,  2,  2,  4,
162         -2, -2, -1,  0,  0,  0,  1,  2,  4,
163         -2, -2, -1,  0,  0,  0,  1,  2,  4,
164         -3, -2, -1, -1,  0,  0,  1,  2,  5,
165         -3, -2, -1, -1,  0,  0,  2,  4,  7,
166         -4, -3, -2, -1,  0,  1,  3,  5,  8,
167     },
168
169     .dist_qp_adj_tab_p = {
170         -1,   0,  0,  0,  0,  1,  1,  2,  3,
171         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
172         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
173         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
174         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
175         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
176         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
177         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
178         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
179     },
180
181     .dist_qp_adj_tab_b = {
182         0,   0,  0,  0, 0, 2, 3, 3, 4,
183         0,   0,  0,  0, 0, 2, 3, 3, 4,
184         -1,  0,  0,  0, 0, 2, 2, 3, 3,
185         -1, -1,  0,  0, 0, 1, 2, 2, 2,
186         -1, -1, -1,  0, 0, 0, 1, 2, 2,
187         -2, -1, -1,  0, 0, 0, 0, 1, 2,
188         -2, -1, -1, -1, 0, 0, 0, 1, 3,
189         -2, -2, -1, -1, 0, 0, 1, 1, 3,
190         -2, -2, -1, -1, 0, 1, 1, 2, 4,
191     },
192
193     /* default table for non lowdelay */
194     .buf_rate_adj_tab_i = {
195         -4, -20, -28, -36, -40, -44, -48, -80,
196         0,   -8, -12, -20, -24, -28, -32, -36,
197         0,    0,  -8, -16, -20, -24, -28, -32,
198         8,    4,   0,   0,  -8, -16, -24, -28,
199         32,  24,  16,   2,  -4,  -8, -16, -20,
200         36,  32,  28,  16,   8,   0,  -4,  -8,
201         40,  36,  24,  20,  16,   8,   0,  -8,
202         48,  40,  28,  24,  20,  12,   0,  -4,
203         64,  48,  28,  20,  16,  12,   8,   4,
204     },
205
206     /* default table for non lowdelay */
207     .buf_rate_adj_tab_p = {
208         -8, -24, -32, -44, -48, -56, -64, -80,
209         -8, -16, -32, -40, -44, -52, -56, -64,
210         0,    0, -16, -28, -36, -40, -44, -48,
211         8,    4,   0,   0,  -8, -16, -24, -36,
212         20,  12,   4,   0,  -8,  -8,  -8, -16,
213         24,  16,   8,   8,   8,   0,  -4,  -8,
214         40,  36,  24,  20,  16,   8,   0,  -8,
215         48,  40,  28,  24,  20,  12,   0,  -4,
216         64,  48,  28,  20,  16,  12,   8,   4,
217     },
218
219     /* default table for non lowdelay */
220     .buf_rate_adj_tab_b = {
221         0,  -4, -8, -16, -24, -32, -40, -48,
222         1,   0, -4,  -8, -16, -24, -32, -40,
223         4,   2,  0,  -1,  -3,  -8, -16, -24,
224         8,   4,  2,   0,  -1,  -4,  -8, -16,
225         20, 16,  4,   0,  -1,  -4,  -8, -16,
226         24, 20, 16,   8,   4,   0,  -4,  -8,
227         28, 24, 20,  16,   8,   4,   0,  -8,
228         32, 24, 20,  16,   8,   4,   0,  -4,
229         64, 48, 28,  20,  16,  12,   8,   4,
230     },
231
232     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
233     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
234
235     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
236     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
237
238     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
239     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
240
241     .i_intra_non_pred = {
242         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
243         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
244         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
245     },
246
247     .i_intra_16x16 = {
248         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
249         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
250         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
251     },
252
253     .i_intra_8x8 = {
254         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
255         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
256         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
257     },
258
259     .i_intra_4x4 = {
260         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
261         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
262         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
263     },
264
265     .i_intra_chroma = {
266         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
268         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
269     },
270
271     .p_intra_non_pred = {
272         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
273         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
274         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
275     },
276
277     .p_intra_16x16 = {
278         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
279         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
280         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
281     },
282
283     .p_intra_8x8 = {
284         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
285         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
286         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
287     },
288
289     .p_intra_4x4 = {
290         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
291         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
292         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
293     },
294
295     .p_intra_chroma = {
296         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
297         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
298         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
299     },
300
301     .p_inter_16x8 = {
302         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
303         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
304         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
305     },
306
307     .p_inter_8x8 = {
308         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
309         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
310         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
311     },
312
313     .p_inter_16x16 = {
314         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
315         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
316         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
317     },
318
319     .p_ref_id = {
320         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
321         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
322         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
323     },
324
325     .hme_mv_cost = {
326         /* mv = 0 */
327         {
328             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
329             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
330             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331         },
332
333         /* mv <= 16 */
334         {
335             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
337             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
338         },
339
340         /* mv <= 32 */
341         {
342             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
343             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
344             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
345         },
346
347         /* mv <= 64 */
348         {
349             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
350             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
351             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
352         },
353
354         /* mv <= 128 */
355         {
356             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
357             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
358             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
359         },
360
361         /* mv <= 256 */
362         {
363             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
364             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
365             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
366         },
367
368         /* mv <= 512 */
369         {
370             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
371             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
372             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
373         },
374
375         /* mv <= 1024 */
376         {
377             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
378             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
379             0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
380         },
381     },
382 };
383
384 /* 11 DWs */
385 static const uint8_t vdenc_const_qp_lambda[44] = {
386     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
387     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
388     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
389     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
390     0x4a, 0x53, 0x00, 0x00
391 };
392
393 /* 14 DWs */
394 static const uint16_t vdenc_const_skip_threshold[28] = {
395
396 };
397
398 /* 14 DWs */
399 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
400
401 };
402
403 /* 7 DWs */
404 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
405
406 };
407
408 /* 7 DWs */
409 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
410
411 };
412
413 /* 7 DWs */
414 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
415
416 };
417
418 /* P frame */
419 /* 11 DWs */
420 static const uint8_t vdenc_const_qp_lambda_p[44] = {
421     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
422     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
423     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
424     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
425     0x4a, 0x53, 0x00, 0x00
426 };
427
428 /* 14 DWs */
429 static const uint16_t vdenc_const_skip_threshold_p[28] = {
430     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
431     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
432     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
433     0x0333, 0x03b0, 0x0000, 0x0000
434 };
435
436 /* 14 DWs */
437 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
438     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
439     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
440     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
441 };
442
443 /* 7 DWs */
444 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
445     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
446     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
447     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
448 };
449
450 /* 7 DWs */
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
452     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
453     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
454     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
455 };
456
457 /* 7 DWs */
458 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
459     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
460     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
461     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
462 };
463
464 static const double
465 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
466
467 static const double
468 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
469
470 static const double
471 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
472
473 static const double
474 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
475
476 static const double
477 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
478
479 static const double
480 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
481
482 static const unsigned char
483 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
484
485 static const unsigned char
486 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
487
488 static const uint16_t
489 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
490
491 static const uint8_t
492 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
493
494 static const uint8_t
495 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
496
497 static const uint8_t
498 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
499
500 static const int8_t
501 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
502
503 static const int vdenc_mode_const[2][12][52] = {
504     //INTRASLICE
505     {
506         //LUTMODE_INTRA_NONPRED
507         {
508             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
509             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
510             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
511             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
512         },
513
514         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
515         {
516             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
517             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
518             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
519             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
520         },
521
522         //LUTMODE_INTRA_8x8
523         {
524             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
525             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
526             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
527             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
528         },
529
530         //LUTMODE_INTRA_4x4
531         {
532             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
533             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
534             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
535             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
536         },
537
538         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
539         { 0, },
540
541         //LUTMODE_INTER_8X8Q
542         { 0, },
543
544         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
545         { 0, },
546
547         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
548         { 0, },
549
550         //LUTMODE_INTER_16x16, LUTMODE_INTER
551         { 0, },
552
553         //LUTMODE_INTER_BWD
554         { 0, },
555
556         //LUTMODE_REF_ID
557         { 0, },
558
559         //LUTMODE_INTRA_CHROMA
560         { 0, },
561     },
562
563     //PREDSLICE
564     {
565         //LUTMODE_INTRA_NONPRED
566         {
567             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
568             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
569             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
570             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
571         },
572
573         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
574         {
575             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
576             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
577             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
578             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
579
580         },
581
582         //LUTMODE_INTRA_8x8
583         {
584             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
585             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
586             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
587             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
588         },
589
590         //LUTMODE_INTRA_4x4
591         {
592             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
593             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
594             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
595             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
596         },
597
598         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
599         {
600             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
601             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
602             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
603             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
604         },
605
606         //LUTMODE_INTER_8X8Q
607         {
608             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
609             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
610             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
611             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
612         },
613
614         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
615         {
616             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
617             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
618             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
619             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
620         },
621
622         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
623         {
624             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
625             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
626             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
627             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
628         },
629
630         //LUTMODE_INTER_16x16, LUTMODE_INTER
631         {
632             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
633             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
634             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
635             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
636         },
637
638         //LUTMODE_INTER_BWD
639         {
640             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
641             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
642             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
643             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
644         },
645
646         //LUTMODE_REF_ID
647         {
648             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
649             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
650             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
651             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
652         },
653
654         //LUTMODE_INTRA_CHROMA
655         {
656             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
657             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
658             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
659             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
660         },
661     },
662 };
663
664 static const int vdenc_mv_cost_skipbias_qpel[8] = {
665     //PREDSLICE
666     0, 6, 6, 9, 10, 13, 14, 16
667 };
668
669 static const int vdenc_hme_cost[8][52] = {
670     //mv=0
671     {
672         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
673         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
674         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
675         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
676     },
677     //mv<=16
678     {
679         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
680         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
681         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
682         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
683     },
684     //mv<=32
685     {
686         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
687         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
688         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
689         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
690     },
691     //mv<=64
692     {
693         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
694         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
695         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
696         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
697     },
698     //mv<=128
699     {
700         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
701         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
702         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
703         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
704     },
705     //mv<=256
706     {
707         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
708         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
709         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
710         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
711     },
712     //mv<=512
713     {
714         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
715         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
716         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
717         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
718     },
719
720     //mv<=1024
721     {
722         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
723         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
724         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
725         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
726     },
727 };
728
729 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
730         if (bo) {                                                       \
731             OUT_BCS_RELOC64(batch,                                      \
732                             bo,                                         \
733                             I915_GEM_DOMAIN_RENDER,                     \
734                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
735                             delta);                                     \
736         } else {                                                        \
737             OUT_BCS_BATCH(batch, 0);                                    \
738             OUT_BCS_BATCH(batch, 0);                                    \
739         }                                                               \
740     } while (0)
741
742 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
743         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
744         OUT_BCS_BATCH(batch, i965->intel.mocs_state);                             \
745     } while (0)
746
747 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do {   \
748         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
749         buffer.width = bfsize;                                  \
750         buffer.height = 1;                                      \
751         buffer.pitch = buffer.width;                            \
752         buffer.size = buffer.pitch;                             \
753         buffer.tiling = I915_TILING_NONE;                       \
754         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
755                                    &buffer,                     \
756                                    bfsize,                      \
757                                    (des));                      \
758     } while (0)
759
760 static int
761 gen9_vdenc_get_max_vmv_range(int level)
762 {
763     int max_vmv_range = 512;
764
765     if (level == 10)
766         max_vmv_range = 256;
767     else if (level <= 20)
768         max_vmv_range = 512;
769     else if (level <= 30)
770         max_vmv_range = 1024;
771     else
772         max_vmv_range = 2048;
773
774     return max_vmv_range;
775 }
776
777 static unsigned char
778 map_44_lut_value(unsigned int v, unsigned char max)
779 {
780     unsigned int maxcost;
781     int d;
782     unsigned char ret;
783
784     if (v == 0) {
785         return 0;
786     }
787
788     maxcost = ((max & 15) << (max >> 4));
789
790     if (v >= maxcost) {
791         return max;
792     }
793
794     d = (int)(log((double)v) / log(2.0)) - 3;
795
796     if (d < 0) {
797         d = 0;
798     }
799
800     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
801     ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
802
803     return ret;
804 }
805
806 static void
807 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
808                                   struct encode_state *encode_state,
809                                   struct intel_encoder_context *encoder_context)
810 {
811     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
812     int i;
813
814     vdenc_context->gop_size = encoder_context->brc.gop_size;
815     vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
816
817     if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
818         encoder_context->brc.need_reset) {
819         /* So far, vdenc doesn't support temporal layer */
820         vdenc_context->framerate = encoder_context->brc.framerate[0];
821
822         vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
823         vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
824
825         vdenc_context->max_bit_rate = encoder_context->brc.bits_per_second[0];
826         vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
827         vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
828
829         if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
830             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
831             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
832         } else {
833             assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
834             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
835             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
836         }
837     }
838
839     vdenc_context->mb_brc_enabled = 1;
840     vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
841     vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
842     vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
843     vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
844
845     for (i = 0; i < vdenc_context->num_roi; i++) {
846         vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
847         vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
848         vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
849         vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].bottom >> 4;
850         vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
851     }
852 }
853
854 static void
855 gen9_vdenc_update_parameters(VADriverContextP ctx,
856                              VAProfile profile,
857                              struct encode_state *encode_state,
858                              struct intel_encoder_context *encoder_context)
859 {
860     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
861     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
862     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
863
864     if (profile == VAProfileH264High)
865         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
866     else
867         vdenc_context->transform_8x8_mode_enable = 0;
868
869     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
870     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
871
872     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
873     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
874
875     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
876     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
877     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
878     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
879     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
880
881     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
882
883     vdenc_context->current_pass = 0;
884     vdenc_context->num_passes = 1;
885
886     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
887         vdenc_context->internal_rate_mode == I965_BRC_VBR)
888         vdenc_context->brc_enabled = 1;
889     else
890         vdenc_context->brc_enabled = 0;
891
892     if (vdenc_context->brc_enabled &&
893         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
894          !vdenc_context->vbv_buffer_size_in_bit ||
895          !vdenc_context->max_bit_rate ||
896          !vdenc_context->target_bit_rate ||
897          !vdenc_context->framerate.num ||
898          !vdenc_context->framerate.den))
899         vdenc_context->brc_enabled = 0;
900
901     if (!vdenc_context->brc_enabled) {
902         vdenc_context->target_bit_rate = 0;
903         vdenc_context->max_bit_rate = 0;
904         vdenc_context->min_bit_rate = 0;
905         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
906         vdenc_context->vbv_buffer_size_in_bit = 0;
907     } else {
908         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
909     }
910 }
911
912 static void
913 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
914                                    struct encode_state *encode_state,
915                                    struct intel_encoder_context *encoder_context,
916                                    int qp)
917 {
918     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
919     unsigned int frame_type = vdenc_context->frame_type;
920
921     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
922     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
923     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
924
925     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
926     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
927     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
928     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
929
930     if (frame_type == VDENC_FRAME_P) {
931         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
932         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
933         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
934         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
935         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
936         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
937
938         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
939         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
940         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
941         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
942         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
943         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
944         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
945         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
946
947         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
948         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
949         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
950         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
951         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
952         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
953         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
954         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
955     }
956 }
957
958 static void
959 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
960                                         struct intel_encoder_context *encoder_context)
961 {
962     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
963     struct gen9_vdenc_streamin_state *streamin_state;
964     int row, col, i;
965
966     if (!vdenc_context->num_roi)
967         return;
968
969     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
970
971     if (!streamin_state)
972         return;
973
974     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
975         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
976             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
977
978             /* The last one has higher priority */
979             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
980                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
981                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
982                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
983
984                     break;
985                 }
986             }
987         }
988     }
989
990     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
991 }
992
993 static VAStatus
994 gen9_vdenc_avc_prepare(VADriverContextP ctx,
995                        VAProfile profile,
996                        struct encode_state *encode_state,
997                        struct intel_encoder_context *encoder_context)
998 {
999     struct i965_driver_data *i965 = i965_driver_data(ctx);
1000     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1001     struct i965_coded_buffer_segment *coded_buffer_segment;
1002     struct object_surface *obj_surface;
1003     struct object_buffer *obj_buffer;
1004     VAEncPictureParameterBufferH264 *pic_param;
1005     VAEncSliceParameterBufferH264 *slice_param;
1006     VDEncAvcSurface *vdenc_avc_surface;
1007     dri_bo *bo;
1008     int i, j, enable_avc_ildb = 0;
1009     int qp;
1010     char *pbuffer;
1011
1012     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1013
1014     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1015         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1016         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1017
1018         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1019             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1020                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1021                    (slice_param->slice_type == SLICE_TYPE_P) ||
1022                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1023                    (slice_param->slice_type == SLICE_TYPE_B));
1024
1025             if (slice_param->disable_deblocking_filter_idc != 1) {
1026                 enable_avc_ildb = 1;
1027                 break;
1028             }
1029
1030             slice_param++;
1031         }
1032     }
1033
1034     /* Setup current frame */
1035     obj_surface = encode_state->reconstructed_object;
1036     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1037
1038     if (obj_surface->private_data == NULL) {
1039         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1040         assert(vdenc_avc_surface);
1041
1042         vdenc_avc_surface->ctx = ctx;
1043         i965_CreateSurfaces(ctx,
1044                             vdenc_context->down_scaled_width_4x,
1045                             vdenc_context->down_scaled_height_4x,
1046                             VA_RT_FORMAT_YUV420,
1047                             1,
1048                             &vdenc_avc_surface->scaled_4x_surface_id);
1049         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1050         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1051         i965_check_alloc_surface_bo(ctx,
1052                                     vdenc_avc_surface->scaled_4x_surface_obj,
1053                                     1,
1054                                     VA_FOURCC_NV12,
1055                                     SUBSAMPLE_YUV420);
1056
1057         obj_surface->private_data = (void *)vdenc_avc_surface;
1058         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1059     }
1060
1061     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1062     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1063
1064     /* Reconstructed surfaces */
1065     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1066     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1067     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1068     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1069
1070     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1071     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1072
1073     if (enable_avc_ildb) {
1074         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1075     } else {
1076         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1077     }
1078
1079
1080     /* Reference surfaces */
1081     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1082         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1083                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1084         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1085         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1086         obj_surface = encode_state->reference_objects[i];
1087
1088         if (obj_surface && obj_surface->bo) {
1089             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1090
1091             if (obj_surface->private_data == NULL) {
1092                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1093                 assert(vdenc_avc_surface);
1094
1095                 vdenc_avc_surface->ctx = ctx;
1096                 i965_CreateSurfaces(ctx,
1097                                     vdenc_context->down_scaled_width_4x,
1098                                     vdenc_context->down_scaled_height_4x,
1099                                     VA_RT_FORMAT_YUV420,
1100                                     1,
1101                                     &vdenc_avc_surface->scaled_4x_surface_id);
1102                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1103                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1104                 i965_check_alloc_surface_bo(ctx,
1105                                             vdenc_avc_surface->scaled_4x_surface_obj,
1106                                             1,
1107                                             VA_FOURCC_NV12,
1108                                             SUBSAMPLE_YUV420);
1109
1110                 obj_surface->private_data = vdenc_avc_surface;
1111                 obj_surface->free_private_data = gen_free_avc_surface;
1112             }
1113
1114             vdenc_avc_surface = obj_surface->private_data;
1115             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1116         }
1117     }
1118
1119     /* Input YUV surface */
1120     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1121     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1122
1123     /* Encoded bitstream */
1124     obj_buffer = encode_state->coded_buf_object;
1125     bo = obj_buffer->buffer_store->bo;
1126     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1127     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1128     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1129     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1130
1131     /* Status buffer */
1132     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1133     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1134     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1135     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1136     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1137     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1138            vdenc_context->compressed_bitstream.start_offset);
1139
1140     dri_bo_map(bo, 1);
1141
1142     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1143     coded_buffer_segment->mapped = 0;
1144     coded_buffer_segment->codec = encoder_context->codec;
1145     coded_buffer_segment->status_support = 1;
1146
1147     pbuffer = bo->virtual;
1148     pbuffer += vdenc_context->status_bffuer.base_offset;
1149     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1150
1151     dri_bo_unmap(bo);
1152
1153     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1154     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1155                                 vdenc_context->frame_width_in_mbs * 64,
1156                                 "Intra row store scratch buffer");
1157
1158     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1159     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1160                                 vdenc_context->frame_width_in_mbs * 256,
1161                                 "Deblocking filter row store scratch buffer");
1162
1163     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1164     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1165                                 vdenc_context->frame_width_in_mbs * 128,
1166                                 "BSD/MPC row store scratch buffer");
1167
1168     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1169     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1170                                 vdenc_context->frame_width_in_mbs * 64,
1171                                 "VDENC row store scratch buffer");
1172
1173     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1174     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1175     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1176                                 vdenc_context->frame_width_in_mbs *
1177                                 vdenc_context->frame_height_in_mbs *
1178                                 sizeof(struct gen9_vdenc_streamin_state),
1179                                 "VDENC StreamIn buffer");
1180
1181     /*
1182      * Calculate the index for each reference surface in list0 for the first slice
1183      * TODO: other slices
1184      */
1185     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1186     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1187
1188     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1189
1190     if (slice_param->num_ref_idx_active_override_flag)
1191         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1192
1193     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1194         vdenc_context->list_ref_idx[0][i] = 0xFF;
1195     }
1196
1197     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1198         return VA_STATUS_ERROR_INVALID_VALUE;
1199
1200     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1201         VAPictureH264 *va_pic;
1202
1203         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1204
1205         if (i >= vdenc_context->num_refs[0])
1206             continue;
1207
1208         va_pic = &slice_param->RefPicList0[i];
1209
1210         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1211             obj_surface = encode_state->reference_objects[j];
1212
1213             if (obj_surface &&
1214                 obj_surface->bo &&
1215                 obj_surface->base.id == va_pic->picture_id) {
1216
1217                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1218                 vdenc_context->list_ref_idx[0][i] = j;
1219
1220                 break;
1221             }
1222         }
1223     }
1224
1225     if (slice_param->slice_type == SLICE_TYPE_I ||
1226         slice_param->slice_type == SLICE_TYPE_SI)
1227         vdenc_context->frame_type = VDENC_FRAME_I;
1228     else
1229         vdenc_context->frame_type = VDENC_FRAME_P;
1230
1231     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1232
1233     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1234     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1235
1236     return VA_STATUS_SUCCESS;
1237 }
1238
1239 static void
1240 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1241                                 struct intel_encoder_context *encoder_context,
1242                                 struct huc_pipe_mode_select_parameter *params)
1243 {
1244     struct intel_batchbuffer *batch = encoder_context->base.batch;
1245
1246     BEGIN_BCS_BATCH(batch, 3);
1247
1248     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1249     OUT_BCS_BATCH(batch,
1250                   (params->huc_stream_object_enable << 10) |
1251                   (params->indirect_stream_out_enable << 4));
1252     OUT_BCS_BATCH(batch,
1253                   params->media_soft_reset_counter);
1254
1255     ADVANCE_BCS_BATCH(batch);
1256 }
1257
1258 static void
1259 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1260                           struct intel_encoder_context *encoder_context,
1261                           struct huc_imem_state_parameter *params)
1262 {
1263     struct intel_batchbuffer *batch = encoder_context->base.batch;
1264
1265     BEGIN_BCS_BATCH(batch, 5);
1266
1267     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1268     OUT_BCS_BATCH(batch, 0);
1269     OUT_BCS_BATCH(batch, 0);
1270     OUT_BCS_BATCH(batch, 0);
1271     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1272
1273     ADVANCE_BCS_BATCH(batch);
1274 }
1275
1276 static void
1277 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1278                           struct intel_encoder_context *encoder_context,
1279                           struct huc_dmem_state_parameter *params)
1280 {
1281     struct i965_driver_data *i965 = i965_driver_data(ctx);
1282     struct intel_batchbuffer *batch = encoder_context->base.batch;
1283
1284     BEGIN_BCS_BATCH(batch, 6);
1285
1286     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1287     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1288     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1289     OUT_BCS_BATCH(batch, params->huc_data_length);
1290
1291     ADVANCE_BCS_BATCH(batch);
1292 }
1293
1294 /*
1295 static void
1296 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1297                          struct intel_encoder_context *encoder_context,
1298                          struct huc_cfg_state_parameter *params)
1299 {
1300     struct intel_batchbuffer *batch = encoder_context->base.batch;
1301
1302     BEGIN_BCS_BATCH(batch, 2);
1303
1304     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1305     OUT_BCS_BATCH(batch, !!params->force_reset);
1306
1307     ADVANCE_BCS_BATCH(batch);
1308 }
1309 */
1310 static void
1311 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1312                                   struct intel_encoder_context *encoder_context,
1313                                   struct huc_virtual_addr_parameter *params)
1314 {
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct intel_batchbuffer *batch = encoder_context->base.batch;
1317     int i;
1318
1319     BEGIN_BCS_BATCH(batch, 49);
1320
1321     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1322
1323     for (i = 0; i < 16; i++) {
1324         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1325             OUT_BUFFER_3DW(batch,
1326                            params->regions[i].huc_surface_res->bo,
1327                            !!params->regions[i].is_target, 0, 0);
1328         else
1329             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1330     }
1331
1332     ADVANCE_BCS_BATCH(batch);
1333 }
1334
1335 static void
1336 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1337                                        struct intel_encoder_context *encoder_context,
1338                                        struct huc_ind_obj_base_addr_parameter *params)
1339 {
1340     struct i965_driver_data *i965 = i965_driver_data(ctx);
1341     struct intel_batchbuffer *batch = encoder_context->base.batch;
1342
1343     BEGIN_BCS_BATCH(batch, 11);
1344
1345     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1346
1347     if (params->huc_indirect_stream_in_object_res)
1348         OUT_BUFFER_3DW(batch,
1349                        params->huc_indirect_stream_in_object_res->bo,
1350                        0, 0, 0);
1351     else
1352         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1353
1354     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1355
1356     if (params->huc_indirect_stream_out_object_res)
1357         OUT_BUFFER_3DW(batch,
1358                        params->huc_indirect_stream_out_object_res->bo,
1359                        1, 0, 0);
1360     else
1361         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1362
1363     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1364
1365     ADVANCE_BCS_BATCH(batch);
1366 }
1367
1368 static void
1369 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1370                                  struct intel_encoder_context *encoder_context)
1371 {
1372     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1373     struct intel_batchbuffer *batch = encoder_context->base.batch;
1374     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1375     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1376
1377     /* Write HUC_STATUS2 mask (1 << 6) */
1378     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1379     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1380     mi_store_data_imm_params.offset = 0;
1381     mi_store_data_imm_params.dw0 = (1 << 6);
1382     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1383
1384     /* Store HUC_STATUS2 */
1385     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1386     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1387     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1388     mi_store_register_mem_params.offset = 4;
1389     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1390 }
1391
1392 static void
1393 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1394                              struct intel_encoder_context *encoder_context,
1395                              struct huc_stream_object_parameter *params)
1396 {
1397     struct intel_batchbuffer *batch = encoder_context->base.batch;
1398
1399     BEGIN_BCS_BATCH(batch, 5);
1400
1401     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1402     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1403     OUT_BCS_BATCH(batch,
1404                   (1 << 31) |   /* Must be 1 */
1405                   params->indirect_stream_in_start_address);
1406     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1407     OUT_BCS_BATCH(batch,
1408                   (!!params->huc_bitstream_enable << 29) |
1409                   (params->length_mode << 27) |
1410                   (!!params->stream_out << 26) |
1411                   (!!params->emulation_prevention_byte_removal << 25) |
1412                   (!!params->start_code_search_engine << 24) |
1413                   (params->start_code_byte2 << 16) |
1414                   (params->start_code_byte1 << 8) |
1415                   params->start_code_byte0);
1416
1417     ADVANCE_BCS_BATCH(batch);
1418 }
1419
1420 static void
1421 gen9_vdenc_huc_start(VADriverContextP ctx,
1422                      struct intel_encoder_context *encoder_context,
1423                      struct huc_start_parameter *params)
1424 {
1425     struct intel_batchbuffer *batch = encoder_context->base.batch;
1426
1427     BEGIN_BCS_BATCH(batch, 2);
1428
1429     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1430     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1431
1432     ADVANCE_BCS_BATCH(batch);
1433 }
1434
1435 static void
1436 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1437                              struct intel_encoder_context *encoder_context,
1438                              struct vd_pipeline_flush_parameter *params)
1439 {
1440     struct intel_batchbuffer *batch = encoder_context->base.batch;
1441
1442     BEGIN_BCS_BATCH(batch, 2);
1443
1444     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1445     OUT_BCS_BATCH(batch,
1446                   params->mfx_pipeline_command_flush << 19 |
1447                   params->mfl_pipeline_command_flush << 18 |
1448                   params->vdenc_pipeline_command_flush << 17 |
1449                   params->hevc_pipeline_command_flush << 16 |
1450                   params->vd_command_message_parser_done << 4 |
1451                   params->mfx_pipeline_done << 3 |
1452                   params->mfl_pipeline_done << 2 |
1453                   params->vdenc_pipeline_done << 1 |
1454                   params->hevc_pipeline_done);
1455
1456     ADVANCE_BCS_BATCH(batch);
1457 }
1458
1459 static int
1460 gen9_vdenc_get_max_mbps(int level_idc)
1461 {
1462     int max_mbps = 11880;
1463
1464     switch (level_idc) {
1465     case 20:
1466         max_mbps = 11880;
1467         break;
1468
1469     case 21:
1470         max_mbps = 19800;
1471         break;
1472
1473     case 22:
1474         max_mbps = 20250;
1475         break;
1476
1477     case 30:
1478         max_mbps = 40500;
1479         break;
1480
1481     case 31:
1482         max_mbps = 108000;
1483         break;
1484
1485     case 32:
1486         max_mbps = 216000;
1487         break;
1488
1489     case 40:
1490     case 41:
1491         max_mbps = 245760;
1492         break;
1493
1494     case 42:
1495         max_mbps = 522240;
1496         break;
1497
1498     case 50:
1499         max_mbps = 589824;
1500         break;
1501
1502     case 51:
1503         max_mbps = 983040;
1504         break;
1505
1506     case 52:
1507         max_mbps = 2073600;
1508         break;
1509
1510     default:
1511         break;
1512     }
1513
1514     return max_mbps;
1515 };
1516
1517 static unsigned int
1518 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1519                                        struct intel_encoder_context *encoder_context,
1520                                        int level_idc)
1521 {
1522     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1523     double bits_per_mb, tmpf;
1524     int max_mbps, num_mb_per_frame;
1525     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1526     unsigned int ret;
1527
1528     if (level_idc >= 31 && level_idc <= 40)
1529         bits_per_mb = 96.0;
1530     else
1531         bits_per_mb = 192.0;
1532
1533     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1534     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1535
1536     tmpf = (double)num_mb_per_frame;
1537
1538     if (tmpf < max_mbps / 172.0)
1539         tmpf = max_mbps / 172.0;
1540
1541     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1542     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1543                                      (double)vdenc_context->framerate.num * bits_per_mb);
1544
1545     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1546     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1547     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1548
1549     return ret;
1550 }
1551
1552 static int
1553 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1554                                 struct encode_state *encode_state,
1555                                 struct intel_encoder_context *encoder_context)
1556 {
1557     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1558     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1559     unsigned frame_size;
1560     int qp, delat_qp;
1561
1562     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1563     qp = (int)(1.0 / 1.2 * pow(10.0,
1564                                (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1565                                       ((double)vdenc_context->target_bit_rate * vdenc_context->framerate.den)) - x0) *
1566                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1567     qp += 2;
1568     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1569                           ((double)vdenc_context->target_bit_rate * vdenc_context->framerate.den)));
1570     if (delat_qp > 0)
1571         qp += delat_qp;
1572
1573     qp = CLAMP(1, 51, qp);
1574     qp--;
1575
1576     if (qp < 0)
1577         qp = 1;
1578
1579     return qp;
1580 }
1581
1582 static void
1583 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1584                                     struct encode_state *encode_state,
1585                                     struct intel_encoder_context *encoder_context)
1586 {
1587     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1588     struct huc_brc_init_dmem *dmem;
1589     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1590     double input_bits_per_frame, bps_ratio;
1591     int i;
1592
1593     vdenc_context->brc_init_reset_input_bits_per_frame =
1594         ((double)vdenc_context->max_bit_rate * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1595     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1596     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1597
1598     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1599
1600     if (!dmem)
1601         return;
1602
1603     memset(dmem, 0, sizeof(*dmem));
1604
1605     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1606
1607     dmem->frame_width = vdenc_context->frame_width;
1608     dmem->frame_height = vdenc_context->frame_height;
1609
1610     dmem->target_bitrate = vdenc_context->target_bit_rate;
1611     dmem->min_rate = vdenc_context->min_bit_rate;
1612     dmem->max_rate = vdenc_context->max_bit_rate;
1613     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1614     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1615
1616     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1617         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1618
1619     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1620         dmem->brc_flag |= 0x10;
1621     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1622         dmem->brc_flag |= 0x20;
1623
1624     dmem->frame_rate_m = vdenc_context->framerate.num;
1625     dmem->frame_rate_d = vdenc_context->framerate.den;
1626
1627     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1628
1629     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1630         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1631
1632     dmem->min_qp = 10;
1633     dmem->max_qp = 51;
1634
1635     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1636     bps_ratio = input_bits_per_frame /
1637                 ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1638
1639     if (bps_ratio < 0.1)
1640         bps_ratio = 0.1;
1641
1642     if (bps_ratio > 3.5)
1643         bps_ratio = 3.5;
1644
1645     for (i = 0; i < 4; i++) {
1646         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1647         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1648
1649         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1650         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1651
1652         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1653         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1654     }
1655
1656     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1657
1658     if (vdenc_context->mb_brc_enabled) {
1659         dmem->mb_qp_ctrl = 1;
1660         dmem->dist_qp_delta[0] = -5;
1661         dmem->dist_qp_delta[1] = -2;
1662         dmem->dist_qp_delta[2] = 2;
1663         dmem->dist_qp_delta[3] = 5;
1664     }
1665
1666     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1667
1668     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1669     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1670
1671     // 2nd re-encode pass if possible
1672     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1673         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1674         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1675         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1676         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1677     } else {
1678         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1679         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1680         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1681         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1682     }
1683
1684     dmem->qp_select_for_first_pass = 1;
1685     dmem->mb_header_compensation = 1;
1686     dmem->delta_qp_adaptation = 1;
1687     dmem->max_crf_quality_factor = 52;
1688
1689     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1690     dmem->scenario_info = 0;
1691
1692     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1693     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1694
1695     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1696 }
1697
1698 static void
1699 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1700                               struct encode_state *encode_state,
1701                               struct intel_encoder_context *encoder_context)
1702 {
1703     struct intel_batchbuffer *batch = encoder_context->base.batch;
1704     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1705     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1706     struct huc_imem_state_parameter imem_state_params;
1707     struct huc_dmem_state_parameter dmem_state_params;
1708     struct huc_virtual_addr_parameter virtual_addr_params;
1709     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1710     struct huc_stream_object_parameter stream_object_params;
1711     struct huc_start_parameter start_params;
1712     struct vd_pipeline_flush_parameter pipeline_flush_params;
1713     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1714
1715     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1716
1717     memset(&imem_state_params, 0, sizeof(imem_state_params));
1718     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1719     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1720
1721     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1722     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1723
1724     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1725     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1726     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1727     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1728     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1729     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1730
1731     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1732     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1733     virtual_addr_params.regions[0].is_target = 1;
1734     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1735
1736     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1737     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1738     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1739     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1740
1741     memset(&stream_object_params, 0, sizeof(stream_object_params));
1742     stream_object_params.indirect_stream_in_data_length = 1;
1743     stream_object_params.indirect_stream_in_start_address = 0;
1744     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1745
1746     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1747
1748     memset(&start_params, 0, sizeof(start_params));
1749     start_params.last_stream_object = 1;
1750     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1751
1752     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1753     pipeline_flush_params.hevc_pipeline_done = 1;
1754     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1755     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1756
1757     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1758     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1759     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1760 }
1761
1762 static void
1763 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1764 {
1765     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1766     struct huc_brc_update_dmem *dmem;
1767     int i, num_p_in_gop = 0;
1768
1769     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1770
1771     if (!dmem)
1772         return;
1773
1774     dmem->brc_func = 1;
1775
1776     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1777         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1778             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1779         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1780         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1781     }
1782
1783     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1784         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1785
1786     dmem->target_size = vdenc_context->brc_target_size;
1787
1788     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1789
1790     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1791
1792     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1793     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1794
1795     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1796
1797     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1798     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1799     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1800
1801     dmem->current_pak_pass = vdenc_context->current_pass;
1802     dmem->max_num_passes = 2;
1803
1804     dmem->scene_change_detect_enable = 1;
1805     dmem->scene_change_prev_intra_percent_threshold = 96;
1806     dmem->scene_change_cur_intra_perent_threshold = 192;
1807
1808     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1809         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1810
1811     for (i = 0; i < 2; i++)
1812         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1813
1814     if (vdenc_context->is_low_delay)
1815         dmem->ip_average_coeff = 0;
1816     else
1817         dmem->ip_average_coeff = 128;
1818
1819     dmem->skip_frame_size = 0;
1820     dmem->num_of_frames_skipped = 0;
1821
1822     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1823     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1824     dmem->hme_cost_enable = 1;
1825
1826     dmem->second_level_batchbuffer_size = 228;
1827
1828     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1829 }
1830
1831 static void
1832 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1833                                   struct encode_state *encode_state,
1834                                   struct intel_encoder_context *encoder_context,
1835                                   struct gen9_mfx_avc_img_state *pstate,
1836                                   int use_huc)
1837 {
1838     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1839     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1840     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1841
1842     memset(pstate, 0, sizeof(*pstate));
1843
1844     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1845
1846     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1847
1848     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1849     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1850
1851     pstate->dw3.image_structure = 0;
1852     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1853     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1854     pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1855     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1856     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1857
1858     pstate->dw4.field_picture_flag = 0;
1859     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1860     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1861     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1862     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1863     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1864     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1865     pstate->dw4.mb_mv_format_flag = 1;
1866     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1867     pstate->dw4.mv_unpacked_flag = 1;
1868     pstate->dw4.insert_test_flag = 0;
1869     pstate->dw4.load_slice_pointer_flag = 0;
1870     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1871     pstate->dw4.minimum_frame_size = 0;
1872
1873     pstate->dw5.intra_mb_max_bit_flag = 1;
1874     pstate->dw5.inter_mb_max_bit_flag = 1;
1875     pstate->dw5.frame_size_over_flag = 1;
1876     pstate->dw5.frame_size_under_flag = 1;
1877     pstate->dw5.intra_mb_ipcm_flag = 1;
1878     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1879     pstate->dw5.non_first_pass_flag = 0;
1880     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1881     pstate->dw5.aq_chroma_disable = 1;
1882
1883     pstate->dw6.intra_mb_max_size = 2700;
1884     pstate->dw6.inter_mb_max_size = 4095;
1885
1886     pstate->dw8.slice_delta_qp_max0 = 0;
1887     pstate->dw8.slice_delta_qp_max1 = 0;
1888     pstate->dw8.slice_delta_qp_max2 = 0;
1889     pstate->dw8.slice_delta_qp_max3 = 0;
1890
1891     pstate->dw9.slice_delta_qp_min0 = 0;
1892     pstate->dw9.slice_delta_qp_min1 = 0;
1893     pstate->dw9.slice_delta_qp_min2 = 0;
1894     pstate->dw9.slice_delta_qp_min3 = 0;
1895
1896     pstate->dw10.frame_bitrate_min = 0;
1897     pstate->dw10.frame_bitrate_min_unit = 1;
1898     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1899     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1900     pstate->dw10.frame_bitrate_max_unit = 1;
1901     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1902
1903     pstate->dw11.frame_bitrate_min_delta = 0;
1904     pstate->dw11.frame_bitrate_max_delta = 0;
1905
1906     pstate->dw12.vad_error_logic = 1;
1907     /* TODO: set paramters DW19/DW20 for slices */
1908 }
1909
1910 static void
1911 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1912                                 struct encode_state *encode_state,
1913                                 struct intel_encoder_context *encoder_context,
1914                                 struct gen9_vdenc_img_state *pstate,
1915                                 int update_cost)
1916 {
1917     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1918     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1919     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1920     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1921
1922     memset(pstate, 0, sizeof(*pstate));
1923
1924     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1925
1926     if (vdenc_context->frame_type == VDENC_FRAME_I) {
1927         pstate->dw4.intra_sad_measure_adjustment = 2;
1928         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1929
1930         pstate->dw5.cre_prefetch_enable = 1;
1931
1932         pstate->dw9.mode0_cost = 10;
1933         pstate->dw9.mode1_cost = 0;
1934         pstate->dw9.mode2_cost = 3;
1935         pstate->dw9.mode3_cost = 30;
1936
1937         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1938         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1939         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1940
1941         pstate->dw22.small_mb_size_in_word = 0xff;
1942         pstate->dw22.large_mb_size_in_word = 0xff;
1943
1944         pstate->dw27.max_hmv_r = 0x2000;
1945         pstate->dw27.max_vmv_r = 0x200;
1946
1947         pstate->dw33.qp_range_check_upper_bound = 0x33;
1948         pstate->dw33.qp_range_check_lower_bound = 0x0a;
1949         pstate->dw33.qp_range_check_value = 0x0f;
1950     } else {
1951         pstate->dw2.bidirectional_weight = 0x20;
1952
1953         pstate->dw4.subpel_mode = 3;
1954         pstate->dw4.bme_disable_for_fbr_message = 1;
1955         pstate->dw4.inter_sad_measure_adjustment = 2;
1956         pstate->dw4.intra_sad_measure_adjustment = 2;
1957         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1958
1959         pstate->dw5.cre_prefetch_enable = 1;
1960
1961         pstate->dw8.non_skip_zero_mv_const_added = 1;
1962         pstate->dw8.non_skip_mb_mode_const_added = 1;
1963         pstate->dw8.ref_id_cost_mode_select = 1;
1964
1965         pstate->dw9.mode0_cost = 7;
1966         pstate->dw9.mode1_cost = 26;
1967         pstate->dw9.mode2_cost = 30;
1968         pstate->dw9.mode3_cost = 57;
1969
1970         pstate->dw10.mode4_cost = 8;
1971         pstate->dw10.mode5_cost = 2;
1972         pstate->dw10.mode6_cost = 4;
1973         pstate->dw10.mode7_cost = 6;
1974
1975         pstate->dw11.mode8_cost = 5;
1976         pstate->dw11.mode9_cost = 0;
1977         pstate->dw11.ref_id_cost = 4;
1978         pstate->dw11.chroma_intra_mode_cost = 0;
1979
1980         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
1981         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
1982         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
1983         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
1984         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
1985         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
1986         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
1987         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
1988
1989         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1990         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1991         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1992
1993         pstate->dw22.small_mb_size_in_word = 0xff;
1994         pstate->dw22.large_mb_size_in_word = 0xff;
1995
1996         pstate->dw27.max_hmv_r = 0x2000;
1997         pstate->dw27.max_vmv_r = 0x200;
1998
1999         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2000
2001         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2002         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2003
2004         pstate->dw33.qp_range_check_upper_bound = 0x33;
2005         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2006         pstate->dw33.qp_range_check_value = 0x0f;
2007
2008         pstate->dw34.midpoint_distortion = 0x640;
2009     }
2010
2011     /* ROI will be updated in HuC kernel for CBR/VBR */
2012     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2013         pstate->dw34.roi_enable = 1;
2014
2015         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2016
2017         if (vdenc_context->num_roi > 1)
2018             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2019
2020         if (vdenc_context->num_roi > 2)
2021             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2022     }
2023
2024     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2025     pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2026
2027     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2028
2029     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2030
2031     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2032     pstate->dw5.picture_type = vdenc_context->frame_type;
2033     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2034
2035     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2036         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2037     }
2038
2039     pstate->dw5.mb_slice_threshold_value = 0;
2040
2041     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2042
2043     if (pstate->dw1.transform_8x8_flag)
2044         pstate->dw8.luma_intra_partition_mask = 0;
2045     else
2046         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2047
2048     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2049
2050     if (update_cost) {
2051         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2052         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2053         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2054         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2055
2056         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2057         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2058         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2059         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2060
2061         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2062         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2063         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2064         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2065
2066         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2067         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2068         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2069         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2070         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2071         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2072         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2073         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2074
2075         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2076         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2077         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2078         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2079         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2080         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2081         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2082         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2083     }
2084
2085     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2086
2087     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2088
2089     /* TODO: check rolling I */
2090
2091     /* TODO: handle ROI */
2092
2093     /* TODO: check stream in support */
2094 }
2095
2096 static void
2097 gen9_vdenc_init_img_states(VADriverContextP ctx,
2098                            struct encode_state *encode_state,
2099                            struct intel_encoder_context *encoder_context)
2100 {
2101     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2102     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2103     struct gen9_vdenc_img_state *vdenc_img_cmd;
2104     char *pbuffer;
2105
2106     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2107
2108     if (!pbuffer)
2109         return;
2110
2111     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2112     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2113     pbuffer += sizeof(*mfx_img_cmd);
2114
2115     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2116     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2117     pbuffer += sizeof(*vdenc_img_cmd);
2118
2119     /* Add batch buffer end command */
2120     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2121
2122     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2123 }
2124
2125 static void
2126 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2127                                         struct encode_state *encode_state,
2128                                         struct intel_encoder_context *encoder_context)
2129 {
2130     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2131     struct huc_brc_update_constant_data *brc_buffer;
2132     int i, j;
2133
2134     brc_buffer = (struct huc_brc_update_constant_data *)
2135                  i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2136
2137     if (!brc_buffer)
2138         return;
2139
2140     memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2141
2142     for (i = 0; i < 8; i++) {
2143         for (j = 0; j < 42; j++) {
2144             brc_buffer->hme_mv_cost[i][j] = map_44_lut_value((vdenc_hme_cost[i][j + 10]), 0x6f);
2145         }
2146     }
2147
2148     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2149         memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2150         memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2151         memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2152         memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2153         memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2154         memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2155     }
2156
2157
2158     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2159 }
2160
2161 static void
2162 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2163                           struct encode_state *encode_state,
2164                           struct intel_encoder_context *encoder_context)
2165 {
2166     struct intel_batchbuffer *batch = encoder_context->base.batch;
2167     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2168     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2169     struct huc_imem_state_parameter imem_state_params;
2170     struct huc_dmem_state_parameter dmem_state_params;
2171     struct huc_virtual_addr_parameter virtual_addr_params;
2172     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2173     struct huc_stream_object_parameter stream_object_params;
2174     struct huc_start_parameter start_params;
2175     struct vd_pipeline_flush_parameter pipeline_flush_params;
2176     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2177     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2178     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2179
2180     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2181     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2182     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2183
2184     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2185         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2186
2187         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2188         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2189         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2190     }
2191
2192     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2193
2194     memset(&imem_state_params, 0, sizeof(imem_state_params));
2195     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2196     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2197
2198     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2199     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2200
2201     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2202     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2203     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2204     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2205     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2206     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2207
2208     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2209     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2210     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2211     virtual_addr_params.regions[0].is_target = 1;
2212     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2213     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2214     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2215     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2216     virtual_addr_params.regions[4].is_target = 1;
2217     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2218     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2219     virtual_addr_params.regions[6].is_target = 1;
2220     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2221
2222     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2223     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2224     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2225     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2226
2227     memset(&stream_object_params, 0, sizeof(stream_object_params));
2228     stream_object_params.indirect_stream_in_data_length = 1;
2229     stream_object_params.indirect_stream_in_start_address = 0;
2230     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2231
2232     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2233
2234     memset(&start_params, 0, sizeof(start_params));
2235     start_params.last_stream_object = 1;
2236     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2237
2238     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2239     pipeline_flush_params.hevc_pipeline_done = 1;
2240     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2241     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2242
2243     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2244     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2245     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2246
2247     /* Store HUC_STATUS */
2248     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2249     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2250     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2251     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2252
2253     /* Write HUC_STATUS mask (1 << 31) */
2254     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2255     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2256     mi_store_data_imm_params.offset = 4;
2257     mi_store_data_imm_params.dw0 = (1 << 31);
2258     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2259 }
2260
2261 static void
2262 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2263                                 struct encode_state *encode_state,
2264                                 struct intel_encoder_context *encoder_context)
2265 {
2266     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2267     struct intel_batchbuffer *batch = encoder_context->base.batch;
2268
2269     BEGIN_BCS_BATCH(batch, 5);
2270
2271     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2272     OUT_BCS_BATCH(batch,
2273                   (1 << 29) |
2274                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2275                   (MFD_MODE_VLD << 15) |
2276                   (1 << 13) |                   /* VDEnc mode */
2277                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2278                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2279                   (1 << 7)  |                   /* Scaled surface enable */
2280                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2281                   (1 << 4)  |                   /* encoding mode */
2282                   (MFX_FORMAT_AVC << 0));
2283     OUT_BCS_BATCH(batch, 0);
2284     OUT_BCS_BATCH(batch, 0);
2285     OUT_BCS_BATCH(batch, 0);
2286
2287     ADVANCE_BCS_BATCH(batch);
2288 }
2289
2290 static void
2291 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2292                              struct intel_encoder_context *encoder_context,
2293                              struct i965_gpe_resource *gpe_resource,
2294                              int id)
2295 {
2296     struct intel_batchbuffer *batch = encoder_context->base.batch;
2297
2298     BEGIN_BCS_BATCH(batch, 6);
2299
2300     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2301     OUT_BCS_BATCH(batch, id);
2302     OUT_BCS_BATCH(batch,
2303                   ((gpe_resource->height - 1) << 18) |
2304                   ((gpe_resource->width - 1) << 4));
2305     OUT_BCS_BATCH(batch,
2306                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2307                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2308                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2309                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2310                   (1 << 1)  |                           /* must be tiled */
2311                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2312     OUT_BCS_BATCH(batch,
2313                   (0 << 16) |                   /* must be 0 for interleave U/V */
2314                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2315     OUT_BCS_BATCH(batch,
2316                   (0 << 16) |                   /* must be 0 for interleave U/V */
2317                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2318
2319     ADVANCE_BCS_BATCH(batch);
2320 }
2321
2322 static void
2323 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2324 {
2325     struct i965_driver_data *i965 = i965_driver_data(ctx);
2326     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2327     struct intel_batchbuffer *batch = encoder_context->base.batch;
2328     int i;
2329
2330     if (IS_GEN10(i965->intel.device_info)) {
2331         BEGIN_BCS_BATCH(batch, 68);
2332         OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (68 - 2));
2333     } else {
2334         BEGIN_BCS_BATCH(batch, 65);
2335         OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2336     }
2337
2338
2339     /* the DW1-3 is for pre_deblocking */
2340     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2341
2342     /* the DW4-6 is for the post_deblocking */
2343     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2344
2345     /* the DW7-9 is for the uncompressed_picture */
2346     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2347
2348     /* the DW10-12 is for PAK information (write) */
2349     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2350
2351     /* the DW13-15 is for the intra_row_store_scratch */
2352     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2353
2354     /* the DW16-18 is for the deblocking filter */
2355     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2356
2357     /* the DW 19-50 is for Reference pictures*/
2358     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2359         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2360     }
2361
2362     /* DW 51, reference picture attributes */
2363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2364
2365     /* The DW 52-54 is for PAK information (read) */
2366     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2367
2368     /* the DW 55-57 is the ILDB buffer */
2369     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2370
2371     /* the DW 58-60 is the second ILDB buffer */
2372     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2373
2374     /* DW 61, memory compress enable & mode */
2375     OUT_BCS_BATCH(batch, 0);
2376
2377     /* the DW 62-64 is the 4x Down Scaling surface */
2378     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2379
2380
2381     if (IS_GEN10(i965->intel.device_info)) {
2382         OUT_BCS_BATCH(batch, 0);
2383         OUT_BCS_BATCH(batch, 0);
2384         OUT_BCS_BATCH(batch, 0);
2385     }
2386
2387     ADVANCE_BCS_BATCH(batch);
2388 }
2389
2390 static void
2391 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2392 {
2393     struct i965_driver_data *i965 = i965_driver_data(ctx);
2394     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2395     struct intel_batchbuffer *batch = encoder_context->base.batch;
2396
2397     BEGIN_BCS_BATCH(batch, 26);
2398
2399     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2400     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2401     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2402     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2403
2404     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2405     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2406     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2407
2408     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2409     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2410     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2411
2412     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2413     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2414     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2415
2416     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2417      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2418      */
2419     OUT_BUFFER_3DW(batch,
2420                    vdenc_context->compressed_bitstream.res.bo,
2421                    1,
2422                    0,
2423                    0);
2424     OUT_BUFFER_2DW(batch,
2425                    vdenc_context->compressed_bitstream.res.bo,
2426                    1,
2427                    vdenc_context->compressed_bitstream.end_offset);
2428
2429     ADVANCE_BCS_BATCH(batch);
2430 }
2431
2432 static void
2433 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2434 {
2435     struct i965_driver_data *i965 = i965_driver_data(ctx);
2436     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2437     struct intel_batchbuffer *batch = encoder_context->base.batch;
2438
2439     BEGIN_BCS_BATCH(batch, 10);
2440
2441     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2442
2443     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2444     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2445
2446     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2447     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2448
2449     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2450     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2451
2452     ADVANCE_BCS_BATCH(batch);
2453 }
2454
2455 static void
2456 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2457                         int qm_type,
2458                         unsigned int *qm,
2459                         int qm_length,
2460                         struct intel_encoder_context *encoder_context)
2461 {
2462     struct intel_batchbuffer *batch = encoder_context->base.batch;
2463     unsigned int qm_buffer[16];
2464
2465     assert(qm_length <= 16);
2466     assert(sizeof(*qm) == 4);
2467     memcpy(qm_buffer, qm, qm_length * 4);
2468
2469     BEGIN_BCS_BATCH(batch, 18);
2470     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2471     OUT_BCS_BATCH(batch, qm_type << 0);
2472     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2473     ADVANCE_BCS_BATCH(batch);
2474 }
2475
2476 static void
2477 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2478 {
2479     /* TODO: add support for non flat matrix */
2480     unsigned int qm[16] = {
2481         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2482         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2483         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2484         0x10101010, 0x10101010, 0x10101010, 0x10101010
2485     };
2486
2487     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2488     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2489     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2490     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2491 }
2492
2493 static void
2494 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2495                          int fqm_type,
2496                          unsigned int *fqm,
2497                          int fqm_length,
2498                          struct intel_encoder_context *encoder_context)
2499 {
2500     struct intel_batchbuffer *batch = encoder_context->base.batch;
2501     unsigned int fqm_buffer[32];
2502
2503     assert(fqm_length <= 32);
2504     assert(sizeof(*fqm) == 4);
2505     memcpy(fqm_buffer, fqm, fqm_length * 4);
2506
2507     BEGIN_BCS_BATCH(batch, 34);
2508     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2509     OUT_BCS_BATCH(batch, fqm_type << 0);
2510     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2511     ADVANCE_BCS_BATCH(batch);
2512 }
2513
2514 static void
2515 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2516 {
2517     /* TODO: add support for non flat matrix */
2518     unsigned int qm[32] = {
2519         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2520         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2521         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2522         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2523         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2524         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2525         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2526         0x10001000, 0x10001000, 0x10001000, 0x10001000
2527     };
2528
2529     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2530     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2531     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2532     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2533 }
2534
2535 static void
2536 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2537                              struct encode_state *encode_state,
2538                              struct intel_encoder_context *encoder_context)
2539 {
2540     struct intel_batchbuffer *batch = encoder_context->base.batch;
2541     struct gen9_mfx_avc_img_state mfx_img_cmd;
2542
2543     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2544
2545     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2546     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2547     ADVANCE_BCS_BATCH(batch);
2548 }
2549
2550 static void
2551 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2552                                   struct encode_state *encode_state,
2553                                   struct intel_encoder_context *encoder_context)
2554 {
2555     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2556     struct intel_batchbuffer *batch = encoder_context->base.batch;
2557
2558     BEGIN_BCS_BATCH(batch, 2);
2559
2560     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2561     OUT_BCS_BATCH(batch,
2562                   (vdenc_context->vdenc_streamin_enable << 9) |
2563                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2564                   (1 << 7)  |                   /* Tlb prefetch enable */
2565                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2566                   (VDENC_CODEC_AVC << 0));
2567
2568     ADVANCE_BCS_BATCH(batch);
2569 }
2570
2571 static void
2572 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2573                                struct intel_encoder_context *encoder_context,
2574                                struct i965_gpe_resource *gpe_resource,
2575                                int vdenc_surface_cmd)
2576 {
2577     struct intel_batchbuffer *batch = encoder_context->base.batch;
2578
2579     BEGIN_BCS_BATCH(batch, 6);
2580
2581     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2582     OUT_BCS_BATCH(batch, 0);
2583     OUT_BCS_BATCH(batch,
2584                   ((gpe_resource->height - 1) << 18) |
2585                   ((gpe_resource->width - 1) << 4));
2586     OUT_BCS_BATCH(batch,
2587                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2588                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2589                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2590                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2591                   (1 << 1)  |                           /* must be tiled */
2592                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2593     OUT_BCS_BATCH(batch,
2594                   (0 << 16) |                   /* must be 0 for interleave U/V */
2595                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2596     OUT_BCS_BATCH(batch,
2597                   (0 << 16) |                   /* must be 0 for interleave U/V */
2598                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2599
2600     ADVANCE_BCS_BATCH(batch);
2601 }
2602
2603 static void
2604 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2605                                    struct intel_encoder_context *encoder_context,
2606                                    struct i965_gpe_resource *gpe_resource)
2607 {
2608     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2609 }
2610
2611 static void
2612 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2613                                    struct intel_encoder_context *encoder_context,
2614                                    struct i965_gpe_resource *gpe_resource)
2615 {
2616     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2617 }
2618
2619 static void
2620 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2621                                       struct intel_encoder_context *encoder_context,
2622                                       struct i965_gpe_resource *gpe_resource)
2623 {
2624     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2625 }
2626
2627 static void
2628 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2629                                      struct encode_state *encode_state,
2630                                      struct intel_encoder_context *encoder_context)
2631 {
2632     struct i965_driver_data *i965 = i965_driver_data(ctx);
2633     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2634     struct intel_batchbuffer *batch = encoder_context->base.batch;
2635
2636     BEGIN_BCS_BATCH(batch, 37);
2637
2638     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2639
2640     /* DW1-6 for DS FWD REF0/REF1 */
2641
2642     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2643         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2644     else
2645         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2646
2647     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2648         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2649     else
2650         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2651
2652     /* DW7-9 for DS BWD REF0, ignored on SKL */
2653     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2654
2655     /* DW10-12 for uncompressed input data */
2656     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2657
2658     /* DW13-DW15 for streamin data */
2659     if (vdenc_context->vdenc_streamin_enable)
2660         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2661     else
2662         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2663
2664     /* DW16-DW18 for row scratch buffer */
2665     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2666
2667     /* DW19-DW21, ignored on SKL */
2668     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2669
2670     /* DW22-DW27 for FWD REF0/REF1 */
2671
2672     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2673         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2674     else
2675         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2676
2677     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2678         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2679     else
2680         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2681
2682     /* DW28-DW30 for FWD REF2, ignored on SKL */
2683     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2684
2685     /* DW31-DW33 for BDW REF0, ignored on SKL */
2686     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2687
2688     /* DW34-DW36 for VDEnc statistics streamout */
2689     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2690
2691     ADVANCE_BCS_BATCH(batch);
2692 }
2693
2694 static void
2695 gen10_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2696                                       struct encode_state *encode_state,
2697                                       struct intel_encoder_context *encoder_context)
2698 {
2699     struct i965_driver_data *i965 = i965_driver_data(ctx);
2700     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2701     struct intel_batchbuffer *batch = encoder_context->base.batch;
2702
2703     BEGIN_BCS_BATCH(batch, 62);
2704
2705     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (62 - 2));
2706
2707     /* DW1-6 for DS FWD REF0/REF1 */
2708     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2709         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo,
2710                        0, 0, 0);
2711     else
2712         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2713
2714     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2715         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo,
2716                        0, 0, 0);
2717     else
2718         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2719
2720     /* DW7-9 for DS BWD REF0. B-frame is not supported */
2721     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2722
2723     /* DW10-12 for uncompressed input data */
2724     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2725
2726     /* DW13-DW15 for streamin data */
2727     if (vdenc_context->vdenc_streamin_enable)
2728         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2729     else
2730         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2731
2732     /* DW16-DW18 for row scratch buffer */
2733     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2734
2735     /* DW19-DW21, Not used */
2736     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2737
2738     /* DW22-DW27 for FWD REF0/REF1 */
2739     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2740         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2741     else
2742         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2743
2744     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2745         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2746     else
2747         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2748
2749     if (vdenc_context->list_ref_idx[0][2] != 0xFF)
2750         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][2]].bo, 0, 0, 0);
2751     else
2752         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2753
2754     /* DW31-DW33 for BDW REF0. Ignored*/
2755     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2756
2757     /* DW34-DW36 for VDEnc statistics streamout */
2758     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2759
2760     /* DW37..DW39. Not used */
2761     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2762
2763     /* DW40..DW42. Not used */
2764     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2765
2766     /* DW43..DW45. Not used */
2767     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2768
2769     /* DW46..DW48. Not used */
2770     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2771
2772     /* DW49..DW51. Not used */
2773     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2774
2775     /* DW52..DW54. Not used */
2776     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2777
2778     /* DW55..DW57. Not used */
2779     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2780
2781     /* DW58..DW60. Not used */
2782     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2783
2784     /* DW 61. Not used */
2785     OUT_BCS_BATCH(batch, 0);
2786
2787     ADVANCE_BCS_BATCH(batch);
2788 }
2789
2790 static void
2791 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2792                                  struct encode_state *encode_state,
2793                                  struct intel_encoder_context *encoder_context)
2794 {
2795     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2796     struct intel_batchbuffer *batch = encoder_context->base.batch;
2797
2798     BEGIN_BCS_BATCH(batch, 61);
2799
2800     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2801
2802     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2803         /* DW1-DW11 */
2804         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2805
2806         /* DW12-DW25 */
2807         intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2808
2809         /* DW26-DW39 */
2810         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2811
2812         /* DW40-DW46 */
2813         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2814
2815         /* DW47-DW53 */
2816         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2817
2818         /* DW54-DW60 */
2819         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2820     } else {
2821         int i;
2822         uint16_t tmp_vdenc_skip_threshold_p[28];
2823
2824         memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2825
2826         for (i = 0; i < 28; i++) {
2827             tmp_vdenc_skip_threshold_p[i] *= 3;
2828         }
2829
2830         /* DW1-DW11 */
2831         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2832
2833         /* DW12-DW25 */
2834         intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2835
2836         /* DW26-DW39 */
2837         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2838
2839         /* DW40-DW46 */
2840         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2841
2842         /* DW47-DW53 */
2843         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2844
2845         /* DW54-DW60 */
2846         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2847     }
2848
2849     ADVANCE_BCS_BATCH(batch);
2850 }
2851
2852 static void
2853 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2854                               struct encode_state *encode_state,
2855                               struct intel_encoder_context *encoder_context)
2856 {
2857     struct intel_batchbuffer *batch = encoder_context->base.batch;
2858
2859     BEGIN_BCS_BATCH(batch, 2);
2860
2861     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2862     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2863
2864     ADVANCE_BCS_BATCH(batch);
2865 }
2866
2867 static void
2868 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2869                                        struct encode_state *encode_state,
2870                                        struct intel_encoder_context *encoder_context,
2871                                        VAEncSliceParameterBufferH264 *slice_param)
2872 {
2873     struct i965_driver_data *i965 = i965_driver_data(ctx);
2874     struct intel_batchbuffer *batch = encoder_context->base.batch;
2875     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2876
2877     if (IS_GEN10(i965->intel.device_info)) {
2878         BEGIN_BCS_BATCH(batch, 5);
2879         OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (5 - 2));
2880     } else {
2881         BEGIN_BCS_BATCH(batch, 3);
2882         OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2883     }
2884
2885     if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2886         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2887                               slice_param->luma_weight_l0[1] << 16 |
2888                               slice_param->luma_offset_l0[0] << 8 |
2889                               slice_param->luma_weight_l0[0] << 0));
2890         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2891                               slice_param->luma_weight_l0[2] << 0));
2892     } else {
2893         OUT_BCS_BATCH(batch, (0 << 24 |
2894                               1 << 16 |
2895                               0 << 8 |
2896                               1 << 0));
2897         OUT_BCS_BATCH(batch, (0 << 8 |
2898                               1 << 0));
2899     }
2900
2901     if (IS_GEN10(i965->intel.device_info)) {
2902         OUT_BCS_BATCH(batch, (0 << 24 |
2903                               1 << 16 |
2904                               0 << 8 |
2905                               1 << 0));
2906         OUT_BCS_BATCH(batch, (0 << 24 |
2907                               1 << 16 |
2908                               0 << 8 |
2909                               1 << 0));
2910     }
2911
2912     ADVANCE_BCS_BATCH(batch);
2913 }
2914
2915 static void
2916 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2917                                struct encode_state *encode_state,
2918                                struct intel_encoder_context *encoder_context,
2919                                VAEncSliceParameterBufferH264 *slice_param,
2920                                VAEncSliceParameterBufferH264 *next_slice_param)
2921 {
2922     struct i965_driver_data *i965 = i965_driver_data(ctx);
2923     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2924     struct intel_batchbuffer *batch = encoder_context->base.batch;
2925     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2926     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2927     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2928     int luma_log2_weight_denom, weighted_pred_idc;
2929
2930     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2931     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2932
2933     if (next_slice_param) {
2934         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2935         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2936     } else {
2937         next_slice_hor_pos = 0;
2938         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2939     }
2940
2941     if (slice_type == SLICE_TYPE_P)
2942         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2943     else
2944         weighted_pred_idc = 0;
2945
2946     if (weighted_pred_idc == 1)
2947         luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2948     else
2949         luma_log2_weight_denom = 0;
2950
2951     if (IS_GEN10(i965->intel.device_info)) {
2952         BEGIN_BCS_BATCH(batch, 6);
2953         OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (6 - 2));
2954     } else {
2955         BEGIN_BCS_BATCH(batch, 4);
2956         OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2957     }
2958
2959     OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2960                           slice_ver_pos));
2961     OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2962                           next_slice_ver_pos));
2963     OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2964
2965     if (IS_GEN10(i965->intel.device_info)) {
2966         /* Not used for VDENC H264 */
2967         OUT_BCS_BATCH(batch, 0);
2968         OUT_BCS_BATCH(batch, 0);
2969     }
2970
2971     ADVANCE_BCS_BATCH(batch);
2972 }
2973
2974 static void
2975 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2976                            struct encode_state *encode_state,
2977                            struct intel_encoder_context *encoder_context)
2978 {
2979     struct intel_batchbuffer *batch = encoder_context->base.batch;
2980     struct gen9_vdenc_img_state vdenc_img_cmd;
2981
2982     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2983
2984     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2985     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2986     ADVANCE_BCS_BATCH(batch);
2987 }
2988
2989 static void
2990 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2991                                  struct intel_encoder_context *encoder_context,
2992                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2993                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2994                                  int slice_header_indicator)
2995 {
2996     struct intel_batchbuffer *batch = encoder_context->base.batch;
2997
2998     if (data_bits_in_last_dw == 0)
2999         data_bits_in_last_dw = 32;
3000
3001     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
3002
3003     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
3004     OUT_BCS_BATCH(batch,
3005                   (0 << 16) |   /* always start at offset 0 */
3006                   (slice_header_indicator << 14) |
3007                   (data_bits_in_last_dw << 8) |
3008                   (skip_emul_byte_count << 4) |
3009                   (!!emulation_flag << 3) |
3010                   ((!!is_last_header) << 2) |
3011                   ((!!is_end_of_slice) << 1) |
3012                   (0 << 0));    /* TODO: check this flag */
3013     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
3014
3015     ADVANCE_BCS_BATCH(batch);
3016 }
3017
3018 static void
3019 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
3020                                             struct encode_state *encode_state,
3021                                             struct intel_encoder_context *encoder_context,
3022                                             int slice_index)
3023 {
3024     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3025     struct i965_driver_data *i965 = i965_driver_data(ctx);
3026     VAEncPackedHeaderParameterBuffer *param = NULL;
3027     unsigned int length_in_bits;
3028     unsigned int *header_data = NULL;
3029     int count, i, start_index;
3030     int slice_header_index;
3031     unsigned int insert_one_zero_byte = 0;
3032
3033     if (encode_state->slice_header_index[slice_index] == 0)
3034         slice_header_index = -1;
3035     else
3036         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
3037
3038     count = encode_state->slice_rawdata_count[slice_index];
3039     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
3040
3041     for (i = 0; i < count; i++) {
3042         unsigned int skip_emul_byte_cnt;
3043
3044         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
3045
3046         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
3047
3048         /* skip the slice header packed data type as it is lastly inserted */
3049         if (param->type == VAEncPackedHeaderSlice)
3050             continue;
3051
3052         length_in_bits = param->bit_length;
3053
3054         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3055
3056         /* as the slice header is still required, the last header flag is set to
3057          * zero.
3058          */
3059         gen9_vdenc_mfx_avc_insert_object(ctx,
3060                                          encoder_context,
3061                                          header_data,
3062                                          ALIGN(length_in_bits, 32) >> 5,
3063                                          length_in_bits & 0x1f,
3064                                          skip_emul_byte_cnt,
3065                                          0,
3066                                          0,
3067                                          !param->has_emulation_bytes,
3068                                          0);
3069
3070     }
3071
3072     if (!vdenc_context->is_frame_level_vdenc) {
3073         insert_one_zero_byte = 1;
3074     }
3075
3076     /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
3077     if (insert_one_zero_byte) {
3078         unsigned int insert_data[] = { 0, };
3079
3080         gen9_vdenc_mfx_avc_insert_object(ctx,
3081                                          encoder_context,
3082                                          insert_data,
3083                                          1,
3084                                          8,
3085                                          1,
3086                                          0, 0, 0, 0);
3087     }
3088
3089     if (slice_header_index == -1) {
3090         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
3091         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3092         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
3093         unsigned char *slice_header = NULL, *slice_header1 = NULL;
3094         int slice_header_length_in_bits = 0;
3095         uint32_t saved_macroblock_address = 0;
3096
3097         /* No slice header data is passed. And the driver needs to generate it */
3098         /* For the Normal H264 */
3099
3100         if (slice_index &&
3101             (IS_KBL(i965->intel.device_info) ||
3102              IS_GLK(i965->intel.device_info) ||
3103              IS_GEN10(i965->intel.device_info))) {
3104             saved_macroblock_address = slice_params->macroblock_address;
3105             slice_params->macroblock_address = 0;
3106         }
3107
3108         slice_header_length_in_bits = build_avc_slice_header(seq_param,
3109                                                              pic_param,
3110                                                              slice_params,
3111                                                              &slice_header);
3112
3113         slice_header1 = slice_header;
3114
3115         if (slice_index &&
3116             (IS_KBL(i965->intel.device_info) ||
3117              IS_GLK(i965->intel.device_info) ||
3118              IS_GEN10(i965->intel.device_info))) {
3119             slice_params->macroblock_address = saved_macroblock_address;
3120         }
3121
3122         if (insert_one_zero_byte) {
3123             slice_header1 += 1;
3124             slice_header_length_in_bits -= 8;
3125         }
3126
3127         gen9_vdenc_mfx_avc_insert_object(ctx,
3128                                          encoder_context,
3129                                          (unsigned int *)slice_header1,
3130                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
3131                                          slice_header_length_in_bits & 0x1f,
3132                                          5,  /* first 5 bytes are start code + nal unit type */
3133                                          1, 0, 1,
3134                                          1);
3135
3136         free(slice_header);
3137     } else {
3138         unsigned int skip_emul_byte_cnt;
3139         unsigned char *slice_header1 = NULL;
3140
3141         if (slice_index &&
3142             (IS_KBL(i965->intel.device_info) ||
3143              IS_GLK(i965->intel.device_info) ||
3144              IS_GEN10(i965->intel.device_info))) {
3145             slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3146         }
3147
3148         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3149
3150         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3151         length_in_bits = param->bit_length;
3152
3153         slice_header1 = (unsigned char *)header_data;
3154
3155         if (insert_one_zero_byte) {
3156             slice_header1 += 1;
3157             length_in_bits -= 8;
3158         }
3159
3160         /* as the slice header is the last header data for one slice,
3161          * the last header flag is set to one.
3162          */
3163         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3164
3165         if (insert_one_zero_byte)
3166             skip_emul_byte_cnt -= 1;
3167
3168         gen9_vdenc_mfx_avc_insert_object(ctx,
3169                                          encoder_context,
3170                                          (unsigned int *)slice_header1,
3171                                          ALIGN(length_in_bits, 32) >> 5,
3172                                          length_in_bits & 0x1f,
3173                                          skip_emul_byte_cnt,
3174                                          1,
3175                                          0,
3176                                          !param->has_emulation_bytes,
3177                                          1);
3178     }
3179
3180     return;
3181 }
3182
3183 static void
3184 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3185                                  struct encode_state *encode_state,
3186                                  struct intel_encoder_context *encoder_context,
3187                                  VAEncSliceParameterBufferH264 *slice_param,
3188                                  int slice_index)
3189 {
3190     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3191     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3192     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3193     unsigned int skip_emul_byte_cnt;
3194
3195     if (slice_index == 0) {
3196
3197         if (encode_state->packed_header_data[idx]) {
3198             VAEncPackedHeaderParameterBuffer *param = NULL;
3199             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3200             unsigned int length_in_bits;
3201
3202             assert(encode_state->packed_header_param[idx]);
3203             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3204             length_in_bits = param->bit_length;
3205
3206             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3207             gen9_vdenc_mfx_avc_insert_object(ctx,
3208                                              encoder_context,
3209                                              header_data,
3210                                              ALIGN(length_in_bits, 32) >> 5,
3211                                              length_in_bits & 0x1f,
3212                                              skip_emul_byte_cnt,
3213                                              0,
3214                                              0,
3215                                              !param->has_emulation_bytes,
3216                                              0);
3217
3218         }
3219
3220         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3221
3222         if (encode_state->packed_header_data[idx]) {
3223             VAEncPackedHeaderParameterBuffer *param = NULL;
3224             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3225             unsigned int length_in_bits;
3226
3227             assert(encode_state->packed_header_param[idx]);
3228             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3229             length_in_bits = param->bit_length;
3230
3231             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3232
3233             gen9_vdenc_mfx_avc_insert_object(ctx,
3234                                              encoder_context,
3235                                              header_data,
3236                                              ALIGN(length_in_bits, 32) >> 5,
3237                                              length_in_bits & 0x1f,
3238                                              skip_emul_byte_cnt,
3239                                              0,
3240                                              0,
3241                                              !param->has_emulation_bytes,
3242                                              0);
3243
3244         }
3245
3246         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3247
3248         if (encode_state->packed_header_data[idx]) {
3249             VAEncPackedHeaderParameterBuffer *param = NULL;
3250             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3251             unsigned int length_in_bits;
3252
3253             assert(encode_state->packed_header_param[idx]);
3254             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3255             length_in_bits = param->bit_length;
3256
3257             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3258             gen9_vdenc_mfx_avc_insert_object(ctx,
3259                                              encoder_context,
3260                                              header_data,
3261                                              ALIGN(length_in_bits, 32) >> 5,
3262                                              length_in_bits & 0x1f,
3263                                              skip_emul_byte_cnt,
3264                                              0,
3265                                              0,
3266                                              !param->has_emulation_bytes,
3267                                              0);
3268
3269         } else if (internal_rate_mode == I965_BRC_CBR) {
3270             /* TODO: insert others */
3271         }
3272     }
3273
3274     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3275                                                 encode_state,
3276                                                 encoder_context,
3277                                                 slice_index);
3278 }
3279
3280 static void
3281 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3282                                struct encode_state *encode_state,
3283                                struct intel_encoder_context *encoder_context,
3284                                VAEncPictureParameterBufferH264 *pic_param,
3285                                VAEncSliceParameterBufferH264 *slice_param,
3286                                VAEncSliceParameterBufferH264 *next_slice_param,
3287                                int slice_index)
3288 {
3289     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3290     struct intel_batchbuffer *batch = encoder_context->base.batch;
3291     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3292     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3293     unsigned char correct[6], grow, shrink;
3294     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3295     int max_qp_n, max_qp_p;
3296     int i;
3297     int weighted_pred_idc = 0;
3298     int num_ref_l0 = 0, num_ref_l1 = 0;
3299     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3300     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3301     int inter_rounding = 0;
3302
3303     if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3304         inter_rounding = 3;
3305
3306     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3307     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3308
3309     if (next_slice_param) {
3310         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3311         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3312     } else {
3313         next_slice_hor_pos = 0;
3314         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3315     }
3316
3317     if (slice_type == SLICE_TYPE_I) {
3318         luma_log2_weight_denom = 0;
3319         chroma_log2_weight_denom = 0;
3320     } else if (slice_type == SLICE_TYPE_P) {
3321         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3322         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3323
3324         if (slice_param->num_ref_idx_active_override_flag)
3325             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3326     } else if (slice_type == SLICE_TYPE_B) {
3327         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3328         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3329         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3330
3331         if (slice_param->num_ref_idx_active_override_flag) {
3332             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3333             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3334         }
3335
3336         if (weighted_pred_idc == 2) {
3337             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3338             luma_log2_weight_denom = 5;
3339             chroma_log2_weight_denom = 5;
3340         }
3341     }
3342
3343     max_qp_n = 0;       /* TODO: update it */
3344     max_qp_p = 0;       /* TODO: update it */
3345     grow = 0;           /* TODO: update it */
3346     shrink = 0;         /* TODO: update it */
3347
3348     for (i = 0; i < 6; i++)
3349         correct[i] = 0; /* TODO: update it */
3350
3351     BEGIN_BCS_BATCH(batch, 11);
3352
3353     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3354     OUT_BCS_BATCH(batch, slice_type);
3355     OUT_BCS_BATCH(batch,
3356                   (num_ref_l0 << 16) |
3357                   (num_ref_l1 << 24) |
3358                   (chroma_log2_weight_denom << 8) |
3359                   (luma_log2_weight_denom << 0));
3360     OUT_BCS_BATCH(batch,
3361                   (weighted_pred_idc << 30) |
3362                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3363                   (slice_param->disable_deblocking_filter_idc << 27) |
3364                   (slice_param->cabac_init_idc << 24) |
3365                   (slice_qp << 16) |
3366                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3367                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3368
3369     OUT_BCS_BATCH(batch,
3370                   slice_ver_pos << 24 |
3371                   slice_hor_pos << 16 |
3372                   slice_param->macroblock_address);
3373     OUT_BCS_BATCH(batch,
3374                   next_slice_ver_pos << 16 |
3375                   next_slice_hor_pos);
3376
3377     OUT_BCS_BATCH(batch,
3378                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3379                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3380                   (2 << 28) |       /* Loose Rate Control */
3381                   (0 << 24) |           /* RC Stable Tolerance */
3382                   (0 << 23) |           /* RC Panic Enable */
3383                   (1 << 22) |           /* CBP mode */
3384                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3385                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3386                   (!next_slice_param << 19) |                   /* Is Last Slice */
3387                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3388                   (1 << 17) |           /* HeaderPresentFlag */
3389                   (1 << 16) |           /* SliceData PresentFlag */
3390                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3391                   (1 << 13) |           /* RBSP NAL TYPE */
3392                   (slice_index << 4) |
3393                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3394
3395     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3396
3397     OUT_BCS_BATCH(batch,
3398                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3399                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3400                   (shrink << 8) |
3401                   (grow << 0));
3402     OUT_BCS_BATCH(batch,
3403                   (1 << 31) |
3404                   (inter_rounding << 28) |
3405                   (1 << 27) |
3406                   (5 << 24) |
3407                   (correct[5] << 20) |
3408                   (correct[4] << 16) |
3409                   (correct[3] << 12) |
3410                   (correct[2] << 8) |
3411                   (correct[1] << 4) |
3412                   (correct[0] << 0));
3413     OUT_BCS_BATCH(batch, 0);
3414
3415     ADVANCE_BCS_BATCH(batch);
3416 }
3417
3418 static uint8_t
3419 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3420 {
3421     unsigned int is_long_term =
3422         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3423     unsigned int is_top_field =
3424         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3425     unsigned int is_bottom_field =
3426         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3427
3428     return ((is_long_term                         << 6) |
3429             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3430             (frame_store_id                       << 1) |
3431             ((is_top_field ^ 1) & is_bottom_field));
3432 }
3433
3434 static void
3435 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3436                                  struct encode_state *encode_state,
3437                                  struct intel_encoder_context *encoder_context,
3438                                  VAEncSliceParameterBufferH264 *slice_param)
3439 {
3440     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3441     struct intel_batchbuffer *batch = encoder_context->base.batch;
3442     VAPictureH264 *ref_pic;
3443     int i, slice_type, ref_idx_shift;
3444     unsigned int fwd_ref_entry;
3445
3446     fwd_ref_entry = 0x80808080;
3447     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3448
3449     for (i = 0; i < MIN(vdenc_context->num_refs[0], 3); i++) {
3450         ref_pic = &slice_param->RefPicList0[i];
3451         ref_idx_shift = i * 8;
3452
3453         if (vdenc_context->list_ref_idx[0][i] == 0xFF)
3454             continue;
3455
3456         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3457         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3458     }
3459
3460     if (slice_type == SLICE_TYPE_P) {
3461         BEGIN_BCS_BATCH(batch, 10);
3462         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3463         OUT_BCS_BATCH(batch, 0);                        // L0
3464         OUT_BCS_BATCH(batch, fwd_ref_entry);
3465
3466         for (i = 0; i < 7; i++) {
3467             OUT_BCS_BATCH(batch, 0x80808080);
3468         }
3469
3470         ADVANCE_BCS_BATCH(batch);
3471     }
3472
3473     if (slice_type == SLICE_TYPE_B) {
3474         /* VDEnc on SKL doesn't support BDW */
3475         assert(0);
3476     }
3477 }
3478
3479 static void
3480 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3481                                       struct encode_state *encode_state,
3482                                       struct intel_encoder_context *encoder_context,
3483                                       VAEncPictureParameterBufferH264 *pic_param,
3484                                       VAEncSliceParameterBufferH264 *slice_param)
3485 {
3486     struct intel_batchbuffer *batch = encoder_context->base.batch;
3487     int i, slice_type;
3488     short weightoffsets[32 * 6];
3489
3490     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3491
3492     if (slice_type == SLICE_TYPE_P &&
3493         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3494
3495         for (i = 0; i < 32; i++) {
3496             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3497             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3498             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3499             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3500             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3501             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3502         }
3503
3504         BEGIN_BCS_BATCH(batch, 98);
3505         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3506         OUT_BCS_BATCH(batch, 0);
3507         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3508
3509         ADVANCE_BCS_BATCH(batch);
3510     }
3511
3512     if (slice_type == SLICE_TYPE_B) {
3513         /* VDEnc on SKL doesn't support BWD */
3514         assert(0);
3515     }
3516 }
3517
3518 static void
3519 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3520                                 struct encode_state *encode_state,
3521                                 struct intel_encoder_context *encoder_context,
3522                                 VAEncSliceParameterBufferH264 *slice_param,
3523                                 VAEncSliceParameterBufferH264 *next_slice_param,
3524                                 int slice_index)
3525 {
3526     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3527     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3528
3529     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3530     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3531                                           encode_state,
3532                                           encoder_context,
3533                                           pic_param,
3534                                           slice_param);
3535     gen9_vdenc_mfx_avc_slice_state(ctx,
3536                                    encode_state,
3537                                    encoder_context,
3538                                    pic_param,
3539                                    slice_param,
3540                                    next_slice_param,
3541                                    slice_index);
3542     gen9_vdenc_mfx_avc_inset_headers(ctx,
3543                                      encode_state,
3544                                      encoder_context,
3545                                      slice_param,
3546                                      slice_index);
3547
3548     if (!vdenc_context->is_frame_level_vdenc) {
3549         gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3550                                                encode_state,
3551                                                encoder_context,
3552                                                slice_param);
3553         gen95_vdenc_vdenc_walker_state(ctx,
3554                                        encode_state,
3555                                        encoder_context,
3556                                        slice_param,
3557                                        next_slice_param);
3558     }
3559 }
3560
3561 static void
3562 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3563                                 struct encode_state *encode_state,
3564                                 struct intel_encoder_context *encoder_context)
3565 {
3566     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3567     struct intel_batchbuffer *batch = encoder_context->base.batch;
3568     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3569     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3570     int i, j;
3571     int slice_index = 0;
3572     int has_tail = 0;                   /* TODO: check it later */
3573
3574     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3575         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3576
3577         if (j == encode_state->num_slice_params_ext - 1)
3578             next_slice_group_param = NULL;
3579         else
3580             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3581
3582         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3583             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3584                 next_slice_param = slice_param + 1;
3585             else
3586                 next_slice_param = next_slice_group_param;
3587
3588             gen9_vdenc_mfx_avc_single_slice(ctx,
3589                                             encode_state,
3590                                             encoder_context,
3591                                             slice_param,
3592                                             next_slice_param,
3593                                             slice_index);
3594
3595             if (vdenc_context->is_frame_level_vdenc)
3596                 break;
3597             else {
3598                 struct vd_pipeline_flush_parameter pipeline_flush_params;
3599                 int insert_mi_flush;
3600
3601                 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3602
3603                 if (next_slice_group_param) {
3604                     pipeline_flush_params.mfx_pipeline_done = 1;
3605                     insert_mi_flush = 1;
3606                 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3607                     pipeline_flush_params.mfx_pipeline_done = 1;
3608                     insert_mi_flush = 1;
3609                 } else {
3610                     pipeline_flush_params.mfx_pipeline_done = !has_tail;
3611                     insert_mi_flush = 0;
3612                 }
3613
3614                 pipeline_flush_params.vdenc_pipeline_done = 1;
3615                 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3616                 pipeline_flush_params.vd_command_message_parser_done = 1;
3617                 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3618
3619                 if (insert_mi_flush) {
3620                     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3621                     mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3622                     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3623                 }
3624             }
3625
3626             slice_param++;
3627             slice_index++;
3628         }
3629
3630         if (vdenc_context->is_frame_level_vdenc)
3631             break;
3632     }
3633
3634     if (vdenc_context->is_frame_level_vdenc) {
3635         struct vd_pipeline_flush_parameter pipeline_flush_params;
3636
3637         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3638
3639         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3640         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3641         pipeline_flush_params.vdenc_pipeline_done = 1;
3642         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3643         pipeline_flush_params.vd_command_message_parser_done = 1;
3644         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3645     }
3646
3647     if (has_tail) {
3648         /* TODO: insert a tail if required */
3649     }
3650
3651     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3652     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3653     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3654 }
3655
3656 static void
3657 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3658                               struct encode_state *encode_state,
3659                               struct intel_encoder_context *encoder_context)
3660 {
3661     struct i965_driver_data *i965 = i965_driver_data(ctx);
3662     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3663     struct intel_batchbuffer *batch = encoder_context->base.batch;
3664     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3665
3666     if (vdenc_context->brc_enabled) {
3667         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3668
3669         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3670         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3671         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3672     }
3673
3674     if (vdenc_context->current_pass) {
3675         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3676
3677         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3678         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3679         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3680     }
3681
3682     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3683
3684     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3685     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3686     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3687
3688     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3689     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3690     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3691
3692     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3693     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3694     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3695     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3696
3697     if (IS_GEN10(i965->intel.device_info))
3698         gen10_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3699     else
3700         gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3701
3702     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3703
3704     if (!vdenc_context->brc_enabled) {
3705         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3706         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3707     } else {
3708         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3709         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3710         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3711         gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3712     }
3713
3714     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3715     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3716
3717     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3718 }
3719
3720 static void
3721 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3722                                struct intel_encoder_context *encoder_context)
3723 {
3724     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3725     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3726
3727     switch (rate_control_mode & 0x7f) {
3728     case VA_RC_CBR:
3729         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3730         break;
3731
3732     case VA_RC_VBR:
3733         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3734         break;
3735
3736     case VA_RC_CQP:
3737     default:
3738         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3739         break;
3740     }
3741 }
3742
3743 static void
3744 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3745 {
3746     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3747     struct intel_batchbuffer *batch = encoder_context->base.batch;
3748     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3749     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3750     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3751     int i;
3752
3753     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3754     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3755
3756     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3757     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3758     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3759     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3760     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3761
3762     /* Update DMEM buffer for BRC Update */
3763     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3764         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3765         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3766         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3767         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3768
3769         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3770         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3771         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3772         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3773     }
3774 }
3775
3776 static VAStatus
3777 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3778                                 struct encode_state *encode_state,
3779                                 struct intel_encoder_context *encoder_context)
3780 {
3781     VAEncSliceParameterBufferH264 *slice_param;
3782     int i, j;
3783
3784     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3785         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3786
3787         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3788             if (slice_param->slice_type == SLICE_TYPE_B)
3789                 return VA_STATUS_ERROR_UNKNOWN;
3790
3791             slice_param++;
3792         }
3793     }
3794
3795     return VA_STATUS_SUCCESS;
3796 }
3797
3798 static VAStatus
3799 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3800                               VAProfile profile,
3801                               struct encode_state *encode_state,
3802                               struct intel_encoder_context *encoder_context)
3803 {
3804     VAStatus va_status;
3805     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3806     struct intel_batchbuffer *batch = encoder_context->base.batch;
3807
3808     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3809
3810     if (va_status != VA_STATUS_SUCCESS)
3811         return va_status;
3812
3813     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3814
3815     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3816         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3817         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3818
3819         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3820
3821         intel_batchbuffer_emit_mi_flush(batch);
3822
3823         if (vdenc_context->brc_enabled) {
3824             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3825                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3826
3827             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3828             intel_batchbuffer_emit_mi_flush(batch);
3829         }
3830
3831         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3832         gen9_vdenc_read_status(ctx, encoder_context);
3833
3834         intel_batchbuffer_end_atomic(batch);
3835         intel_batchbuffer_flush(batch);
3836
3837         vdenc_context->brc_initted = 1;
3838         vdenc_context->brc_need_reset = 0;
3839     }
3840
3841     return VA_STATUS_SUCCESS;
3842 }
3843
3844 static VAStatus
3845 gen9_vdenc_pipeline(VADriverContextP ctx,
3846                     VAProfile profile,
3847                     struct encode_state *encode_state,
3848                     struct intel_encoder_context *encoder_context)
3849 {
3850     VAStatus vaStatus;
3851
3852     switch (profile) {
3853     case VAProfileH264ConstrainedBaseline:
3854     case VAProfileH264Main:
3855     case VAProfileH264High:
3856         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3857         break;
3858
3859     default:
3860         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3861         break;
3862     }
3863
3864     return vaStatus;
3865 }
3866
3867 static void
3868 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3869 {
3870     int i;
3871
3872     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3873     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3874     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3875     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3876     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3877
3878     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3879         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3880
3881     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3882     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3883     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3884     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3885     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3886     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3887
3888     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3889     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3890
3891     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3892     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3893     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3894     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3895
3896     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3897         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3898         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3899     }
3900
3901     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3902     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3903     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3904
3905     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3906     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3907     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3908     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3909
3910     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3911 }
3912
3913 static void
3914 gen9_vdenc_context_destroy(void *context)
3915 {
3916     struct gen9_vdenc_context *vdenc_context = context;
3917
3918     gen9_vdenc_free_resources(vdenc_context);
3919
3920     free(vdenc_context);
3921 }
3922
3923 static void
3924 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3925                               struct intel_encoder_context *encoder_context,
3926                               struct gen9_vdenc_context *vdenc_context)
3927 {
3928     struct i965_driver_data *i965 = i965_driver_data(ctx);
3929     int i;
3930
3931     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3932                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3933                                 "HuC Init&Reset DMEM buffer");
3934
3935     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3936                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3937                                 "HuC History buffer");
3938
3939     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3940                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3941                                 "HuC Stream In buffer");
3942
3943     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3944                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3945                                 "HuC Stream Out buffer");
3946
3947     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3948                                 0x1000,
3949                                 "HuC dummy buffer");
3950
3951     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3952         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3953                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3954                                     "HuC BRC Update buffer");
3955         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3956     }
3957
3958     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3959                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3960                                 "VDENC statistics buffer");
3961
3962     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3963                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3964                                 "PAK statistics buffer");
3965
3966     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3967                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3968                                 "VDENC/AVC image state buffer");
3969
3970     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3971                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3972                                 "HME summary buffer");
3973
3974     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3975                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3976                                 "BRC constant buffer");
3977
3978     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3979                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3980                                 "Second level batch buffer");
3981
3982     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3983                                 0x1000,
3984                                 "HuC Status buffer");
3985
3986     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3987                                 0x1000,
3988                                 "HuC Status buffer");
3989 }
3990
3991 static void
3992 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3993                               struct intel_encoder_context *encoder_context,
3994                               struct gen9_vdenc_context *vdenc_context)
3995 {
3996     vdenc_context->is_frame_level_vdenc = 1;
3997 }
3998
3999 static void
4000 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
4001                                struct intel_encoder_context *encoder_context,
4002                                struct gen9_vdenc_context *vdenc_context)
4003 {
4004     vdenc_context->use_extended_pak_obj_cmd = 1;
4005 }
4006
4007 static void
4008 vdenc_hw_interfaces_init(VADriverContextP ctx,
4009                          struct intel_encoder_context *encoder_context,
4010                          struct gen9_vdenc_context *vdenc_context)
4011 {
4012     struct i965_driver_data *i965 = i965_driver_data(ctx);
4013
4014     if (IS_KBL(i965->intel.device_info) ||
4015         IS_GLK(i965->intel.device_info) ||
4016         IS_GEN10(i965->intel.device_info)) {
4017         gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4018     } else {
4019         gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4020     }
4021 }
4022
4023 static VAStatus
4024 gen9_vdenc_context_get_status(VADriverContextP ctx,
4025                               struct intel_encoder_context *encoder_context,
4026                               struct i965_coded_buffer_segment *coded_buffer_segment)
4027 {
4028     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
4029
4030     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
4031
4032     return VA_STATUS_SUCCESS;
4033 }
4034
4035 Bool
4036 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4037 {
4038     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
4039
4040     if (!vdenc_context)
4041         return False;
4042
4043     vdenc_context->brc_initted = 0;
4044     vdenc_context->brc_need_reset = 0;
4045     vdenc_context->is_low_delay = 0;
4046     vdenc_context->current_pass = 0;
4047     vdenc_context->num_passes = 1;
4048     vdenc_context->vdenc_streamin_enable = 0;
4049     vdenc_context->vdenc_pak_threshold_check_enable = 0;
4050     vdenc_context->is_frame_level_vdenc = 0;
4051
4052     vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4053     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
4054
4055     encoder_context->mfc_context = vdenc_context;
4056     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
4057     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
4058     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
4059     encoder_context->get_status = gen9_vdenc_context_get_status;
4060
4061     return True;
4062 }