OSDN Git Service

c92af302312b05b9847acb22dc72dc7eb0d46020
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 extern int
45 intel_avc_enc_slice_type_fixup(int slice_type);
46
47 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
48     0,   0, -8, -12, -16, -20, -28, -36,
49     0,   0, -4,  -8, -12, -16, -24, -32,
50     4,   2,  0,  -1,  -3,  -8, -16, -24,
51     8,   4,  2,   0,  -1,  -4,  -8, -16,
52     20, 16,  4,   0,  -1,  -4,  -8, -16,
53     24, 20, 16,   8,   4,   0,  -4,  -8,
54     28, 24, 20,  16,   8,   4,   0,  -8,
55     32, 24, 20,  16,   8,   4,   0,  -4,
56     64, 48, 28,  20,   16, 12,   8,   4,
57 };
58
59 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
60     -8, -24, -32, -40, -44, -48, -52, -80,
61     -8, -16, -32, -40, -40, -44, -44, -56,
62     0,    0, -12, -20, -24, -28, -32, -36,
63     8,    4,   0,   0,  -8, -16, -24, -32,
64     32,  16,   8,   4,  -4,  -8, -16, -20,
65     36,  24,  16,   8,   4,  -2,  -4,  -8,
66     40,  36,  24,  20,  16,   8,   0,  -8,
67     48,  40,  28,  24,  20,  12,   0,  -4,
68     64,  48,  28,  20,  16,  12,   8,   4,
69 };
70
71 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
72     0,  -4, -8, -16, -24, -32, -40, -48,
73     1,   0, -4,  -8, -16, -24, -32, -40,
74     4,   2,  0,  -1,  -3,  -8, -16, -24,
75     8,   4,  2,   0,  -1,  -4,  -8, -16,
76     20, 16,  4,   0,  -1,  -4,  -8, -16,
77     24, 20, 16,   8,   4,   0,  -4,  -8,
78     28, 24, 20,  16,   8,   4,   0,  -8,
79     32, 24, 20,  16,   8,   4,   0,  -4,
80     64, 48, 28,  20,  16,  12,   8,   4,
81 };
82
83 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
84     +0,  0,  0,  0, 0, 3, 4, 6, 8,
85     +0,  0,  0,  0, 0, 2, 3, 5, 7,
86     -1,  0,  0,  0, 0, 2, 2, 4, 5,
87     -1, -1,  0,  0, 0, 1, 2, 2, 4,
88     -2, -2, -1,  0, 0, 0, 1, 2, 4,
89     -2, -2, -1,  0, 0, 0, 1, 2, 4,
90     -3, -2, -1, -1, 0, 0, 1, 2, 5,
91     -3, -2, -1, -1, 0, 0, 2, 4, 7,
92     -4, -3, -2, -1, 0, 1, 3, 5, 8,
93 };
94
95 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
96     -1,  0,  0,  0, 0, 1, 1, 2, 3,
97     -1, -1,  0,  0, 0, 1, 1, 2, 3,
98     -2, -1, -1,  0, 0, 1, 1, 2, 3,
99     -3, -2, -2, -1, 0, 0, 1, 2, 3,
100     -3, -2, -1, -1, 0, 0, 1, 2, 3,
101     -3, -2, -1, -1, 0, 0, 1, 2, 3,
102     -3, -2, -1, -1, 0, 0, 1, 2, 3,
103     -3, -2, -1, -1, 0, 0, 1, 2, 3,
104     -3, -2, -1, -1, 0, 0, 1, 2, 3,
105 };
106
107 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
108     +0,  0,  0,  0, 0, 2, 3, 3, 4,
109     +0,  0,  0,  0, 0, 2, 3, 3, 4,
110     -1,  0,  0,  0, 0, 2, 2, 3, 3,
111     -1, -1,  0,  0, 0, 1, 2, 2, 2,
112     -1, -1, -1,  0, 0, 0, 1, 2, 2,
113     -2, -1, -1,  0, 0, 0, 0, 1, 2,
114     -2, -1, -1, -1, 0, 0, 0, 1, 3,
115     -2, -2, -1, -1, 0, 0, 1, 1, 3,
116     -2, -2, -1, -1, 0, 1, 1, 2, 4,
117 };
118
119 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
120     -4, -20, -28, -36, -40, -44, -48, -80,
121     +0,  -8, -12, -20, -24, -28, -32, -36,
122     +0,   0,  -8, -16, -20, -24, -28, -32,
123     +8,   4,   0,   0,  -8, -16, -24, -28,
124     32,  24,  16,   2,  -4,  -8, -16, -20,
125     36,  32,  28,  16,   8,   0,  -4,  -8,
126     40,  36,  24,  20,  16,   8,   0,  -8,
127     48,  40,  28,  24,  20,  12,   0,  -4,
128     64,  48,  28,  20,  16,  12,   8,   4,
129 };
130
131 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
132     -8, -24, -32, -44, -48, -56, -64, -80,
133     -8, -16, -32, -40, -44, -52, -56, -64,
134     +0,   0, -16, -28, -36, -40, -44, -48,
135     +8,   4,   0,   0,  -8, -16, -24, -36,
136     20,  12,   4,   0,  -8,  -8,  -8, -16,
137     24,  16,   8,   8,   8,   0,  -4,  -8,
138     40,  36,  24,  20,  16,   8,   0,  -8,
139     48,  40,  28,  24,  20,  12,   0,  -4,
140     64,  48,  28,  20,  16,  12,   8,   4,
141 };
142
143 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
144     0,  -4, -8, -16, -24, -32, -40, -48,
145     1,   0, -4,  -8, -16, -24, -32, -40,
146     4,   2,  0,  -1,  -3,  -8, -16, -24,
147     8,   4,  2,   0,  -1,  -4,  -8, -16,
148     20, 16,  4,   0,  -1,  -4,  -8, -16,
149     24, 20, 16,   8,   4,   0,  -4,  -8,
150     28, 24, 20,  16,   8,   4,   0,  -8,
151     32, 24, 20,  16,   8,   4,   0,  -4,
152     64, 48, 28,  20,  16,  12,   8,   4,
153 };
154
155 static const struct huc_brc_update_constant_data
156         gen9_brc_update_constant_data = {
157     .global_rate_qp_adj_tab_i = {
158         48, 40, 32,  24,  16,   8,   0,  -8,
159         40, 32, 24,  16,   8,   0,  -8, -16,
160         32, 24, 16,   8,   0,  -8, -16, -24,
161         24, 16,  8,   0,  -8, -16, -24, -32,
162         16, 8,   0,  -8, -16, -24, -32, -40,
163         8,  0,  -8, -16, -24, -32, -40, -48,
164         0, -8, -16, -24, -32, -40, -48, -56,
165         48, 40, 32,  24,  16,   8,   0,  -8,
166     },
167
168     .global_rate_qp_adj_tab_p = {
169         48,  40,  32,  24,  16,  8,    0,  -8,
170         40,  32,  24,  16,   8,  0,   -8, -16,
171         16,   8,   8,   4,  -8, -16, -16, -24,
172         8,    0,   0,  -8, -16, -16, -16, -24,
173         8,    0,   0, -24, -32, -32, -32, -48,
174         0,  -16, -16, -24, -32, -48, -56, -64,
175         -8, -16, -32, -32, -48, -48, -56, -64,
176         -16, -32, -48, -48, -48, -56, -64, -80,
177     },
178
179     .global_rate_qp_adj_tab_b = {
180         48, 40, 32, 24,  16,   8,   0,  -8,
181         40, 32, 24, 16,  8,    0,  -8, -16,
182         32, 24, 16,  8,  0,   -8, -16, -24,
183         24, 16, 8,   0, -8,   -8, -16, -24,
184         16, 8,  0,   0, -8,  -16, -24, -32,
185         16, 8,  0,   0, -8,  -16, -24, -32,
186         0, -8, -8, -16, -32, -48, -56, -64,
187         0, -8, -8, -16, -32, -48, -56, -64
188     },
189
190     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
192     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
193
194     .dist_qp_adj_tab_i = {
195         0,   0,  0,  0,  0,  3,  4,  6,  8,
196         0,   0,  0,  0,  0,  2,  3,  5,  7,
197         -1,  0,  0,  0,  0,  2,  2,  4,  5,
198         -1, -1,  0,  0,  0,  1,  2,  2,  4,
199         -2, -2, -1,  0,  0,  0,  1,  2,  4,
200         -2, -2, -1,  0,  0,  0,  1,  2,  4,
201         -3, -2, -1, -1,  0,  0,  1,  2,  5,
202         -3, -2, -1, -1,  0,  0,  2,  4,  7,
203         -4, -3, -2, -1,  0,  1,  3,  5,  8,
204     },
205
206     .dist_qp_adj_tab_p = {
207         -1,   0,  0,  0,  0,  1,  1,  2,  3,
208         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
209         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
210         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
211         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
212         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
213         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
214         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
215         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
216     },
217
218     .dist_qp_adj_tab_b = {
219         0,   0,  0,  0, 0, 2, 3, 3, 4,
220         0,   0,  0,  0, 0, 2, 3, 3, 4,
221         -1,  0,  0,  0, 0, 2, 2, 3, 3,
222         -1, -1,  0,  0, 0, 1, 2, 2, 2,
223         -1, -1, -1,  0, 0, 0, 1, 2, 2,
224         -2, -1, -1,  0, 0, 0, 0, 1, 2,
225         -2, -1, -1, -1, 0, 0, 0, 1, 3,
226         -2, -2, -1, -1, 0, 0, 1, 1, 3,
227         -2, -2, -1, -1, 0, 1, 1, 2, 4,
228     },
229
230     /* default table for non lowdelay */
231     .buf_rate_adj_tab_i = {
232         -4, -20, -28, -36, -40, -44, -48, -80,
233         0,   -8, -12, -20, -24, -28, -32, -36,
234         0,    0,  -8, -16, -20, -24, -28, -32,
235         8,    4,   0,   0,  -8, -16, -24, -28,
236         32,  24,  16,   2,  -4,  -8, -16, -20,
237         36,  32,  28,  16,   8,   0,  -4,  -8,
238         40,  36,  24,  20,  16,   8,   0,  -8,
239         48,  40,  28,  24,  20,  12,   0,  -4,
240         64,  48,  28,  20,  16,  12,   8,   4,
241     },
242
243     /* default table for non lowdelay */
244     .buf_rate_adj_tab_p = {
245         -8, -24, -32, -44, -48, -56, -64, -80,
246         -8, -16, -32, -40, -44, -52, -56, -64,
247         0,    0, -16, -28, -36, -40, -44, -48,
248         8,    4,   0,   0,  -8, -16, -24, -36,
249         20,  12,   4,   0,  -8,  -8,  -8, -16,
250         24,  16,   8,   8,   8,   0,  -4,  -8,
251         40,  36,  24,  20,  16,   8,   0,  -8,
252         48,  40,  28,  24,  20,  12,   0,  -4,
253         64,  48,  28,  20,  16,  12,   8,   4,
254     },
255
256     /* default table for non lowdelay */
257     .buf_rate_adj_tab_b = {
258         0,  -4, -8, -16, -24, -32, -40, -48,
259         1,   0, -4,  -8, -16, -24, -32, -40,
260         4,   2,  0,  -1,  -3,  -8, -16, -24,
261         8,   4,  2,   0,  -1,  -4,  -8, -16,
262         20, 16,  4,   0,  -1,  -4,  -8, -16,
263         24, 20, 16,   8,   4,   0,  -4,  -8,
264         28, 24, 20,  16,   8,   4,   0,  -8,
265         32, 24, 20,  16,   8,   4,   0,  -4,
266         64, 48, 28,  20,  16,  12,   8,   4,
267     },
268
269     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
270     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
271
272     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
273     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
274
275     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
276     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
277
278     .i_intra_non_pred = {
279         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
280         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
281         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
282     },
283
284     .i_intra_16x16 = {
285         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
286         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
287         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288     },
289
290     .i_intra_8x8 = {
291         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
292         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
293         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
294     },
295
296     .i_intra_4x4 = {
297         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
298         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
299         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
300     },
301
302     .i_intra_chroma = {
303         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
304         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
306     },
307
308     .p_intra_non_pred = {
309         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
310         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
311         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
312     },
313
314     .p_intra_16x16 = {
315         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
316         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
317         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
318     },
319
320     .p_intra_8x8 = {
321         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
322         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
323         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
324     },
325
326     .p_intra_4x4 = {
327         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
328         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
329         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
330     },
331
332     .p_intra_chroma = {
333         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
334         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
335         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336     },
337
338     .p_inter_16x8 = {
339         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
340         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
341         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
342     },
343
344     .p_inter_8x8 = {
345         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
346         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
347         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
348     },
349
350     .p_inter_16x16 = {
351         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
352         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
353         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
354     },
355
356     .p_ref_id = {
357         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
358         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
359         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
360     },
361
362     .hme_mv_cost = {
363         /* mv = 0 */
364         {
365             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
366             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
368         },
369
370         /* mv <= 16 */
371         {
372             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
373             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
374             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
375         },
376
377         /* mv <= 32 */
378         {
379             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
380             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
381             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
382         },
383
384         /* mv <= 64 */
385         {
386             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
387             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
388             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
389         },
390
391         /* mv <= 128 */
392         {
393             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
394             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
395             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
396         },
397
398         /* mv <= 256 */
399         {
400             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
401             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
402             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
403         },
404
405         /* mv <= 512 */
406         {
407             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
408             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
409             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
410         },
411
412         /* mv <= 1024 */
413         {
414             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
415             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
416             0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
417         },
418     },
419 };
420
421 /* 11 DWs */
422 static const uint8_t vdenc_const_qp_lambda[44] = {
423     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
424     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
425     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
426     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
427     0x4a, 0x53, 0x00, 0x00
428 };
429
430 /* 14 DWs */
431 static const uint16_t vdenc_const_skip_threshold[28] = {
432
433 };
434
435 /* 14 DWs */
436 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
437
438 };
439
440 /* 7 DWs */
441 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
442
443 };
444
445 /* 7 DWs */
446 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
447
448 };
449
450 /* 7 DWs */
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
452
453 };
454
455 /* P frame */
456 /* 11 DWs */
457 static const uint8_t vdenc_const_qp_lambda_p[44] = {
458     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
459     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
460     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
461     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
462     0x4a, 0x53, 0x00, 0x00
463 };
464
465 /* 14 DWs */
466 static const uint16_t vdenc_const_skip_threshold_p[28] = {
467     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
468     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
469     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
470     0x0333, 0x03b0, 0x0000, 0x0000
471 };
472
473 /* 14 DWs */
474 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
475     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
476     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
477     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
478 };
479
480 /* 7 DWs */
481 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
482     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
483     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
484     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
485 };
486
487 /* 7 DWs */
488 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
489     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
490     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
491     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
492 };
493
494 /* 7 DWs */
495 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
496     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
497     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
498     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
499 };
500
501 static const double
502 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
503
504 static const double
505 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
506
507 static const double
508 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
509
510 static const double
511 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
512
513 static const double
514 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
515
516 static const double
517 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
518
519 static const unsigned char
520 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521
522 static const unsigned char
523 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
524
525 static const uint16_t
526 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
527
528 static const uint8_t
529 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
530
531 static const uint8_t
532 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
533
534 static const uint8_t
535 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
536
537 static const int8_t
538 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
539
540 static const int vdenc_mode_const[2][12][52] = {
541     //INTRASLICE
542     {
543         //LUTMODE_INTRA_NONPRED
544         {
545             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
546             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
547             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
548             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
549         },
550
551         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
552         {
553             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
554             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
555             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
556             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
557         },
558
559         //LUTMODE_INTRA_8x8
560         {
561             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
562             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
563             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
564             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
565         },
566
567         //LUTMODE_INTRA_4x4
568         {
569             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
570             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
571             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
572             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
573         },
574
575         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
576         { 0, },
577
578         //LUTMODE_INTER_8X8Q
579         { 0, },
580
581         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
582         { 0, },
583
584         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
585         { 0, },
586
587         //LUTMODE_INTER_16x16, LUTMODE_INTER
588         { 0, },
589
590         //LUTMODE_INTER_BWD
591         { 0, },
592
593         //LUTMODE_REF_ID
594         { 0, },
595
596         //LUTMODE_INTRA_CHROMA
597         { 0, },
598     },
599
600     //PREDSLICE
601     {
602         //LUTMODE_INTRA_NONPRED
603         {
604             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
605             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
606             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
607             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
608         },
609
610         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
611         {
612             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
613             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
614             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
615             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
616
617         },
618
619         //LUTMODE_INTRA_8x8
620         {
621             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
622             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
623             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
624             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
625         },
626
627         //LUTMODE_INTRA_4x4
628         {
629             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
630             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
631             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
632             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
633         },
634
635         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
636         {
637             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
638             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
639             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
640             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
641         },
642
643         //LUTMODE_INTER_8X8Q
644         {
645             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
646             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
647             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
648             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
649         },
650
651         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
652         {
653             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
654             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
655             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
656             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
657         },
658
659         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
660         {
661             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
662             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
663             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
664             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
665         },
666
667         //LUTMODE_INTER_16x16, LUTMODE_INTER
668         {
669             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
670             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
671             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
672             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
673         },
674
675         //LUTMODE_INTER_BWD
676         {
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
678             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
679             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
680             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
681         },
682
683         //LUTMODE_REF_ID
684         {
685             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
686             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
687             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
688             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
689         },
690
691         //LUTMODE_INTRA_CHROMA
692         {
693             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
694             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
695             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
696             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
697         },
698     },
699 };
700
701 static const int vdenc_mv_cost_skipbias_qpel[8] = {
702     //PREDSLICE
703     0, 6, 6, 9, 10, 13, 14, 16
704 };
705
706 static const int vdenc_hme_cost[8][52] = {
707     //mv=0
708     {
709         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
710         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
711         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
712         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
713     },
714     //mv<=16
715     {
716         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
717         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
718         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
719         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
720     },
721     //mv<=32
722     {
723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
724         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
725         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
726         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
727     },
728     //mv<=64
729     {
730         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
731         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
732         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
733         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
734     },
735     //mv<=128
736     {
737         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
738         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
739         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
740         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
741     },
742     //mv<=256
743     {
744         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
745         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
746         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
747         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
748     },
749     //mv<=512
750     {
751         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
752         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
753         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
754         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
755     },
756
757     //mv<=1024
758     {
759         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
760         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
761         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
762         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
763     },
764 };
765
766 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
767         if (bo) {                                                       \
768             OUT_BCS_RELOC64(batch,                                      \
769                             bo,                                         \
770                             I915_GEM_DOMAIN_RENDER,                     \
771                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
772                             delta);                                     \
773         } else {                                                        \
774             OUT_BCS_BATCH(batch, 0);                                    \
775             OUT_BCS_BATCH(batch, 0);                                    \
776         }                                                               \
777     } while (0)
778
779 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
780         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
781         OUT_BCS_BATCH(batch, i965->intel.mocs_state);                             \
782     } while (0)
783
784 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do {   \
785         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
786         buffer.width = bfsize;                                  \
787         buffer.height = 1;                                      \
788         buffer.pitch = buffer.width;                            \
789         buffer.size = buffer.pitch;                             \
790         buffer.tiling = I915_TILING_NONE;                       \
791         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
792                                    &buffer,                     \
793                                    bfsize,                      \
794                                    (des));                      \
795     } while (0)
796
797 static int
798 gen9_vdenc_get_max_vmv_range(int level)
799 {
800     int max_vmv_range = 512;
801
802     if (level == 10)
803         max_vmv_range = 256;
804     else if (level <= 20)
805         max_vmv_range = 512;
806     else if (level <= 30)
807         max_vmv_range = 1024;
808     else
809         max_vmv_range = 2048;
810
811     return max_vmv_range;
812 }
813
814 static unsigned char
815 map_44_lut_value(unsigned int v, unsigned char max)
816 {
817     unsigned int maxcost;
818     int d;
819     unsigned char ret;
820
821     if (v == 0) {
822         return 0;
823     }
824
825     maxcost = ((max & 15) << (max >> 4));
826
827     if (v >= maxcost) {
828         return max;
829     }
830
831     d = (int)(log((double)v) / log(2.0)) - 3;
832
833     if (d < 0) {
834         d = 0;
835     }
836
837     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
838     ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
839
840     return ret;
841 }
842
843 static void
844 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
845                                   struct encode_state *encode_state,
846                                   struct intel_encoder_context *encoder_context)
847 {
848     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
849     int i;
850
851     vdenc_context->gop_size = encoder_context->brc.gop_size;
852     vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
853
854     if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
855         encoder_context->brc.need_reset) {
856         /* So far, vdenc doesn't support temporal layer */
857         vdenc_context->framerate = encoder_context->brc.framerate[0];
858
859         vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
860         vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
861
862         vdenc_context->max_bit_rate = ALIGN(encoder_context->brc.bits_per_second[0], 1000) / 1000;
863         vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
864         vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
865
866         if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
867             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
868             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
869         } else {
870             assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
871             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
872             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
873         }
874     }
875
876     vdenc_context->mb_brc_enabled = 1;
877     vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
878     vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
879     vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
880     vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
881
882     for (i = 0; i < vdenc_context->num_roi; i++) {
883         vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
884         vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
885         vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
886         vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].bottom >> 4;
887         vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
888     }
889 }
890
891 static void
892 gen9_vdenc_update_parameters(VADriverContextP ctx,
893                              VAProfile profile,
894                              struct encode_state *encode_state,
895                              struct intel_encoder_context *encoder_context)
896 {
897     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
899     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
900
901     if (profile == VAProfileH264High)
902         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
903     else
904         vdenc_context->transform_8x8_mode_enable = 0;
905
906     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
907     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
908
909     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
910     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
911
912     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
913     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
914     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
915     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
916     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
917
918     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
919
920     vdenc_context->current_pass = 0;
921     vdenc_context->num_passes = 1;
922
923     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
924         vdenc_context->internal_rate_mode == I965_BRC_VBR)
925         vdenc_context->brc_enabled = 1;
926     else
927         vdenc_context->brc_enabled = 0;
928
929     if (vdenc_context->brc_enabled &&
930         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
931          !vdenc_context->vbv_buffer_size_in_bit ||
932          !vdenc_context->max_bit_rate ||
933          !vdenc_context->target_bit_rate ||
934          !vdenc_context->framerate.num ||
935          !vdenc_context->framerate.den))
936         vdenc_context->brc_enabled = 0;
937
938     if (!vdenc_context->brc_enabled) {
939         vdenc_context->target_bit_rate = 0;
940         vdenc_context->max_bit_rate = 0;
941         vdenc_context->min_bit_rate = 0;
942         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
943         vdenc_context->vbv_buffer_size_in_bit = 0;
944     } else {
945         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
946     }
947 }
948
949 static void
950 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
951                                    struct encode_state *encode_state,
952                                    struct intel_encoder_context *encoder_context,
953                                    int qp)
954 {
955     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
956     unsigned int frame_type = vdenc_context->frame_type;
957
958     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
959     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
960     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
961
962     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
963     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
964     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
965     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
966
967     if (frame_type == VDENC_FRAME_P) {
968         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
969         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
970         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
971         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
972         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
973         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
974
975         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
976         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
977         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
978         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
979         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
980         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
981         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
982         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
983
984         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
985         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
986         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
987         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
988         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
989         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
990         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
991         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
992     }
993 }
994
995 static void
996 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
997                                         struct intel_encoder_context *encoder_context)
998 {
999     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1000     struct gen9_vdenc_streamin_state *streamin_state;
1001     int row, col, i;
1002
1003     if (!vdenc_context->num_roi)
1004         return;
1005
1006     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1007
1008     if (!streamin_state)
1009         return;
1010
1011     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
1012         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1013             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1014
1015             /* The last one has higher priority */
1016             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1017                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1018                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1019                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1020
1021                     break;
1022                 }
1023             }
1024         }
1025     }
1026
1027     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1028 }
1029
1030 static VAStatus
1031 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1032                        VAProfile profile,
1033                        struct encode_state *encode_state,
1034                        struct intel_encoder_context *encoder_context)
1035 {
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1038     struct i965_coded_buffer_segment *coded_buffer_segment;
1039     struct object_surface *obj_surface;
1040     struct object_buffer *obj_buffer;
1041     VAEncPictureParameterBufferH264 *pic_param;
1042     VAEncSliceParameterBufferH264 *slice_param;
1043     VDEncAvcSurface *vdenc_avc_surface;
1044     dri_bo *bo;
1045     int i, j, enable_avc_ildb = 0;
1046     int qp;
1047     char *pbuffer;
1048
1049     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1050
1051     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1052         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1053         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1054
1055         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1056             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1057                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1058                    (slice_param->slice_type == SLICE_TYPE_P) ||
1059                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1060                    (slice_param->slice_type == SLICE_TYPE_B));
1061
1062             if (slice_param->disable_deblocking_filter_idc != 1) {
1063                 enable_avc_ildb = 1;
1064                 break;
1065             }
1066
1067             slice_param++;
1068         }
1069     }
1070
1071     /* Setup current frame */
1072     obj_surface = encode_state->reconstructed_object;
1073     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1074
1075     if (obj_surface->private_data == NULL) {
1076         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1077         assert(vdenc_avc_surface);
1078
1079         vdenc_avc_surface->ctx = ctx;
1080         i965_CreateSurfaces(ctx,
1081                             vdenc_context->down_scaled_width_4x,
1082                             vdenc_context->down_scaled_height_4x,
1083                             VA_RT_FORMAT_YUV420,
1084                             1,
1085                             &vdenc_avc_surface->scaled_4x_surface_id);
1086         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1087         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1088         i965_check_alloc_surface_bo(ctx,
1089                                     vdenc_avc_surface->scaled_4x_surface_obj,
1090                                     1,
1091                                     VA_FOURCC_NV12,
1092                                     SUBSAMPLE_YUV420);
1093
1094         obj_surface->private_data = (void *)vdenc_avc_surface;
1095         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1096     }
1097
1098     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1099     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1100
1101     /* Reconstructed surfaces */
1102     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1103     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1104     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1105     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1106
1107     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1108     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1109
1110     if (enable_avc_ildb) {
1111         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1112     } else {
1113         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1114     }
1115
1116
1117     /* Reference surfaces */
1118     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1119         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1120                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1121         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1122         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1123         obj_surface = encode_state->reference_objects[i];
1124
1125         if (obj_surface && obj_surface->bo) {
1126             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1127
1128             if (obj_surface->private_data == NULL) {
1129                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1130                 assert(vdenc_avc_surface);
1131
1132                 vdenc_avc_surface->ctx = ctx;
1133                 i965_CreateSurfaces(ctx,
1134                                     vdenc_context->down_scaled_width_4x,
1135                                     vdenc_context->down_scaled_height_4x,
1136                                     VA_RT_FORMAT_YUV420,
1137                                     1,
1138                                     &vdenc_avc_surface->scaled_4x_surface_id);
1139                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1140                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1141                 i965_check_alloc_surface_bo(ctx,
1142                                             vdenc_avc_surface->scaled_4x_surface_obj,
1143                                             1,
1144                                             VA_FOURCC_NV12,
1145                                             SUBSAMPLE_YUV420);
1146
1147                 obj_surface->private_data = vdenc_avc_surface;
1148                 obj_surface->free_private_data = gen_free_avc_surface;
1149             }
1150
1151             vdenc_avc_surface = obj_surface->private_data;
1152             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1153         }
1154     }
1155
1156     /* Input YUV surface */
1157     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1158     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1159
1160     /* Encoded bitstream */
1161     obj_buffer = encode_state->coded_buf_object;
1162     bo = obj_buffer->buffer_store->bo;
1163     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1164     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1165     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1166     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1167
1168     /* Status buffer */
1169     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1170     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1171     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1172     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1173     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1174     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1175            vdenc_context->compressed_bitstream.start_offset);
1176
1177     dri_bo_map(bo, 1);
1178
1179     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1180     coded_buffer_segment->mapped = 0;
1181     coded_buffer_segment->codec = encoder_context->codec;
1182     coded_buffer_segment->status_support = 1;
1183
1184     pbuffer = bo->virtual;
1185     pbuffer += vdenc_context->status_bffuer.base_offset;
1186     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1187
1188     dri_bo_unmap(bo);
1189
1190     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1191     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1192                                 vdenc_context->frame_width_in_mbs * 64,
1193                                 "Intra row store scratch buffer");
1194
1195     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1196     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1197                                 vdenc_context->frame_width_in_mbs * 256,
1198                                 "Deblocking filter row store scratch buffer");
1199
1200     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1201     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1202                                 vdenc_context->frame_width_in_mbs * 128,
1203                                 "BSD/MPC row store scratch buffer");
1204
1205     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1206     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1207                                 vdenc_context->frame_width_in_mbs * 64,
1208                                 "VDENC row store scratch buffer");
1209
1210     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1211     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1212     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1213                                 vdenc_context->frame_width_in_mbs *
1214                                 vdenc_context->frame_height_in_mbs *
1215                                 sizeof(struct gen9_vdenc_streamin_state),
1216                                 "VDENC StreamIn buffer");
1217
1218     /*
1219      * Calculate the index for each reference surface in list0 for the first slice
1220      * TODO: other slices
1221      */
1222     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1223     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1224
1225     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1226
1227     if (slice_param->num_ref_idx_active_override_flag)
1228         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1229
1230     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1231         vdenc_context->list_ref_idx[0][i] = 0xFF;
1232     }
1233
1234     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1235         return VA_STATUS_ERROR_INVALID_VALUE;
1236
1237     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1238         VAPictureH264 *va_pic;
1239
1240         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1241
1242         if (i >= vdenc_context->num_refs[0])
1243             continue;
1244
1245         va_pic = &slice_param->RefPicList0[i];
1246
1247         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1248             obj_surface = encode_state->reference_objects[j];
1249
1250             if (obj_surface &&
1251                 obj_surface->bo &&
1252                 obj_surface->base.id == va_pic->picture_id) {
1253
1254                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1255                 vdenc_context->list_ref_idx[0][i] = j;
1256
1257                 break;
1258             }
1259         }
1260     }
1261
1262     if (slice_param->slice_type == SLICE_TYPE_I ||
1263         slice_param->slice_type == SLICE_TYPE_SI)
1264         vdenc_context->frame_type = VDENC_FRAME_I;
1265     else
1266         vdenc_context->frame_type = VDENC_FRAME_P;
1267
1268     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1269
1270     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1271     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1272
1273     return VA_STATUS_SUCCESS;
1274 }
1275
1276 static void
1277 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1278                                 struct intel_encoder_context *encoder_context,
1279                                 struct huc_pipe_mode_select_parameter *params)
1280 {
1281     struct intel_batchbuffer *batch = encoder_context->base.batch;
1282
1283     BEGIN_BCS_BATCH(batch, 3);
1284
1285     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1286     OUT_BCS_BATCH(batch,
1287                   (params->huc_stream_object_enable << 10) |
1288                   (params->indirect_stream_out_enable << 4));
1289     OUT_BCS_BATCH(batch,
1290                   params->media_soft_reset_counter);
1291
1292     ADVANCE_BCS_BATCH(batch);
1293 }
1294
1295 static void
1296 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1297                           struct intel_encoder_context *encoder_context,
1298                           struct huc_imem_state_parameter *params)
1299 {
1300     struct intel_batchbuffer *batch = encoder_context->base.batch;
1301
1302     BEGIN_BCS_BATCH(batch, 5);
1303
1304     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1305     OUT_BCS_BATCH(batch, 0);
1306     OUT_BCS_BATCH(batch, 0);
1307     OUT_BCS_BATCH(batch, 0);
1308     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1309
1310     ADVANCE_BCS_BATCH(batch);
1311 }
1312
1313 static void
1314 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1315                           struct intel_encoder_context *encoder_context,
1316                           struct huc_dmem_state_parameter *params)
1317 {
1318     struct i965_driver_data *i965 = i965_driver_data(ctx);
1319     struct intel_batchbuffer *batch = encoder_context->base.batch;
1320
1321     BEGIN_BCS_BATCH(batch, 6);
1322
1323     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1324     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1325     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1326     OUT_BCS_BATCH(batch, params->huc_data_length);
1327
1328     ADVANCE_BCS_BATCH(batch);
1329 }
1330
1331 /*
1332 static void
1333 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1334                          struct intel_encoder_context *encoder_context,
1335                          struct huc_cfg_state_parameter *params)
1336 {
1337     struct intel_batchbuffer *batch = encoder_context->base.batch;
1338
1339     BEGIN_BCS_BATCH(batch, 2);
1340
1341     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1342     OUT_BCS_BATCH(batch, !!params->force_reset);
1343
1344     ADVANCE_BCS_BATCH(batch);
1345 }
1346 */
1347 static void
1348 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1349                                   struct intel_encoder_context *encoder_context,
1350                                   struct huc_virtual_addr_parameter *params)
1351 {
1352     struct i965_driver_data *i965 = i965_driver_data(ctx);
1353     struct intel_batchbuffer *batch = encoder_context->base.batch;
1354     int i;
1355
1356     BEGIN_BCS_BATCH(batch, 49);
1357
1358     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1359
1360     for (i = 0; i < 16; i++) {
1361         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1362             OUT_BUFFER_3DW(batch,
1363                            params->regions[i].huc_surface_res->bo,
1364                            !!params->regions[i].is_target, 0, 0);
1365         else
1366             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1367     }
1368
1369     ADVANCE_BCS_BATCH(batch);
1370 }
1371
1372 static void
1373 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1374                                        struct intel_encoder_context *encoder_context,
1375                                        struct huc_ind_obj_base_addr_parameter *params)
1376 {
1377     struct i965_driver_data *i965 = i965_driver_data(ctx);
1378     struct intel_batchbuffer *batch = encoder_context->base.batch;
1379
1380     BEGIN_BCS_BATCH(batch, 11);
1381
1382     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1383
1384     if (params->huc_indirect_stream_in_object_res)
1385         OUT_BUFFER_3DW(batch,
1386                        params->huc_indirect_stream_in_object_res->bo,
1387                        0, 0, 0);
1388     else
1389         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1390
1391     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1392
1393     if (params->huc_indirect_stream_out_object_res)
1394         OUT_BUFFER_3DW(batch,
1395                        params->huc_indirect_stream_out_object_res->bo,
1396                        1, 0, 0);
1397     else
1398         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1399
1400     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1401
1402     ADVANCE_BCS_BATCH(batch);
1403 }
1404
1405 static void
1406 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1407                                  struct intel_encoder_context *encoder_context)
1408 {
1409     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1410     struct intel_batchbuffer *batch = encoder_context->base.batch;
1411     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1412     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1413
1414     /* Write HUC_STATUS2 mask (1 << 6) */
1415     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1416     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1417     mi_store_data_imm_params.offset = 0;
1418     mi_store_data_imm_params.dw0 = (1 << 6);
1419     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1420
1421     /* Store HUC_STATUS2 */
1422     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1423     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1424     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1425     mi_store_register_mem_params.offset = 4;
1426     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1427 }
1428
1429 static void
1430 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1431                              struct intel_encoder_context *encoder_context,
1432                              struct huc_stream_object_parameter *params)
1433 {
1434     struct intel_batchbuffer *batch = encoder_context->base.batch;
1435
1436     BEGIN_BCS_BATCH(batch, 5);
1437
1438     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1439     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1440     OUT_BCS_BATCH(batch,
1441                   (1 << 31) |   /* Must be 1 */
1442                   params->indirect_stream_in_start_address);
1443     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1444     OUT_BCS_BATCH(batch,
1445                   (!!params->huc_bitstream_enable << 29) |
1446                   (params->length_mode << 27) |
1447                   (!!params->stream_out << 26) |
1448                   (!!params->emulation_prevention_byte_removal << 25) |
1449                   (!!params->start_code_search_engine << 24) |
1450                   (params->start_code_byte2 << 16) |
1451                   (params->start_code_byte1 << 8) |
1452                   params->start_code_byte0);
1453
1454     ADVANCE_BCS_BATCH(batch);
1455 }
1456
1457 static void
1458 gen9_vdenc_huc_start(VADriverContextP ctx,
1459                      struct intel_encoder_context *encoder_context,
1460                      struct huc_start_parameter *params)
1461 {
1462     struct intel_batchbuffer *batch = encoder_context->base.batch;
1463
1464     BEGIN_BCS_BATCH(batch, 2);
1465
1466     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1467     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1468
1469     ADVANCE_BCS_BATCH(batch);
1470 }
1471
1472 static void
1473 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1474                              struct intel_encoder_context *encoder_context,
1475                              struct vd_pipeline_flush_parameter *params)
1476 {
1477     struct intel_batchbuffer *batch = encoder_context->base.batch;
1478
1479     BEGIN_BCS_BATCH(batch, 2);
1480
1481     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1482     OUT_BCS_BATCH(batch,
1483                   params->mfx_pipeline_command_flush << 19 |
1484                   params->mfl_pipeline_command_flush << 18 |
1485                   params->vdenc_pipeline_command_flush << 17 |
1486                   params->hevc_pipeline_command_flush << 16 |
1487                   params->vd_command_message_parser_done << 4 |
1488                   params->mfx_pipeline_done << 3 |
1489                   params->mfl_pipeline_done << 2 |
1490                   params->vdenc_pipeline_done << 1 |
1491                   params->hevc_pipeline_done);
1492
1493     ADVANCE_BCS_BATCH(batch);
1494 }
1495
1496 static int
1497 gen9_vdenc_get_max_mbps(int level_idc)
1498 {
1499     int max_mbps = 11880;
1500
1501     switch (level_idc) {
1502     case 20:
1503         max_mbps = 11880;
1504         break;
1505
1506     case 21:
1507         max_mbps = 19800;
1508         break;
1509
1510     case 22:
1511         max_mbps = 20250;
1512         break;
1513
1514     case 30:
1515         max_mbps = 40500;
1516         break;
1517
1518     case 31:
1519         max_mbps = 108000;
1520         break;
1521
1522     case 32:
1523         max_mbps = 216000;
1524         break;
1525
1526     case 40:
1527     case 41:
1528         max_mbps = 245760;
1529         break;
1530
1531     case 42:
1532         max_mbps = 522240;
1533         break;
1534
1535     case 50:
1536         max_mbps = 589824;
1537         break;
1538
1539     case 51:
1540         max_mbps = 983040;
1541         break;
1542
1543     case 52:
1544         max_mbps = 2073600;
1545         break;
1546
1547     default:
1548         break;
1549     }
1550
1551     return max_mbps;
1552 };
1553
1554 static unsigned int
1555 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1556                                        struct intel_encoder_context *encoder_context,
1557                                        int level_idc)
1558 {
1559     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1560     double bits_per_mb, tmpf;
1561     int max_mbps, num_mb_per_frame;
1562     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1563     unsigned int ret;
1564
1565     if (level_idc >= 31 && level_idc <= 40)
1566         bits_per_mb = 96.0;
1567     else
1568         bits_per_mb = 192.0;
1569
1570     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1571     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1572
1573     tmpf = (double)num_mb_per_frame;
1574
1575     if (tmpf < max_mbps / 172.0)
1576         tmpf = max_mbps / 172.0;
1577
1578     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1579     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1580                                      (double)vdenc_context->framerate.num * bits_per_mb);
1581
1582     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1583     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1584     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1585
1586     return ret;
1587 }
1588
1589 static int
1590 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1591                                 struct encode_state *encode_state,
1592                                 struct intel_encoder_context *encoder_context)
1593 {
1594     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1595     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1596     unsigned frame_size;
1597     int qp, delat_qp;
1598
1599     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1600     qp = (int)(1.0 / 1.2 * pow(10.0,
1601                                (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1602                                       ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)) - x0) *
1603                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1604     qp += 2;
1605     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1606                           ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)));
1607     if (delat_qp > 0)
1608         qp += delat_qp;
1609
1610     qp = CLAMP(1, 51, qp);
1611     qp--;
1612
1613     if (qp < 0)
1614         qp = 1;
1615
1616     return qp;
1617 }
1618
1619 static void
1620 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1621                                     struct encode_state *encode_state,
1622                                     struct intel_encoder_context *encoder_context)
1623 {
1624     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1625     struct huc_brc_init_dmem *dmem;
1626     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1627     double input_bits_per_frame, bps_ratio;
1628     int i;
1629
1630     vdenc_context->brc_init_reset_input_bits_per_frame =
1631         ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1632     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1633     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1634
1635     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1636
1637     if (!dmem)
1638         return;
1639
1640     memset(dmem, 0, sizeof(*dmem));
1641
1642     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1643
1644     dmem->frame_width = vdenc_context->frame_width;
1645     dmem->frame_height = vdenc_context->frame_height;
1646
1647     dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1648     dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1649     dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1650     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1651     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1652
1653     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1654         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1655
1656     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1657         dmem->brc_flag |= 0x10;
1658     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1659         dmem->brc_flag |= 0x20;
1660
1661     dmem->frame_rate_m = vdenc_context->framerate.num;
1662     dmem->frame_rate_d = vdenc_context->framerate.den;
1663
1664     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1665
1666     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1667         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1668
1669     dmem->min_qp = 10;
1670     dmem->max_qp = 51;
1671
1672     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1673     bps_ratio = input_bits_per_frame /
1674                 ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1675
1676     if (bps_ratio < 0.1)
1677         bps_ratio = 0.1;
1678
1679     if (bps_ratio > 3.5)
1680         bps_ratio = 3.5;
1681
1682     for (i = 0; i < 4; i++) {
1683         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1684         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1685
1686         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1687         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1688
1689         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1690         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1691     }
1692
1693     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1694
1695     if (vdenc_context->mb_brc_enabled) {
1696         dmem->mb_qp_ctrl = 1;
1697         dmem->dist_qp_delta[0] = -5;
1698         dmem->dist_qp_delta[1] = -2;
1699         dmem->dist_qp_delta[2] = 2;
1700         dmem->dist_qp_delta[3] = 5;
1701     }
1702
1703     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1704
1705     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1706     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1707
1708     // 2nd re-encode pass if possible
1709     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1710         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1711         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1712         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1713         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1714     } else {
1715         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1716         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1717         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1718         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1719     }
1720
1721     dmem->qp_select_for_first_pass = 1;
1722     dmem->mb_header_compensation = 1;
1723     dmem->delta_qp_adaptation = 1;
1724     dmem->max_crf_quality_factor = 52;
1725
1726     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1727     dmem->scenario_info = 0;
1728
1729     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1730     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1731
1732     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1733 }
1734
1735 static void
1736 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1737                               struct encode_state *encode_state,
1738                               struct intel_encoder_context *encoder_context)
1739 {
1740     struct intel_batchbuffer *batch = encoder_context->base.batch;
1741     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1742     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1743     struct huc_imem_state_parameter imem_state_params;
1744     struct huc_dmem_state_parameter dmem_state_params;
1745     struct huc_virtual_addr_parameter virtual_addr_params;
1746     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1747     struct huc_stream_object_parameter stream_object_params;
1748     struct huc_start_parameter start_params;
1749     struct vd_pipeline_flush_parameter pipeline_flush_params;
1750     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1751
1752     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1753
1754     memset(&imem_state_params, 0, sizeof(imem_state_params));
1755     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1756     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1757
1758     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1759     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1760
1761     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1762     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1763     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1764     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1765     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1766     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1767
1768     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1769     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1770     virtual_addr_params.regions[0].is_target = 1;
1771     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1772
1773     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1774     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1775     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1776     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1777
1778     memset(&stream_object_params, 0, sizeof(stream_object_params));
1779     stream_object_params.indirect_stream_in_data_length = 1;
1780     stream_object_params.indirect_stream_in_start_address = 0;
1781     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1782
1783     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1784
1785     memset(&start_params, 0, sizeof(start_params));
1786     start_params.last_stream_object = 1;
1787     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1788
1789     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1790     pipeline_flush_params.hevc_pipeline_done = 1;
1791     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1792     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1793
1794     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1795     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1796     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1797 }
1798
1799 static void
1800 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1801 {
1802     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1803     struct huc_brc_update_dmem *dmem;
1804     int i, num_p_in_gop = 0;
1805
1806     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1807
1808     if (!dmem)
1809         return;
1810
1811     dmem->brc_func = 1;
1812
1813     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1814         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1815             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1816         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1817         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1818     }
1819
1820     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1821         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1822
1823     dmem->target_size = vdenc_context->brc_target_size;
1824
1825     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1826
1827     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1828
1829     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1830     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1831
1832     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1833
1834     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1835     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1836     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1837
1838     dmem->current_pak_pass = vdenc_context->current_pass;
1839     dmem->max_num_passes = 2;
1840
1841     dmem->scene_change_detect_enable = 1;
1842     dmem->scene_change_prev_intra_percent_threshold = 96;
1843     dmem->scene_change_cur_intra_perent_threshold = 192;
1844
1845     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1846         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1847
1848     for (i = 0; i < 2; i++)
1849         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1850
1851     if (vdenc_context->is_low_delay)
1852         dmem->ip_average_coeff = 0;
1853     else
1854         dmem->ip_average_coeff = 128;
1855
1856     dmem->skip_frame_size = 0;
1857     dmem->num_of_frames_skipped = 0;
1858
1859     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1860     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1861     dmem->hme_cost_enable = 1;
1862
1863     dmem->second_level_batchbuffer_size = 228;
1864
1865     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1866 }
1867
1868 static void
1869 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1870                                   struct encode_state *encode_state,
1871                                   struct intel_encoder_context *encoder_context,
1872                                   struct gen9_mfx_avc_img_state *pstate,
1873                                   int use_huc)
1874 {
1875     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1876     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1877     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1878
1879     memset(pstate, 0, sizeof(*pstate));
1880
1881     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1882
1883     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1884
1885     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1886     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1887
1888     pstate->dw3.image_structure = 0;
1889     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1890     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1891     pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1892     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1893     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1894
1895     pstate->dw4.field_picture_flag = 0;
1896     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1897     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1898     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1899     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1900     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1901     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1902     pstate->dw4.mb_mv_format_flag = 1;
1903     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1904     pstate->dw4.mv_unpacked_flag = 1;
1905     pstate->dw4.insert_test_flag = 0;
1906     pstate->dw4.load_slice_pointer_flag = 0;
1907     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1908     pstate->dw4.minimum_frame_size = 0;
1909
1910     pstate->dw5.intra_mb_max_bit_flag = 1;
1911     pstate->dw5.inter_mb_max_bit_flag = 1;
1912     pstate->dw5.frame_size_over_flag = 1;
1913     pstate->dw5.frame_size_under_flag = 1;
1914     pstate->dw5.intra_mb_ipcm_flag = 1;
1915     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1916     pstate->dw5.non_first_pass_flag = 0;
1917     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1918     pstate->dw5.aq_chroma_disable = 1;
1919
1920     pstate->dw6.intra_mb_max_size = 2700;
1921     pstate->dw6.inter_mb_max_size = 4095;
1922
1923     pstate->dw8.slice_delta_qp_max0 = 0;
1924     pstate->dw8.slice_delta_qp_max1 = 0;
1925     pstate->dw8.slice_delta_qp_max2 = 0;
1926     pstate->dw8.slice_delta_qp_max3 = 0;
1927
1928     pstate->dw9.slice_delta_qp_min0 = 0;
1929     pstate->dw9.slice_delta_qp_min1 = 0;
1930     pstate->dw9.slice_delta_qp_min2 = 0;
1931     pstate->dw9.slice_delta_qp_min3 = 0;
1932
1933     pstate->dw10.frame_bitrate_min = 0;
1934     pstate->dw10.frame_bitrate_min_unit = 1;
1935     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1936     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1937     pstate->dw10.frame_bitrate_max_unit = 1;
1938     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1939
1940     pstate->dw11.frame_bitrate_min_delta = 0;
1941     pstate->dw11.frame_bitrate_max_delta = 0;
1942
1943     pstate->dw12.vad_error_logic = 1;
1944     /* TODO: set paramters DW19/DW20 for slices */
1945 }
1946
1947 static void
1948 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1949                                 struct encode_state *encode_state,
1950                                 struct intel_encoder_context *encoder_context,
1951                                 struct gen9_vdenc_img_state *pstate,
1952                                 int update_cost)
1953 {
1954     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1955     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1956     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1957     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1958
1959     memset(pstate, 0, sizeof(*pstate));
1960
1961     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1962
1963     if (vdenc_context->frame_type == VDENC_FRAME_I) {
1964         pstate->dw4.intra_sad_measure_adjustment = 2;
1965         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1966
1967         pstate->dw5.cre_prefetch_enable = 1;
1968
1969         pstate->dw9.mode0_cost = 10;
1970         pstate->dw9.mode1_cost = 0;
1971         pstate->dw9.mode2_cost = 3;
1972         pstate->dw9.mode3_cost = 30;
1973
1974         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1975         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1976         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1977
1978         pstate->dw22.small_mb_size_in_word = 0xff;
1979         pstate->dw22.large_mb_size_in_word = 0xff;
1980
1981         pstate->dw27.max_hmv_r = 0x2000;
1982         pstate->dw27.max_vmv_r = 0x200;
1983
1984         pstate->dw33.qp_range_check_upper_bound = 0x33;
1985         pstate->dw33.qp_range_check_lower_bound = 0x0a;
1986         pstate->dw33.qp_range_check_value = 0x0f;
1987     } else {
1988         pstate->dw2.bidirectional_weight = 0x20;
1989
1990         pstate->dw4.subpel_mode = 3;
1991         pstate->dw4.bme_disable_for_fbr_message = 1;
1992         pstate->dw4.inter_sad_measure_adjustment = 2;
1993         pstate->dw4.intra_sad_measure_adjustment = 2;
1994         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1995
1996         pstate->dw5.cre_prefetch_enable = 1;
1997
1998         pstate->dw8.non_skip_zero_mv_const_added = 1;
1999         pstate->dw8.non_skip_mb_mode_const_added = 1;
2000         pstate->dw8.ref_id_cost_mode_select = 1;
2001
2002         pstate->dw9.mode0_cost = 7;
2003         pstate->dw9.mode1_cost = 26;
2004         pstate->dw9.mode2_cost = 30;
2005         pstate->dw9.mode3_cost = 57;
2006
2007         pstate->dw10.mode4_cost = 8;
2008         pstate->dw10.mode5_cost = 2;
2009         pstate->dw10.mode6_cost = 4;
2010         pstate->dw10.mode7_cost = 6;
2011
2012         pstate->dw11.mode8_cost = 5;
2013         pstate->dw11.mode9_cost = 0;
2014         pstate->dw11.ref_id_cost = 4;
2015         pstate->dw11.chroma_intra_mode_cost = 0;
2016
2017         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2018         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2019         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2020         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2021         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2022         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2023         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2024         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2025
2026         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2027         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2028         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2029
2030         pstate->dw22.small_mb_size_in_word = 0xff;
2031         pstate->dw22.large_mb_size_in_word = 0xff;
2032
2033         pstate->dw27.max_hmv_r = 0x2000;
2034         pstate->dw27.max_vmv_r = 0x200;
2035
2036         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2037
2038         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2039         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2040
2041         pstate->dw33.qp_range_check_upper_bound = 0x33;
2042         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2043         pstate->dw33.qp_range_check_value = 0x0f;
2044
2045         pstate->dw34.midpoint_distortion = 0x640;
2046     }
2047
2048     /* ROI will be updated in HuC kernel for CBR/VBR */
2049     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2050         pstate->dw34.roi_enable = 1;
2051
2052         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2053
2054         if (vdenc_context->num_roi > 1)
2055             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2056
2057         if (vdenc_context->num_roi > 2)
2058             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2059     }
2060
2061     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2062     pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2063
2064     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2065
2066     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2067
2068     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2069     pstate->dw5.picture_type = vdenc_context->frame_type;
2070     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2071
2072     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2073         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2074     }
2075
2076     pstate->dw5.mb_slice_threshold_value = 0;
2077
2078     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2079
2080     if (pstate->dw1.transform_8x8_flag)
2081         pstate->dw8.luma_intra_partition_mask = 0;
2082     else
2083         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2084
2085     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2086
2087     if (update_cost) {
2088         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2089         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2090         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2091         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2092
2093         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2094         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2095         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2096         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2097
2098         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2099         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2100         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2101         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2102
2103         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2104         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2105         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2106         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2107         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2108         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2109         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2110         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2111
2112         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2113         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2114         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2115         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2116         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2117         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2118         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2119         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2120     }
2121
2122     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2123
2124     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2125
2126     /* TODO: check rolling I */
2127
2128     /* TODO: handle ROI */
2129
2130     /* TODO: check stream in support */
2131 }
2132
2133 static void
2134 gen9_vdenc_init_img_states(VADriverContextP ctx,
2135                            struct encode_state *encode_state,
2136                            struct intel_encoder_context *encoder_context)
2137 {
2138     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2139     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2140     struct gen9_vdenc_img_state *vdenc_img_cmd;
2141     char *pbuffer;
2142
2143     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2144
2145     if (!pbuffer)
2146         return;
2147
2148     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2149     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2150     pbuffer += sizeof(*mfx_img_cmd);
2151
2152     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2153     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2154     pbuffer += sizeof(*vdenc_img_cmd);
2155
2156     /* Add batch buffer end command */
2157     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2158
2159     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2160 }
2161
2162 static void
2163 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2164                                         struct encode_state *encode_state,
2165                                         struct intel_encoder_context *encoder_context)
2166 {
2167     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2168     struct huc_brc_update_constant_data *brc_buffer;
2169     int i, j;
2170
2171     brc_buffer = (struct huc_brc_update_constant_data *)
2172                  i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2173
2174     if (!brc_buffer)
2175         return;
2176
2177     memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2178
2179     for (i = 0; i < 8; i++) {
2180         for (j = 0; j < 42; j++) {
2181             brc_buffer->hme_mv_cost[i][j] = map_44_lut_value((vdenc_hme_cost[i][j + 10]), 0x6f);
2182         }
2183     }
2184
2185     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2186         memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2187         memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2188         memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2189         memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2190         memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2191         memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2192     }
2193
2194
2195     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2196 }
2197
2198 static void
2199 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2200                           struct encode_state *encode_state,
2201                           struct intel_encoder_context *encoder_context)
2202 {
2203     struct intel_batchbuffer *batch = encoder_context->base.batch;
2204     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2205     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2206     struct huc_imem_state_parameter imem_state_params;
2207     struct huc_dmem_state_parameter dmem_state_params;
2208     struct huc_virtual_addr_parameter virtual_addr_params;
2209     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2210     struct huc_stream_object_parameter stream_object_params;
2211     struct huc_start_parameter start_params;
2212     struct vd_pipeline_flush_parameter pipeline_flush_params;
2213     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2214     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2215     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2216
2217     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2218     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2219     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2220
2221     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2222         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2223
2224         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2225         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2226         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2227     }
2228
2229     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2230
2231     memset(&imem_state_params, 0, sizeof(imem_state_params));
2232     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2233     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2234
2235     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2236     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2237
2238     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2239     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2240     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2241     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2242     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2243     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2244
2245     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2246     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2247     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2248     virtual_addr_params.regions[0].is_target = 1;
2249     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2250     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2251     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2252     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2253     virtual_addr_params.regions[4].is_target = 1;
2254     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2255     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2256     virtual_addr_params.regions[6].is_target = 1;
2257     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2258
2259     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2260     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2261     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2262     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2263
2264     memset(&stream_object_params, 0, sizeof(stream_object_params));
2265     stream_object_params.indirect_stream_in_data_length = 1;
2266     stream_object_params.indirect_stream_in_start_address = 0;
2267     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2268
2269     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2270
2271     memset(&start_params, 0, sizeof(start_params));
2272     start_params.last_stream_object = 1;
2273     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2274
2275     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2276     pipeline_flush_params.hevc_pipeline_done = 1;
2277     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2278     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2279
2280     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2281     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2282     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2283
2284     /* Store HUC_STATUS */
2285     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2286     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2287     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2288     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2289
2290     /* Write HUC_STATUS mask (1 << 31) */
2291     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2292     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2293     mi_store_data_imm_params.offset = 4;
2294     mi_store_data_imm_params.dw0 = (1 << 31);
2295     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2296 }
2297
2298 static void
2299 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2300                                 struct encode_state *encode_state,
2301                                 struct intel_encoder_context *encoder_context)
2302 {
2303     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2304     struct intel_batchbuffer *batch = encoder_context->base.batch;
2305
2306     BEGIN_BCS_BATCH(batch, 5);
2307
2308     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2309     OUT_BCS_BATCH(batch,
2310                   (1 << 29) |
2311                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2312                   (MFD_MODE_VLD << 15) |
2313                   (1 << 13) |                   /* VDEnc mode */
2314                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2315                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2316                   (1 << 7)  |                   /* Scaled surface enable */
2317                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2318                   (1 << 4)  |                   /* encoding mode */
2319                   (MFX_FORMAT_AVC << 0));
2320     OUT_BCS_BATCH(batch, 0);
2321     OUT_BCS_BATCH(batch, 0);
2322     OUT_BCS_BATCH(batch, 0);
2323
2324     ADVANCE_BCS_BATCH(batch);
2325 }
2326
2327 static void
2328 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2329                              struct intel_encoder_context *encoder_context,
2330                              struct i965_gpe_resource *gpe_resource,
2331                              int id)
2332 {
2333     struct intel_batchbuffer *batch = encoder_context->base.batch;
2334
2335     BEGIN_BCS_BATCH(batch, 6);
2336
2337     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2338     OUT_BCS_BATCH(batch, id);
2339     OUT_BCS_BATCH(batch,
2340                   ((gpe_resource->height - 1) << 18) |
2341                   ((gpe_resource->width - 1) << 4));
2342     OUT_BCS_BATCH(batch,
2343                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2344                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2345                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2346                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2347                   (1 << 1)  |                           /* must be tiled */
2348                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2349     OUT_BCS_BATCH(batch,
2350                   (0 << 16) |                   /* must be 0 for interleave U/V */
2351                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2352     OUT_BCS_BATCH(batch,
2353                   (0 << 16) |                   /* must be 0 for interleave U/V */
2354                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2355
2356     ADVANCE_BCS_BATCH(batch);
2357 }
2358
2359 static void
2360 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2361 {
2362     struct i965_driver_data *i965 = i965_driver_data(ctx);
2363     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2364     struct intel_batchbuffer *batch = encoder_context->base.batch;
2365     int i;
2366
2367     BEGIN_BCS_BATCH(batch, 65);
2368
2369     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2370
2371     /* the DW1-3 is for pre_deblocking */
2372     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2373
2374     /* the DW4-6 is for the post_deblocking */
2375     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2376
2377     /* the DW7-9 is for the uncompressed_picture */
2378     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2379
2380     /* the DW10-12 is for PAK information (write) */
2381     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2382
2383     /* the DW13-15 is for the intra_row_store_scratch */
2384     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2385
2386     /* the DW16-18 is for the deblocking filter */
2387     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2388
2389     /* the DW 19-50 is for Reference pictures*/
2390     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2391         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2392     }
2393
2394     /* DW 51, reference picture attributes */
2395     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2396
2397     /* The DW 52-54 is for PAK information (read) */
2398     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2399
2400     /* the DW 55-57 is the ILDB buffer */
2401     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2402
2403     /* the DW 58-60 is the second ILDB buffer */
2404     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2405
2406     /* DW 61, memory compress enable & mode */
2407     OUT_BCS_BATCH(batch, 0);
2408
2409     /* the DW 62-64 is the 4x Down Scaling surface */
2410     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2411
2412     ADVANCE_BCS_BATCH(batch);
2413 }
2414
2415 static void
2416 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2417 {
2418     struct i965_driver_data *i965 = i965_driver_data(ctx);
2419     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2420     struct intel_batchbuffer *batch = encoder_context->base.batch;
2421
2422     BEGIN_BCS_BATCH(batch, 26);
2423
2424     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2425     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2426     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2427     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2428
2429     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2430     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2431     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2432
2433     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2434     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2435     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2436
2437     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2438     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2439     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2440
2441     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2442      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2443      */
2444     OUT_BUFFER_3DW(batch,
2445                    vdenc_context->compressed_bitstream.res.bo,
2446                    1,
2447                    0,
2448                    0);
2449     OUT_BUFFER_2DW(batch,
2450                    vdenc_context->compressed_bitstream.res.bo,
2451                    1,
2452                    vdenc_context->compressed_bitstream.end_offset);
2453
2454     ADVANCE_BCS_BATCH(batch);
2455 }
2456
2457 static void
2458 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2459 {
2460     struct i965_driver_data *i965 = i965_driver_data(ctx);
2461     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2462     struct intel_batchbuffer *batch = encoder_context->base.batch;
2463
2464     BEGIN_BCS_BATCH(batch, 10);
2465
2466     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2467
2468     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2469     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2470
2471     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2472     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2473
2474     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2475     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2476
2477     ADVANCE_BCS_BATCH(batch);
2478 }
2479
2480 static void
2481 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2482                         int qm_type,
2483                         unsigned int *qm,
2484                         int qm_length,
2485                         struct intel_encoder_context *encoder_context)
2486 {
2487     struct intel_batchbuffer *batch = encoder_context->base.batch;
2488     unsigned int qm_buffer[16];
2489
2490     assert(qm_length <= 16);
2491     assert(sizeof(*qm) == 4);
2492     memcpy(qm_buffer, qm, qm_length * 4);
2493
2494     BEGIN_BCS_BATCH(batch, 18);
2495     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2496     OUT_BCS_BATCH(batch, qm_type << 0);
2497     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2498     ADVANCE_BCS_BATCH(batch);
2499 }
2500
2501 static void
2502 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2503 {
2504     /* TODO: add support for non flat matrix */
2505     unsigned int qm[16] = {
2506         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2507         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2508         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2509         0x10101010, 0x10101010, 0x10101010, 0x10101010
2510     };
2511
2512     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2513     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2514     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2515     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2516 }
2517
2518 static void
2519 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2520                          int fqm_type,
2521                          unsigned int *fqm,
2522                          int fqm_length,
2523                          struct intel_encoder_context *encoder_context)
2524 {
2525     struct intel_batchbuffer *batch = encoder_context->base.batch;
2526     unsigned int fqm_buffer[32];
2527
2528     assert(fqm_length <= 32);
2529     assert(sizeof(*fqm) == 4);
2530     memcpy(fqm_buffer, fqm, fqm_length * 4);
2531
2532     BEGIN_BCS_BATCH(batch, 34);
2533     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2534     OUT_BCS_BATCH(batch, fqm_type << 0);
2535     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2536     ADVANCE_BCS_BATCH(batch);
2537 }
2538
2539 static void
2540 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2541 {
2542     /* TODO: add support for non flat matrix */
2543     unsigned int qm[32] = {
2544         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2545         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2546         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2547         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2548         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2549         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2550         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2551         0x10001000, 0x10001000, 0x10001000, 0x10001000
2552     };
2553
2554     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2555     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2556     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2557     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2558 }
2559
2560 static void
2561 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2562                              struct encode_state *encode_state,
2563                              struct intel_encoder_context *encoder_context)
2564 {
2565     struct intel_batchbuffer *batch = encoder_context->base.batch;
2566     struct gen9_mfx_avc_img_state mfx_img_cmd;
2567
2568     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2569
2570     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2571     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2572     ADVANCE_BCS_BATCH(batch);
2573 }
2574
2575 static void
2576 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2577                                   struct encode_state *encode_state,
2578                                   struct intel_encoder_context *encoder_context)
2579 {
2580     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2581     struct intel_batchbuffer *batch = encoder_context->base.batch;
2582
2583     BEGIN_BCS_BATCH(batch, 2);
2584
2585     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2586     OUT_BCS_BATCH(batch,
2587                   (vdenc_context->vdenc_streamin_enable << 9) |
2588                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2589                   (1 << 7)  |                   /* Tlb prefetch enable */
2590                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2591                   (VDENC_CODEC_AVC << 0));
2592
2593     ADVANCE_BCS_BATCH(batch);
2594 }
2595
2596 static void
2597 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2598                                struct intel_encoder_context *encoder_context,
2599                                struct i965_gpe_resource *gpe_resource,
2600                                int vdenc_surface_cmd)
2601 {
2602     struct intel_batchbuffer *batch = encoder_context->base.batch;
2603
2604     BEGIN_BCS_BATCH(batch, 6);
2605
2606     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2607     OUT_BCS_BATCH(batch, 0);
2608     OUT_BCS_BATCH(batch,
2609                   ((gpe_resource->height - 1) << 18) |
2610                   ((gpe_resource->width - 1) << 4));
2611     OUT_BCS_BATCH(batch,
2612                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2613                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2614                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2615                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2616                   (1 << 1)  |                           /* must be tiled */
2617                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2618     OUT_BCS_BATCH(batch,
2619                   (0 << 16) |                   /* must be 0 for interleave U/V */
2620                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2621     OUT_BCS_BATCH(batch,
2622                   (0 << 16) |                   /* must be 0 for interleave U/V */
2623                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2624
2625     ADVANCE_BCS_BATCH(batch);
2626 }
2627
2628 static void
2629 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2630                                    struct intel_encoder_context *encoder_context,
2631                                    struct i965_gpe_resource *gpe_resource)
2632 {
2633     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2634 }
2635
2636 static void
2637 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2638                                    struct intel_encoder_context *encoder_context,
2639                                    struct i965_gpe_resource *gpe_resource)
2640 {
2641     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2642 }
2643
2644 static void
2645 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2646                                       struct intel_encoder_context *encoder_context,
2647                                       struct i965_gpe_resource *gpe_resource)
2648 {
2649     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2650 }
2651
2652 static void
2653 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2654                                      struct encode_state *encode_state,
2655                                      struct intel_encoder_context *encoder_context)
2656 {
2657     struct i965_driver_data *i965 = i965_driver_data(ctx);
2658     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2659     struct intel_batchbuffer *batch = encoder_context->base.batch;
2660
2661     BEGIN_BCS_BATCH(batch, 37);
2662
2663     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2664
2665     /* DW1-6 for DS FWD REF0/REF1 */
2666
2667     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2668         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2669     else
2670         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2671
2672     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2673         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2674     else
2675         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2676
2677     /* DW7-9 for DS BWD REF0, ignored on SKL */
2678     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2679
2680     /* DW10-12 for uncompressed input data */
2681     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2682
2683     /* DW13-DW15 for streamin data */
2684     if (vdenc_context->vdenc_streamin_enable)
2685         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2686     else
2687         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2688
2689     /* DW16-DW18 for row scratch buffer */
2690     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2691
2692     /* DW19-DW21, ignored on SKL */
2693     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2694
2695     /* DW22-DW27 for FWD REF0/REF1 */
2696
2697     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2698         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2699     else
2700         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2701
2702     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2703         OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2704     else
2705         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2706
2707     /* DW28-DW30 for FWD REF2, ignored on SKL */
2708     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2709
2710     /* DW31-DW33 for BDW REF0, ignored on SKL */
2711     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2712
2713     /* DW34-DW36 for VDEnc statistics streamout */
2714     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2715
2716     ADVANCE_BCS_BATCH(batch);
2717 }
2718
2719 static void
2720 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2721                                  struct encode_state *encode_state,
2722                                  struct intel_encoder_context *encoder_context)
2723 {
2724     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2725     struct intel_batchbuffer *batch = encoder_context->base.batch;
2726
2727     BEGIN_BCS_BATCH(batch, 61);
2728
2729     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2730
2731     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2732         /* DW1-DW11 */
2733         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2734
2735         /* DW12-DW25 */
2736         intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2737
2738         /* DW26-DW39 */
2739         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2740
2741         /* DW40-DW46 */
2742         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2743
2744         /* DW47-DW53 */
2745         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2746
2747         /* DW54-DW60 */
2748         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2749     } else {
2750         int i;
2751         uint16_t tmp_vdenc_skip_threshold_p[28];
2752
2753         memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2754
2755         for (i = 0; i < 28; i++) {
2756             tmp_vdenc_skip_threshold_p[i] *= 3;
2757         }
2758
2759         /* DW1-DW11 */
2760         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2761
2762         /* DW12-DW25 */
2763         intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2764
2765         /* DW26-DW39 */
2766         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2767
2768         /* DW40-DW46 */
2769         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2770
2771         /* DW47-DW53 */
2772         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2773
2774         /* DW54-DW60 */
2775         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2776     }
2777
2778     ADVANCE_BCS_BATCH(batch);
2779 }
2780
2781 static void
2782 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2783                               struct encode_state *encode_state,
2784                               struct intel_encoder_context *encoder_context)
2785 {
2786     struct intel_batchbuffer *batch = encoder_context->base.batch;
2787
2788     BEGIN_BCS_BATCH(batch, 2);
2789
2790     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2791     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2792
2793     ADVANCE_BCS_BATCH(batch);
2794 }
2795
2796 static void
2797 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2798                                        struct encode_state *encode_state,
2799                                        struct intel_encoder_context *encoder_context,
2800                                        VAEncSliceParameterBufferH264 *slice_param)
2801 {
2802     struct intel_batchbuffer *batch = encoder_context->base.batch;
2803     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2804
2805     BEGIN_BCS_BATCH(batch, 3);
2806
2807     OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2808
2809     if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2810         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2811                               slice_param->luma_weight_l0[1] << 16 |
2812                               slice_param->luma_offset_l0[0] << 8 |
2813                               slice_param->luma_weight_l0[0] << 0));
2814         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2815                               slice_param->luma_weight_l0[2] << 0));
2816     } else {
2817         OUT_BCS_BATCH(batch, (0 << 24 |
2818                               1 << 16 |
2819                               0 << 8 |
2820                               1 << 0));
2821         OUT_BCS_BATCH(batch, (0 << 8 |
2822                               1 << 0));
2823     }
2824
2825
2826     ADVANCE_BCS_BATCH(batch);
2827 }
2828
2829 static void
2830 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2831                                struct encode_state *encode_state,
2832                                struct intel_encoder_context *encoder_context,
2833                                VAEncSliceParameterBufferH264 *slice_param,
2834                                VAEncSliceParameterBufferH264 *next_slice_param)
2835 {
2836     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2837     struct intel_batchbuffer *batch = encoder_context->base.batch;
2838     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2839     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2840     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2841     int luma_log2_weight_denom, weighted_pred_idc;
2842
2843     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2844     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2845
2846     if (next_slice_param) {
2847         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2848         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2849     } else {
2850         next_slice_hor_pos = 0;
2851         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2852     }
2853
2854     if (slice_type == SLICE_TYPE_P)
2855         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2856     else
2857         weighted_pred_idc = 0;
2858
2859     if (weighted_pred_idc == 1)
2860         luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2861     else
2862         luma_log2_weight_denom = 0;
2863
2864     BEGIN_BCS_BATCH(batch, 4);
2865
2866     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2867     OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2868                           slice_ver_pos));
2869     OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2870                           next_slice_ver_pos));
2871     OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2872
2873     ADVANCE_BCS_BATCH(batch);
2874 }
2875
2876 static void
2877 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2878                            struct encode_state *encode_state,
2879                            struct intel_encoder_context *encoder_context)
2880 {
2881     struct intel_batchbuffer *batch = encoder_context->base.batch;
2882     struct gen9_vdenc_img_state vdenc_img_cmd;
2883
2884     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2885
2886     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2887     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2888     ADVANCE_BCS_BATCH(batch);
2889 }
2890
2891 static void
2892 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2893                                  struct intel_encoder_context *encoder_context,
2894                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2895                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2896                                  int slice_header_indicator)
2897 {
2898     struct intel_batchbuffer *batch = encoder_context->base.batch;
2899
2900     if (data_bits_in_last_dw == 0)
2901         data_bits_in_last_dw = 32;
2902
2903     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2904
2905     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2906     OUT_BCS_BATCH(batch,
2907                   (0 << 16) |   /* always start at offset 0 */
2908                   (slice_header_indicator << 14) |
2909                   (data_bits_in_last_dw << 8) |
2910                   (skip_emul_byte_count << 4) |
2911                   (!!emulation_flag << 3) |
2912                   ((!!is_last_header) << 2) |
2913                   ((!!is_end_of_slice) << 1) |
2914                   (0 << 0));    /* TODO: check this flag */
2915     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2916
2917     ADVANCE_BCS_BATCH(batch);
2918 }
2919
2920 static void
2921 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2922                                             struct encode_state *encode_state,
2923                                             struct intel_encoder_context *encoder_context,
2924                                             int slice_index)
2925 {
2926     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2927     struct i965_driver_data *i965 = i965_driver_data(ctx);
2928     VAEncPackedHeaderParameterBuffer *param = NULL;
2929     unsigned int length_in_bits;
2930     unsigned int *header_data = NULL;
2931     int count, i, start_index;
2932     int slice_header_index;
2933     unsigned int insert_one_zero_byte = 0;
2934
2935     if (encode_state->slice_header_index[slice_index] == 0)
2936         slice_header_index = -1;
2937     else
2938         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2939
2940     count = encode_state->slice_rawdata_count[slice_index];
2941     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2942
2943     for (i = 0; i < count; i++) {
2944         unsigned int skip_emul_byte_cnt;
2945
2946         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2947
2948         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2949
2950         /* skip the slice header packed data type as it is lastly inserted */
2951         if (param->type == VAEncPackedHeaderSlice)
2952             continue;
2953
2954         length_in_bits = param->bit_length;
2955
2956         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2957
2958         /* as the slice header is still required, the last header flag is set to
2959          * zero.
2960          */
2961         gen9_vdenc_mfx_avc_insert_object(ctx,
2962                                          encoder_context,
2963                                          header_data,
2964                                          ALIGN(length_in_bits, 32) >> 5,
2965                                          length_in_bits & 0x1f,
2966                                          skip_emul_byte_cnt,
2967                                          0,
2968                                          0,
2969                                          !param->has_emulation_bytes,
2970                                          0);
2971
2972     }
2973
2974     if (!vdenc_context->is_frame_level_vdenc) {
2975         insert_one_zero_byte = 1;
2976     }
2977
2978     /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
2979     if (insert_one_zero_byte) {
2980         unsigned int insert_data[] = { 0, };
2981
2982         gen9_vdenc_mfx_avc_insert_object(ctx,
2983                                          encoder_context,
2984                                          insert_data,
2985                                          1,
2986                                          8,
2987                                          1,
2988                                          0, 0, 0, 0);
2989     }
2990
2991     if (slice_header_index == -1) {
2992         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2993         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2994         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2995         unsigned char *slice_header = NULL, *slice_header1 = NULL;
2996         int slice_header_length_in_bits = 0;
2997         uint32_t saved_macroblock_address = 0;
2998
2999         /* No slice header data is passed. And the driver needs to generate it */
3000         /* For the Normal H264 */
3001
3002         if (slice_index &&
3003             (IS_KBL(i965->intel.device_info) ||
3004              IS_GLK(i965->intel.device_info))) {
3005             saved_macroblock_address = slice_params->macroblock_address;
3006             slice_params->macroblock_address = 0;
3007         }
3008
3009         slice_header_length_in_bits = build_avc_slice_header(seq_param,
3010                                                              pic_param,
3011                                                              slice_params,
3012                                                              &slice_header);
3013
3014         slice_header1 = slice_header;
3015
3016         if (slice_index &&
3017             (IS_KBL(i965->intel.device_info) ||
3018              IS_GLK(i965->intel.device_info))) {
3019             slice_params->macroblock_address = saved_macroblock_address;
3020         }
3021
3022         if (insert_one_zero_byte) {
3023             slice_header1 += 1;
3024             slice_header_length_in_bits -= 8;
3025         }
3026
3027         gen9_vdenc_mfx_avc_insert_object(ctx,
3028                                          encoder_context,
3029                                          (unsigned int *)slice_header1,
3030                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
3031                                          slice_header_length_in_bits & 0x1f,
3032                                          5,  /* first 5 bytes are start code + nal unit type */
3033                                          1, 0, 1,
3034                                          1);
3035
3036         free(slice_header);
3037     } else {
3038         unsigned int skip_emul_byte_cnt;
3039         unsigned char *slice_header1 = NULL;
3040
3041         if (slice_index &&
3042             (IS_KBL(i965->intel.device_info) ||
3043              IS_GLK(i965->intel.device_info))) {
3044             slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3045         }
3046
3047         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3048
3049         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3050         length_in_bits = param->bit_length;
3051
3052         slice_header1 = (unsigned char *)header_data;
3053
3054         if (insert_one_zero_byte) {
3055             slice_header1 += 1;
3056             length_in_bits -= 8;
3057         }
3058
3059         /* as the slice header is the last header data for one slice,
3060          * the last header flag is set to one.
3061          */
3062         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3063
3064         if (insert_one_zero_byte)
3065             skip_emul_byte_cnt -= 1;
3066
3067         gen9_vdenc_mfx_avc_insert_object(ctx,
3068                                          encoder_context,
3069                                          (unsigned int *)slice_header1,
3070                                          ALIGN(length_in_bits, 32) >> 5,
3071                                          length_in_bits & 0x1f,
3072                                          skip_emul_byte_cnt,
3073                                          1,
3074                                          0,
3075                                          !param->has_emulation_bytes,
3076                                          1);
3077     }
3078
3079     return;
3080 }
3081
3082 static void
3083 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3084                                  struct encode_state *encode_state,
3085                                  struct intel_encoder_context *encoder_context,
3086                                  VAEncSliceParameterBufferH264 *slice_param,
3087                                  int slice_index)
3088 {
3089     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3090     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3091     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3092     unsigned int skip_emul_byte_cnt;
3093
3094     if (slice_index == 0) {
3095
3096         if (encode_state->packed_header_data[idx]) {
3097             VAEncPackedHeaderParameterBuffer *param = NULL;
3098             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3099             unsigned int length_in_bits;
3100
3101             assert(encode_state->packed_header_param[idx]);
3102             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3103             length_in_bits = param->bit_length;
3104
3105             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3106             gen9_vdenc_mfx_avc_insert_object(ctx,
3107                                              encoder_context,
3108                                              header_data,
3109                                              ALIGN(length_in_bits, 32) >> 5,
3110                                              length_in_bits & 0x1f,
3111                                              skip_emul_byte_cnt,
3112                                              0,
3113                                              0,
3114                                              !param->has_emulation_bytes,
3115                                              0);
3116
3117         }
3118
3119         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3120
3121         if (encode_state->packed_header_data[idx]) {
3122             VAEncPackedHeaderParameterBuffer *param = NULL;
3123             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3124             unsigned int length_in_bits;
3125
3126             assert(encode_state->packed_header_param[idx]);
3127             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3128             length_in_bits = param->bit_length;
3129
3130             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3131
3132             gen9_vdenc_mfx_avc_insert_object(ctx,
3133                                              encoder_context,
3134                                              header_data,
3135                                              ALIGN(length_in_bits, 32) >> 5,
3136                                              length_in_bits & 0x1f,
3137                                              skip_emul_byte_cnt,
3138                                              0,
3139                                              0,
3140                                              !param->has_emulation_bytes,
3141                                              0);
3142
3143         }
3144
3145         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3146
3147         if (encode_state->packed_header_data[idx]) {
3148             VAEncPackedHeaderParameterBuffer *param = NULL;
3149             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3150             unsigned int length_in_bits;
3151
3152             assert(encode_state->packed_header_param[idx]);
3153             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3154             length_in_bits = param->bit_length;
3155
3156             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3157             gen9_vdenc_mfx_avc_insert_object(ctx,
3158                                              encoder_context,
3159                                              header_data,
3160                                              ALIGN(length_in_bits, 32) >> 5,
3161                                              length_in_bits & 0x1f,
3162                                              skip_emul_byte_cnt,
3163                                              0,
3164                                              0,
3165                                              !param->has_emulation_bytes,
3166                                              0);
3167
3168         } else if (internal_rate_mode == I965_BRC_CBR) {
3169             /* TODO: insert others */
3170         }
3171     }
3172
3173     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3174                                                 encode_state,
3175                                                 encoder_context,
3176                                                 slice_index);
3177 }
3178
3179 static void
3180 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3181                                struct encode_state *encode_state,
3182                                struct intel_encoder_context *encoder_context,
3183                                VAEncPictureParameterBufferH264 *pic_param,
3184                                VAEncSliceParameterBufferH264 *slice_param,
3185                                VAEncSliceParameterBufferH264 *next_slice_param,
3186                                int slice_index)
3187 {
3188     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3189     struct intel_batchbuffer *batch = encoder_context->base.batch;
3190     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3191     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3192     unsigned char correct[6], grow, shrink;
3193     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3194     int max_qp_n, max_qp_p;
3195     int i;
3196     int weighted_pred_idc = 0;
3197     int num_ref_l0 = 0, num_ref_l1 = 0;
3198     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3199     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3200     int inter_rounding = 0;
3201
3202     if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3203         inter_rounding = 3;
3204
3205     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3206     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3207
3208     if (next_slice_param) {
3209         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3210         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3211     } else {
3212         next_slice_hor_pos = 0;
3213         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3214     }
3215
3216     if (slice_type == SLICE_TYPE_I) {
3217         luma_log2_weight_denom = 0;
3218         chroma_log2_weight_denom = 0;
3219     } else if (slice_type == SLICE_TYPE_P) {
3220         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3221         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3222
3223         if (slice_param->num_ref_idx_active_override_flag)
3224             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3225     } else if (slice_type == SLICE_TYPE_B) {
3226         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3227         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3228         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3229
3230         if (slice_param->num_ref_idx_active_override_flag) {
3231             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3232             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3233         }
3234
3235         if (weighted_pred_idc == 2) {
3236             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3237             luma_log2_weight_denom = 5;
3238             chroma_log2_weight_denom = 5;
3239         }
3240     }
3241
3242     max_qp_n = 0;       /* TODO: update it */
3243     max_qp_p = 0;       /* TODO: update it */
3244     grow = 0;           /* TODO: update it */
3245     shrink = 0;         /* TODO: update it */
3246
3247     for (i = 0; i < 6; i++)
3248         correct[i] = 0; /* TODO: update it */
3249
3250     BEGIN_BCS_BATCH(batch, 11);
3251
3252     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3253     OUT_BCS_BATCH(batch, slice_type);
3254     OUT_BCS_BATCH(batch,
3255                   (num_ref_l0 << 16) |
3256                   (num_ref_l1 << 24) |
3257                   (chroma_log2_weight_denom << 8) |
3258                   (luma_log2_weight_denom << 0));
3259     OUT_BCS_BATCH(batch,
3260                   (weighted_pred_idc << 30) |
3261                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3262                   (slice_param->disable_deblocking_filter_idc << 27) |
3263                   (slice_param->cabac_init_idc << 24) |
3264                   (slice_qp << 16) |
3265                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3266                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3267
3268     OUT_BCS_BATCH(batch,
3269                   slice_ver_pos << 24 |
3270                   slice_hor_pos << 16 |
3271                   slice_param->macroblock_address);
3272     OUT_BCS_BATCH(batch,
3273                   next_slice_ver_pos << 16 |
3274                   next_slice_hor_pos);
3275
3276     OUT_BCS_BATCH(batch,
3277                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3278                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3279                   (2 << 28) |       /* Loose Rate Control */
3280                   (0 << 24) |           /* RC Stable Tolerance */
3281                   (0 << 23) |           /* RC Panic Enable */
3282                   (1 << 22) |           /* CBP mode */
3283                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3284                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3285                   (!next_slice_param << 19) |                   /* Is Last Slice */
3286                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3287                   (1 << 17) |           /* HeaderPresentFlag */
3288                   (1 << 16) |           /* SliceData PresentFlag */
3289                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3290                   (1 << 13) |           /* RBSP NAL TYPE */
3291                   (slice_index << 4) |
3292                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3293
3294     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3295
3296     OUT_BCS_BATCH(batch,
3297                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3298                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3299                   (shrink << 8) |
3300                   (grow << 0));
3301     OUT_BCS_BATCH(batch,
3302                   (1 << 31) |
3303                   (inter_rounding << 28) |
3304                   (1 << 27) |
3305                   (5 << 24) |
3306                   (correct[5] << 20) |
3307                   (correct[4] << 16) |
3308                   (correct[3] << 12) |
3309                   (correct[2] << 8) |
3310                   (correct[1] << 4) |
3311                   (correct[0] << 0));
3312     OUT_BCS_BATCH(batch, 0);
3313
3314     ADVANCE_BCS_BATCH(batch);
3315 }
3316
3317 static uint8_t
3318 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3319 {
3320     unsigned int is_long_term =
3321         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3322     unsigned int is_top_field =
3323         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3324     unsigned int is_bottom_field =
3325         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3326
3327     return ((is_long_term                         << 6) |
3328             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3329             (frame_store_id                       << 1) |
3330             ((is_top_field ^ 1) & is_bottom_field));
3331 }
3332
3333 static void
3334 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3335                                  struct encode_state *encode_state,
3336                                  struct intel_encoder_context *encoder_context,
3337                                  VAEncSliceParameterBufferH264 *slice_param)
3338 {
3339     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3340     struct intel_batchbuffer *batch = encoder_context->base.batch;
3341     VAPictureH264 *ref_pic;
3342     int i, slice_type, ref_idx_shift;
3343     unsigned int fwd_ref_entry;
3344
3345     fwd_ref_entry = 0x80808080;
3346     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3347
3348     for (i = 0; i < MIN(vdenc_context->num_refs[0], 3); i++) {
3349         ref_pic = &slice_param->RefPicList0[i];
3350         ref_idx_shift = i * 8;
3351
3352         if (vdenc_context->list_ref_idx[0][i] == 0xFF)
3353             continue;
3354
3355         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3356         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3357     }
3358
3359     if (slice_type == SLICE_TYPE_P) {
3360         BEGIN_BCS_BATCH(batch, 10);
3361         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3362         OUT_BCS_BATCH(batch, 0);                        // L0
3363         OUT_BCS_BATCH(batch, fwd_ref_entry);
3364
3365         for (i = 0; i < 7; i++) {
3366             OUT_BCS_BATCH(batch, 0x80808080);
3367         }
3368
3369         ADVANCE_BCS_BATCH(batch);
3370     }
3371
3372     if (slice_type == SLICE_TYPE_B) {
3373         /* VDEnc on SKL doesn't support BDW */
3374         assert(0);
3375     }
3376 }
3377
3378 static void
3379 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3380                                       struct encode_state *encode_state,
3381                                       struct intel_encoder_context *encoder_context,
3382                                       VAEncPictureParameterBufferH264 *pic_param,
3383                                       VAEncSliceParameterBufferH264 *slice_param)
3384 {
3385     struct intel_batchbuffer *batch = encoder_context->base.batch;
3386     int i, slice_type;
3387     short weightoffsets[32 * 6];
3388
3389     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3390
3391     if (slice_type == SLICE_TYPE_P &&
3392         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3393
3394         for (i = 0; i < 32; i++) {
3395             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3396             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3397             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3398             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3399             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3400             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3401         }
3402
3403         BEGIN_BCS_BATCH(batch, 98);
3404         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3405         OUT_BCS_BATCH(batch, 0);
3406         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3407
3408         ADVANCE_BCS_BATCH(batch);
3409     }
3410
3411     if (slice_type == SLICE_TYPE_B) {
3412         /* VDEnc on SKL doesn't support BWD */
3413         assert(0);
3414     }
3415 }
3416
3417 static void
3418 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3419                                 struct encode_state *encode_state,
3420                                 struct intel_encoder_context *encoder_context,
3421                                 VAEncSliceParameterBufferH264 *slice_param,
3422                                 VAEncSliceParameterBufferH264 *next_slice_param,
3423                                 int slice_index)
3424 {
3425     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3426     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3427
3428     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3429     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3430                                           encode_state,
3431                                           encoder_context,
3432                                           pic_param,
3433                                           slice_param);
3434     gen9_vdenc_mfx_avc_slice_state(ctx,
3435                                    encode_state,
3436                                    encoder_context,
3437                                    pic_param,
3438                                    slice_param,
3439                                    next_slice_param,
3440                                    slice_index);
3441     gen9_vdenc_mfx_avc_inset_headers(ctx,
3442                                      encode_state,
3443                                      encoder_context,
3444                                      slice_param,
3445                                      slice_index);
3446
3447     if (!vdenc_context->is_frame_level_vdenc) {
3448         gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3449                                                encode_state,
3450                                                encoder_context,
3451                                                slice_param);
3452         gen95_vdenc_vdenc_walker_state(ctx,
3453                                        encode_state,
3454                                        encoder_context,
3455                                        slice_param,
3456                                        next_slice_param);
3457     }
3458 }
3459
3460 static void
3461 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3462                                 struct encode_state *encode_state,
3463                                 struct intel_encoder_context *encoder_context)
3464 {
3465     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3466     struct intel_batchbuffer *batch = encoder_context->base.batch;
3467     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3468     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3469     int i, j;
3470     int slice_index = 0;
3471     int has_tail = 0;                   /* TODO: check it later */
3472
3473     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3474         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3475
3476         if (j == encode_state->num_slice_params_ext - 1)
3477             next_slice_group_param = NULL;
3478         else
3479             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3480
3481         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3482             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3483                 next_slice_param = slice_param + 1;
3484             else
3485                 next_slice_param = next_slice_group_param;
3486
3487             gen9_vdenc_mfx_avc_single_slice(ctx,
3488                                             encode_state,
3489                                             encoder_context,
3490                                             slice_param,
3491                                             next_slice_param,
3492                                             slice_index);
3493
3494             if (vdenc_context->is_frame_level_vdenc)
3495                 break;
3496             else {
3497                 struct vd_pipeline_flush_parameter pipeline_flush_params;
3498                 int insert_mi_flush;
3499
3500                 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3501
3502                 if (next_slice_group_param) {
3503                     pipeline_flush_params.mfx_pipeline_done = 1;
3504                     insert_mi_flush = 1;
3505                 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3506                     pipeline_flush_params.mfx_pipeline_done = 1;
3507                     insert_mi_flush = 1;
3508                 } else {
3509                     pipeline_flush_params.mfx_pipeline_done = !has_tail;
3510                     insert_mi_flush = 0;
3511                 }
3512
3513                 pipeline_flush_params.vdenc_pipeline_done = 1;
3514                 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3515                 pipeline_flush_params.vd_command_message_parser_done = 1;
3516                 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3517
3518                 if (insert_mi_flush) {
3519                     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3520                     mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3521                     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3522                 }
3523             }
3524
3525             slice_param++;
3526             slice_index++;
3527         }
3528
3529         if (vdenc_context->is_frame_level_vdenc)
3530             break;
3531     }
3532
3533     if (vdenc_context->is_frame_level_vdenc) {
3534         struct vd_pipeline_flush_parameter pipeline_flush_params;
3535
3536         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3537
3538         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3539         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3540         pipeline_flush_params.vdenc_pipeline_done = 1;
3541         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3542         pipeline_flush_params.vd_command_message_parser_done = 1;
3543         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3544     }
3545
3546     if (has_tail) {
3547         /* TODO: insert a tail if required */
3548     }
3549
3550     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3551     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3552     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3553 }
3554
3555 static void
3556 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3557                               struct encode_state *encode_state,
3558                               struct intel_encoder_context *encoder_context)
3559 {
3560     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3561     struct intel_batchbuffer *batch = encoder_context->base.batch;
3562     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3563
3564     if (vdenc_context->brc_enabled) {
3565         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3566
3567         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3568         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3569         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3570     }
3571
3572     if (vdenc_context->current_pass) {
3573         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3574
3575         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3576         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3577         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3578     }
3579
3580     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3581
3582     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3583     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3584     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3585
3586     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3587     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3588     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3589
3590     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3591     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3592     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3593     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3594     gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3595     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3596
3597     if (!vdenc_context->brc_enabled) {
3598         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3599         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3600     } else {
3601         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3602         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3603         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3604         gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3605     }
3606
3607     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3608     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3609
3610     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3611 }
3612
3613 static void
3614 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3615                                struct intel_encoder_context *encoder_context)
3616 {
3617     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3618     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3619
3620     switch (rate_control_mode & 0x7f) {
3621     case VA_RC_CBR:
3622         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3623         break;
3624
3625     case VA_RC_VBR:
3626         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3627         break;
3628
3629     case VA_RC_CQP:
3630     default:
3631         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3632         break;
3633     }
3634 }
3635
3636 static void
3637 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3638 {
3639     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3640     struct intel_batchbuffer *batch = encoder_context->base.batch;
3641     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3642     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3643     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3644     int i;
3645
3646     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3647     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3648
3649     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3650     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3651     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3652     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3653     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3654
3655     /* Update DMEM buffer for BRC Update */
3656     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3657         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3658         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3659         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3660         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3661
3662         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3663         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3664         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3665         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3666     }
3667 }
3668
3669 static VAStatus
3670 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3671                                 struct encode_state *encode_state,
3672                                 struct intel_encoder_context *encoder_context)
3673 {
3674     VAEncSliceParameterBufferH264 *slice_param;
3675     int i, j;
3676
3677     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3678         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3679
3680         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3681             if (slice_param->slice_type == SLICE_TYPE_B)
3682                 return VA_STATUS_ERROR_UNKNOWN;
3683
3684             slice_param++;
3685         }
3686     }
3687
3688     return VA_STATUS_SUCCESS;
3689 }
3690
3691 static VAStatus
3692 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3693                               VAProfile profile,
3694                               struct encode_state *encode_state,
3695                               struct intel_encoder_context *encoder_context)
3696 {
3697     VAStatus va_status;
3698     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3699     struct intel_batchbuffer *batch = encoder_context->base.batch;
3700
3701     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3702
3703     if (va_status != VA_STATUS_SUCCESS)
3704         return va_status;
3705
3706     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3707
3708     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3709         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3710         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3711
3712         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3713
3714         intel_batchbuffer_emit_mi_flush(batch);
3715
3716         if (vdenc_context->brc_enabled) {
3717             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3718                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3719
3720             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3721             intel_batchbuffer_emit_mi_flush(batch);
3722         }
3723
3724         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3725         gen9_vdenc_read_status(ctx, encoder_context);
3726
3727         intel_batchbuffer_end_atomic(batch);
3728         intel_batchbuffer_flush(batch);
3729
3730         vdenc_context->brc_initted = 1;
3731         vdenc_context->brc_need_reset = 0;
3732     }
3733
3734     return VA_STATUS_SUCCESS;
3735 }
3736
3737 static VAStatus
3738 gen9_vdenc_pipeline(VADriverContextP ctx,
3739                     VAProfile profile,
3740                     struct encode_state *encode_state,
3741                     struct intel_encoder_context *encoder_context)
3742 {
3743     VAStatus vaStatus;
3744
3745     switch (profile) {
3746     case VAProfileH264ConstrainedBaseline:
3747     case VAProfileH264Main:
3748     case VAProfileH264High:
3749         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3750         break;
3751
3752     default:
3753         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3754         break;
3755     }
3756
3757     return vaStatus;
3758 }
3759
3760 static void
3761 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3762 {
3763     int i;
3764
3765     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3766     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3767     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3768     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3769     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3770
3771     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3772         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3773
3774     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3775     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3776     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3777     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3778     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3779     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3780
3781     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3782     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3783
3784     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3785     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3786     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3787     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3788
3789     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3790         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3791         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3792     }
3793
3794     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3795     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3796     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3797
3798     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3799     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3800     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3801     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3802
3803     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3804 }
3805
3806 static void
3807 gen9_vdenc_context_destroy(void *context)
3808 {
3809     struct gen9_vdenc_context *vdenc_context = context;
3810
3811     gen9_vdenc_free_resources(vdenc_context);
3812
3813     free(vdenc_context);
3814 }
3815
3816 static void
3817 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3818                               struct intel_encoder_context *encoder_context,
3819                               struct gen9_vdenc_context *vdenc_context)
3820 {
3821     struct i965_driver_data *i965 = i965_driver_data(ctx);
3822     int i;
3823
3824     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3825                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3826                                 "HuC Init&Reset DMEM buffer");
3827
3828     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3829                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3830                                 "HuC History buffer");
3831
3832     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3833                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3834                                 "HuC Stream In buffer");
3835
3836     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3837                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3838                                 "HuC Stream Out buffer");
3839
3840     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3841                                 0x1000,
3842                                 "HuC dummy buffer");
3843
3844     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3845         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3846                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3847                                     "HuC BRC Update buffer");
3848         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3849     }
3850
3851     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3852                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3853                                 "VDENC statistics buffer");
3854
3855     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3856                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3857                                 "PAK statistics buffer");
3858
3859     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3860                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3861                                 "VDENC/AVC image state buffer");
3862
3863     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3864                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3865                                 "HME summary buffer");
3866
3867     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3868                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3869                                 "BRC constant buffer");
3870
3871     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3872                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3873                                 "Second level batch buffer");
3874
3875     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3876                                 0x1000,
3877                                 "HuC Status buffer");
3878
3879     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3880                                 0x1000,
3881                                 "HuC Status buffer");
3882 }
3883
3884 static void
3885 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3886                               struct intel_encoder_context *encoder_context,
3887                               struct gen9_vdenc_context *vdenc_context)
3888 {
3889     vdenc_context->is_frame_level_vdenc = 1;
3890 }
3891
3892 static void
3893 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
3894                                struct intel_encoder_context *encoder_context,
3895                                struct gen9_vdenc_context *vdenc_context)
3896 {
3897     vdenc_context->use_extended_pak_obj_cmd = 1;
3898 }
3899
3900 static void
3901 vdenc_hw_interfaces_init(VADriverContextP ctx,
3902                          struct intel_encoder_context *encoder_context,
3903                          struct gen9_vdenc_context *vdenc_context)
3904 {
3905     struct i965_driver_data *i965 = i965_driver_data(ctx);
3906
3907     if (IS_KBL(i965->intel.device_info) ||
3908         IS_GLK(i965->intel.device_info)) {
3909         gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3910     } else {
3911         gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3912     }
3913 }
3914
3915 static VAStatus
3916 gen9_vdenc_context_get_status(VADriverContextP ctx,
3917                               struct intel_encoder_context *encoder_context,
3918                               struct i965_coded_buffer_segment *coded_buffer_segment)
3919 {
3920     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3921
3922     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3923
3924     return VA_STATUS_SUCCESS;
3925 }
3926
3927 Bool
3928 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3929 {
3930     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3931
3932     if (!vdenc_context)
3933         return False;
3934
3935     vdenc_context->brc_initted = 0;
3936     vdenc_context->brc_need_reset = 0;
3937     vdenc_context->is_low_delay = 0;
3938     vdenc_context->current_pass = 0;
3939     vdenc_context->num_passes = 1;
3940     vdenc_context->vdenc_streamin_enable = 0;
3941     vdenc_context->vdenc_pak_threshold_check_enable = 0;
3942     vdenc_context->is_frame_level_vdenc = 0;
3943
3944     vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3945     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3946
3947     encoder_context->mfc_context = vdenc_context;
3948     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3949     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3950     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3951     encoder_context->get_status = gen9_vdenc_context_get_status;
3952
3953     return True;
3954 }