OSDN Git Service

Unify the coding style in the driver
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 extern int
45 intel_avc_enc_slice_type_fixup(int slice_type);
46
47 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
48     0,   0, -8, -12, -16, -20, -28, -36,
49     0,   0, -4,  -8, -12, -16, -24, -32,
50     4,   2,  0,  -1,  -3,  -8, -16, -24,
51     8,   4,  2,   0,  -1,  -4,  -8, -16,
52     20, 16,  4,   0,  -1,  -4,  -8, -16,
53     24, 20, 16,   8,   4,   0,  -4,  -8,
54     28, 24, 20,  16,   8,   4,   0,  -8,
55     32, 24, 20,  16,   8,   4,   0,  -4,
56     64, 48, 28,  20,   16, 12,   8,   4,
57 };
58
59 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
60     -8, -24, -32, -40, -44, -48, -52, -80,
61     -8, -16, -32, -40, -40, -44, -44, -56,
62     0,    0, -12, -20, -24, -28, -32, -36,
63     8,    4,   0,   0,  -8, -16, -24, -32,
64     32,  16,   8,   4,  -4,  -8, -16, -20,
65     36,  24,  16,   8,   4,  -2,  -4,  -8,
66     40,  36,  24,  20,  16,   8,   0,  -8,
67     48,  40,  28,  24,  20,  12,   0,  -4,
68     64,  48,  28,  20,  16,  12,   8,   4,
69 };
70
71 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
72     0,  -4, -8, -16, -24, -32, -40, -48,
73     1,   0, -4,  -8, -16, -24, -32, -40,
74     4,   2,  0,  -1,  -3,  -8, -16, -24,
75     8,   4,  2,   0,  -1,  -4,  -8, -16,
76     20, 16,  4,   0,  -1,  -4,  -8, -16,
77     24, 20, 16,   8,   4,   0,  -4,  -8,
78     28, 24, 20,  16,   8,   4,   0,  -8,
79     32, 24, 20,  16,   8,   4,   0,  -4,
80     64, 48, 28,  20,  16,  12,   8,   4,
81 };
82
83 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
84     +0,  0,  0,  0, 0, 3, 4, 6, 8,
85     +0,  0,  0,  0, 0, 2, 3, 5, 7,
86     -1,  0,  0,  0, 0, 2, 2, 4, 5,
87     -1, -1,  0,  0, 0, 1, 2, 2, 4,
88     -2, -2, -1,  0, 0, 0, 1, 2, 4,
89     -2, -2, -1,  0, 0, 0, 1, 2, 4,
90     -3, -2, -1, -1, 0, 0, 1, 2, 5,
91     -3, -2, -1, -1, 0, 0, 2, 4, 7,
92     -4, -3, -2, -1, 0, 1, 3, 5, 8,
93 };
94
95 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
96     -1,  0,  0,  0, 0, 1, 1, 2, 3,
97     -1, -1,  0,  0, 0, 1, 1, 2, 3,
98     -2, -1, -1,  0, 0, 1, 1, 2, 3,
99     -3, -2, -2, -1, 0, 0, 1, 2, 3,
100     -3, -2, -1, -1, 0, 0, 1, 2, 3,
101     -3, -2, -1, -1, 0, 0, 1, 2, 3,
102     -3, -2, -1, -1, 0, 0, 1, 2, 3,
103     -3, -2, -1, -1, 0, 0, 1, 2, 3,
104     -3, -2, -1, -1, 0, 0, 1, 2, 3,
105 };
106
107 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
108     +0,  0,  0,  0, 0, 2, 3, 3, 4,
109     +0,  0,  0,  0, 0, 2, 3, 3, 4,
110     -1,  0,  0,  0, 0, 2, 2, 3, 3,
111     -1, -1,  0,  0, 0, 1, 2, 2, 2,
112     -1, -1, -1,  0, 0, 0, 1, 2, 2,
113     -2, -1, -1,  0, 0, 0, 0, 1, 2,
114     -2, -1, -1, -1, 0, 0, 0, 1, 3,
115     -2, -2, -1, -1, 0, 0, 1, 1, 3,
116     -2, -2, -1, -1, 0, 1, 1, 2, 4,
117 };
118
119 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
120     -4, -20, -28, -36, -40, -44, -48, -80,
121     +0,  -8, -12, -20, -24, -28, -32, -36,
122     +0,   0,  -8, -16, -20, -24, -28, -32,
123     +8,   4,   0,   0,  -8, -16, -24, -28,
124     32,  24,  16,   2,  -4,  -8, -16, -20,
125     36,  32,  28,  16,   8,   0,  -4,  -8,
126     40,  36,  24,  20,  16,   8,   0,  -8,
127     48,  40,  28,  24,  20,  12,   0,  -4,
128     64,  48,  28,  20,  16,  12,   8,   4,
129 };
130
131 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
132     -8, -24, -32, -44, -48, -56, -64, -80,
133     -8, -16, -32, -40, -44, -52, -56, -64,
134     +0,   0, -16, -28, -36, -40, -44, -48,
135     +8,   4,   0,   0,  -8, -16, -24, -36,
136     20,  12,   4,   0,  -8,  -8,  -8, -16,
137     24,  16,   8,   8,   8,   0,  -4,  -8,
138     40,  36,  24,  20,  16,   8,   0,  -8,
139     48,  40,  28,  24,  20,  12,   0,  -4,
140     64,  48,  28,  20,  16,  12,   8,   4,
141 };
142
143 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
144     0,  -4, -8, -16, -24, -32, -40, -48,
145     1,   0, -4,  -8, -16, -24, -32, -40,
146     4,   2,  0,  -1,  -3,  -8, -16, -24,
147     8,   4,  2,   0,  -1,  -4,  -8, -16,
148     20, 16,  4,   0,  -1,  -4,  -8, -16,
149     24, 20, 16,   8,   4,   0,  -4,  -8,
150     28, 24, 20,  16,   8,   4,   0,  -8,
151     32, 24, 20,  16,   8,   4,   0,  -4,
152     64, 48, 28,  20,  16,  12,   8,   4,
153 };
154
155 static const struct huc_brc_update_constant_data
156         gen9_brc_update_constant_data = {
157     .global_rate_qp_adj_tab_i = {
158         48, 40, 32,  24,  16,   8,   0,  -8,
159         40, 32, 24,  16,   8,   0,  -8, -16,
160         32, 24, 16,   8,   0,  -8, -16, -24,
161         24, 16,  8,   0,  -8, -16, -24, -32,
162         16, 8,   0,  -8, -16, -24, -32, -40,
163         8,  0,  -8, -16, -24, -32, -40, -48,
164         0, -8, -16, -24, -32, -40, -48, -56,
165         48, 40, 32,  24,  16,   8,   0,  -8,
166     },
167
168     .global_rate_qp_adj_tab_p = {
169         48,  40,  32,  24,  16,  8,    0,  -8,
170         40,  32,  24,  16,   8,  0,   -8, -16,
171         16,   8,   8,   4,  -8, -16, -16, -24,
172         8,    0,   0,  -8, -16, -16, -16, -24,
173         8,    0,   0, -24, -32, -32, -32, -48,
174         0,  -16, -16, -24, -32, -48, -56, -64,
175         -8, -16, -32, -32, -48, -48, -56, -64,
176         -16, -32, -48, -48, -48, -56, -64, -80,
177     },
178
179     .global_rate_qp_adj_tab_b = {
180         48, 40, 32, 24,  16,   8,   0,  -8,
181         40, 32, 24, 16,  8,    0,  -8, -16,
182         32, 24, 16,  8,  0,   -8, -16, -24,
183         24, 16, 8,   0, -8,   -8, -16, -24,
184         16, 8,  0,   0, -8,  -16, -24, -32,
185         16, 8,  0,   0, -8,  -16, -24, -32,
186         0, -8, -8, -16, -32, -48, -56, -64,
187         0, -8, -8, -16, -32, -48, -56, -64
188     },
189
190     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
192     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
193
194     .dist_qp_adj_tab_i = {
195         0,   0,  0,  0,  0,  3,  4,  6,  8,
196         0,   0,  0,  0,  0,  2,  3,  5,  7,
197         -1,  0,  0,  0,  0,  2,  2,  4,  5,
198         -1, -1,  0,  0,  0,  1,  2,  2,  4,
199         -2, -2, -1,  0,  0,  0,  1,  2,  4,
200         -2, -2, -1,  0,  0,  0,  1,  2,  4,
201         -3, -2, -1, -1,  0,  0,  1,  2,  5,
202         -3, -2, -1, -1,  0,  0,  2,  4,  7,
203         -4, -3, -2, -1,  0,  1,  3,  5,  8,
204     },
205
206     .dist_qp_adj_tab_p = {
207         -1,   0,  0,  0,  0,  1,  1,  2,  3,
208         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
209         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
210         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
211         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
212         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
213         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
214         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
215         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
216     },
217
218     .dist_qp_adj_tab_b = {
219         0,   0,  0,  0, 0, 2, 3, 3, 4,
220         0,   0,  0,  0, 0, 2, 3, 3, 4,
221         -1,  0,  0,  0, 0, 2, 2, 3, 3,
222         -1, -1,  0,  0, 0, 1, 2, 2, 2,
223         -1, -1, -1,  0, 0, 0, 1, 2, 2,
224         -2, -1, -1,  0, 0, 0, 0, 1, 2,
225         -2, -1, -1, -1, 0, 0, 0, 1, 3,
226         -2, -2, -1, -1, 0, 0, 1, 1, 3,
227         -2, -2, -1, -1, 0, 1, 1, 2, 4,
228     },
229
230     /* default table for non lowdelay */
231     .buf_rate_adj_tab_i = {
232         -4, -20, -28, -36, -40, -44, -48, -80,
233         0,   -8, -12, -20, -24, -28, -32, -36,
234         0,    0,  -8, -16, -20, -24, -28, -32,
235         8,    4,   0,   0,  -8, -16, -24, -28,
236         32,  24,  16,   2,  -4,  -8, -16, -20,
237         36,  32,  28,  16,   8,   0,  -4,  -8,
238         40,  36,  24,  20,  16,   8,   0,  -8,
239         48,  40,  28,  24,  20,  12,   0,  -4,
240         64,  48,  28,  20,  16,  12,   8,   4,
241     },
242
243     /* default table for non lowdelay */
244     .buf_rate_adj_tab_p = {
245         -8, -24, -32, -44, -48, -56, -64, -80,
246         -8, -16, -32, -40, -44, -52, -56, -64,
247         0,    0, -16, -28, -36, -40, -44, -48,
248         8,    4,   0,   0,  -8, -16, -24, -36,
249         20,  12,   4,   0,  -8,  -8,  -8, -16,
250         24,  16,   8,   8,   8,   0,  -4,  -8,
251         40,  36,  24,  20,  16,   8,   0,  -8,
252         48,  40,  28,  24,  20,  12,   0,  -4,
253         64,  48,  28,  20,  16,  12,   8,   4,
254     },
255
256     /* default table for non lowdelay */
257     .buf_rate_adj_tab_b = {
258         0,  -4, -8, -16, -24, -32, -40, -48,
259         1,   0, -4,  -8, -16, -24, -32, -40,
260         4,   2,  0,  -1,  -3,  -8, -16, -24,
261         8,   4,  2,   0,  -1,  -4,  -8, -16,
262         20, 16,  4,   0,  -1,  -4,  -8, -16,
263         24, 20, 16,   8,   4,   0,  -4,  -8,
264         28, 24, 20,  16,   8,   4,   0,  -8,
265         32, 24, 20,  16,   8,   4,   0,  -4,
266         64, 48, 28,  20,  16,  12,   8,   4,
267     },
268
269     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
270     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
271
272     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
273     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
274
275     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
276     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
277
278     .i_intra_non_pred = {
279         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
280         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
281         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
282     },
283
284     .i_intra_16x16 = {
285         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
286         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
287         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288     },
289
290     .i_intra_8x8 = {
291         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
292         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
293         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
294     },
295
296     .i_intra_4x4 = {
297         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
298         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
299         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
300     },
301
302     .i_intra_chroma = {
303         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
304         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
306     },
307
308     .p_intra_non_pred = {
309         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
310         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
311         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
312     },
313
314     .p_intra_16x16 = {
315         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
316         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
317         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
318     },
319
320     .p_intra_8x8 = {
321         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
322         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
323         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
324     },
325
326     .p_intra_4x4 = {
327         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
328         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
329         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
330     },
331
332     .p_intra_chroma = {
333         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
334         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
335         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336     },
337
338     .p_inter_16x8 = {
339         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
340         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
341         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
342     },
343
344     .p_inter_8x8 = {
345         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
346         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
347         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
348     },
349
350     .p_inter_16x16 = {
351         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
352         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
353         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
354     },
355
356     .p_ref_id = {
357         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
358         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
359         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
360     },
361
362     .hme_mv_cost = {
363         /* mv = 0 */
364         {
365             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
366             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
368         },
369
370         /* mv <= 16 */
371         {
372             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
373             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
374             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
375         },
376
377         /* mv <= 32 */
378         {
379             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
380             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
381             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
382         },
383
384         /* mv <= 64 */
385         {
386             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
387             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
388             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
389         },
390
391         /* mv <= 128 */
392         {
393             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
394             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
395             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
396         },
397
398         /* mv <= 256 */
399         {
400             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
401             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
402             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
403         },
404
405         /* mv <= 512 */
406         {
407             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
408             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
409             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
410         },
411
412         /* mv <= 1024 */
413         {
414             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
415             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
416             0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
417         },
418     },
419 };
420
421 /* 11 DWs */
422 static const uint8_t vdenc_const_qp_lambda[44] = {
423     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
424     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
425     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
426     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
427     0x4a, 0x53, 0x00, 0x00
428 };
429
430 /* 14 DWs */
431 static const uint16_t vdenc_const_skip_threshold[28] = {
432
433 };
434
435 /* 14 DWs */
436 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
437
438 };
439
440 /* 7 DWs */
441 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
442
443 };
444
445 /* 7 DWs */
446 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
447
448 };
449
450 /* 7 DWs */
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
452
453 };
454
455 /* P frame */
456 /* 11 DWs */
457 static const uint8_t vdenc_const_qp_lambda_p[44] = {
458     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
459     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
460     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
461     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
462     0x4a, 0x53, 0x00, 0x00
463 };
464
465 /* 14 DWs */
466 static const uint16_t vdenc_const_skip_threshold_p[28] = {
467     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
468     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
469     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
470     0x0333, 0x03b0, 0x0000, 0x0000
471 };
472
473 /* 14 DWs */
474 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
475     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
476     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
477     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
478 };
479
480 /* 7 DWs */
481 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
482     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
483     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
484     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
485 };
486
487 /* 7 DWs */
488 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
489     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
490     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
491     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
492 };
493
494 /* 7 DWs */
495 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
496     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
497     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
498     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
499 };
500
501 static const double
502 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
503
504 static const double
505 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
506
507 static const double
508 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
509
510 static const double
511 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
512
513 static const double
514 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
515
516 static const double
517 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
518
519 static const unsigned char
520 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521
522 static const unsigned char
523 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
524
525 static const uint16_t
526 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
527
528 static const uint8_t
529 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
530
531 static const uint8_t
532 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
533
534 static const uint8_t
535 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
536
537 static const int8_t
538 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
539
540 static const int vdenc_mode_const[2][12][52] = {
541     //INTRASLICE
542     {
543         //LUTMODE_INTRA_NONPRED
544         {
545             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
546             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
547             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
548             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
549         },
550
551         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
552         {
553             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
554             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
555             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
556             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
557         },
558
559         //LUTMODE_INTRA_8x8
560         {
561             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
562             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
563             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
564             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
565         },
566
567         //LUTMODE_INTRA_4x4
568         {
569             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
570             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
571             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
572             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
573         },
574
575         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
576         { 0, },
577
578         //LUTMODE_INTER_8X8Q
579         { 0, },
580
581         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
582         { 0, },
583
584         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
585         { 0, },
586
587         //LUTMODE_INTER_16x16, LUTMODE_INTER
588         { 0, },
589
590         //LUTMODE_INTER_BWD
591         { 0, },
592
593         //LUTMODE_REF_ID
594         { 0, },
595
596         //LUTMODE_INTRA_CHROMA
597         { 0, },
598     },
599
600     //PREDSLICE
601     {
602         //LUTMODE_INTRA_NONPRED
603         {
604             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
605             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
606             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
607             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
608         },
609
610         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
611         {
612             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
613             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
614             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
615             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
616
617         },
618
619         //LUTMODE_INTRA_8x8
620         {
621             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
622             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
623             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
624             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
625         },
626
627         //LUTMODE_INTRA_4x4
628         {
629             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
630             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
631             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
632             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
633         },
634
635         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
636         {
637             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
638             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
639             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
640             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
641         },
642
643         //LUTMODE_INTER_8X8Q
644         {
645             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
646             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
647             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
648             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
649         },
650
651         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
652         {
653             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
654             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
655             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
656             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
657         },
658
659         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
660         {
661             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
662             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
663             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
664             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
665         },
666
667         //LUTMODE_INTER_16x16, LUTMODE_INTER
668         {
669             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
670             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
671             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
672             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
673         },
674
675         //LUTMODE_INTER_BWD
676         {
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
678             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
679             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
680             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
681         },
682
683         //LUTMODE_REF_ID
684         {
685             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
686             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
687             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
688             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
689         },
690
691         //LUTMODE_INTRA_CHROMA
692         {
693             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
694             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
695             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
696             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
697         },
698     },
699 };
700
701 static const int vdenc_mv_cost_skipbias_qpel[8] = {
702     //PREDSLICE
703     0, 6, 6, 9, 10, 13, 14, 16
704 };
705
706 static const int vdenc_hme_cost[8][52] = {
707     //mv=0
708     {
709         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
710         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
711         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
712         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
713     },
714     //mv<=16
715     {
716         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
717         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
718         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
719         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
720     },
721     //mv<=32
722     {
723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
724         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
725         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
726         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
727     },
728     //mv<=64
729     {
730         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
731         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
732         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
733         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
734     },
735     //mv<=128
736     {
737         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
738         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
739         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
740         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
741     },
742     //mv<=256
743     {
744         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
745         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
746         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
747         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
748     },
749     //mv<=512
750     {
751         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
752         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
753         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
754         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
755     },
756
757     //mv<=1024
758     {
759         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
760         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
761         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
762         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
763     },
764 };
765
766 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
767         if (bo) {                                                       \
768             OUT_BCS_RELOC64(batch,                                      \
769                             bo,                                         \
770                             I915_GEM_DOMAIN_RENDER,                     \
771                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
772                             delta);                                     \
773         } else {                                                        \
774             OUT_BCS_BATCH(batch, 0);                                    \
775             OUT_BCS_BATCH(batch, 0);                                    \
776         }                                                               \
777     } while (0)
778
779 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
780         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
781         OUT_BCS_BATCH(batch, i965->intel.mocs_state);                             \
782     } while (0)
783
784 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do {   \
785         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
786         buffer.width = bfsize;                                  \
787         buffer.height = 1;                                      \
788         buffer.pitch = buffer.width;                            \
789         buffer.size = buffer.pitch;                             \
790         buffer.tiling = I915_TILING_NONE;                       \
791         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
792                                    &buffer,                     \
793                                    bfsize,                      \
794                                    (des));                      \
795     } while (0)
796
797 static int
798 gen9_vdenc_get_max_vmv_range(int level)
799 {
800     int max_vmv_range = 512;
801
802     if (level == 10)
803         max_vmv_range = 256;
804     else if (level <= 20)
805         max_vmv_range = 512;
806     else if (level <= 30)
807         max_vmv_range = 1024;
808     else
809         max_vmv_range = 2048;
810
811     return max_vmv_range;
812 }
813
814 static unsigned char
815 map_44_lut_value(unsigned int v, unsigned char max)
816 {
817     unsigned int maxcost;
818     int d;
819     unsigned char ret;
820
821     if (v == 0) {
822         return 0;
823     }
824
825     maxcost = ((max & 15) << (max >> 4));
826
827     if (v >= maxcost) {
828         return max;
829     }
830
831     d = (int)(log((double)v) / log(2.0)) - 3;
832
833     if (d < 0) {
834         d = 0;
835     }
836
837     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
838     ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
839
840     return ret;
841 }
842
843 static void
844 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
845                                   struct encode_state *encode_state,
846                                   struct intel_encoder_context *encoder_context)
847 {
848     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
849     int i;
850
851     vdenc_context->gop_size = encoder_context->brc.gop_size;
852     vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
853
854     if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
855         encoder_context->brc.need_reset) {
856         /* So far, vdenc doesn't support temporal layer */
857         vdenc_context->framerate = encoder_context->brc.framerate[0];
858
859         vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
860         vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
861
862         vdenc_context->max_bit_rate = ALIGN(encoder_context->brc.bits_per_second[0], 1000) / 1000;
863         vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
864         vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
865
866         if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
867             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
868             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
869         } else {
870             assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
871             vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
872             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
873         }
874     }
875
876     vdenc_context->mb_brc_enabled = 1;
877     vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
878     vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
879     vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
880     vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
881
882     for (i = 0; i < vdenc_context->num_roi; i++) {
883         vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
884         vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
885         vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
886         vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].top >> 4;
887         vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
888     }
889 }
890
891 static void
892 gen9_vdenc_update_parameters(VADriverContextP ctx,
893                              VAProfile profile,
894                              struct encode_state *encode_state,
895                              struct intel_encoder_context *encoder_context)
896 {
897     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
899     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
900
901     if (profile == VAProfileH264High)
902         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
903     else
904         vdenc_context->transform_8x8_mode_enable = 0;
905
906     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
907     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
908
909     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
910     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
911
912     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
913     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
914     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
915     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
916     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
917
918     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
919
920     vdenc_context->current_pass = 0;
921     vdenc_context->num_passes = 1;
922
923     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
924         vdenc_context->internal_rate_mode == I965_BRC_VBR)
925         vdenc_context->brc_enabled = 1;
926     else
927         vdenc_context->brc_enabled = 0;
928
929     if (vdenc_context->brc_enabled &&
930         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
931          !vdenc_context->vbv_buffer_size_in_bit ||
932          !vdenc_context->max_bit_rate ||
933          !vdenc_context->target_bit_rate ||
934          !vdenc_context->framerate.num ||
935          !vdenc_context->framerate.den))
936         vdenc_context->brc_enabled = 0;
937
938     if (!vdenc_context->brc_enabled) {
939         vdenc_context->target_bit_rate = 0;
940         vdenc_context->max_bit_rate = 0;
941         vdenc_context->min_bit_rate = 0;
942         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
943         vdenc_context->vbv_buffer_size_in_bit = 0;
944     } else {
945         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
946     }
947 }
948
949 static void
950 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
951                                    struct encode_state *encode_state,
952                                    struct intel_encoder_context *encoder_context,
953                                    int qp)
954 {
955     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
956     unsigned int frame_type = vdenc_context->frame_type;
957
958     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
959     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
960     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
961
962     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
963     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
964     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
965     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
966
967     if (frame_type == VDENC_FRAME_P) {
968         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
969         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
970         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
971         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
972         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
973         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
974
975         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
976         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
977         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
978         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
979         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
980         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
981         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
982         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
983
984         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
985         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
986         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
987         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
988         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
989         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
990         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
991         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
992     }
993 }
994
995 static void
996 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
997                                         struct intel_encoder_context *encoder_context)
998 {
999     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1000     struct gen9_vdenc_streamin_state *streamin_state;
1001     int row, col, i;
1002
1003     if (!vdenc_context->num_roi)
1004         return;
1005
1006     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1007
1008     if (!streamin_state)
1009         return;
1010
1011     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
1012         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1013             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1014
1015             /* The last one has higher priority */
1016             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1017                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1018                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1019                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1020
1021                     break;
1022                 }
1023             }
1024         }
1025     }
1026
1027     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1028 }
1029
1030 static VAStatus
1031 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1032                        VAProfile profile,
1033                        struct encode_state *encode_state,
1034                        struct intel_encoder_context *encoder_context)
1035 {
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1038     struct i965_coded_buffer_segment *coded_buffer_segment;
1039     struct object_surface *obj_surface;
1040     struct object_buffer *obj_buffer;
1041     VAEncPictureParameterBufferH264 *pic_param;
1042     VAEncSliceParameterBufferH264 *slice_param;
1043     VDEncAvcSurface *vdenc_avc_surface;
1044     dri_bo *bo;
1045     int i, j, enable_avc_ildb = 0;
1046     int qp;
1047     char *pbuffer;
1048
1049     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1050
1051     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1052         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1053         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1054
1055         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1056             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1057                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1058                    (slice_param->slice_type == SLICE_TYPE_P) ||
1059                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1060                    (slice_param->slice_type == SLICE_TYPE_B));
1061
1062             if (slice_param->disable_deblocking_filter_idc != 1) {
1063                 enable_avc_ildb = 1;
1064                 break;
1065             }
1066
1067             slice_param++;
1068         }
1069     }
1070
1071     /* Setup current frame */
1072     obj_surface = encode_state->reconstructed_object;
1073     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1074
1075     if (obj_surface->private_data == NULL) {
1076         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1077         assert(vdenc_avc_surface);
1078
1079         vdenc_avc_surface->ctx = ctx;
1080         i965_CreateSurfaces(ctx,
1081                             vdenc_context->down_scaled_width_4x,
1082                             vdenc_context->down_scaled_height_4x,
1083                             VA_RT_FORMAT_YUV420,
1084                             1,
1085                             &vdenc_avc_surface->scaled_4x_surface_id);
1086         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1087         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1088         i965_check_alloc_surface_bo(ctx,
1089                                     vdenc_avc_surface->scaled_4x_surface_obj,
1090                                     1,
1091                                     VA_FOURCC_NV12,
1092                                     SUBSAMPLE_YUV420);
1093
1094         obj_surface->private_data = (void *)vdenc_avc_surface;
1095         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1096     }
1097
1098     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1099     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1100
1101     /* Reconstructed surfaces */
1102     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1103     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1104     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1105     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1106
1107     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1108     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1109
1110     if (enable_avc_ildb) {
1111         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1112     } else {
1113         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1114     }
1115
1116
1117     /* Reference surfaces */
1118     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1119         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1120                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1121         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1122         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1123         obj_surface = encode_state->reference_objects[i];
1124
1125         if (obj_surface && obj_surface->bo) {
1126             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1127
1128             if (obj_surface->private_data == NULL) {
1129                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1130                 assert(vdenc_avc_surface);
1131
1132                 vdenc_avc_surface->ctx = ctx;
1133                 i965_CreateSurfaces(ctx,
1134                                     vdenc_context->down_scaled_width_4x,
1135                                     vdenc_context->down_scaled_height_4x,
1136                                     VA_RT_FORMAT_YUV420,
1137                                     1,
1138                                     &vdenc_avc_surface->scaled_4x_surface_id);
1139                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1140                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1141                 i965_check_alloc_surface_bo(ctx,
1142                                             vdenc_avc_surface->scaled_4x_surface_obj,
1143                                             1,
1144                                             VA_FOURCC_NV12,
1145                                             SUBSAMPLE_YUV420);
1146
1147                 obj_surface->private_data = vdenc_avc_surface;
1148                 obj_surface->free_private_data = gen_free_avc_surface;
1149             }
1150
1151             vdenc_avc_surface = obj_surface->private_data;
1152             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1153         }
1154     }
1155
1156     /* Input YUV surface */
1157     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1158     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1159
1160     /* Encoded bitstream */
1161     obj_buffer = encode_state->coded_buf_object;
1162     bo = obj_buffer->buffer_store->bo;
1163     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1164     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1165     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1166     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1167
1168     /* Status buffer */
1169     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1170     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1171     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1172     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1173     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1174     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1175            vdenc_context->compressed_bitstream.start_offset);
1176
1177     dri_bo_map(bo, 1);
1178
1179     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1180     coded_buffer_segment->mapped = 0;
1181     coded_buffer_segment->codec = encoder_context->codec;
1182     coded_buffer_segment->status_support = 1;
1183
1184     pbuffer = bo->virtual;
1185     pbuffer += vdenc_context->status_bffuer.base_offset;
1186     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1187
1188     dri_bo_unmap(bo);
1189
1190     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1191     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1192                                 vdenc_context->frame_width_in_mbs * 64,
1193                                 "Intra row store scratch buffer");
1194
1195     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1196     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1197                                 vdenc_context->frame_width_in_mbs * 256,
1198                                 "Deblocking filter row store scratch buffer");
1199
1200     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1201     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1202                                 vdenc_context->frame_width_in_mbs * 128,
1203                                 "BSD/MPC row store scratch buffer");
1204
1205     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1206     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1207                                 vdenc_context->frame_width_in_mbs * 64,
1208                                 "VDENC row store scratch buffer");
1209
1210     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1211     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1212     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1213                                 vdenc_context->frame_width_in_mbs *
1214                                 vdenc_context->frame_height_in_mbs *
1215                                 sizeof(struct gen9_vdenc_streamin_state),
1216                                 "VDENC StreamIn buffer");
1217
1218     /*
1219      * Calculate the index for each reference surface in list0 for the first slice
1220      * TODO: other slices
1221      */
1222     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1223     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1224
1225     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1226
1227     if (slice_param->num_ref_idx_active_override_flag)
1228         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1229
1230     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1231         vdenc_context->list_ref_idx[0][i] = 0xFF;
1232     }
1233
1234     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1235         return VA_STATUS_ERROR_INVALID_VALUE;
1236
1237     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1238         VAPictureH264 *va_pic;
1239
1240         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1241
1242         if (i >= vdenc_context->num_refs[0])
1243             continue;
1244
1245         va_pic = &slice_param->RefPicList0[i];
1246
1247         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1248             obj_surface = encode_state->reference_objects[j];
1249
1250             if (obj_surface &&
1251                 obj_surface->bo &&
1252                 obj_surface->base.id == va_pic->picture_id) {
1253
1254                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1255                 vdenc_context->list_ref_idx[0][i] = j;
1256
1257                 break;
1258             }
1259         }
1260     }
1261
1262     if (slice_param->slice_type == SLICE_TYPE_I ||
1263         slice_param->slice_type == SLICE_TYPE_SI)
1264         vdenc_context->frame_type = VDENC_FRAME_I;
1265     else
1266         vdenc_context->frame_type = VDENC_FRAME_P;
1267
1268     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1269
1270     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1271     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1272
1273     return VA_STATUS_SUCCESS;
1274 }
1275
1276 static void
1277 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1278                                 struct intel_encoder_context *encoder_context,
1279                                 struct huc_pipe_mode_select_parameter *params)
1280 {
1281     struct intel_batchbuffer *batch = encoder_context->base.batch;
1282
1283     BEGIN_BCS_BATCH(batch, 3);
1284
1285     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1286     OUT_BCS_BATCH(batch,
1287                   (params->huc_stream_object_enable << 10) |
1288                   (params->indirect_stream_out_enable << 4));
1289     OUT_BCS_BATCH(batch,
1290                   params->media_soft_reset_counter);
1291
1292     ADVANCE_BCS_BATCH(batch);
1293 }
1294
1295 static void
1296 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1297                           struct intel_encoder_context *encoder_context,
1298                           struct huc_imem_state_parameter *params)
1299 {
1300     struct intel_batchbuffer *batch = encoder_context->base.batch;
1301
1302     BEGIN_BCS_BATCH(batch, 5);
1303
1304     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1305     OUT_BCS_BATCH(batch, 0);
1306     OUT_BCS_BATCH(batch, 0);
1307     OUT_BCS_BATCH(batch, 0);
1308     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1309
1310     ADVANCE_BCS_BATCH(batch);
1311 }
1312
1313 static void
1314 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1315                           struct intel_encoder_context *encoder_context,
1316                           struct huc_dmem_state_parameter *params)
1317 {
1318     struct i965_driver_data *i965 = i965_driver_data(ctx);
1319     struct intel_batchbuffer *batch = encoder_context->base.batch;
1320
1321     BEGIN_BCS_BATCH(batch, 6);
1322
1323     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1324     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1325     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1326     OUT_BCS_BATCH(batch, params->huc_data_length);
1327
1328     ADVANCE_BCS_BATCH(batch);
1329 }
1330
1331 /*
1332 static void
1333 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1334                          struct intel_encoder_context *encoder_context,
1335                          struct huc_cfg_state_parameter *params)
1336 {
1337     struct intel_batchbuffer *batch = encoder_context->base.batch;
1338
1339     BEGIN_BCS_BATCH(batch, 2);
1340
1341     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1342     OUT_BCS_BATCH(batch, !!params->force_reset);
1343
1344     ADVANCE_BCS_BATCH(batch);
1345 }
1346 */
1347 static void
1348 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1349                                   struct intel_encoder_context *encoder_context,
1350                                   struct huc_virtual_addr_parameter *params)
1351 {
1352     struct i965_driver_data *i965 = i965_driver_data(ctx);
1353     struct intel_batchbuffer *batch = encoder_context->base.batch;
1354     int i;
1355
1356     BEGIN_BCS_BATCH(batch, 49);
1357
1358     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1359
1360     for (i = 0; i < 16; i++) {
1361         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1362             OUT_BUFFER_3DW(batch,
1363                            params->regions[i].huc_surface_res->bo,
1364                            !!params->regions[i].is_target, 0, 0);
1365         else
1366             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1367     }
1368
1369     ADVANCE_BCS_BATCH(batch);
1370 }
1371
1372 static void
1373 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1374                                        struct intel_encoder_context *encoder_context,
1375                                        struct huc_ind_obj_base_addr_parameter *params)
1376 {
1377     struct i965_driver_data *i965 = i965_driver_data(ctx);
1378     struct intel_batchbuffer *batch = encoder_context->base.batch;
1379
1380     BEGIN_BCS_BATCH(batch, 11);
1381
1382     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1383
1384     if (params->huc_indirect_stream_in_object_res)
1385         OUT_BUFFER_3DW(batch,
1386                        params->huc_indirect_stream_in_object_res->bo,
1387                        0, 0, 0);
1388     else
1389         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1390
1391     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1392
1393     if (params->huc_indirect_stream_out_object_res)
1394         OUT_BUFFER_3DW(batch,
1395                        params->huc_indirect_stream_out_object_res->bo,
1396                        1, 0, 0);
1397     else
1398         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1399
1400     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1401
1402     ADVANCE_BCS_BATCH(batch);
1403 }
1404
1405 static void
1406 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1407                                  struct intel_encoder_context *encoder_context)
1408 {
1409     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1410     struct intel_batchbuffer *batch = encoder_context->base.batch;
1411     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1412     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1413
1414     /* Write HUC_STATUS2 mask (1 << 6) */
1415     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1416     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1417     mi_store_data_imm_params.offset = 0;
1418     mi_store_data_imm_params.dw0 = (1 << 6);
1419     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1420
1421     /* Store HUC_STATUS2 */
1422     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1423     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1424     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1425     mi_store_register_mem_params.offset = 4;
1426     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1427 }
1428
1429 static void
1430 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1431                              struct intel_encoder_context *encoder_context,
1432                              struct huc_stream_object_parameter *params)
1433 {
1434     struct intel_batchbuffer *batch = encoder_context->base.batch;
1435
1436     BEGIN_BCS_BATCH(batch, 5);
1437
1438     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1439     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1440     OUT_BCS_BATCH(batch,
1441                   (1 << 31) |   /* Must be 1 */
1442                   params->indirect_stream_in_start_address);
1443     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1444     OUT_BCS_BATCH(batch,
1445                   (!!params->huc_bitstream_enable << 29) |
1446                   (params->length_mode << 27) |
1447                   (!!params->stream_out << 26) |
1448                   (!!params->emulation_prevention_byte_removal << 25) |
1449                   (!!params->start_code_search_engine << 24) |
1450                   (params->start_code_byte2 << 16) |
1451                   (params->start_code_byte1 << 8) |
1452                   params->start_code_byte0);
1453
1454     ADVANCE_BCS_BATCH(batch);
1455 }
1456
1457 static void
1458 gen9_vdenc_huc_start(VADriverContextP ctx,
1459                      struct intel_encoder_context *encoder_context,
1460                      struct huc_start_parameter *params)
1461 {
1462     struct intel_batchbuffer *batch = encoder_context->base.batch;
1463
1464     BEGIN_BCS_BATCH(batch, 2);
1465
1466     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1467     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1468
1469     ADVANCE_BCS_BATCH(batch);
1470 }
1471
1472 static void
1473 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1474                              struct intel_encoder_context *encoder_context,
1475                              struct vd_pipeline_flush_parameter *params)
1476 {
1477     struct intel_batchbuffer *batch = encoder_context->base.batch;
1478
1479     BEGIN_BCS_BATCH(batch, 2);
1480
1481     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1482     OUT_BCS_BATCH(batch,
1483                   params->mfx_pipeline_command_flush << 19 |
1484                   params->mfl_pipeline_command_flush << 18 |
1485                   params->vdenc_pipeline_command_flush << 17 |
1486                   params->hevc_pipeline_command_flush << 16 |
1487                   params->vd_command_message_parser_done << 4 |
1488                   params->mfx_pipeline_done << 3 |
1489                   params->mfl_pipeline_done << 2 |
1490                   params->vdenc_pipeline_done << 1 |
1491                   params->hevc_pipeline_done);
1492
1493     ADVANCE_BCS_BATCH(batch);
1494 }
1495
1496 static int
1497 gen9_vdenc_get_max_mbps(int level_idc)
1498 {
1499     int max_mbps = 11880;
1500
1501     switch (level_idc) {
1502     case 20:
1503         max_mbps = 11880;
1504         break;
1505
1506     case 21:
1507         max_mbps = 19800;
1508         break;
1509
1510     case 22:
1511         max_mbps = 20250;
1512         break;
1513
1514     case 30:
1515         max_mbps = 40500;
1516         break;
1517
1518     case 31:
1519         max_mbps = 108000;
1520         break;
1521
1522     case 32:
1523         max_mbps = 216000;
1524         break;
1525
1526     case 40:
1527     case 41:
1528         max_mbps = 245760;
1529         break;
1530
1531     case 42:
1532         max_mbps = 522240;
1533         break;
1534
1535     case 50:
1536         max_mbps = 589824;
1537         break;
1538
1539     case 51:
1540         max_mbps = 983040;
1541         break;
1542
1543     case 52:
1544         max_mbps = 2073600;
1545         break;
1546
1547     default:
1548         break;
1549     }
1550
1551     return max_mbps;
1552 };
1553
1554 static unsigned int
1555 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1556                                        struct intel_encoder_context *encoder_context,
1557                                        int level_idc)
1558 {
1559     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1560     double bits_per_mb, tmpf;
1561     int max_mbps, num_mb_per_frame;
1562     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1563     unsigned int ret;
1564
1565     if (level_idc >= 31 && level_idc <= 40)
1566         bits_per_mb = 96.0;
1567     else
1568         bits_per_mb = 192.0;
1569
1570     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1571     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1572
1573     tmpf = (double)num_mb_per_frame;
1574
1575     if (tmpf < max_mbps / 172.0)
1576         tmpf = max_mbps / 172.0;
1577
1578     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1579     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1580                                      (double)vdenc_context->framerate.num * bits_per_mb);
1581
1582     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1583     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1584     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1585
1586     return ret;
1587 }
1588
1589 static int
1590 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1591                                 struct encode_state *encode_state,
1592                                 struct intel_encoder_context *encoder_context)
1593 {
1594     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1595     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1596     unsigned frame_size;
1597     int qp, delat_qp;
1598
1599     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1600     qp = (int)(1.0 / 1.2 * pow(10.0,
1601                                (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1602                                       ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)) - x0) *
1603                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1604     qp += 2;
1605     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1606                           ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)));
1607     if (delat_qp > 0)
1608         qp += delat_qp;
1609
1610     qp = CLAMP(1, 51, qp);
1611     qp--;
1612
1613     if (qp < 0)
1614         qp = 1;
1615
1616     return qp;
1617 }
1618
1619 static void
1620 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1621                                     struct encode_state *encode_state,
1622                                     struct intel_encoder_context *encoder_context)
1623 {
1624     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1625     struct huc_brc_init_dmem *dmem;
1626     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1627     double input_bits_per_frame, bps_ratio;
1628     int i;
1629
1630     vdenc_context->brc_init_reset_input_bits_per_frame =
1631         ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1632     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1633     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1634
1635     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1636
1637     if (!dmem)
1638         return;
1639
1640     memset(dmem, 0, sizeof(*dmem));
1641
1642     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1643
1644     dmem->frame_width = vdenc_context->frame_width;
1645     dmem->frame_height = vdenc_context->frame_height;
1646
1647     dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1648     dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1649     dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1650     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1651     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1652
1653     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1654         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1655
1656     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1657         dmem->brc_flag |= 0x10;
1658     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1659         dmem->brc_flag |= 0x20;
1660
1661     dmem->frame_rate_m = vdenc_context->framerate.num;
1662     dmem->frame_rate_d = vdenc_context->framerate.den;
1663
1664     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1665
1666     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1667         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1668
1669     dmem->min_qp = 10;
1670     dmem->max_qp = 51;
1671
1672     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1673     bps_ratio = input_bits_per_frame /
1674                 ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1675
1676     if (bps_ratio < 0.1)
1677         bps_ratio = 0.1;
1678
1679     if (bps_ratio > 3.5)
1680         bps_ratio = 3.5;
1681
1682     for (i = 0; i < 4; i++) {
1683         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1684         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1685
1686         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1687         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1688
1689         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1690         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1691     }
1692
1693     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1694
1695     if (vdenc_context->mb_brc_enabled) {
1696         dmem->mb_qp_ctrl = 1;
1697         dmem->dist_qp_delta[0] = -5;
1698         dmem->dist_qp_delta[1] = -2;
1699         dmem->dist_qp_delta[2] = 2;
1700         dmem->dist_qp_delta[3] = 5;
1701     }
1702
1703     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1704
1705     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1706     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1707
1708     // 2nd re-encode pass if possible
1709     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1710         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1711         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1712         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1713         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1714     } else {
1715         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1716         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1717         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1718         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1719     }
1720
1721     dmem->qp_select_for_first_pass = 1;
1722     dmem->mb_header_compensation = 1;
1723     dmem->delta_qp_adaptation = 1;
1724     dmem->max_crf_quality_factor = 52;
1725
1726     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1727     dmem->scenario_info = 0;
1728
1729     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1730     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1731
1732     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1733 }
1734
1735 static void
1736 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1737                               struct encode_state *encode_state,
1738                               struct intel_encoder_context *encoder_context)
1739 {
1740     struct intel_batchbuffer *batch = encoder_context->base.batch;
1741     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1742     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1743     struct huc_imem_state_parameter imem_state_params;
1744     struct huc_dmem_state_parameter dmem_state_params;
1745     struct huc_virtual_addr_parameter virtual_addr_params;
1746     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1747     struct huc_stream_object_parameter stream_object_params;
1748     struct huc_start_parameter start_params;
1749     struct vd_pipeline_flush_parameter pipeline_flush_params;
1750     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1751
1752     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1753
1754     memset(&imem_state_params, 0, sizeof(imem_state_params));
1755     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1756     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1757
1758     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1759     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1760
1761     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1762     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1763     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1764     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1765     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1766     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1767
1768     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1769     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1770     virtual_addr_params.regions[0].is_target = 1;
1771     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1772
1773     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1774     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1775     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1776     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1777
1778     memset(&stream_object_params, 0, sizeof(stream_object_params));
1779     stream_object_params.indirect_stream_in_data_length = 1;
1780     stream_object_params.indirect_stream_in_start_address = 0;
1781     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1782
1783     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1784
1785     memset(&start_params, 0, sizeof(start_params));
1786     start_params.last_stream_object = 1;
1787     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1788
1789     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1790     pipeline_flush_params.hevc_pipeline_done = 1;
1791     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1792     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1793
1794     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1795     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1796     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1797 }
1798
1799 static void
1800 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1801 {
1802     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1803     struct huc_brc_update_dmem *dmem;
1804     int i, num_p_in_gop = 0;
1805
1806     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1807
1808     if (!dmem)
1809         return;
1810
1811     dmem->brc_func = 1;
1812
1813     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1814         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1815             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1816         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1817         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1818     }
1819
1820     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1821         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1822
1823     dmem->target_size = vdenc_context->brc_target_size;
1824
1825     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1826
1827     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1828
1829     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1830     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1831
1832     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1833
1834     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1835     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1836     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1837
1838     dmem->current_pak_pass = vdenc_context->current_pass;
1839     dmem->max_num_passes = 2;
1840
1841     dmem->scene_change_detect_enable = 1;
1842     dmem->scene_change_prev_intra_percent_threshold = 96;
1843     dmem->scene_change_cur_intra_perent_threshold = 192;
1844
1845     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1846         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1847
1848     for (i = 0; i < 2; i++)
1849         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1850
1851     if (vdenc_context->is_low_delay)
1852         dmem->ip_average_coeff = 0;
1853     else
1854         dmem->ip_average_coeff = 128;
1855
1856     dmem->skip_frame_size = 0;
1857     dmem->num_of_frames_skipped = 0;
1858
1859     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1860     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1861     dmem->hme_cost_enable = 1;
1862
1863     dmem->second_level_batchbuffer_size = 228;
1864
1865     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1866 }
1867
1868 static void
1869 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1870                                   struct encode_state *encode_state,
1871                                   struct intel_encoder_context *encoder_context,
1872                                   struct gen9_mfx_avc_img_state *pstate,
1873                                   int use_huc)
1874 {
1875     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1876     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1877     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1878
1879     memset(pstate, 0, sizeof(*pstate));
1880
1881     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1882
1883     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1884
1885     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1886     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1887
1888     pstate->dw3.image_structure = 0;
1889     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1890     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1891     pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1892     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1893     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1894
1895     pstate->dw4.field_picture_flag = 0;
1896     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1897     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1898     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1899     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1900     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1901     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1902     pstate->dw4.mb_mv_format_flag = 1;
1903     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1904     pstate->dw4.mv_unpacked_flag = 1;
1905     pstate->dw4.insert_test_flag = 0;
1906     pstate->dw4.load_slice_pointer_flag = 0;
1907     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1908     pstate->dw4.minimum_frame_size = 0;
1909
1910     pstate->dw5.intra_mb_max_bit_flag = 1;
1911     pstate->dw5.inter_mb_max_bit_flag = 1;
1912     pstate->dw5.frame_size_over_flag = 1;
1913     pstate->dw5.frame_size_under_flag = 1;
1914     pstate->dw5.intra_mb_ipcm_flag = 1;
1915     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1916     pstate->dw5.non_first_pass_flag = 0;
1917     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1918     pstate->dw5.aq_chroma_disable = 1;
1919
1920     pstate->dw6.intra_mb_max_size = 2700;
1921     pstate->dw6.inter_mb_max_size = 4095;
1922
1923     pstate->dw8.slice_delta_qp_max0 = 0;
1924     pstate->dw8.slice_delta_qp_max1 = 0;
1925     pstate->dw8.slice_delta_qp_max2 = 0;
1926     pstate->dw8.slice_delta_qp_max3 = 0;
1927
1928     pstate->dw9.slice_delta_qp_min0 = 0;
1929     pstate->dw9.slice_delta_qp_min1 = 0;
1930     pstate->dw9.slice_delta_qp_min2 = 0;
1931     pstate->dw9.slice_delta_qp_min3 = 0;
1932
1933     pstate->dw10.frame_bitrate_min = 0;
1934     pstate->dw10.frame_bitrate_min_unit = 1;
1935     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1936     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1937     pstate->dw10.frame_bitrate_max_unit = 1;
1938     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1939
1940     pstate->dw11.frame_bitrate_min_delta = 0;
1941     pstate->dw11.frame_bitrate_max_delta = 0;
1942
1943     pstate->dw12.vad_error_logic = 1;
1944     /* TODO: set paramters DW19/DW20 for slices */
1945 }
1946
1947 static void
1948 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1949                                 struct encode_state *encode_state,
1950                                 struct intel_encoder_context *encoder_context,
1951                                 struct gen9_vdenc_img_state *pstate,
1952                                 int update_cost)
1953 {
1954     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1955     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1956     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1957     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1958
1959     memset(pstate, 0, sizeof(*pstate));
1960
1961     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1962
1963     if (vdenc_context->frame_type == VDENC_FRAME_I) {
1964         pstate->dw4.intra_sad_measure_adjustment = 2;
1965         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1966
1967         pstate->dw5.cre_prefetch_enable = 1;
1968
1969         pstate->dw9.mode0_cost = 10;
1970         pstate->dw9.mode1_cost = 0;
1971         pstate->dw9.mode2_cost = 3;
1972         pstate->dw9.mode3_cost = 30;
1973
1974         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1975         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1976         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1977
1978         pstate->dw22.small_mb_size_in_word = 0xff;
1979         pstate->dw22.large_mb_size_in_word = 0xff;
1980
1981         pstate->dw27.max_hmv_r = 0x2000;
1982         pstate->dw27.max_vmv_r = 0x200;
1983
1984         pstate->dw33.qp_range_check_upper_bound = 0x33;
1985         pstate->dw33.qp_range_check_lower_bound = 0x0a;
1986         pstate->dw33.qp_range_check_value = 0x0f;
1987     } else {
1988         pstate->dw2.bidirectional_weight = 0x20;
1989
1990         pstate->dw4.subpel_mode = 3;
1991         pstate->dw4.bme_disable_for_fbr_message = 1;
1992         pstate->dw4.inter_sad_measure_adjustment = 2;
1993         pstate->dw4.intra_sad_measure_adjustment = 2;
1994         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1995
1996         pstate->dw5.cre_prefetch_enable = 1;
1997
1998         pstate->dw8.non_skip_zero_mv_const_added = 1;
1999         pstate->dw8.non_skip_mb_mode_const_added = 1;
2000         pstate->dw8.ref_id_cost_mode_select = 1;
2001
2002         pstate->dw9.mode0_cost = 7;
2003         pstate->dw9.mode1_cost = 26;
2004         pstate->dw9.mode2_cost = 30;
2005         pstate->dw9.mode3_cost = 57;
2006
2007         pstate->dw10.mode4_cost = 8;
2008         pstate->dw10.mode5_cost = 2;
2009         pstate->dw10.mode6_cost = 4;
2010         pstate->dw10.mode7_cost = 6;
2011
2012         pstate->dw11.mode8_cost = 5;
2013         pstate->dw11.mode9_cost = 0;
2014         pstate->dw11.ref_id_cost = 4;
2015         pstate->dw11.chroma_intra_mode_cost = 0;
2016
2017         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2018         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2019         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2020         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2021         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2022         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2023         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2024         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2025
2026         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2027         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2028         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2029
2030         pstate->dw22.small_mb_size_in_word = 0xff;
2031         pstate->dw22.large_mb_size_in_word = 0xff;
2032
2033         pstate->dw27.max_hmv_r = 0x2000;
2034         pstate->dw27.max_vmv_r = 0x200;
2035
2036         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2037
2038         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2039         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2040
2041         pstate->dw33.qp_range_check_upper_bound = 0x33;
2042         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2043         pstate->dw33.qp_range_check_value = 0x0f;
2044
2045         pstate->dw34.midpoint_distortion = 0x640;
2046     }
2047
2048     /* ROI will be updated in HuC kernel for CBR/VBR */
2049     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2050         pstate->dw34.roi_enable = 1;
2051
2052         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2053
2054         if (vdenc_context->num_roi > 1)
2055             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2056
2057         if (vdenc_context->num_roi > 2)
2058             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2059     }
2060
2061     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2062     pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2063
2064     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2065
2066     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2067
2068     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2069     pstate->dw5.picture_type = vdenc_context->frame_type;
2070     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2071
2072     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2073         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2074     }
2075
2076     pstate->dw5.mb_slice_threshold_value = 0;
2077
2078     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2079
2080     if (pstate->dw1.transform_8x8_flag)
2081         pstate->dw8.luma_intra_partition_mask = 0;
2082     else
2083         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2084
2085     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2086
2087     if (update_cost) {
2088         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2089         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2090         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2091         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2092
2093         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2094         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2095         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2096         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2097
2098         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2099         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2100         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2101         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2102
2103         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2104         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2105         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2106         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2107         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2108         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2109         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2110         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2111
2112         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2113         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2114         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2115         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2116         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2117         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2118         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2119         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2120     }
2121
2122     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2123
2124     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2125
2126     /* TODO: check rolling I */
2127
2128     /* TODO: handle ROI */
2129
2130     /* TODO: check stream in support */
2131 }
2132
2133 static void
2134 gen9_vdenc_init_img_states(VADriverContextP ctx,
2135                            struct encode_state *encode_state,
2136                            struct intel_encoder_context *encoder_context)
2137 {
2138     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2139     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2140     struct gen9_vdenc_img_state *vdenc_img_cmd;
2141     char *pbuffer;
2142
2143     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2144
2145     if (!pbuffer)
2146         return;
2147
2148     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2149     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2150     pbuffer += sizeof(*mfx_img_cmd);
2151
2152     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2153     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2154     pbuffer += sizeof(*vdenc_img_cmd);
2155
2156     /* Add batch buffer end command */
2157     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2158
2159     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2160 }
2161
2162 static void
2163 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2164                                         struct encode_state *encode_state,
2165                                         struct intel_encoder_context *encoder_context)
2166 {
2167     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2168     struct huc_brc_update_constant_data *brc_buffer;
2169
2170     brc_buffer = (struct huc_brc_update_constant_data *)
2171                  i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2172
2173     if (!brc_buffer)
2174         return;
2175
2176     memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2177
2178     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2179         memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2180         memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2181         memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2182         memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2183         memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2184         memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2185     }
2186
2187
2188     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2189 }
2190
2191 static void
2192 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2193                           struct encode_state *encode_state,
2194                           struct intel_encoder_context *encoder_context)
2195 {
2196     struct intel_batchbuffer *batch = encoder_context->base.batch;
2197     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2198     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2199     struct huc_imem_state_parameter imem_state_params;
2200     struct huc_dmem_state_parameter dmem_state_params;
2201     struct huc_virtual_addr_parameter virtual_addr_params;
2202     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2203     struct huc_stream_object_parameter stream_object_params;
2204     struct huc_start_parameter start_params;
2205     struct vd_pipeline_flush_parameter pipeline_flush_params;
2206     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2207     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2208     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2209
2210     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2211     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2212     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2213
2214     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2215         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2216
2217         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2218         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2219         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2220     }
2221
2222     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2223
2224     memset(&imem_state_params, 0, sizeof(imem_state_params));
2225     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2226     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2227
2228     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2229     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2230
2231     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2232     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2233     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2234     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2235     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2236     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2237
2238     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2239     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2240     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2241     virtual_addr_params.regions[0].is_target = 1;
2242     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2243     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2244     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2245     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2246     virtual_addr_params.regions[4].is_target = 1;
2247     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2248     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2249     virtual_addr_params.regions[6].is_target = 1;
2250     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2251
2252     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2253     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2254     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2255     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2256
2257     memset(&stream_object_params, 0, sizeof(stream_object_params));
2258     stream_object_params.indirect_stream_in_data_length = 1;
2259     stream_object_params.indirect_stream_in_start_address = 0;
2260     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2261
2262     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2263
2264     memset(&start_params, 0, sizeof(start_params));
2265     start_params.last_stream_object = 1;
2266     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2267
2268     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2269     pipeline_flush_params.hevc_pipeline_done = 1;
2270     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2271     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2272
2273     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2274     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2275     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2276
2277     /* Store HUC_STATUS */
2278     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2279     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2280     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2281     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2282
2283     /* Write HUC_STATUS mask (1 << 31) */
2284     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2285     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2286     mi_store_data_imm_params.offset = 4;
2287     mi_store_data_imm_params.dw0 = (1 << 31);
2288     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2289 }
2290
2291 static void
2292 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2293                                 struct encode_state *encode_state,
2294                                 struct intel_encoder_context *encoder_context)
2295 {
2296     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2297     struct intel_batchbuffer *batch = encoder_context->base.batch;
2298
2299     BEGIN_BCS_BATCH(batch, 5);
2300
2301     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2302     OUT_BCS_BATCH(batch,
2303                   (1 << 29) |
2304                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2305                   (MFD_MODE_VLD << 15) |
2306                   (1 << 13) |                   /* VDEnc mode */
2307                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2308                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2309                   (1 << 7)  |                   /* Scaled surface enable */
2310                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2311                   (1 << 4)  |                   /* encoding mode */
2312                   (MFX_FORMAT_AVC << 0));
2313     OUT_BCS_BATCH(batch, 0);
2314     OUT_BCS_BATCH(batch, 0);
2315     OUT_BCS_BATCH(batch, 0);
2316
2317     ADVANCE_BCS_BATCH(batch);
2318 }
2319
2320 static void
2321 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2322                              struct intel_encoder_context *encoder_context,
2323                              struct i965_gpe_resource *gpe_resource,
2324                              int id)
2325 {
2326     struct intel_batchbuffer *batch = encoder_context->base.batch;
2327
2328     BEGIN_BCS_BATCH(batch, 6);
2329
2330     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2331     OUT_BCS_BATCH(batch, id);
2332     OUT_BCS_BATCH(batch,
2333                   ((gpe_resource->height - 1) << 18) |
2334                   ((gpe_resource->width - 1) << 4));
2335     OUT_BCS_BATCH(batch,
2336                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2337                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2338                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2339                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2340                   (1 << 1)  |                           /* must be tiled */
2341                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2342     OUT_BCS_BATCH(batch,
2343                   (0 << 16) |                   /* must be 0 for interleave U/V */
2344                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2345     OUT_BCS_BATCH(batch,
2346                   (0 << 16) |                   /* must be 0 for interleave U/V */
2347                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2348
2349     ADVANCE_BCS_BATCH(batch);
2350 }
2351
2352 static void
2353 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2354 {
2355     struct i965_driver_data *i965 = i965_driver_data(ctx);
2356     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2357     struct intel_batchbuffer *batch = encoder_context->base.batch;
2358     int i;
2359
2360     BEGIN_BCS_BATCH(batch, 65);
2361
2362     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2363
2364     /* the DW1-3 is for pre_deblocking */
2365     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2366
2367     /* the DW4-6 is for the post_deblocking */
2368     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2369
2370     /* the DW7-9 is for the uncompressed_picture */
2371     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2372
2373     /* the DW10-12 is for PAK information (write) */
2374     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2375
2376     /* the DW13-15 is for the intra_row_store_scratch */
2377     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2378
2379     /* the DW16-18 is for the deblocking filter */
2380     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2381
2382     /* the DW 19-50 is for Reference pictures*/
2383     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2384         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2385     }
2386
2387     /* DW 51, reference picture attributes */
2388     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2389
2390     /* The DW 52-54 is for PAK information (read) */
2391     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2392
2393     /* the DW 55-57 is the ILDB buffer */
2394     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2395
2396     /* the DW 58-60 is the second ILDB buffer */
2397     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2398
2399     /* DW 61, memory compress enable & mode */
2400     OUT_BCS_BATCH(batch, 0);
2401
2402     /* the DW 62-64 is the 4x Down Scaling surface */
2403     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2404
2405     ADVANCE_BCS_BATCH(batch);
2406 }
2407
2408 static void
2409 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2410 {
2411     struct i965_driver_data *i965 = i965_driver_data(ctx);
2412     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2413     struct intel_batchbuffer *batch = encoder_context->base.batch;
2414
2415     BEGIN_BCS_BATCH(batch, 26);
2416
2417     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2418     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2419     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2420     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2421
2422     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2423     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2424     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2425
2426     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2427     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2428     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2429
2430     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2431     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2432     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2433
2434     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2435      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2436      */
2437     OUT_BUFFER_3DW(batch,
2438                    vdenc_context->compressed_bitstream.res.bo,
2439                    1,
2440                    0,
2441                    0);
2442     OUT_BUFFER_2DW(batch,
2443                    vdenc_context->compressed_bitstream.res.bo,
2444                    1,
2445                    vdenc_context->compressed_bitstream.end_offset);
2446
2447     ADVANCE_BCS_BATCH(batch);
2448 }
2449
2450 static void
2451 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2452 {
2453     struct i965_driver_data *i965 = i965_driver_data(ctx);
2454     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2455     struct intel_batchbuffer *batch = encoder_context->base.batch;
2456
2457     BEGIN_BCS_BATCH(batch, 10);
2458
2459     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2460
2461     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2462     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2463
2464     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2465     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2466
2467     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2468     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2469
2470     ADVANCE_BCS_BATCH(batch);
2471 }
2472
2473 static void
2474 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2475                         int qm_type,
2476                         unsigned int *qm,
2477                         int qm_length,
2478                         struct intel_encoder_context *encoder_context)
2479 {
2480     struct intel_batchbuffer *batch = encoder_context->base.batch;
2481     unsigned int qm_buffer[16];
2482
2483     assert(qm_length <= 16);
2484     assert(sizeof(*qm) == 4);
2485     memcpy(qm_buffer, qm, qm_length * 4);
2486
2487     BEGIN_BCS_BATCH(batch, 18);
2488     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2489     OUT_BCS_BATCH(batch, qm_type << 0);
2490     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2491     ADVANCE_BCS_BATCH(batch);
2492 }
2493
2494 static void
2495 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2496 {
2497     /* TODO: add support for non flat matrix */
2498     unsigned int qm[16] = {
2499         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2500         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2501         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2502         0x10101010, 0x10101010, 0x10101010, 0x10101010
2503     };
2504
2505     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2506     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2507     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2508     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2509 }
2510
2511 static void
2512 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2513                          int fqm_type,
2514                          unsigned int *fqm,
2515                          int fqm_length,
2516                          struct intel_encoder_context *encoder_context)
2517 {
2518     struct intel_batchbuffer *batch = encoder_context->base.batch;
2519     unsigned int fqm_buffer[32];
2520
2521     assert(fqm_length <= 32);
2522     assert(sizeof(*fqm) == 4);
2523     memcpy(fqm_buffer, fqm, fqm_length * 4);
2524
2525     BEGIN_BCS_BATCH(batch, 34);
2526     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2527     OUT_BCS_BATCH(batch, fqm_type << 0);
2528     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2529     ADVANCE_BCS_BATCH(batch);
2530 }
2531
2532 static void
2533 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2534 {
2535     /* TODO: add support for non flat matrix */
2536     unsigned int qm[32] = {
2537         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2538         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2539         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2540         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2541         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2542         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2543         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2544         0x10001000, 0x10001000, 0x10001000, 0x10001000
2545     };
2546
2547     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2548     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2549     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2550     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2551 }
2552
2553 static void
2554 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2555                              struct encode_state *encode_state,
2556                              struct intel_encoder_context *encoder_context)
2557 {
2558     struct intel_batchbuffer *batch = encoder_context->base.batch;
2559     struct gen9_mfx_avc_img_state mfx_img_cmd;
2560
2561     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2562
2563     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2564     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2565     ADVANCE_BCS_BATCH(batch);
2566 }
2567
2568 static void
2569 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2570                                   struct encode_state *encode_state,
2571                                   struct intel_encoder_context *encoder_context)
2572 {
2573     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2574     struct intel_batchbuffer *batch = encoder_context->base.batch;
2575
2576     BEGIN_BCS_BATCH(batch, 2);
2577
2578     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2579     OUT_BCS_BATCH(batch,
2580                   (vdenc_context->vdenc_streamin_enable << 9) |
2581                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2582                   (1 << 7)  |                   /* Tlb prefetch enable */
2583                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2584                   (VDENC_CODEC_AVC << 0));
2585
2586     ADVANCE_BCS_BATCH(batch);
2587 }
2588
2589 static void
2590 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2591                                struct intel_encoder_context *encoder_context,
2592                                struct i965_gpe_resource *gpe_resource,
2593                                int vdenc_surface_cmd)
2594 {
2595     struct intel_batchbuffer *batch = encoder_context->base.batch;
2596
2597     BEGIN_BCS_BATCH(batch, 6);
2598
2599     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2600     OUT_BCS_BATCH(batch, 0);
2601     OUT_BCS_BATCH(batch,
2602                   ((gpe_resource->height - 1) << 18) |
2603                   ((gpe_resource->width - 1) << 4));
2604     OUT_BCS_BATCH(batch,
2605                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2606                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2607                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2608                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2609                   (1 << 1)  |                           /* must be tiled */
2610                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2611     OUT_BCS_BATCH(batch,
2612                   (0 << 16) |                   /* must be 0 for interleave U/V */
2613                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2614     OUT_BCS_BATCH(batch,
2615                   (0 << 16) |                   /* must be 0 for interleave U/V */
2616                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2617
2618     ADVANCE_BCS_BATCH(batch);
2619 }
2620
2621 static void
2622 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2623                                    struct intel_encoder_context *encoder_context,
2624                                    struct i965_gpe_resource *gpe_resource)
2625 {
2626     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2627 }
2628
2629 static void
2630 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2631                                    struct intel_encoder_context *encoder_context,
2632                                    struct i965_gpe_resource *gpe_resource)
2633 {
2634     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2635 }
2636
2637 static void
2638 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2639                                       struct intel_encoder_context *encoder_context,
2640                                       struct i965_gpe_resource *gpe_resource)
2641 {
2642     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2643 }
2644
2645 static void
2646 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2647                                      struct encode_state *encode_state,
2648                                      struct intel_encoder_context *encoder_context)
2649 {
2650     struct i965_driver_data *i965 = i965_driver_data(ctx);
2651     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2652     struct intel_batchbuffer *batch = encoder_context->base.batch;
2653
2654     BEGIN_BCS_BATCH(batch, 37);
2655
2656     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2657
2658     /* DW1-6 for DS FWD REF0/REF1 */
2659
2660     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2661         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2662     else
2663         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2664
2665     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2666         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2667     else
2668         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2669
2670     /* DW7-9 for DS BWD REF0, ignored on SKL */
2671     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2672
2673     /* DW10-12 for uncompressed input data */
2674     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2675
2676     /* DW13-DW15 for streamin data */
2677     if (vdenc_context->vdenc_streamin_enable)
2678         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2679     else
2680         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2681
2682     /* DW16-DW18 for row scratch buffer */
2683     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2684
2685     /* DW19-DW21, ignored on SKL */
2686     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2687
2688     /* DW22-DW27 for FWD REF0/REF1 */
2689
2690     if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2691         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2692     else
2693         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2694
2695     if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2696         OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2697     else
2698         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2699
2700     /* DW28-DW30 for FWD REF2, ignored on SKL */
2701     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2702
2703     /* DW31-DW33 for BDW REF0, ignored on SKL */
2704     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2705
2706     /* DW34-DW36 for VDEnc statistics streamout */
2707     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2708
2709     ADVANCE_BCS_BATCH(batch);
2710 }
2711
2712 static void
2713 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2714                                  struct encode_state *encode_state,
2715                                  struct intel_encoder_context *encoder_context)
2716 {
2717     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2718     struct intel_batchbuffer *batch = encoder_context->base.batch;
2719
2720     BEGIN_BCS_BATCH(batch, 61);
2721
2722     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2723
2724     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2725         /* DW1-DW11 */
2726         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2727
2728         /* DW12-DW25 */
2729         intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2730
2731         /* DW26-DW39 */
2732         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2733
2734         /* DW40-DW46 */
2735         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2736
2737         /* DW47-DW53 */
2738         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2739
2740         /* DW54-DW60 */
2741         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2742     } else {
2743         int i;
2744         uint16_t tmp_vdenc_skip_threshold_p[28];
2745
2746         memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2747
2748         for (i = 0; i < 28; i++) {
2749             tmp_vdenc_skip_threshold_p[i] *= 3;
2750         }
2751
2752         /* DW1-DW11 */
2753         intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2754
2755         /* DW12-DW25 */
2756         intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2757
2758         /* DW26-DW39 */
2759         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2760
2761         /* DW40-DW46 */
2762         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2763
2764         /* DW47-DW53 */
2765         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2766
2767         /* DW54-DW60 */
2768         intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2769     }
2770
2771     ADVANCE_BCS_BATCH(batch);
2772 }
2773
2774 static void
2775 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2776                               struct encode_state *encode_state,
2777                               struct intel_encoder_context *encoder_context)
2778 {
2779     struct intel_batchbuffer *batch = encoder_context->base.batch;
2780
2781     BEGIN_BCS_BATCH(batch, 2);
2782
2783     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2784     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2785
2786     ADVANCE_BCS_BATCH(batch);
2787 }
2788
2789 static void
2790 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2791                                        struct encode_state *encode_state,
2792                                        struct intel_encoder_context *encoder_context,
2793                                        VAEncSliceParameterBufferH264 *slice_param)
2794 {
2795     struct intel_batchbuffer *batch = encoder_context->base.batch;
2796     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2797
2798     BEGIN_BCS_BATCH(batch, 3);
2799
2800     OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2801
2802     if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2803         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2804                               slice_param->luma_weight_l0[1] << 16 |
2805                               slice_param->luma_offset_l0[0] << 8 |
2806                               slice_param->luma_weight_l0[0] << 0));
2807         OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2808                               slice_param->luma_weight_l0[2] << 0));
2809     } else {
2810         OUT_BCS_BATCH(batch, (0 << 24 |
2811                               1 << 16 |
2812                               0 << 8 |
2813                               1 << 0));
2814         OUT_BCS_BATCH(batch, (0 << 8 |
2815                               1 << 0));
2816     }
2817
2818
2819     ADVANCE_BCS_BATCH(batch);
2820 }
2821
2822 static void
2823 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2824                                struct encode_state *encode_state,
2825                                struct intel_encoder_context *encoder_context,
2826                                VAEncSliceParameterBufferH264 *slice_param,
2827                                VAEncSliceParameterBufferH264 *next_slice_param)
2828 {
2829     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2830     struct intel_batchbuffer *batch = encoder_context->base.batch;
2831     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2832     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2833     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2834     int luma_log2_weight_denom, weighted_pred_idc;
2835
2836     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2837     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2838
2839     if (next_slice_param) {
2840         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2841         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2842     } else {
2843         next_slice_hor_pos = 0;
2844         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2845     }
2846
2847     if (slice_type == SLICE_TYPE_P)
2848         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2849     else
2850         weighted_pred_idc = 0;
2851
2852     if (weighted_pred_idc == 1)
2853         luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2854     else
2855         luma_log2_weight_denom = 0;
2856
2857     BEGIN_BCS_BATCH(batch, 4);
2858
2859     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2860     OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2861                           slice_ver_pos));
2862     OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2863                           next_slice_ver_pos));
2864     OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2865
2866     ADVANCE_BCS_BATCH(batch);
2867 }
2868
2869 static void
2870 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2871                            struct encode_state *encode_state,
2872                            struct intel_encoder_context *encoder_context)
2873 {
2874     struct intel_batchbuffer *batch = encoder_context->base.batch;
2875     struct gen9_vdenc_img_state vdenc_img_cmd;
2876
2877     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2878
2879     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2880     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2881     ADVANCE_BCS_BATCH(batch);
2882 }
2883
2884 static void
2885 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2886                                  struct intel_encoder_context *encoder_context,
2887                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2888                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2889                                  int slice_header_indicator)
2890 {
2891     struct intel_batchbuffer *batch = encoder_context->base.batch;
2892
2893     if (data_bits_in_last_dw == 0)
2894         data_bits_in_last_dw = 32;
2895
2896     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2897
2898     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2899     OUT_BCS_BATCH(batch,
2900                   (0 << 16) |   /* always start at offset 0 */
2901                   (slice_header_indicator << 14) |
2902                   (data_bits_in_last_dw << 8) |
2903                   (skip_emul_byte_count << 4) |
2904                   (!!emulation_flag << 3) |
2905                   ((!!is_last_header) << 2) |
2906                   ((!!is_end_of_slice) << 1) |
2907                   (0 << 0));    /* TODO: check this flag */
2908     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2909
2910     ADVANCE_BCS_BATCH(batch);
2911 }
2912
2913 static void
2914 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2915                                             struct encode_state *encode_state,
2916                                             struct intel_encoder_context *encoder_context,
2917                                             int slice_index)
2918 {
2919     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2920     struct i965_driver_data *i965 = i965_driver_data(ctx);
2921     VAEncPackedHeaderParameterBuffer *param = NULL;
2922     unsigned int length_in_bits;
2923     unsigned int *header_data = NULL;
2924     int count, i, start_index;
2925     int slice_header_index;
2926     unsigned int insert_one_zero_byte = 0;
2927
2928     if (encode_state->slice_header_index[slice_index] == 0)
2929         slice_header_index = -1;
2930     else
2931         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2932
2933     count = encode_state->slice_rawdata_count[slice_index];
2934     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2935
2936     for (i = 0; i < count; i++) {
2937         unsigned int skip_emul_byte_cnt;
2938
2939         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2940
2941         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2942
2943         /* skip the slice header packed data type as it is lastly inserted */
2944         if (param->type == VAEncPackedHeaderSlice)
2945             continue;
2946
2947         length_in_bits = param->bit_length;
2948
2949         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2950
2951         /* as the slice header is still required, the last header flag is set to
2952          * zero.
2953          */
2954         gen9_vdenc_mfx_avc_insert_object(ctx,
2955                                          encoder_context,
2956                                          header_data,
2957                                          ALIGN(length_in_bits, 32) >> 5,
2958                                          length_in_bits & 0x1f,
2959                                          skip_emul_byte_cnt,
2960                                          0,
2961                                          0,
2962                                          !param->has_emulation_bytes,
2963                                          0);
2964
2965     }
2966
2967     if (!vdenc_context->is_frame_level_vdenc) {
2968         insert_one_zero_byte = 1;
2969     }
2970
2971     /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
2972     if (insert_one_zero_byte) {
2973         unsigned int insert_data[] = { 0, };
2974
2975         gen9_vdenc_mfx_avc_insert_object(ctx,
2976                                          encoder_context,
2977                                          insert_data,
2978                                          1,
2979                                          8,
2980                                          1,
2981                                          0, 0, 0, 0);
2982     }
2983
2984     if (slice_header_index == -1) {
2985         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2986         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2987         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2988         unsigned char *slice_header = NULL, *slice_header1 = NULL;
2989         int slice_header_length_in_bits = 0;
2990         uint32_t saved_macroblock_address = 0;
2991
2992         /* No slice header data is passed. And the driver needs to generate it */
2993         /* For the Normal H264 */
2994
2995         if (slice_index &&
2996             (IS_KBL(i965->intel.device_info) ||
2997              IS_GLK(i965->intel.device_info))) {
2998             saved_macroblock_address = slice_params->macroblock_address;
2999             slice_params->macroblock_address = 0;
3000         }
3001
3002         slice_header_length_in_bits = build_avc_slice_header(seq_param,
3003                                                              pic_param,
3004                                                              slice_params,
3005                                                              &slice_header);
3006
3007         slice_header1 = slice_header;
3008
3009         if (slice_index &&
3010             (IS_KBL(i965->intel.device_info) ||
3011              IS_GLK(i965->intel.device_info))) {
3012             slice_params->macroblock_address = saved_macroblock_address;
3013         }
3014
3015         if (insert_one_zero_byte) {
3016             slice_header1 += 1;
3017             slice_header_length_in_bits -= 8;
3018         }
3019
3020         gen9_vdenc_mfx_avc_insert_object(ctx,
3021                                          encoder_context,
3022                                          (unsigned int *)slice_header1,
3023                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
3024                                          slice_header_length_in_bits & 0x1f,
3025                                          5,  /* first 5 bytes are start code + nal unit type */
3026                                          1, 0, 1,
3027                                          1);
3028
3029         free(slice_header);
3030     } else {
3031         unsigned int skip_emul_byte_cnt;
3032         unsigned char *slice_header1 = NULL;
3033
3034         if (slice_index &&
3035             (IS_KBL(i965->intel.device_info) ||
3036              IS_GLK(i965->intel.device_info))) {
3037             slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3038         }
3039
3040         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3041
3042         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3043         length_in_bits = param->bit_length;
3044
3045         slice_header1 = (unsigned char *)header_data;
3046
3047         if (insert_one_zero_byte) {
3048             slice_header1 += 1;
3049             length_in_bits -= 8;
3050         }
3051
3052         /* as the slice header is the last header data for one slice,
3053          * the last header flag is set to one.
3054          */
3055         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3056
3057         if (insert_one_zero_byte)
3058             skip_emul_byte_cnt -= 1;
3059
3060         gen9_vdenc_mfx_avc_insert_object(ctx,
3061                                          encoder_context,
3062                                          (unsigned int *)slice_header1,
3063                                          ALIGN(length_in_bits, 32) >> 5,
3064                                          length_in_bits & 0x1f,
3065                                          skip_emul_byte_cnt,
3066                                          1,
3067                                          0,
3068                                          !param->has_emulation_bytes,
3069                                          1);
3070     }
3071
3072     return;
3073 }
3074
3075 static void
3076 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3077                                  struct encode_state *encode_state,
3078                                  struct intel_encoder_context *encoder_context,
3079                                  VAEncSliceParameterBufferH264 *slice_param,
3080                                  int slice_index)
3081 {
3082     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3083     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3084     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3085     unsigned int skip_emul_byte_cnt;
3086
3087     if (slice_index == 0) {
3088
3089         if (encode_state->packed_header_data[idx]) {
3090             VAEncPackedHeaderParameterBuffer *param = NULL;
3091             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3092             unsigned int length_in_bits;
3093
3094             assert(encode_state->packed_header_param[idx]);
3095             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3096             length_in_bits = param->bit_length;
3097
3098             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3099             gen9_vdenc_mfx_avc_insert_object(ctx,
3100                                              encoder_context,
3101                                              header_data,
3102                                              ALIGN(length_in_bits, 32) >> 5,
3103                                              length_in_bits & 0x1f,
3104                                              skip_emul_byte_cnt,
3105                                              0,
3106                                              0,
3107                                              !param->has_emulation_bytes,
3108                                              0);
3109
3110         }
3111
3112         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3113
3114         if (encode_state->packed_header_data[idx]) {
3115             VAEncPackedHeaderParameterBuffer *param = NULL;
3116             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3117             unsigned int length_in_bits;
3118
3119             assert(encode_state->packed_header_param[idx]);
3120             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3121             length_in_bits = param->bit_length;
3122
3123             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3124
3125             gen9_vdenc_mfx_avc_insert_object(ctx,
3126                                              encoder_context,
3127                                              header_data,
3128                                              ALIGN(length_in_bits, 32) >> 5,
3129                                              length_in_bits & 0x1f,
3130                                              skip_emul_byte_cnt,
3131                                              0,
3132                                              0,
3133                                              !param->has_emulation_bytes,
3134                                              0);
3135
3136         }
3137
3138         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3139
3140         if (encode_state->packed_header_data[idx]) {
3141             VAEncPackedHeaderParameterBuffer *param = NULL;
3142             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3143             unsigned int length_in_bits;
3144
3145             assert(encode_state->packed_header_param[idx]);
3146             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3147             length_in_bits = param->bit_length;
3148
3149             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3150             gen9_vdenc_mfx_avc_insert_object(ctx,
3151                                              encoder_context,
3152                                              header_data,
3153                                              ALIGN(length_in_bits, 32) >> 5,
3154                                              length_in_bits & 0x1f,
3155                                              skip_emul_byte_cnt,
3156                                              0,
3157                                              0,
3158                                              !param->has_emulation_bytes,
3159                                              0);
3160
3161         } else if (internal_rate_mode == I965_BRC_CBR) {
3162             /* TODO: insert others */
3163         }
3164     }
3165
3166     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3167                                                 encode_state,
3168                                                 encoder_context,
3169                                                 slice_index);
3170 }
3171
3172 static void
3173 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3174                                struct encode_state *encode_state,
3175                                struct intel_encoder_context *encoder_context,
3176                                VAEncPictureParameterBufferH264 *pic_param,
3177                                VAEncSliceParameterBufferH264 *slice_param,
3178                                VAEncSliceParameterBufferH264 *next_slice_param,
3179                                int slice_index)
3180 {
3181     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3182     struct intel_batchbuffer *batch = encoder_context->base.batch;
3183     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3184     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3185     unsigned char correct[6], grow, shrink;
3186     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3187     int max_qp_n, max_qp_p;
3188     int i;
3189     int weighted_pred_idc = 0;
3190     int num_ref_l0 = 0, num_ref_l1 = 0;
3191     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3192     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3193     int inter_rounding = 0;
3194
3195     if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3196         inter_rounding = 3;
3197
3198     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3199     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3200
3201     if (next_slice_param) {
3202         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3203         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3204     } else {
3205         next_slice_hor_pos = 0;
3206         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3207     }
3208
3209     if (slice_type == SLICE_TYPE_I) {
3210         luma_log2_weight_denom = 0;
3211         chroma_log2_weight_denom = 0;
3212     } else if (slice_type == SLICE_TYPE_P) {
3213         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3214         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3215
3216         if (slice_param->num_ref_idx_active_override_flag)
3217             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3218     } else if (slice_type == SLICE_TYPE_B) {
3219         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3220         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3221         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3222
3223         if (slice_param->num_ref_idx_active_override_flag) {
3224             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3225             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3226         }
3227
3228         if (weighted_pred_idc == 2) {
3229             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3230             luma_log2_weight_denom = 5;
3231             chroma_log2_weight_denom = 5;
3232         }
3233     }
3234
3235     max_qp_n = 0;       /* TODO: update it */
3236     max_qp_p = 0;       /* TODO: update it */
3237     grow = 0;           /* TODO: update it */
3238     shrink = 0;         /* TODO: update it */
3239
3240     for (i = 0; i < 6; i++)
3241         correct[i] = 0; /* TODO: update it */
3242
3243     BEGIN_BCS_BATCH(batch, 11);
3244
3245     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3246     OUT_BCS_BATCH(batch, slice_type);
3247     OUT_BCS_BATCH(batch,
3248                   (num_ref_l0 << 16) |
3249                   (num_ref_l1 << 24) |
3250                   (chroma_log2_weight_denom << 8) |
3251                   (luma_log2_weight_denom << 0));
3252     OUT_BCS_BATCH(batch,
3253                   (weighted_pred_idc << 30) |
3254                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3255                   (slice_param->disable_deblocking_filter_idc << 27) |
3256                   (slice_param->cabac_init_idc << 24) |
3257                   (slice_qp << 16) |
3258                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3259                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3260
3261     OUT_BCS_BATCH(batch,
3262                   slice_ver_pos << 24 |
3263                   slice_hor_pos << 16 |
3264                   slice_param->macroblock_address);
3265     OUT_BCS_BATCH(batch,
3266                   next_slice_ver_pos << 16 |
3267                   next_slice_hor_pos);
3268
3269     OUT_BCS_BATCH(batch,
3270                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3271                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3272                   (2 << 28) |       /* Loose Rate Control */
3273                   (0 << 24) |           /* RC Stable Tolerance */
3274                   (0 << 23) |           /* RC Panic Enable */
3275                   (1 << 22) |           /* CBP mode */
3276                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3277                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3278                   (!next_slice_param << 19) |                   /* Is Last Slice */
3279                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3280                   (1 << 17) |           /* HeaderPresentFlag */
3281                   (1 << 16) |           /* SliceData PresentFlag */
3282                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3283                   (1 << 13) |           /* RBSP NAL TYPE */
3284                   (slice_index << 4) |
3285                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3286
3287     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3288
3289     OUT_BCS_BATCH(batch,
3290                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3291                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3292                   (shrink << 8) |
3293                   (grow << 0));
3294     OUT_BCS_BATCH(batch,
3295                   (1 << 31) |
3296                   (inter_rounding << 28) |
3297                   (1 << 27) |
3298                   (5 << 24) |
3299                   (correct[5] << 20) |
3300                   (correct[4] << 16) |
3301                   (correct[3] << 12) |
3302                   (correct[2] << 8) |
3303                   (correct[1] << 4) |
3304                   (correct[0] << 0));
3305     OUT_BCS_BATCH(batch, 0);
3306
3307     ADVANCE_BCS_BATCH(batch);
3308 }
3309
3310 static uint8_t
3311 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3312 {
3313     unsigned int is_long_term =
3314         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3315     unsigned int is_top_field =
3316         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3317     unsigned int is_bottom_field =
3318         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3319
3320     return ((is_long_term                         << 6) |
3321             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3322             (frame_store_id                       << 1) |
3323             ((is_top_field ^ 1) & is_bottom_field));
3324 }
3325
3326 static void
3327 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3328                                  struct encode_state *encode_state,
3329                                  struct intel_encoder_context *encoder_context,
3330                                  VAEncSliceParameterBufferH264 *slice_param)
3331 {
3332     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3333     struct intel_batchbuffer *batch = encoder_context->base.batch;
3334     VAPictureH264 *ref_pic;
3335     int i, slice_type, ref_idx_shift;
3336     unsigned int fwd_ref_entry;
3337
3338     fwd_ref_entry = 0x80808080;
3339     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3340
3341     for (i = 0; i < MIN(vdenc_context->num_refs[0], 3); i++) {
3342         ref_pic = &slice_param->RefPicList0[i];
3343         ref_idx_shift = i * 8;
3344
3345         if (vdenc_context->list_ref_idx[0][i] == 0xFF)
3346             continue;
3347
3348         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3349         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3350     }
3351
3352     if (slice_type == SLICE_TYPE_P) {
3353         BEGIN_BCS_BATCH(batch, 10);
3354         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3355         OUT_BCS_BATCH(batch, 0);                        // L0
3356         OUT_BCS_BATCH(batch, fwd_ref_entry);
3357
3358         for (i = 0; i < 7; i++) {
3359             OUT_BCS_BATCH(batch, 0x80808080);
3360         }
3361
3362         ADVANCE_BCS_BATCH(batch);
3363     }
3364
3365     if (slice_type == SLICE_TYPE_B) {
3366         /* VDEnc on SKL doesn't support BDW */
3367         assert(0);
3368     }
3369 }
3370
3371 static void
3372 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3373                                       struct encode_state *encode_state,
3374                                       struct intel_encoder_context *encoder_context,
3375                                       VAEncPictureParameterBufferH264 *pic_param,
3376                                       VAEncSliceParameterBufferH264 *slice_param)
3377 {
3378     struct intel_batchbuffer *batch = encoder_context->base.batch;
3379     int i, slice_type;
3380     short weightoffsets[32 * 6];
3381
3382     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3383
3384     if (slice_type == SLICE_TYPE_P &&
3385         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3386
3387         for (i = 0; i < 32; i++) {
3388             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3389             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3390             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3391             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3392             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3393             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3394         }
3395
3396         BEGIN_BCS_BATCH(batch, 98);
3397         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3398         OUT_BCS_BATCH(batch, 0);
3399         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3400
3401         ADVANCE_BCS_BATCH(batch);
3402     }
3403
3404     if (slice_type == SLICE_TYPE_B) {
3405         /* VDEnc on SKL doesn't support BWD */
3406         assert(0);
3407     }
3408 }
3409
3410 static void
3411 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3412                                 struct encode_state *encode_state,
3413                                 struct intel_encoder_context *encoder_context,
3414                                 VAEncSliceParameterBufferH264 *slice_param,
3415                                 VAEncSliceParameterBufferH264 *next_slice_param,
3416                                 int slice_index)
3417 {
3418     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3419     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3420
3421     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3422     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3423                                           encode_state,
3424                                           encoder_context,
3425                                           pic_param,
3426                                           slice_param);
3427     gen9_vdenc_mfx_avc_slice_state(ctx,
3428                                    encode_state,
3429                                    encoder_context,
3430                                    pic_param,
3431                                    slice_param,
3432                                    next_slice_param,
3433                                    slice_index);
3434     gen9_vdenc_mfx_avc_inset_headers(ctx,
3435                                      encode_state,
3436                                      encoder_context,
3437                                      slice_param,
3438                                      slice_index);
3439
3440     if (!vdenc_context->is_frame_level_vdenc) {
3441         gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3442                                                encode_state,
3443                                                encoder_context,
3444                                                slice_param);
3445         gen95_vdenc_vdenc_walker_state(ctx,
3446                                        encode_state,
3447                                        encoder_context,
3448                                        slice_param,
3449                                        next_slice_param);
3450     }
3451 }
3452
3453 static void
3454 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3455                                 struct encode_state *encode_state,
3456                                 struct intel_encoder_context *encoder_context)
3457 {
3458     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3459     struct intel_batchbuffer *batch = encoder_context->base.batch;
3460     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3461     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3462     int i, j;
3463     int slice_index = 0;
3464     int has_tail = 0;                   /* TODO: check it later */
3465
3466     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3467         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3468
3469         if (j == encode_state->num_slice_params_ext - 1)
3470             next_slice_group_param = NULL;
3471         else
3472             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3473
3474         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3475             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3476                 next_slice_param = slice_param + 1;
3477             else
3478                 next_slice_param = next_slice_group_param;
3479
3480             gen9_vdenc_mfx_avc_single_slice(ctx,
3481                                             encode_state,
3482                                             encoder_context,
3483                                             slice_param,
3484                                             next_slice_param,
3485                                             slice_index);
3486
3487             if (vdenc_context->is_frame_level_vdenc)
3488                 break;
3489             else {
3490                 struct vd_pipeline_flush_parameter pipeline_flush_params;
3491                 int insert_mi_flush;
3492
3493                 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3494
3495                 if (next_slice_group_param) {
3496                     pipeline_flush_params.mfx_pipeline_done = 1;
3497                     insert_mi_flush = 1;
3498                 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3499                     pipeline_flush_params.mfx_pipeline_done = 1;
3500                     insert_mi_flush = 1;
3501                 } else {
3502                     pipeline_flush_params.mfx_pipeline_done = !has_tail;
3503                     insert_mi_flush = 0;
3504                 }
3505
3506                 pipeline_flush_params.vdenc_pipeline_done = 1;
3507                 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3508                 pipeline_flush_params.vd_command_message_parser_done = 1;
3509                 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3510
3511                 if (insert_mi_flush) {
3512                     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3513                     mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3514                     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3515                 }
3516             }
3517
3518             slice_param++;
3519             slice_index++;
3520         }
3521
3522         if (vdenc_context->is_frame_level_vdenc)
3523             break;
3524     }
3525
3526     if (vdenc_context->is_frame_level_vdenc) {
3527         struct vd_pipeline_flush_parameter pipeline_flush_params;
3528
3529         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3530
3531         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3532         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3533         pipeline_flush_params.vdenc_pipeline_done = 1;
3534         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3535         pipeline_flush_params.vd_command_message_parser_done = 1;
3536         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3537     }
3538
3539     if (has_tail) {
3540         /* TODO: insert a tail if required */
3541     }
3542
3543     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3544     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3545     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3546 }
3547
3548 static void
3549 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3550                               struct encode_state *encode_state,
3551                               struct intel_encoder_context *encoder_context)
3552 {
3553     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3554     struct intel_batchbuffer *batch = encoder_context->base.batch;
3555     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3556
3557     if (vdenc_context->brc_enabled) {
3558         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3559
3560         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3561         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3562         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3563     }
3564
3565     if (vdenc_context->current_pass) {
3566         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3567
3568         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3569         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3570         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3571     }
3572
3573     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3574
3575     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3576     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3577     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3578
3579     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3580     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3581     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3582
3583     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3584     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3585     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3586     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3587     gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3588     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3589
3590     if (!vdenc_context->brc_enabled) {
3591         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3592         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3593     } else {
3594         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3595         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3596         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3597         gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3598     }
3599
3600     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3601     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3602
3603     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3604 }
3605
3606 static void
3607 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3608                                struct intel_encoder_context *encoder_context)
3609 {
3610     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3611     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3612
3613     switch (rate_control_mode & 0x7f) {
3614     case VA_RC_CBR:
3615         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3616         break;
3617
3618     case VA_RC_VBR:
3619         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3620         break;
3621
3622     case VA_RC_CQP:
3623     default:
3624         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3625         break;
3626     }
3627 }
3628
3629 static void
3630 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3631 {
3632     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3633     struct intel_batchbuffer *batch = encoder_context->base.batch;
3634     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3635     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3636     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3637     int i;
3638
3639     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3640     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3641
3642     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3643     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3644     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3645     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3646     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3647
3648     /* Update DMEM buffer for BRC Update */
3649     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3650         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3651         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3652         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3653         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3654
3655         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3656         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3657         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3658         gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3659     }
3660 }
3661
3662 static VAStatus
3663 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3664                                 struct encode_state *encode_state,
3665                                 struct intel_encoder_context *encoder_context)
3666 {
3667     VAEncSliceParameterBufferH264 *slice_param;
3668     int i, j;
3669
3670     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3671         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3672
3673         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3674             if (slice_param->slice_type == SLICE_TYPE_B)
3675                 return VA_STATUS_ERROR_UNKNOWN;
3676
3677             slice_param++;
3678         }
3679     }
3680
3681     return VA_STATUS_SUCCESS;
3682 }
3683
3684 static VAStatus
3685 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3686                               VAProfile profile,
3687                               struct encode_state *encode_state,
3688                               struct intel_encoder_context *encoder_context)
3689 {
3690     VAStatus va_status;
3691     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3692     struct intel_batchbuffer *batch = encoder_context->base.batch;
3693
3694     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3695
3696     if (va_status != VA_STATUS_SUCCESS)
3697         return va_status;
3698
3699     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3700
3701     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3702         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3703         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3704
3705         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3706
3707         intel_batchbuffer_emit_mi_flush(batch);
3708
3709         if (vdenc_context->brc_enabled) {
3710             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3711                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3712
3713             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3714             intel_batchbuffer_emit_mi_flush(batch);
3715         }
3716
3717         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3718         gen9_vdenc_read_status(ctx, encoder_context);
3719
3720         intel_batchbuffer_end_atomic(batch);
3721         intel_batchbuffer_flush(batch);
3722
3723         vdenc_context->brc_initted = 1;
3724         vdenc_context->brc_need_reset = 0;
3725     }
3726
3727     return VA_STATUS_SUCCESS;
3728 }
3729
3730 static VAStatus
3731 gen9_vdenc_pipeline(VADriverContextP ctx,
3732                     VAProfile profile,
3733                     struct encode_state *encode_state,
3734                     struct intel_encoder_context *encoder_context)
3735 {
3736     VAStatus vaStatus;
3737
3738     switch (profile) {
3739     case VAProfileH264ConstrainedBaseline:
3740     case VAProfileH264Main:
3741     case VAProfileH264High:
3742         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3743         break;
3744
3745     default:
3746         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3747         break;
3748     }
3749
3750     return vaStatus;
3751 }
3752
3753 static void
3754 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3755 {
3756     int i;
3757
3758     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3759     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3760     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3761     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3762     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3763
3764     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3765         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3766
3767     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3768     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3769     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3770     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3771     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3772     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3773
3774     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3775     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3776
3777     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3778     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3779     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3780     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3781
3782     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3783         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3784         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3785     }
3786
3787     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3788     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3789     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3790
3791     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3792     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3793     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3794     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3795
3796     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3797 }
3798
3799 static void
3800 gen9_vdenc_context_destroy(void *context)
3801 {
3802     struct gen9_vdenc_context *vdenc_context = context;
3803
3804     gen9_vdenc_free_resources(vdenc_context);
3805
3806     free(vdenc_context);
3807 }
3808
3809 static void
3810 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3811                               struct intel_encoder_context *encoder_context,
3812                               struct gen9_vdenc_context *vdenc_context)
3813 {
3814     struct i965_driver_data *i965 = i965_driver_data(ctx);
3815     int i;
3816
3817     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3818                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3819                                 "HuC Init&Reset DMEM buffer");
3820
3821     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3822                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3823                                 "HuC History buffer");
3824
3825     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3826                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3827                                 "HuC Stream In buffer");
3828
3829     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3830                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3831                                 "HuC Stream Out buffer");
3832
3833     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3834                                 0x1000,
3835                                 "HuC dummy buffer");
3836
3837     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3838         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3839                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3840                                     "HuC BRC Update buffer");
3841         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3842     }
3843
3844     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3845                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3846                                 "VDENC statistics buffer");
3847
3848     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3849                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3850                                 "PAK statistics buffer");
3851
3852     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3853                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3854                                 "VDENC/AVC image state buffer");
3855
3856     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3857                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3858                                 "HME summary buffer");
3859
3860     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3861                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3862                                 "BRC constant buffer");
3863
3864     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3865                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3866                                 "Second level batch buffer");
3867
3868     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3869                                 0x1000,
3870                                 "HuC Status buffer");
3871
3872     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3873                                 0x1000,
3874                                 "HuC Status buffer");
3875 }
3876
3877 static void
3878 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3879                               struct intel_encoder_context *encoder_context,
3880                               struct gen9_vdenc_context *vdenc_context)
3881 {
3882     vdenc_context->is_frame_level_vdenc = 1;
3883 }
3884
3885 static void
3886 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
3887                                struct intel_encoder_context *encoder_context,
3888                                struct gen9_vdenc_context *vdenc_context)
3889 {
3890     vdenc_context->use_extended_pak_obj_cmd = 1;
3891 }
3892
3893 static void
3894 vdenc_hw_interfaces_init(VADriverContextP ctx,
3895                          struct intel_encoder_context *encoder_context,
3896                          struct gen9_vdenc_context *vdenc_context)
3897 {
3898     struct i965_driver_data *i965 = i965_driver_data(ctx);
3899
3900     if (IS_KBL(i965->intel.device_info) ||
3901         IS_GLK(i965->intel.device_info)) {
3902         gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3903     } else {
3904         gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3905     }
3906 }
3907
3908 static VAStatus
3909 gen9_vdenc_context_get_status(VADriverContextP ctx,
3910                               struct intel_encoder_context *encoder_context,
3911                               struct i965_coded_buffer_segment *coded_buffer_segment)
3912 {
3913     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3914
3915     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3916
3917     return VA_STATUS_SUCCESS;
3918 }
3919
3920 Bool
3921 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3922 {
3923     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3924
3925     if (!vdenc_context)
3926         return False;
3927
3928     vdenc_context->brc_initted = 0;
3929     vdenc_context->brc_need_reset = 0;
3930     vdenc_context->is_low_delay = 0;
3931     vdenc_context->current_pass = 0;
3932     vdenc_context->num_passes = 1;
3933     vdenc_context->vdenc_streamin_enable = 0;
3934     vdenc_context->vdenc_pak_threshold_check_enable = 0;
3935     vdenc_context->is_frame_level_vdenc = 0;
3936
3937     vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3938     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3939
3940     encoder_context->mfc_context = vdenc_context;
3941     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3942     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3943     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3944     encoder_context->get_status = gen9_vdenc_context_get_status;
3945
3946     return True;
3947 }