OSDN Git Service

3cc897e40e557209e963266e25f00dad7ab55a12
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
45     0,   0, -8, -12, -16, -20, -28, -36,
46     0,   0, -4,  -8, -12, -16, -24, -32,
47     4,   2,  0,  -1,  -3,  -8, -16, -24,
48     8,   4,  2,   0,  -1,  -4,  -8, -16,
49     20, 16,  4,   0,  -1,  -4,  -8, -16,
50     24, 20, 16,   8,   4,   0,  -4,  -8,
51     28, 24, 20,  16,   8,   4,   0,  -8,
52     32, 24, 20,  16,   8,   4,   0,  -4,
53     64, 48, 28,  20,   16, 12,   8,   4,
54 };
55
56 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
57     -8, -24, -32, -40, -44, -48, -52, -80,
58     -8, -16, -32, -40, -40, -44, -44, -56,
59     0,    0, -12, -20, -24, -28, -32, -36,
60     8,    4,   0,   0,  -8, -16, -24, -32,
61     32,  16,   8,   4,  -4,  -8, -16, -20,
62     36,  24,  16,   8,   4,  -2,  -4,  -8,
63     40,  36,  24,  20,  16,   8,   0,  -8,
64     48,  40,  28,  24,  20,  12,   0,  -4,
65     64,  48,  28,  20,  16,  12,   8,   4,
66 };
67
68 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
69     0,  -4, -8, -16, -24, -32, -40, -48,
70     1,   0, -4,  -8, -16, -24, -32, -40,
71     4,   2,  0,  -1,  -3,  -8, -16, -24,
72     8,   4,  2,   0,  -1,  -4,  -8, -16,
73     20, 16,  4,   0,  -1,  -4,  -8, -16,
74     24, 20, 16,   8,   4,   0,  -4,  -8,
75     28, 24, 20,  16,   8,   4,   0,  -8,
76     32, 24, 20,  16,   8,   4,   0,  -4,
77     64, 48, 28,  20,  16,  12,   8,   4,
78 };
79
80 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
81     +0,  0,  0,  0, 0, 3, 4, 6, 8,
82     +0,  0,  0,  0, 0, 2, 3, 5, 7,
83     -1,  0,  0,  0, 0, 2, 2, 4, 5,
84     -1, -1,  0,  0, 0, 1, 2, 2, 4,
85     -2, -2, -1,  0, 0, 0, 1, 2, 4,
86     -2, -2, -1,  0, 0, 0, 1, 2, 4,
87     -3, -2, -1, -1, 0, 0, 1, 2, 5,
88     -3, -2, -1, -1, 0, 0, 2, 4, 7,
89     -4, -3, -2, -1, 0, 1, 3, 5, 8,
90 };
91
92 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
93     -1,  0,  0,  0, 0, 1, 1, 2, 3,
94     -1, -1,  0,  0, 0, 1, 1, 2, 3,
95     -2, -1, -1,  0, 0, 1, 1, 2, 3,
96     -3, -2, -2, -1, 0, 0, 1, 2, 3,
97     -3, -2, -1, -1, 0, 0, 1, 2, 3,
98     -3, -2, -1, -1, 0, 0, 1, 2, 3,
99     -3, -2, -1, -1, 0, 0, 1, 2, 3,
100     -3, -2, -1, -1, 0, 0, 1, 2, 3,
101     -3, -2, -1, -1, 0, 0, 1, 2, 3,
102 };
103
104 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
105     +0,  0,  0,  0, 0, 2, 3, 3, 4,
106     +0,  0,  0,  0, 0, 2, 3, 3, 4,
107     -1,  0,  0,  0, 0, 2, 2, 3, 3,
108     -1, -1,  0,  0, 0, 1, 2, 2, 2,
109     -1, -1, -1,  0, 0, 0, 1, 2, 2,
110     -2, -1, -1,  0, 0, 0, 0, 1, 2,
111     -2, -1, -1, -1, 0, 0, 0, 1, 3,
112     -2, -2, -1, -1, 0, 0, 1, 1, 3,
113     -2, -2, -1, -1, 0, 1, 1, 2, 4,
114 };
115
116 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
117     -4, -20, -28, -36, -40, -44, -48, -80,
118     +0,  -8, -12, -20, -24, -28, -32, -36,
119     +0,   0,  -8, -16, -20, -24, -28, -32,
120     +8,   4,   0,   0,  -8, -16, -24, -28,
121     32,  24,  16,   2,  -4,  -8, -16, -20,
122     36,  32,  28,  16,   8,   0,  -4,  -8,
123     40,  36,  24,  20,  16,   8,   0,  -8,
124     48,  40,  28,  24,  20,  12,   0,  -4,
125     64,  48,  28,  20,  16,  12,   8,   4,
126 };
127
128 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
129     -8, -24, -32, -44, -48, -56, -64, -80,
130     -8, -16, -32, -40, -44, -52, -56, -64,
131     +0,   0, -16, -28, -36, -40, -44, -48,
132     +8,   4,   0,   0,  -8, -16, -24, -36,
133     20,  12,   4,   0,  -8,  -8,  -8, -16,
134     24,  16,   8,   8,   8,   0,  -4,  -8,
135     40,  36,  24,  20,  16,   8,   0,  -8,
136     48,  40,  28,  24,  20,  12,   0,  -4,
137     64,  48,  28,  20,  16,  12,   8,   4,
138 };
139
140 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
141     0,  -4, -8, -16, -24, -32, -40, -48,
142     1,   0, -4,  -8, -16, -24, -32, -40,
143     4,   2,  0,  -1,  -3,  -8, -16, -24,
144     8,   4,  2,   0,  -1,  -4,  -8, -16,
145     20, 16,  4,   0,  -1,  -4,  -8, -16,
146     24, 20, 16,   8,   4,   0,  -4,  -8,
147     28, 24, 20,  16,   8,   4,   0,  -8,
148     32, 24, 20,  16,   8,   4,   0,  -4,
149     64, 48, 28,  20,  16,  12,   8,   4,
150 };
151
152 static struct huc_brc_update_constant_data
153 gen9_brc_update_constant_data = {
154     .global_rate_qp_adj_tab_i = {
155         48, 40, 32,  24,  16,   8,   0,  -8,
156         40, 32, 24,  16,   8,   0,  -8, -16,
157         32, 24, 16,   8,   0,  -8, -16, -24,
158         24, 16,  8,   0,  -8, -16, -24, -32,
159         16, 8,   0,  -8, -16, -24, -32, -40,
160         8,  0,  -8, -16, -24, -32, -40, -48,
161         0, -8, -16, -24, -32, -40, -48, -56,
162         48, 40, 32,  24,  16,   8,   0,  -8,
163     },
164
165     .global_rate_qp_adj_tab_p = {
166         48,  40,  32,  24,  16,  8,    0,  -8,
167         40,  32,  24,  16,   8,  0,   -8, -16,
168         16,   8,   8,   4,  -8, -16, -16, -24,
169         8,    0,   0,  -8, -16, -16, -16, -24,
170         8,    0,   0, -24, -32, -32, -32, -48,
171         0,  -16, -16, -24, -32, -48, -56, -64,
172         -8, -16, -32, -32, -48, -48, -56, -64,
173         -16,-32, -48, -48, -48, -56, -64, -80,
174     },
175
176     .global_rate_qp_adj_tab_b = {
177         48, 40, 32, 24,  16,   8,   0,  -8,
178         40, 32, 24, 16,  8,    0,  -8, -16,
179         32, 24, 16,  8,  0,   -8, -16, -24,
180         24, 16, 8,   0, -8,   -8, -16, -24,
181         16, 8,  0,   0, -8,  -16, -24, -32,
182         16, 8,  0,   0, -8,  -16, -24, -32,
183         0, -8, -8, -16, -32, -48, -56, -64,
184         0, -8, -8, -16, -32, -48, -56, -64
185     },
186
187     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
188     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
189     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
190
191     .dist_qp_adj_tab_i = {
192         0,   0,  0,  0,  0,  3,  4,  6,  8,
193         0,   0,  0,  0,  0,  2,  3,  5,  7,
194         -1,  0,  0,  0,  0,  2,  2,  4,  5,
195         -1, -1,  0,  0,  0,  1,  2,  2,  4,
196         -2, -2, -1,  0,  0,  0,  1,  2,  4,
197         -2, -2, -1,  0,  0,  0,  1,  2,  4,
198         -3, -2, -1, -1,  0,  0,  1,  2,  5,
199         -3, -2, -1, -1,  0,  0,  2,  4,  7,
200         -4, -3, -2, -1,  0,  1,  3,  5,  8,
201     },
202
203     .dist_qp_adj_tab_p = {
204         -1,   0,  0,  0,  0,  1,  1,  2,  3,
205         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
206         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
207         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
208         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
209         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
210         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
211         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
212         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
213     },
214
215     .dist_qp_adj_tab_b = {
216         0,   0,  0,  0, 0, 2, 3, 3, 4,
217         0,   0,  0,  0, 0, 2, 3, 3, 4,
218         -1,  0,  0,  0, 0, 2, 2, 3, 3,
219         -1, -1,  0,  0, 0, 1, 2, 2, 2,
220         -1, -1, -1,  0, 0, 0, 1, 2, 2,
221         -2, -1, -1,  0, 0, 0, 0, 1, 2,
222         -2, -1, -1, -1, 0, 0, 0, 1, 3,
223         -2, -2, -1, -1, 0, 0, 1, 1, 3,
224         -2, -2, -1, -1, 0, 1, 1, 2, 4,
225     },
226
227     /* default table for non lowdelay */
228     .buf_rate_adj_tab_i = {
229         -4, -20, -28, -36, -40, -44, -48, -80,
230         0,   -8, -12, -20, -24, -28, -32, -36,
231         0,    0,  -8, -16, -20, -24, -28, -32,
232         8,    4,   0,   0,  -8, -16, -24, -28,
233         32,  24,  16,   2,  -4,  -8, -16, -20,
234         36,  32,  28,  16,   8,   0,  -4,  -8,
235         40,  36,  24,  20,  16,   8,   0,  -8,
236         48,  40,  28,  24,  20,  12,   0,  -4,
237         64,  48,  28,  20,  16,  12,   8,   4,
238     },
239
240     /* default table for non lowdelay */
241     .buf_rate_adj_tab_p = {
242         -8, -24, -32, -44, -48, -56, -64, -80,
243         -8, -16, -32, -40, -44, -52, -56, -64,
244         0,    0, -16, -28, -36, -40, -44, -48,
245         8,    4,   0,   0,  -8, -16, -24, -36,
246         20,  12,   4,   0,  -8,  -8,  -8, -16,
247         24,  16,   8,   8,   8,   0,  -4,  -8,
248         40,  36,  24,  20,  16,   8,   0,  -8,
249         48,  40,  28,  24,  20,  12,   0,  -4,
250         64,  48,  28,  20,  16,  12,   8,   4,
251     },
252
253     /* default table for non lowdelay */
254     .buf_rate_adj_tab_b = {
255         0,  -4, -8, -16, -24, -32, -40, -48,
256         1,   0, -4,  -8, -16, -24, -32, -40,
257         4,   2,  0,  -1,  -3,  -8, -16, -24,
258         8,   4,  2,   0,  -1,  -4,  -8, -16,
259         20, 16,  4,   0,  -1,  -4,  -8, -16,
260         24, 20, 16,   8,   4,   0,  -4,  -8,
261         28, 24, 20,  16,   8,   4,   0,  -8,
262         32, 24, 20,  16,   8,   4,   0,  -4,
263         64, 48, 28,  20,  16,  12,   8,   4,
264     },
265
266     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
267     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
268
269     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
270     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
271
272     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
273     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
274
275     .i_intra_non_pred = {
276         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
277         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
278         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
279     },
280
281     .i_intra_16x16 = {
282         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
284         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
285     },
286
287     .i_intra_8x8 = {
288         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
289         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
290         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
291     },
292
293     .i_intra_4x4 = {
294         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
295         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
296         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
297     },
298
299     .i_intra_chroma = {
300         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
303     },
304
305     .p_intra_non_pred = {
306         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
307         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
308         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
309     },
310
311     .p_intra_16x16 = {
312         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
313         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
314         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
315     },
316
317     .p_intra_8x8 = {
318         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
319         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
320         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
321     },
322
323     .p_intra_4x4 = {
324         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
325         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
326         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
327     },
328
329     .p_intra_chroma = {
330         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
332         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
333     },
334
335     .p_inter_16x8 = {
336         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
337         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
338         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
339     },
340
341     .p_inter_8x8 = {
342         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
343         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
344         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
345     },
346
347     .p_inter_16x16 = {
348         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
349         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
350         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
351     },
352
353     .p_ref_id = {
354         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
355         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
356         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
357     },
358
359     .hme_mv_cost = {
360         /* mv = 0 */
361         {
362             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
363             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
365         },
366
367         /* mv <= 16 */
368         {
369             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
371             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
372         },
373
374         /* mv <= 32 */
375         {
376             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
377             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
378             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
379         },
380
381         /* mv <= 64 */
382         {
383             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
384             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
385             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
386         },
387
388         /* mv <= 128 */
389         {
390             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
391             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
392             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
393         },
394
395         /* mv <= 256 */
396         {
397             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
398             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
399             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
400         },
401
402         /* mv <= 512 */
403         {
404             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
405             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
406             0x1a, 0x1a, 0x1a, 0x1a, 0x2a, 0x2f, 0x3a, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d,
407         },
408
409         /* mv <= 1024 */
410         {
411             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
412             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
413             0x1a, 0x1a, 0x1a, 0x1f, 0x2d, 0x3d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d,
414         },
415     },
416 };
417
418 /* 11 DWs */
419 static uint8_t vdenc_const_qp_lambda[44] = {
420     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
421     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
422     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
423     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
424     0x4a, 0x53, 0x00, 0x00
425 };
426
427 /* 14 DWs */
428 static uint16_t vdenc_const_skip_threshold[28] = {
429
430 };
431
432 /* 14 DWs */
433 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
434
435 };
436
437 /* 7 DWs */
438 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
439
440 };
441
442 /* 7 DWs */
443 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
444
445 };
446
447 /* 7 DWs */
448 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
449
450 };
451
452 /* P frame */
453 /* 11 DWs */
454 static uint8_t vdenc_const_qp_lambda_p[44] = {
455     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
456     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
457     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
458     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
459     0x4a, 0x53, 0x00, 0x00
460 };
461
462 /* 14 DWs */
463 static uint16_t vdenc_const_skip_threshold_p[28] = {
464     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
465     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
466     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
467     0x0333, 0x03b0, 0x0000, 0x0000
468 };
469
470 /* 14 DWs */
471 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
472     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
473     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
474     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
475 };
476
477 /* 7 DWs */
478 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
479     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
480     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
481     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
482 };
483
484 /* 7 DWs */
485 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
486     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
487     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
488     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
489 };
490
491 /* 7 DWs */
492 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
493     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
494     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
495     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
496 };
497
498 static const double
499 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
500
501 static const double
502 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
503
504 static const double
505 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
506
507 static const double
508 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
509
510 static const double
511 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
512
513 static const double
514 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
515
516 static const unsigned char
517 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
518
519 static const unsigned char
520 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521
522 static const uint16_t
523 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
524
525 static const uint8_t
526 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
527
528 static const uint8_t
529 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
530
531 static const uint8_t
532 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
533
534 static const int8_t
535 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
536
537 const int vdenc_mode_const[2][12][52] = {
538     //INTRASLICE
539     {
540         //LUTMODE_INTRA_NONPRED
541         {
542             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
543             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
544             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
545             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
546         },
547
548         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
549         {
550             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
551             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
552             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
553             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
554         },
555
556         //LUTMODE_INTRA_8x8
557         {
558             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
559             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
560             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
561             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
562         },
563
564         //LUTMODE_INTRA_4x4
565         {
566             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
567             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
568             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
569             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
570         },
571
572         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
573         { 0, },
574
575         //LUTMODE_INTER_8X8Q
576         { 0, },
577
578         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
579         { 0, },
580
581         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
582         { 0, },
583
584         //LUTMODE_INTER_16x16, LUTMODE_INTER
585         { 0, },
586
587         //LUTMODE_INTER_BWD
588         { 0, },
589
590         //LUTMODE_REF_ID
591         { 0, },
592
593         //LUTMODE_INTRA_CHROMA
594         { 0, },
595     },
596
597     //PREDSLICE
598     {
599         //LUTMODE_INTRA_NONPRED
600         {
601             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
602             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
603             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
604             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
605         },
606
607         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
608         {
609             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
610             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
611             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
612             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
613
614         },
615
616         //LUTMODE_INTRA_8x8
617         {
618             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
619             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
620             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
621             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
622         },
623
624         //LUTMODE_INTRA_4x4
625         {
626             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
627             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
628             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
629             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
630         },
631
632         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
633         {
634             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
635             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
636             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
637             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
638         },
639
640         //LUTMODE_INTER_8X8Q
641         {
642             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
643             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
644             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
645             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
646         },
647
648         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
649         {
650             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
651             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
652             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
653             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
654         },
655
656         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
657         {
658             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
659             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
660             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
661             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
662         },
663
664         //LUTMODE_INTER_16x16, LUTMODE_INTER
665         {
666             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
667             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
668             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
669             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
670         },
671
672         //LUTMODE_INTER_BWD
673         {
674             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
675             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
676             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
678         },
679
680         //LUTMODE_REF_ID
681         {
682             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
683             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
684             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
685             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
686         },
687
688         //LUTMODE_INTRA_CHROMA
689         {
690             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
691             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
692             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
693             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
694         },
695     },
696 };
697
698 const int vdenc_mv_cost_skipbias_qpel[8] = {
699     //PREDSLICE
700     0, 6, 6, 9, 10, 13, 14, 16
701 };
702
703 const int vdenc_hme_cost[8][52] = {
704     //mv=0
705     {
706         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
707         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
708         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
709         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
710     },
711     //mv<=16
712     {
713         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
714         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
715         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
716         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
717     },
718     //mv<=32
719     {
720         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
721         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
722         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
724     },
725     //mv<=64
726     {
727         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
728         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
729         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
730         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
731     },
732     //mv<=128
733     {
734         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
735         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
736         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
737         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
738     },
739     //mv<=256
740     {
741         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
742         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
743         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
744         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
745     },
746     //mv<=512
747     {
748         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
749         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
750         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
751         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
752     },
753
754     //mv<=1024
755     {
756         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
757         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
758         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
759         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
760     },
761 };
762
763 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
764         if (bo) {                                                       \
765             OUT_BCS_RELOC64(batch,                                      \
766                             bo,                                         \
767                             I915_GEM_DOMAIN_RENDER,                     \
768                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
769                             delta);                                     \
770         } else {                                                        \
771             OUT_BCS_BATCH(batch, 0);                                    \
772             OUT_BCS_BATCH(batch, 0);                                    \
773         }                                                               \
774     } while (0)
775
776 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
777         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
778         OUT_BCS_BATCH(batch, attr);                             \
779     } while (0)
780
781 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) {      \
782         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
783         buffer.width = bfsize;                                  \
784         buffer.height = 1;                                      \
785         buffer.pitch = buffer.width;                            \
786         buffer.size = buffer.pitch;                             \
787         buffer.tiling = I915_TILING_NONE;                       \
788         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
789                                    &buffer,                     \
790                                    (des));                      \
791     } while (0)
792
793 static int
794 gen9_vdenc_get_max_vmv_range(int level)
795 {
796     int max_vmv_range = 512;
797
798     if (level == 10)
799         max_vmv_range = 256;
800     else if (level <= 20)
801         max_vmv_range = 512;
802     else if (level <= 30)
803         max_vmv_range = 1024;
804     else
805         max_vmv_range = 2048;
806
807     return max_vmv_range;
808 }
809
810 static unsigned char
811 map_44_lut_value(unsigned int v, unsigned char max)
812 {
813     unsigned int maxcost;
814     int d;
815     unsigned char ret;
816
817     if (v == 0) {
818         return 0;
819     }
820
821     maxcost = ((max & 15) << (max >> 4));
822
823     if (v >= maxcost) {
824         return max;
825     }
826
827     d = (int)(log((double)v) / log(2.0)) - 3;
828
829     if (d < 0) {
830         d = 0;
831     }
832
833     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
834     ret =  (ret & 0xf) == 0 ? (ret | 8) : ret;
835
836     return ret;
837 }
838
839 static void
840 gen9_vdenc_update_rate_control_parameters(VADriverContextP ctx,
841                                           struct intel_encoder_context *encoder_context,
842                                           VAEncMiscParameterRateControl *misc)
843 {
844     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
845
846     vdenc_context->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
847     vdenc_context->mb_brc_enabled = 0;
848
849     if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
850         vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
851         vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
852
853         if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate) {
854             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
855             vdenc_context->brc_need_reset = 1;
856         }
857     } else if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
858         vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
859         vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
860
861         if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate * misc->target_percentage / 100) {
862             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * misc->target_percentage / 100;
863             vdenc_context->brc_need_reset = 1;
864         }
865     }
866 }
867
868 static void
869 gen9_vdenc_update_hrd_parameters(VADriverContextP ctx,
870                                  struct intel_encoder_context *encoder_context,
871                                  VAEncMiscParameterHRD *misc)
872 {
873     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
874
875     if (vdenc_context->internal_rate_mode == I965_BRC_CQP)
876         return;
877
878     vdenc_context->vbv_buffer_size_in_bit = misc->buffer_size;
879     vdenc_context->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
880 }
881
882 static void
883 gen9_vdenc_update_framerate_parameters(VADriverContextP ctx,
884                                        struct intel_encoder_context *encoder_context,
885                                        VAEncMiscParameterFrameRate *misc)
886 {
887     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
888
889     vdenc_context->frames_per_100s = misc->framerate; /* misc->framerate is multiple of 100 */
890 }
891
892 static void
893 gen9_vdenc_update_roi_parameters(VADriverContextP ctx,
894                                  struct intel_encoder_context *encoder_context,
895                                  VAEncMiscParameterBufferROI *misc)
896 {
897     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898     int i;
899
900     if (!misc || !misc->roi) {
901         vdenc_context->num_roi = 0;
902         return;
903     }
904
905     vdenc_context->num_roi = MIN(misc->num_roi, 3);
906     vdenc_context->max_delta_qp = misc->max_delta_qp;
907     vdenc_context->min_delta_qp = misc->min_delta_qp;
908     vdenc_context->vdenc_streamin_enable = (vdenc_context->num_roi == 0);
909
910     for (i = 0; i < vdenc_context->num_roi; i++) {
911         vdenc_context->roi[i].left = misc->roi->roi_rectangle.x;
912         vdenc_context->roi[i].right = vdenc_context->roi[i].left + misc->roi->roi_rectangle.width;
913         vdenc_context->roi[i].top = misc->roi->roi_rectangle.y;
914         vdenc_context->roi[i].bottom = vdenc_context->roi[i].top + misc->roi->roi_rectangle.height;
915         vdenc_context->roi[i].value = misc->roi->roi_value;
916
917         vdenc_context->roi[i].left /= 16;
918         vdenc_context->roi[i].right /= 16;
919         vdenc_context->roi[i].top /= 16;
920         vdenc_context->roi[i].bottom /= 16;
921     }
922 }
923
924 static void
925 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
926                                   struct encode_state *encode_state,
927                                   struct intel_encoder_context *encoder_context)
928 {
929     int i;
930     VAEncMiscParameterBuffer *misc_param;
931
932     for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
933         if (!encode_state->misc_param[i] || !encode_state->misc_param[i]->buffer)
934             continue;
935
936         misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i]->buffer;
937
938         switch (misc_param->type) {
939         case VAEncMiscParameterTypeFrameRate:
940             gen9_vdenc_update_framerate_parameters(ctx,
941                                                    encoder_context,
942                                                    (VAEncMiscParameterFrameRate *)misc_param->data);
943             break;
944
945         case VAEncMiscParameterTypeRateControl:
946             gen9_vdenc_update_rate_control_parameters(ctx,
947                                                       encoder_context,
948                                                       (VAEncMiscParameterRateControl *)misc_param->data);
949             break;
950
951         case VAEncMiscParameterTypeHRD:
952             gen9_vdenc_update_hrd_parameters(ctx,
953                                              encoder_context,
954                                              (VAEncMiscParameterHRD *)misc_param->data);
955             break;
956
957         case VAEncMiscParameterTypeROI:
958             gen9_vdenc_update_roi_parameters(ctx,
959                                              encoder_context,
960                                              (VAEncMiscParameterBufferROI *)misc_param->data);
961             break;
962
963         default:
964             break;
965         }
966     }
967 }
968
969 static void
970 gen9_vdenc_update_parameters(VADriverContextP ctx,
971                              VAProfile profile,
972                              struct encode_state *encode_state,
973                              struct intel_encoder_context *encoder_context)
974 {
975     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
976     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
977     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
978
979     if (profile == VAProfileH264High)
980         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
981     else
982         vdenc_context->transform_8x8_mode_enable = 0;
983
984     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
985     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
986
987     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
988     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
989
990     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
991     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
992     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
993     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
994     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
995
996     if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
997         vdenc_context->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
998         vdenc_context->max_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
999         vdenc_context->min_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1000     }
1001
1002     vdenc_context->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
1003     vdenc_context->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
1004     vdenc_context->frames_per_100s = 3000; /* 30fps */
1005     vdenc_context->gop_size = seq_param->intra_period;
1006     vdenc_context->ref_dist = seq_param->ip_period;
1007     vdenc_context->vdenc_streamin_enable = 0;
1008
1009     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
1010
1011     vdenc_context->current_pass = 0;
1012     vdenc_context->num_passes = 1;
1013
1014     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
1015         vdenc_context->internal_rate_mode == I965_BRC_VBR)
1016         vdenc_context->brc_enabled = 1;
1017     else
1018         vdenc_context->brc_enabled = 0;
1019
1020     if (vdenc_context->brc_enabled &&
1021         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
1022          !vdenc_context->vbv_buffer_size_in_bit ||
1023          !vdenc_context->max_bit_rate ||
1024          !vdenc_context->target_bit_rate ||
1025          !vdenc_context->frames_per_100s))
1026         vdenc_context->brc_enabled = 0;
1027
1028     if (!vdenc_context->brc_enabled) {
1029         vdenc_context->target_bit_rate = 0;
1030         vdenc_context->max_bit_rate = 0;
1031         vdenc_context->min_bit_rate = 0;
1032         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
1033         vdenc_context->vbv_buffer_size_in_bit = 0;
1034     } else {
1035         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
1036     }
1037 }
1038
1039 static void
1040 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
1041                                    struct encode_state *encode_state,
1042                                    struct intel_encoder_context *encoder_context,
1043                                    int qp)
1044 {
1045     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1046     unsigned int frame_type = vdenc_context->frame_type;
1047
1048     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
1049     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
1050     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
1051
1052     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
1053     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
1054     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
1055     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
1056
1057     if (frame_type == VDENC_FRAME_P) {
1058         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
1059         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
1060         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
1061         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
1062         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
1063         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
1064
1065         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
1066         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
1067         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
1068         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
1069         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
1070         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
1071         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
1072         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
1073
1074         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
1075         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
1076         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
1077         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
1078         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
1079         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
1080         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
1081         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
1082     }
1083 }
1084
1085 static void
1086 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
1087                                         struct intel_encoder_context *encoder_context)
1088 {
1089     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1090     struct gen9_vdenc_streamin_state *streamin_state;
1091     int row, col, i;
1092
1093     if (!vdenc_context->num_roi)
1094         return;
1095
1096     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1097
1098     if (!streamin_state)
1099         return;
1100
1101     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
1102         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1103             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1104
1105             /* The last one has higher priority */
1106             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1107                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1108                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1109                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1110
1111                     break;
1112                 }
1113             }
1114         }
1115     }
1116
1117     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1118 }
1119
1120 static VAStatus
1121 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1122                        VAProfile profile,
1123                        struct encode_state *encode_state,
1124                        struct intel_encoder_context *encoder_context)
1125 {
1126     struct i965_driver_data *i965 = i965_driver_data(ctx);
1127     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1128     struct i965_coded_buffer_segment *coded_buffer_segment;
1129     struct object_surface *obj_surface;
1130     struct object_buffer *obj_buffer;
1131     VAEncPictureParameterBufferH264 *pic_param;
1132     VAEncSliceParameterBufferH264 *slice_param;
1133     VDEncAvcSurface *vdenc_avc_surface;
1134     dri_bo *bo;
1135     int i, j, enable_avc_ildb = 0;
1136     int qp;
1137     char *pbuffer;
1138
1139     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1140
1141     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1142         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1143         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1144
1145         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1146             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1147                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1148                    (slice_param->slice_type == SLICE_TYPE_P) ||
1149                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1150                    (slice_param->slice_type == SLICE_TYPE_B));
1151
1152             if (slice_param->disable_deblocking_filter_idc != 1) {
1153                 enable_avc_ildb = 1;
1154                 break;
1155             }
1156
1157             slice_param++;
1158         }
1159     }
1160
1161     /* Setup current frame */
1162     obj_surface = encode_state->reconstructed_object;
1163     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1164
1165     if (obj_surface->private_data == NULL) {
1166         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1167         assert(vdenc_avc_surface);
1168
1169         vdenc_avc_surface->ctx = ctx;
1170         i965_CreateSurfaces(ctx,
1171                             vdenc_context->down_scaled_width_4x,
1172                             vdenc_context->down_scaled_height_4x,
1173                             VA_RT_FORMAT_YUV420,
1174                             1,
1175                             &vdenc_avc_surface->scaled_4x_surface_id);
1176         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1177         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1178         i965_check_alloc_surface_bo(ctx,
1179                                     vdenc_avc_surface->scaled_4x_surface_obj,
1180                                     1,
1181                                     VA_FOURCC_NV12,
1182                                     SUBSAMPLE_YUV420);
1183
1184         obj_surface->private_data = (void *)vdenc_avc_surface;
1185         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1186     }
1187
1188     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1189     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1190
1191     /* Reconstructed surfaces */
1192     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1193     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1194     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1195     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1196
1197     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1198     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1199
1200     if (enable_avc_ildb) {
1201         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1202     } else {
1203         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1204     }
1205
1206
1207     /* Reference surfaces */
1208     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1209         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1210                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1211         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1212         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1213         obj_surface = encode_state->reference_objects[i];
1214
1215         if (obj_surface && obj_surface->bo) {
1216             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1217
1218             if (obj_surface->private_data == NULL) {
1219                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1220                 assert(vdenc_avc_surface);
1221
1222                 vdenc_avc_surface->ctx = ctx;
1223                 i965_CreateSurfaces(ctx,
1224                                     vdenc_context->down_scaled_width_4x,
1225                                     vdenc_context->down_scaled_height_4x,
1226                                     VA_RT_FORMAT_YUV420,
1227                                     1,
1228                                     &vdenc_avc_surface->scaled_4x_surface_id);
1229                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1230                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1231                 i965_check_alloc_surface_bo(ctx,
1232                                             vdenc_avc_surface->scaled_4x_surface_obj,
1233                                             1,
1234                                             VA_FOURCC_NV12,
1235                                             SUBSAMPLE_YUV420);
1236
1237                 obj_surface->private_data = vdenc_avc_surface;
1238                 obj_surface->free_private_data = gen_free_avc_surface;
1239             }
1240
1241             vdenc_avc_surface = obj_surface->private_data;
1242             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1243         }
1244     }
1245
1246     /* Input YUV surface */
1247     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1248     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1249
1250     /* Encoded bitstream */
1251     obj_buffer = encode_state->coded_buf_object;
1252     bo = obj_buffer->buffer_store->bo;
1253     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1254     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1255     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1256     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1257
1258     /* Status buffer */
1259     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1260     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1261     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1262     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1263     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1264     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1265            vdenc_context->compressed_bitstream.start_offset);
1266
1267     dri_bo_map(bo, 1);
1268
1269     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1270     coded_buffer_segment->mapped = 0;
1271     coded_buffer_segment->codec = encoder_context->codec;
1272     coded_buffer_segment->status_support = 1;
1273
1274     pbuffer = bo->virtual;
1275     pbuffer += vdenc_context->status_bffuer.base_offset;
1276     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1277
1278     dri_bo_unmap(bo);
1279
1280     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1281     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1282                                 vdenc_context->frame_width_in_mbs * 64,
1283                                 "Intra row store scratch buffer");
1284
1285     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1286     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1287                                 vdenc_context->frame_width_in_mbs * 256,
1288                                 "Deblocking filter row store scratch buffer");
1289
1290     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1291     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1292                                 vdenc_context->frame_width_in_mbs * 128,
1293                                 "BSD/MPC row store scratch buffer");
1294
1295     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1296     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1297                                 vdenc_context->frame_width_in_mbs * 64,
1298                                 "VDENC row store scratch buffer");
1299
1300     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1301     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1302     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1303                                 vdenc_context->frame_width_in_mbs *
1304                                 vdenc_context->frame_height_in_mbs *
1305                                 sizeof(struct gen9_vdenc_streamin_state),
1306                                 "VDENC StreamIn buffer");
1307
1308     /*
1309      * Calculate the index for each reference surface in list0 for the first slice
1310      * TODO: other slices
1311      */
1312     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1313     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1314
1315     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1316
1317     if (slice_param->num_ref_idx_active_override_flag)
1318         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1319
1320     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1321         return VA_STATUS_ERROR_INVALID_VALUE;
1322
1323     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1324         VAPictureH264 *va_pic;
1325
1326         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1327         vdenc_context->list_ref_idx[0][i] = 0;
1328
1329         if (i >= vdenc_context->num_refs[0])
1330             continue;
1331
1332         va_pic = &slice_param->RefPicList0[i];
1333
1334         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1335             obj_surface = encode_state->reference_objects[j];
1336
1337             if (obj_surface &&
1338                 obj_surface->bo &&
1339                 obj_surface->base.id == va_pic->picture_id) {
1340
1341                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1342                 vdenc_context->list_ref_idx[0][i] = j;
1343
1344                 break;
1345             }
1346         }
1347     }
1348
1349     if (slice_param->slice_type == SLICE_TYPE_I ||
1350         slice_param->slice_type == SLICE_TYPE_SI)
1351         vdenc_context->frame_type = VDENC_FRAME_I;
1352     else
1353         vdenc_context->frame_type = VDENC_FRAME_P;
1354
1355     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1356
1357     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1358     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1359
1360     return VA_STATUS_SUCCESS;
1361 }
1362
1363 static void
1364 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1365                                 struct intel_encoder_context *encoder_context,
1366                                 struct huc_pipe_mode_select_parameter *params)
1367 {
1368     struct intel_batchbuffer *batch = encoder_context->base.batch;
1369
1370     BEGIN_BCS_BATCH(batch, 3);
1371
1372     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1373     OUT_BCS_BATCH(batch,
1374                   (params->huc_stream_object_enable << 10) |
1375                   (params->indirect_stream_out_enable << 4));
1376     OUT_BCS_BATCH(batch,
1377                   params->media_soft_reset_counter);
1378
1379     ADVANCE_BCS_BATCH(batch);
1380 }
1381
1382 static void
1383 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1384                           struct intel_encoder_context *encoder_context,
1385                           struct huc_imem_state_parameter *params)
1386 {
1387     struct intel_batchbuffer *batch = encoder_context->base.batch;
1388
1389     BEGIN_BCS_BATCH(batch, 5);
1390
1391     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1392     OUT_BCS_BATCH(batch, 0);
1393     OUT_BCS_BATCH(batch, 0);
1394     OUT_BCS_BATCH(batch, 0);
1395     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1396
1397     ADVANCE_BCS_BATCH(batch);
1398 }
1399
1400 static void
1401 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1402                           struct intel_encoder_context *encoder_context,
1403                           struct huc_dmem_state_parameter *params)
1404 {
1405     struct intel_batchbuffer *batch = encoder_context->base.batch;
1406
1407     BEGIN_BCS_BATCH(batch, 6);
1408
1409     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1410     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1411     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1412     OUT_BCS_BATCH(batch, params->huc_data_length);
1413
1414     ADVANCE_BCS_BATCH(batch);
1415 }
1416
1417 /*
1418 static void
1419 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1420                          struct intel_encoder_context *encoder_context,
1421                          struct huc_cfg_state_parameter *params)
1422 {
1423     struct intel_batchbuffer *batch = encoder_context->base.batch;
1424
1425     BEGIN_BCS_BATCH(batch, 2);
1426
1427     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1428     OUT_BCS_BATCH(batch, !!params->force_reset);
1429
1430     ADVANCE_BCS_BATCH(batch);
1431 }
1432 */
1433 static void
1434 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1435                                   struct intel_encoder_context *encoder_context,
1436                                   struct huc_virtual_addr_parameter *params)
1437 {
1438     struct intel_batchbuffer *batch = encoder_context->base.batch;
1439     int i;
1440
1441     BEGIN_BCS_BATCH(batch, 49);
1442
1443     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1444
1445     for (i = 0; i < 16; i++) {
1446         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1447             OUT_BUFFER_3DW(batch,
1448                            params->regions[i].huc_surface_res->bo,
1449                            !!params->regions[i].is_target, 0, 0);
1450         else
1451             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1452     }
1453
1454     ADVANCE_BCS_BATCH(batch);
1455 }
1456
1457 static void
1458 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1459                                        struct intel_encoder_context *encoder_context,
1460                                        struct huc_ind_obj_base_addr_parameter *params)
1461 {
1462     struct intel_batchbuffer *batch = encoder_context->base.batch;
1463
1464     BEGIN_BCS_BATCH(batch, 11);
1465
1466     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1467
1468     if (params->huc_indirect_stream_in_object_res)
1469         OUT_BUFFER_3DW(batch,
1470                        params->huc_indirect_stream_in_object_res->bo,
1471                        0, 0, 0);
1472     else
1473         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1474
1475     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1476
1477     if (params->huc_indirect_stream_out_object_res)
1478         OUT_BUFFER_3DW(batch,
1479                        params->huc_indirect_stream_out_object_res->bo,
1480                        1, 0, 0);
1481     else
1482         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1483
1484     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1485
1486     ADVANCE_BCS_BATCH(batch);
1487 }
1488
1489 static void
1490 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1491                                  struct intel_encoder_context *encoder_context)
1492 {
1493     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1494     struct intel_batchbuffer *batch = encoder_context->base.batch;
1495     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1496     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1497
1498     /* Write HUC_STATUS2 mask (1 << 6) */
1499     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1500     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1501     mi_store_data_imm_params.offset = 0;
1502     mi_store_data_imm_params.dw0 = (1 << 6);
1503     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1504
1505     /* Store HUC_STATUS2 */
1506     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1507     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1508     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1509     mi_store_register_mem_params.offset = 4;
1510     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1511 }
1512
1513 static void
1514 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1515                              struct intel_encoder_context *encoder_context,
1516                              struct huc_stream_object_parameter *params)
1517 {
1518     struct intel_batchbuffer *batch = encoder_context->base.batch;
1519
1520     BEGIN_BCS_BATCH(batch, 5);
1521
1522     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1523     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1524     OUT_BCS_BATCH(batch,
1525                   (1 << 31) |   /* Must be 1 */
1526                   params->indirect_stream_in_start_address);
1527     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1528     OUT_BCS_BATCH(batch,
1529                   (!!params->huc_bitstream_enable << 29) |
1530                   (params->length_mode << 27) |
1531                   (!!params->stream_out << 26) |
1532                   (!!params->emulation_prevention_byte_removal << 25) |
1533                   (!!params->start_code_search_engine << 24) |
1534                   (params->start_code_byte2 << 16) |
1535                   (params->start_code_byte1 << 8) |
1536                   params->start_code_byte0);
1537
1538     ADVANCE_BCS_BATCH(batch);
1539 }
1540
1541 static void
1542 gen9_vdenc_huc_start(VADriverContextP ctx,
1543                      struct intel_encoder_context *encoder_context,
1544                      struct huc_start_parameter *params)
1545 {
1546     struct intel_batchbuffer *batch = encoder_context->base.batch;
1547
1548     BEGIN_BCS_BATCH(batch, 2);
1549
1550     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1551     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1552
1553     ADVANCE_BCS_BATCH(batch);
1554 }
1555
1556 static void
1557 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1558                              struct intel_encoder_context *encoder_context,
1559                              struct vd_pipeline_flush_parameter *params)
1560 {
1561     struct intel_batchbuffer *batch = encoder_context->base.batch;
1562
1563     BEGIN_BCS_BATCH(batch, 2);
1564
1565     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1566     OUT_BCS_BATCH(batch,
1567                   params->mfx_pipeline_command_flush << 19 |
1568                   params->mfl_pipeline_command_flush << 18 |
1569                   params->vdenc_pipeline_command_flush << 17 |
1570                   params->hevc_pipeline_command_flush << 16 |
1571                   params->vd_command_message_parser_done << 4 |
1572                   params->mfx_pipeline_done << 3 |
1573                   params->mfl_pipeline_done << 2 |
1574                   params->vdenc_pipeline_done << 1 |
1575                   params->hevc_pipeline_done);
1576
1577     ADVANCE_BCS_BATCH(batch);
1578 }
1579
1580 static int
1581 gen9_vdenc_get_max_mbps(int level_idc)
1582 {
1583     int max_mbps = 11880;
1584
1585     switch (level_idc) {
1586     case 20:
1587         max_mbps = 11880;
1588         break;
1589
1590     case 21:
1591         max_mbps = 19800;
1592         break;
1593
1594     case 22:
1595         max_mbps = 20250;
1596         break;
1597
1598     case 30:
1599         max_mbps = 40500;
1600         break;
1601
1602     case 31:
1603         max_mbps = 108000;
1604         break;
1605
1606     case 32:
1607         max_mbps = 216000;
1608         break;
1609
1610     case 40:
1611     case 41:
1612         max_mbps = 245760;
1613         break;
1614
1615     case 42:
1616         max_mbps = 522240;
1617         break;
1618
1619     case 50:
1620         max_mbps = 589824;
1621         break;
1622
1623     case 51:
1624         max_mbps = 983040;
1625         break;
1626
1627     case 52:
1628         max_mbps = 2073600;
1629         break;
1630
1631     default:
1632         break;
1633     }
1634
1635     return max_mbps;
1636 };
1637
1638 static unsigned int
1639 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1640                                        struct intel_encoder_context *encoder_context,
1641                                        int level_idc)
1642 {
1643     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1644     double bits_per_mb, tmpf;
1645     int max_mbps, num_mb_per_frame;
1646     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1647     unsigned int ret;
1648
1649     if (level_idc >= 31 && level_idc <= 40)
1650         bits_per_mb = 96.0;
1651     else
1652         bits_per_mb = 192.0;
1653
1654     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1655     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1656
1657     tmpf = (double)num_mb_per_frame;
1658
1659     if (tmpf < max_mbps / 172.0)
1660         tmpf = max_mbps / 172.0;
1661
1662     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1663     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * 100) / vdenc_context->frames_per_100s *bits_per_mb);
1664
1665     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1666     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1667     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1668
1669     return ret;
1670 }
1671
1672 static int
1673 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1674                                 struct encode_state *encode_state,
1675                                 struct intel_encoder_context *encoder_context)
1676 {
1677     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1678     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1679     unsigned frame_size;
1680     int qp, delat_qp;
1681
1682     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1683     qp = (int)(1.0 / 1.2 * pow(10.0,
1684                                (log10(frame_size * 2.0 / 3.0 * ((float)vdenc_context->frames_per_100s) /
1685                                       ((float)(vdenc_context->target_bit_rate * 1000) * 100)) - x0) *
1686                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1687     qp += 2;
1688     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((float)vdenc_context->frames_per_100s) /
1689                           ((float)(vdenc_context->target_bit_rate * 1000) * 100)));
1690     if (delat_qp > 0)
1691         qp += delat_qp;
1692
1693     qp = CLAMP(1, 51, qp);
1694     qp--;
1695
1696     if (qp < 0)
1697         qp = 1;
1698
1699     return qp;
1700 }
1701
1702 static void
1703 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1704                                     struct encode_state *encode_state,
1705                                     struct intel_encoder_context *encoder_context)
1706 {
1707     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1708     struct huc_brc_init_dmem *dmem;
1709     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1710     double input_bits_per_frame, bps_ratio;
1711     int i;
1712
1713     vdenc_context->brc_init_reset_input_bits_per_frame = ((double)(vdenc_context->max_bit_rate * 1000) * 100) / vdenc_context->frames_per_100s;
1714     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1715     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1716
1717     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1718
1719     if (!dmem)
1720         return;
1721
1722     memset(dmem, 0, sizeof(*dmem));
1723
1724     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1725
1726     dmem->frame_width = vdenc_context->frame_width;
1727     dmem->frame_height = vdenc_context->frame_height;
1728
1729     dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1730     dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1731     dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1732     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1733     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1734
1735     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1736         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1737
1738     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1739         dmem->brc_flag |= 0x10;
1740     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1741         dmem->brc_flag |= 0x20;
1742
1743     dmem->frame_rate_m = vdenc_context->frames_per_100s;
1744     dmem->frame_rate_d = 100;
1745
1746     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1747
1748     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1749         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1750
1751     dmem->min_qp = 10;
1752     dmem->max_qp = 51;
1753
1754     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000 * 100) / vdenc_context->frames_per_100s;
1755     bps_ratio = input_bits_per_frame / ((double)vdenc_context->vbv_buffer_size_in_bit * 100 / vdenc_context->frames_per_100s);
1756
1757     if (bps_ratio < 0.1)
1758         bps_ratio = 0.1;
1759
1760     if (bps_ratio > 3.5)
1761         bps_ratio = 3.5;
1762
1763     for (i = 0; i < 4; i++) {
1764         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1765         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1766
1767         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1768         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1769
1770         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1771         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1772     }
1773
1774     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1775
1776     if (vdenc_context->mb_brc_enabled) {
1777         dmem->mb_qp_ctrl = 1;
1778         dmem->dist_qp_delta[0] = -5;
1779         dmem->dist_qp_delta[1] = -2;
1780         dmem->dist_qp_delta[2] = 2;
1781         dmem->dist_qp_delta[3] = 5;
1782     }
1783
1784     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1785
1786     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1787     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1788
1789     // 2nd re-encode pass if possible
1790     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1791         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1792         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1793         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1794         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1795     } else {
1796         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1797         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1798         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1799         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1800     }
1801
1802     dmem->qp_select_for_first_pass = 1;
1803     dmem->mb_header_compensation = 1;
1804     dmem->delta_qp_adaptation = 1;
1805     dmem->max_crf_quality_factor = 52;
1806
1807     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1808     dmem->scenario_info = 0;
1809
1810     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1811     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1812
1813     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1814 }
1815
1816 static void
1817 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1818                               struct encode_state *encode_state,
1819                               struct intel_encoder_context *encoder_context)
1820 {
1821     struct intel_batchbuffer *batch = encoder_context->base.batch;
1822     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1823     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1824     struct huc_imem_state_parameter imem_state_params;
1825     struct huc_dmem_state_parameter dmem_state_params;
1826     struct huc_virtual_addr_parameter virtual_addr_params;
1827     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1828     struct huc_stream_object_parameter stream_object_params;
1829     struct huc_start_parameter start_params;
1830     struct vd_pipeline_flush_parameter pipeline_flush_params;
1831     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1832
1833     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1834
1835     memset(&imem_state_params, 0, sizeof(imem_state_params));
1836     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1837     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1838
1839     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1840     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1841
1842     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1843     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1844     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1845     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1846     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1847     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1848
1849     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1850     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1851     virtual_addr_params.regions[0].is_target = 1;
1852     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1853
1854     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1855     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1856     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1857     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1858
1859     memset(&stream_object_params, 0, sizeof(stream_object_params));
1860     stream_object_params.indirect_stream_in_data_length = 1;
1861     stream_object_params.indirect_stream_in_start_address = 0;
1862     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1863
1864     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1865
1866     memset(&start_params, 0, sizeof(start_params));
1867     start_params.last_stream_object = 1;
1868     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1869
1870     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1871     pipeline_flush_params.hevc_pipeline_done = 1;
1872     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1873     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1874
1875     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1876     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1877     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1878 }
1879
1880 static void
1881 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1882 {
1883     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1884     struct huc_brc_update_dmem *dmem;
1885     int i, num_p_in_gop = 0;
1886
1887     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1888
1889     if (!dmem)
1890         return;
1891
1892     dmem->brc_func = 1;
1893
1894     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1895         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1896             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1897         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1898         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1899     }
1900
1901     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1902         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1903
1904     dmem->target_size = vdenc_context->brc_target_size;
1905
1906     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1907
1908     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1909
1910     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1911     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1912
1913     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1914
1915     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1916     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1917     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1918
1919     dmem->current_pak_pass = vdenc_context->current_pass;
1920     dmem->max_num_passes = 2;
1921
1922     dmem->scene_change_detect_enable = 1;
1923     dmem->scene_change_prev_intra_percent_threshold = 96;
1924     dmem->scene_change_cur_intra_perent_threshold = 192;
1925
1926     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1927         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1928
1929     for (i = 0; i < 2; i++)
1930         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1931
1932     if (vdenc_context->is_low_delay)
1933         dmem->ip_average_coeff = 0;
1934     else
1935         dmem->ip_average_coeff = 128;
1936
1937     dmem->skip_frame_size = 0;
1938     dmem->num_of_frames_skipped = 0;
1939
1940     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1941     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1942     dmem->hme_cost_enable = 1;
1943
1944     dmem->second_level_batchbuffer_size = 228;
1945
1946     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1947 }
1948
1949 static void
1950 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1951                                   struct encode_state *encode_state,
1952                                   struct intel_encoder_context *encoder_context,
1953                                   struct gen9_mfx_avc_img_state *pstate)
1954 {
1955     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1956     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1957     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1958
1959     memset(pstate, 0, sizeof(*pstate));
1960
1961     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1962
1963     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1964
1965     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1966     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1967
1968     pstate->dw3.image_structure = 0;
1969     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1970     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1971     pstate->dw3.brc_domain_rate_control_enable = 1;
1972     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1973     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1974
1975     pstate->dw4.field_picture_flag = 0;
1976     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1977     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1978     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1979     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1980     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1981     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1982     pstate->dw4.mb_mv_format_flag = 1;
1983     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1984     pstate->dw4.mv_unpacked_flag = 1;
1985     pstate->dw4.insert_test_flag = 0;
1986     pstate->dw4.load_slice_pointer_flag = 0;
1987     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1988     pstate->dw4.minimum_frame_size = 0;
1989
1990     pstate->dw5.intra_mb_max_bit_flag = 1;
1991     pstate->dw5.inter_mb_max_bit_flag = 1;
1992     pstate->dw5.frame_size_over_flag = 1;
1993     pstate->dw5.frame_size_under_flag = 1;
1994     pstate->dw5.intra_mb_ipcm_flag = 1;
1995     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1996     pstate->dw5.non_first_pass_flag = 0;
1997     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1998     pstate->dw5.aq_chroma_disable = 1;
1999
2000     pstate->dw6.intra_mb_max_size = 2700;
2001     pstate->dw6.inter_mb_max_size = 4095;
2002
2003     pstate->dw8.slice_delta_qp_max0 = 0;
2004     pstate->dw8.slice_delta_qp_max1 = 0;
2005     pstate->dw8.slice_delta_qp_max2 = 0;
2006     pstate->dw8.slice_delta_qp_max3 = 0;
2007
2008     pstate->dw9.slice_delta_qp_min0 = 0;
2009     pstate->dw9.slice_delta_qp_min1 = 0;
2010     pstate->dw9.slice_delta_qp_min2 = 0;
2011     pstate->dw9.slice_delta_qp_min3 = 0;
2012
2013     pstate->dw10.frame_bitrate_min = 0;
2014     pstate->dw10.frame_bitrate_min_unit = 1;
2015     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2016     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2017     pstate->dw10.frame_bitrate_max_unit = 1;
2018     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2019
2020     pstate->dw11.frame_bitrate_min_delta = 0;
2021     pstate->dw11.frame_bitrate_max_delta = 0;
2022
2023     pstate->dw12.vad_error_logic = 1;
2024     /* TODO: set paramters DW19/DW20 for slices */
2025 }
2026
2027 static void
2028 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
2029                                 struct encode_state *encode_state,
2030                                 struct intel_encoder_context *encoder_context,
2031                                 struct gen9_vdenc_img_state *pstate,
2032                                 int update_cost)
2033 {
2034     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2035     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2036     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2037     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2038
2039     memset(pstate, 0, sizeof(*pstate));
2040
2041     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
2042
2043     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2044         pstate->dw4.intra_sad_measure_adjustment = 2;
2045         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2046
2047         pstate->dw5.cre_prefetch_enable = 1;
2048
2049         pstate->dw9.mode0_cost = 10;
2050         pstate->dw9.mode1_cost = 0;
2051         pstate->dw9.mode2_cost = 3;
2052         pstate->dw9.mode3_cost = 30;
2053
2054         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2055         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2056         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2057
2058         pstate->dw22.small_mb_size_in_word = 0xff;
2059         pstate->dw22.large_mb_size_in_word = 0xff;
2060
2061         pstate->dw27.max_hmv_r = 0x2000;
2062         pstate->dw27.max_vmv_r = 0x200;
2063
2064         pstate->dw33.qp_range_check_upper_bound = 0x33;
2065         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2066         pstate->dw33.qp_range_check_value = 0x0f;
2067     } else {
2068         pstate->dw2.bidirectional_weight = 0x20;
2069
2070         pstate->dw4.subpel_mode = 3;
2071         pstate->dw4.bme_disable_for_fbr_message = 1;
2072         pstate->dw4.inter_sad_measure_adjustment = 2;
2073         pstate->dw4.intra_sad_measure_adjustment = 2;
2074         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2075
2076         pstate->dw5.cre_prefetch_enable = 1;
2077
2078         pstate->dw8.non_skip_zero_mv_const_added = 1;
2079         pstate->dw8.non_skip_mb_mode_const_added = 1;
2080         pstate->dw8.ref_id_cost_mode_select = 1;
2081
2082         pstate->dw9.mode0_cost = 7;
2083         pstate->dw9.mode1_cost = 26;
2084         pstate->dw9.mode2_cost = 30;
2085         pstate->dw9.mode3_cost = 57;
2086
2087         pstate->dw10.mode4_cost = 8;
2088         pstate->dw10.mode5_cost = 2;
2089         pstate->dw10.mode6_cost = 4;
2090         pstate->dw10.mode7_cost = 6;
2091
2092         pstate->dw11.mode8_cost = 5;
2093         pstate->dw11.mode9_cost = 0;
2094         pstate->dw11.ref_id_cost = 4;
2095         pstate->dw11.chroma_intra_mode_cost = 0;
2096
2097         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2098         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2099         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2100         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2101         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2102         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2103         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2104         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2105
2106         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2107         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2108         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2109
2110         pstate->dw22.small_mb_size_in_word = 0xff;
2111         pstate->dw22.large_mb_size_in_word = 0xff;
2112
2113         pstate->dw27.max_hmv_r = 0x2000;
2114         pstate->dw27.max_vmv_r = 0x200;
2115
2116         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2117
2118         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2119         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2120
2121         pstate->dw33.qp_range_check_upper_bound = 0x33;
2122         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2123         pstate->dw33.qp_range_check_value = 0x0f;
2124
2125         pstate->dw34.midpoint_distortion = 0x640;
2126     }
2127
2128     /* ROI will be updated in HuC kernel for CBR/VBR */
2129     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2130         pstate->dw34.roi_enable = 1;
2131
2132         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2133
2134         if (vdenc_context->num_roi > 1)
2135             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2136
2137         if (vdenc_context->num_roi > 2)
2138             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2139     }
2140
2141     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2142
2143     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2144
2145     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2146
2147     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2148     pstate->dw5.picture_type = vdenc_context->frame_type;
2149     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2150
2151     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2152         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2153     }
2154
2155     pstate->dw5.mb_slice_threshold_value = 0;
2156
2157     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2158
2159     if (pstate->dw1.transform_8x8_flag)
2160         pstate->dw8.luma_intra_partition_mask = 0;
2161     else
2162         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2163
2164     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2165
2166     if (update_cost) {
2167         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2168         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2169         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2170         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2171
2172         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2173         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2174         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2175         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2176
2177         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2178         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2179         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2180         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2181
2182         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2183         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2184         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2185         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2186         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2187         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2188         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2189         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2190
2191         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2192         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2193         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2194         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2195         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2196         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2197         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2198         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2199     }
2200
2201     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2202
2203     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2204
2205     /* TODO: check rolling I */
2206
2207     /* TODO: handle ROI */
2208
2209     /* TODO: check stream in support */
2210 }
2211
2212 static void
2213 gen9_vdenc_init_img_states(VADriverContextP ctx,
2214                            struct encode_state *encode_state,
2215                            struct intel_encoder_context *encoder_context)
2216 {
2217     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2218     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2219     struct gen9_vdenc_img_state *vdenc_img_cmd;
2220     char *pbuffer;
2221
2222     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2223
2224     if (!pbuffer)
2225         return;
2226
2227     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2228     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd);
2229     pbuffer += sizeof(*mfx_img_cmd);
2230
2231     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2232     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2233     pbuffer += sizeof(*vdenc_img_cmd);
2234
2235     /* Add batch buffer end command */
2236     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2237
2238     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2239 }
2240
2241 static void
2242 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2243                                         struct encode_state *encode_state,
2244                                         struct intel_encoder_context *encoder_context)
2245 {
2246     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2247     char *pbuffer;
2248
2249     pbuffer = i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2250
2251     if (!pbuffer)
2252         return;
2253
2254     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2255         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2256         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2257         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2258         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2259         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2260         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2261     }
2262
2263     memcpy(pbuffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2264
2265     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2266 }
2267
2268 static void
2269 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2270                           struct encode_state *encode_state,
2271                           struct intel_encoder_context *encoder_context)
2272 {
2273     struct intel_batchbuffer *batch = encoder_context->base.batch;
2274     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2275     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2276     struct huc_imem_state_parameter imem_state_params;
2277     struct huc_dmem_state_parameter dmem_state_params;
2278     struct huc_virtual_addr_parameter virtual_addr_params;
2279     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2280     struct huc_stream_object_parameter stream_object_params;
2281     struct huc_start_parameter start_params;
2282     struct vd_pipeline_flush_parameter pipeline_flush_params;
2283     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2284     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2285     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2286
2287     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2288     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2289     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2290
2291     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2292         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2293
2294         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2295         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2296         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2297     }
2298
2299     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2300
2301     memset(&imem_state_params, 0, sizeof(imem_state_params));
2302     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2303     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2304
2305     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2306     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2307
2308     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2309     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2310     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2311     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2312     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2313     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2314
2315     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2316     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2317     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2318     virtual_addr_params.regions[0].is_target = 1;
2319     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2320     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2321     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2322     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2323     virtual_addr_params.regions[4].is_target = 1;
2324     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2325     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2326     virtual_addr_params.regions[6].is_target = 1;
2327     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2328
2329     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2330     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2331     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2332     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2333
2334     memset(&stream_object_params, 0, sizeof(stream_object_params));
2335     stream_object_params.indirect_stream_in_data_length = 1;
2336     stream_object_params.indirect_stream_in_start_address = 0;
2337     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2338
2339     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2340
2341     memset(&start_params, 0, sizeof(start_params));
2342     start_params.last_stream_object = 1;
2343     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2344
2345     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2346     pipeline_flush_params.hevc_pipeline_done = 1;
2347     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2348     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2349
2350     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2351     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2352     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2353
2354     /* Store HUC_STATUS */
2355     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2356     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2357     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2358     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2359
2360     /* Write HUC_STATUS mask (1 << 31) */
2361     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2362     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2363     mi_store_data_imm_params.offset = 4;
2364     mi_store_data_imm_params.dw0 = (1 << 31);
2365     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2366 }
2367
2368 static void
2369 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2370                                 struct encode_state *encode_state,
2371                                 struct intel_encoder_context *encoder_context)
2372 {
2373     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2374     struct intel_batchbuffer *batch = encoder_context->base.batch;
2375
2376     BEGIN_BCS_BATCH(batch, 5);
2377
2378     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2379     OUT_BCS_BATCH(batch,
2380                   (1 << 29) |
2381                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2382                   (MFD_MODE_VLD << 15) |
2383                   (1 << 13) |                   /* VDEnc mode */
2384                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2385                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2386                   (1 << 7)  |                   /* Scaled surface enable */
2387                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2388                   (1 << 4)  |                   /* encoding mode */
2389                   (MFX_FORMAT_AVC << 0));
2390     OUT_BCS_BATCH(batch, 0);
2391     OUT_BCS_BATCH(batch, 0);
2392     OUT_BCS_BATCH(batch, 0);
2393
2394     ADVANCE_BCS_BATCH(batch);
2395 }
2396
2397 static void
2398 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2399                              struct intel_encoder_context *encoder_context,
2400                              struct i965_gpe_resource *gpe_resource,
2401                              int id)
2402 {
2403     struct intel_batchbuffer *batch = encoder_context->base.batch;
2404
2405     BEGIN_BCS_BATCH(batch, 6);
2406
2407     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2408     OUT_BCS_BATCH(batch, id);
2409     OUT_BCS_BATCH(batch,
2410                   ((gpe_resource->height - 1) << 18) |
2411                   ((gpe_resource->width - 1) << 4));
2412     OUT_BCS_BATCH(batch,
2413                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2414                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2415                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2416                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2417                   (1 << 1)  |                           /* must be tiled */
2418                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2419     OUT_BCS_BATCH(batch,
2420                   (0 << 16) |                           /* must be 0 for interleave U/V */
2421                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2422     OUT_BCS_BATCH(batch,
2423                   (0 << 16) |                           /* must be 0 for interleave U/V */
2424                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2425
2426     ADVANCE_BCS_BATCH(batch);
2427 }
2428
2429 static void
2430 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2431 {
2432     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2433     struct intel_batchbuffer *batch = encoder_context->base.batch;
2434     int i;
2435
2436     BEGIN_BCS_BATCH(batch, 65);
2437
2438     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2439
2440     /* the DW1-3 is for pre_deblocking */
2441     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2442
2443     /* the DW4-6 is for the post_deblocking */
2444     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2445
2446     /* the DW7-9 is for the uncompressed_picture */
2447     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2448
2449     /* the DW10-12 is for PAK information (write) */
2450     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2451
2452     /* the DW13-15 is for the intra_row_store_scratch */
2453     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2454
2455     /* the DW16-18 is for the deblocking filter */
2456     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2457
2458     /* the DW 19-50 is for Reference pictures*/
2459     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2460         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2461     }
2462
2463     /* DW 51, reference picture attributes */
2464     OUT_BCS_BATCH(batch, 0);
2465
2466     /* The DW 52-54 is for PAK information (read) */
2467     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2468
2469     /* the DW 55-57 is the ILDB buffer */
2470     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2471
2472     /* the DW 58-60 is the second ILDB buffer */
2473     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2474
2475     /* DW 61, memory compress enable & mode */
2476     OUT_BCS_BATCH(batch, 0);
2477
2478     /* the DW 62-64 is the 4x Down Scaling surface */
2479     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 0, 0, 0);
2480
2481     ADVANCE_BCS_BATCH(batch);
2482 }
2483
2484 static void
2485 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2486 {
2487     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2488     struct intel_batchbuffer *batch = encoder_context->base.batch;
2489
2490     BEGIN_BCS_BATCH(batch, 26);
2491
2492     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2493     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2494     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2495     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2496
2497     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2498     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2499     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2500
2501     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2502     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2503     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2504
2505     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2506     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2507     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2508
2509     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2510      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2511      */
2512     OUT_BUFFER_3DW(batch,
2513                    vdenc_context->compressed_bitstream.res.bo,
2514                    1,
2515                    0,
2516                    0);
2517     OUT_BUFFER_2DW(batch,
2518                    vdenc_context->compressed_bitstream.res.bo,
2519                    1,
2520                    vdenc_context->compressed_bitstream.end_offset);
2521
2522     ADVANCE_BCS_BATCH(batch);
2523 }
2524
2525 static void
2526 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2527 {
2528     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2529     struct intel_batchbuffer *batch = encoder_context->base.batch;
2530
2531     BEGIN_BCS_BATCH(batch, 10);
2532
2533     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2534
2535     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2536     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2537
2538     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2539     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2540
2541     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2542     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2543
2544     ADVANCE_BCS_BATCH(batch);
2545 }
2546
2547 static void
2548 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2549                         int qm_type,
2550                         unsigned int *qm,
2551                         int qm_length,
2552                         struct intel_encoder_context *encoder_context)
2553 {
2554     struct intel_batchbuffer *batch = encoder_context->base.batch;
2555     unsigned int qm_buffer[16];
2556
2557     assert(qm_length <= 16);
2558     assert(sizeof(*qm) == 4);
2559     memcpy(qm_buffer, qm, qm_length * 4);
2560
2561     BEGIN_BCS_BATCH(batch, 18);
2562     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2563     OUT_BCS_BATCH(batch, qm_type << 0);
2564     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2565     ADVANCE_BCS_BATCH(batch);
2566 }
2567
2568 static void
2569 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2570 {
2571     /* TODO: add support for non flat matrix */
2572     unsigned int qm[16] = {
2573         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2574         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2575         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2576         0x10101010, 0x10101010, 0x10101010, 0x10101010
2577     };
2578
2579     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2580     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2581     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2582     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2583 }
2584
2585 static void
2586 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2587                          int fqm_type,
2588                          unsigned int *fqm,
2589                          int fqm_length,
2590                          struct intel_encoder_context *encoder_context)
2591 {
2592     struct intel_batchbuffer *batch = encoder_context->base.batch;
2593     unsigned int fqm_buffer[32];
2594
2595     assert(fqm_length <= 32);
2596     assert(sizeof(*fqm) == 4);
2597     memcpy(fqm_buffer, fqm, fqm_length * 4);
2598
2599     BEGIN_BCS_BATCH(batch, 34);
2600     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2601     OUT_BCS_BATCH(batch, fqm_type << 0);
2602     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2603     ADVANCE_BCS_BATCH(batch);
2604 }
2605
2606 static void
2607 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2608 {
2609     /* TODO: add support for non flat matrix */
2610     unsigned int qm[32] = {
2611         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2612         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2613         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2614         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2615         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2616         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2617         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2618         0x10001000, 0x10001000, 0x10001000, 0x10001000
2619     };
2620
2621     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2622     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2623     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2624     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2625 }
2626
2627 static void
2628 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2629                              struct encode_state *encode_state,
2630                              struct intel_encoder_context *encoder_context)
2631 {
2632     struct intel_batchbuffer *batch = encoder_context->base.batch;
2633     struct gen9_mfx_avc_img_state mfx_img_cmd;
2634
2635     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd);
2636
2637     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2638     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2639     ADVANCE_BCS_BATCH(batch);
2640 }
2641
2642 static void
2643 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2644                                   struct encode_state *encode_state,
2645                                   struct intel_encoder_context *encoder_context)
2646 {
2647     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2648     struct intel_batchbuffer *batch = encoder_context->base.batch;
2649
2650     BEGIN_BCS_BATCH(batch, 2);
2651
2652     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2653     OUT_BCS_BATCH(batch,
2654                   (vdenc_context->vdenc_streamin_enable << 9) |
2655                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2656                   (1 << 7)  |                   /* Tlb prefetch enable */
2657                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2658                   (VDENC_CODEC_AVC << 0));
2659
2660     ADVANCE_BCS_BATCH(batch);
2661 }
2662
2663 static void
2664 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2665                                struct intel_encoder_context *encoder_context,
2666                                struct i965_gpe_resource *gpe_resource,
2667                                int vdenc_surface_cmd)
2668 {
2669     struct intel_batchbuffer *batch = encoder_context->base.batch;
2670
2671     BEGIN_BCS_BATCH(batch, 6);
2672
2673     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2674     OUT_BCS_BATCH(batch, 0);
2675     OUT_BCS_BATCH(batch,
2676                   ((gpe_resource->height - 1) << 18) |
2677                   ((gpe_resource->width - 1) << 4));
2678     OUT_BCS_BATCH(batch,
2679                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2680                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2681                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2682                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2683                   (1 << 1)  |                           /* must be tiled */
2684                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2685     OUT_BCS_BATCH(batch,
2686                   (0 << 16) |                           /* must be 0 for interleave U/V */
2687                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2688     OUT_BCS_BATCH(batch,
2689                   (0 << 16) |                           /* must be 0 for interleave U/V */
2690                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2691
2692     ADVANCE_BCS_BATCH(batch);
2693 }
2694
2695 static void
2696 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2697                                    struct intel_encoder_context *encoder_context,
2698                                    struct i965_gpe_resource *gpe_resource)
2699 {
2700     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2701 }
2702
2703 static void
2704 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2705                                    struct intel_encoder_context *encoder_context,
2706                                    struct i965_gpe_resource *gpe_resource)
2707 {
2708     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2709 }
2710
2711 static void
2712 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2713                                       struct intel_encoder_context *encoder_context,
2714                                       struct i965_gpe_resource *gpe_resource)
2715 {
2716     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2717 }
2718
2719 static void
2720 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2721                                      struct encode_state *encode_state,
2722                                      struct intel_encoder_context *encoder_context)
2723 {
2724     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2725     struct intel_batchbuffer *batch = encoder_context->base.batch;
2726
2727     BEGIN_BCS_BATCH(batch, 37);
2728
2729     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2730
2731     /* DW1-6 for DS FWD REF0/REF1 */
2732     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2733     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2734
2735     /* DW7-9 for DS BWD REF0, ignored on SKL */
2736     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2737
2738     /* DW10-12 for uncompressed input data */
2739     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2740
2741     /* DW13-DW15 for streamin data */
2742     if (vdenc_context->vdenc_streamin_enable)
2743         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2744     else
2745         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2746
2747     /* DW16-DW18 for row scratch buffer */
2748     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2749
2750     /* DW19-DW21, ignored on SKL */
2751     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2752
2753     /* DW22-DW27 for FWD REF0/REF1 */
2754     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2755     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2756
2757     /* DW28-DW30 for FWD REF2, ignored on SKL */
2758     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2759
2760     /* DW31-DW33 for BDW REF0, ignored on SKL */
2761     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2762
2763     /* DW34-DW36 for VDEnc statistics streamout */
2764     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2765
2766     ADVANCE_BCS_BATCH(batch);
2767 }
2768
2769 static void
2770 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2771                                  struct encode_state *encode_state,
2772                                  struct intel_encoder_context *encoder_context)
2773 {
2774     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2775     struct intel_batchbuffer *batch = encoder_context->base.batch;
2776
2777     BEGIN_BCS_BATCH(batch, 61);
2778
2779     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2780
2781     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2782         /* DW1-DW11 */
2783         intel_batchbuffer_data(batch, vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2784
2785         /* DW12-DW25 */
2786         intel_batchbuffer_data(batch, vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2787
2788         /* DW26-DW39 */
2789         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2790
2791         /* DW40-DW46 */
2792         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2793
2794         /* DW47-DW53 */
2795         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2796
2797         /* DW54-DW60 */
2798         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2799     } else {
2800         int i;
2801
2802         for (i = 0; i < 28; i++) {
2803             vdenc_const_skip_threshold_p[i] *= 3;
2804         }
2805
2806         /* DW1-DW11 */
2807         intel_batchbuffer_data(batch, vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2808
2809         /* DW12-DW25 */
2810         intel_batchbuffer_data(batch, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2811
2812         /* DW26-DW39 */
2813         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2814
2815         /* DW40-DW46 */
2816         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2817
2818         /* DW47-DW53 */
2819         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2820
2821         /* DW54-DW60 */
2822         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2823     }
2824
2825     ADVANCE_BCS_BATCH(batch);
2826 }
2827
2828 static void
2829 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2830                               struct encode_state *encode_state,
2831                               struct intel_encoder_context *encoder_context)
2832 {
2833     struct intel_batchbuffer *batch = encoder_context->base.batch;
2834
2835     BEGIN_BCS_BATCH(batch, 2);
2836
2837     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2838     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2839
2840     ADVANCE_BCS_BATCH(batch);
2841 }
2842
2843 static void
2844 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2845                            struct encode_state *encode_state,
2846                            struct intel_encoder_context *encoder_context)
2847 {
2848     struct intel_batchbuffer *batch = encoder_context->base.batch;
2849     struct gen9_vdenc_img_state vdenc_img_cmd;
2850
2851     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2852
2853     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2854     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2855     ADVANCE_BCS_BATCH(batch);
2856 }
2857
2858 extern int
2859 intel_avc_enc_slice_type_fixup(int slice_type);
2860
2861 static void
2862 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2863                                  struct intel_encoder_context *encoder_context,
2864                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2865                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2866                                  int slice_header_indicator)
2867 {
2868     struct intel_batchbuffer *batch = encoder_context->base.batch;
2869
2870     if (data_bits_in_last_dw == 0)
2871         data_bits_in_last_dw = 32;
2872
2873     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2874
2875     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2876     OUT_BCS_BATCH(batch,
2877                   (0 << 16) |   /* always start at offset 0 */
2878                   (slice_header_indicator << 14) |
2879                   (data_bits_in_last_dw << 8) |
2880                   (skip_emul_byte_count << 4) |
2881                   (!!emulation_flag << 3) |
2882                   ((!!is_last_header) << 2) |
2883                   ((!!is_end_of_slice) << 1) |
2884                   (0 << 0));    /* TODO: check this flag */
2885     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2886
2887     ADVANCE_BCS_BATCH(batch);
2888 }
2889
2890 static void
2891 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2892                                             struct encode_state *encode_state,
2893                                             struct intel_encoder_context *encoder_context,
2894                                             int slice_index)
2895 {
2896     VAEncPackedHeaderParameterBuffer *param = NULL;
2897     unsigned int length_in_bits;
2898     unsigned int *header_data = NULL;
2899     int count, i, start_index;
2900     int slice_header_index;
2901
2902     if (encode_state->slice_header_index[slice_index] == 0)
2903         slice_header_index = -1;
2904     else
2905         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2906
2907     count = encode_state->slice_rawdata_count[slice_index];
2908     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2909
2910     for (i = 0; i < count; i++) {
2911         unsigned int skip_emul_byte_cnt;
2912
2913         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2914
2915         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2916
2917         /* skip the slice header packed data type as it is lastly inserted */
2918         if (param->type == VAEncPackedHeaderSlice)
2919             continue;
2920
2921         length_in_bits = param->bit_length;
2922
2923         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2924
2925         /* as the slice header is still required, the last header flag is set to
2926          * zero.
2927          */
2928         gen9_vdenc_mfx_avc_insert_object(ctx,
2929                                          encoder_context,
2930                                          header_data,
2931                                          ALIGN(length_in_bits, 32) >> 5,
2932                                          length_in_bits & 0x1f,
2933                                          skip_emul_byte_cnt,
2934                                          0,
2935                                          0,
2936                                          !param->has_emulation_bytes,
2937                                          0);
2938     }
2939
2940     if (slice_header_index == -1) {
2941         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2942         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2943         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2944         unsigned char *slice_header = NULL;
2945         int slice_header_length_in_bits = 0;
2946
2947         /* No slice header data is passed. And the driver needs to generate it */
2948         /* For the Normal H264 */
2949         slice_header_length_in_bits = build_avc_slice_header(seq_param,
2950                                                              pic_param,
2951                                                              slice_params,
2952                                                              &slice_header);
2953         gen9_vdenc_mfx_avc_insert_object(ctx,
2954                                          encoder_context,
2955                                          (unsigned int *)slice_header,
2956                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
2957                                          slice_header_length_in_bits & 0x1f,
2958                                          5,  /* first 5 bytes are start code + nal unit type */
2959                                          1, 0, 1,
2960                                          1);
2961
2962         free(slice_header);
2963     } else {
2964         unsigned int skip_emul_byte_cnt;
2965
2966         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
2967
2968         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
2969         length_in_bits = param->bit_length;
2970
2971         /* as the slice header is the last header data for one slice,
2972          * the last header flag is set to one.
2973          */
2974         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2975
2976         gen9_vdenc_mfx_avc_insert_object(ctx,
2977                                          encoder_context,
2978                                          header_data,
2979                                          ALIGN(length_in_bits, 32) >> 5,
2980                                          length_in_bits & 0x1f,
2981                                          skip_emul_byte_cnt,
2982                                          1,
2983                                          0,
2984                                          !param->has_emulation_bytes,
2985                                          1);
2986     }
2987
2988     return;
2989 }
2990
2991 static void
2992 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
2993                                  struct encode_state *encode_state,
2994                                  struct intel_encoder_context *encoder_context,
2995                                  VAEncSliceParameterBufferH264 *slice_param,
2996                                  int slice_index)
2997 {
2998     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2999     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3000     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3001     unsigned int skip_emul_byte_cnt;
3002
3003     if (slice_index == 0) {
3004         if (encode_state->packed_header_data[idx]) {
3005             VAEncPackedHeaderParameterBuffer *param = NULL;
3006             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3007             unsigned int length_in_bits;
3008
3009             assert(encode_state->packed_header_param[idx]);
3010             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3011             length_in_bits = param->bit_length;
3012
3013             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3014             gen9_vdenc_mfx_avc_insert_object(ctx,
3015                                              encoder_context,
3016                                              header_data,
3017                                              ALIGN(length_in_bits, 32) >> 5,
3018                                              length_in_bits & 0x1f,
3019                                              skip_emul_byte_cnt,
3020                                              0,
3021                                              0,
3022                                              !param->has_emulation_bytes,
3023                                              0);
3024         }
3025
3026         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3027
3028         if (encode_state->packed_header_data[idx]) {
3029             VAEncPackedHeaderParameterBuffer *param = NULL;
3030             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3031             unsigned int length_in_bits;
3032
3033             assert(encode_state->packed_header_param[idx]);
3034             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3035             length_in_bits = param->bit_length;
3036
3037             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3038
3039             gen9_vdenc_mfx_avc_insert_object(ctx,
3040                                              encoder_context,
3041                                              header_data,
3042                                              ALIGN(length_in_bits, 32) >> 5,
3043                                              length_in_bits & 0x1f,
3044                                              skip_emul_byte_cnt,
3045                                              0,
3046                                              0,
3047                                              !param->has_emulation_bytes,
3048                                              0);
3049         }
3050
3051         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3052
3053         if (encode_state->packed_header_data[idx]) {
3054             VAEncPackedHeaderParameterBuffer *param = NULL;
3055             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3056             unsigned int length_in_bits;
3057
3058             assert(encode_state->packed_header_param[idx]);
3059             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3060             length_in_bits = param->bit_length;
3061
3062             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3063             gen9_vdenc_mfx_avc_insert_object(ctx,
3064                                              encoder_context,
3065                                              header_data,
3066                                              ALIGN(length_in_bits, 32) >> 5,
3067                                              length_in_bits & 0x1f,
3068                                              skip_emul_byte_cnt,
3069                                              0,
3070                                              0,
3071                                              !param->has_emulation_bytes,
3072                                              0);
3073         } else if (internal_rate_mode == I965_BRC_CBR) {
3074             /* TODO: insert others */
3075         }
3076     }
3077
3078     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3079                                                 encode_state,
3080                                                 encoder_context,
3081                                                 slice_index);
3082 }
3083
3084 static void
3085 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3086                                struct encode_state *encode_state,
3087                                struct intel_encoder_context *encoder_context,
3088                                VAEncPictureParameterBufferH264 *pic_param,
3089                                VAEncSliceParameterBufferH264 *slice_param,
3090                                VAEncSliceParameterBufferH264 *next_slice_param)
3091 {
3092     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3093     struct intel_batchbuffer *batch = encoder_context->base.batch;
3094     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3095     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3096     unsigned char correct[6], grow, shrink;
3097     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3098     int max_qp_n, max_qp_p;
3099     int i;
3100     int weighted_pred_idc = 0;
3101     int num_ref_l0 = 0, num_ref_l1 = 0;
3102     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3103     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3104
3105     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3106     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3107
3108     if (next_slice_param) {
3109         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3110         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3111     } else {
3112         next_slice_hor_pos = 0;
3113         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3114     }
3115
3116     if (slice_type == SLICE_TYPE_I) {
3117         luma_log2_weight_denom = 0;
3118         chroma_log2_weight_denom = 0;
3119     } else if (slice_type == SLICE_TYPE_P) {
3120         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3121         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3122
3123         if (slice_param->num_ref_idx_active_override_flag)
3124             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3125     } else if (slice_type == SLICE_TYPE_B) {
3126         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3127         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3128         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3129
3130         if (slice_param->num_ref_idx_active_override_flag) {
3131             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3132             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3133         }
3134
3135         if (weighted_pred_idc == 2) {
3136             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3137             luma_log2_weight_denom = 5;
3138             chroma_log2_weight_denom = 5;
3139         }
3140     }
3141
3142     max_qp_n = 0;       /* TODO: update it */
3143     max_qp_p = 0;       /* TODO: update it */
3144     grow = 0;           /* TODO: update it */
3145     shrink = 0;         /* TODO: update it */
3146
3147     for (i = 0; i < 6; i++)
3148         correct[i] = 0; /* TODO: update it */
3149
3150     BEGIN_BCS_BATCH(batch, 11);
3151
3152     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3153     OUT_BCS_BATCH(batch, slice_type);
3154     OUT_BCS_BATCH(batch,
3155                   (num_ref_l0 << 16) |
3156                   (num_ref_l1 << 24) |
3157                   (chroma_log2_weight_denom << 8) |
3158                   (luma_log2_weight_denom << 0));
3159     OUT_BCS_BATCH(batch,
3160                   (weighted_pred_idc << 30) |
3161                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3162                   (slice_param->disable_deblocking_filter_idc << 27) |
3163                   (slice_param->cabac_init_idc << 24) |
3164                   (slice_qp << 16) |
3165                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3166                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3167
3168     OUT_BCS_BATCH(batch,
3169                   slice_ver_pos << 24 |
3170                   slice_hor_pos << 16 |
3171                   slice_param->macroblock_address);
3172     OUT_BCS_BATCH(batch,
3173                   next_slice_ver_pos << 16 |
3174                   next_slice_hor_pos);
3175
3176     OUT_BCS_BATCH(batch,
3177                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3178                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3179                   (2 << 28) |           /* Loose Rate Control */
3180                   (0 << 24) |           /* RC Stable Tolerance */
3181                   (0 << 23) |           /* RC Panic Enable */
3182                   (1 << 22) |           /* CBP mode */
3183                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3184                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3185                   (!next_slice_param << 19) |                   /* Is Last Slice */
3186                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3187                   (1 << 17) |           /* HeaderPresentFlag */
3188                   (1 << 16) |           /* SliceData PresentFlag */
3189                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3190                   (1 << 13) |           /* RBSP NAL TYPE */
3191                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3192
3193     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3194
3195     OUT_BCS_BATCH(batch,
3196                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3197                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3198                   (shrink << 8) |
3199                   (grow << 0));
3200     OUT_BCS_BATCH(batch,
3201                   (1 << 31) |
3202                   (3 << 28) |
3203                   (1 << 27) |
3204                   (5 << 24) |
3205                   (correct[5] << 20) |
3206                   (correct[4] << 16) |
3207                   (correct[3] << 12) |
3208                   (correct[2] << 8) |
3209                   (correct[1] << 4) |
3210                   (correct[0] << 0));
3211     OUT_BCS_BATCH(batch, 0);
3212
3213     ADVANCE_BCS_BATCH(batch);
3214 }
3215
3216 static uint8_t
3217 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3218 {
3219     unsigned int is_long_term =
3220         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3221     unsigned int is_top_field =
3222         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3223     unsigned int is_bottom_field =
3224         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3225
3226     return ((is_long_term                         << 6) |
3227             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3228             (frame_store_id                       << 1) |
3229             ((is_top_field ^ 1) & is_bottom_field));
3230 }
3231
3232 static void
3233 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3234                                  struct encode_state *encode_state,
3235                                  struct intel_encoder_context *encoder_context,
3236                                  VAEncSliceParameterBufferH264 *slice_param)
3237 {
3238     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3239     struct intel_batchbuffer *batch = encoder_context->base.batch;
3240     VAPictureH264 *ref_pic;
3241     int i, slice_type, ref_idx_shift;
3242     unsigned int fwd_ref_entry;
3243
3244     fwd_ref_entry = 0x80808080;
3245     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3246
3247     for (i = 0; i < MAX(vdenc_context->num_refs[0], 2); i++) {
3248         ref_pic = &slice_param->RefPicList0[i];
3249         ref_idx_shift = vdenc_context->list_ref_idx[0][i] * 8;
3250
3251         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3252         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3253     }
3254
3255     if (slice_type == SLICE_TYPE_P) {
3256         BEGIN_BCS_BATCH(batch, 10);
3257         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3258         OUT_BCS_BATCH(batch, 0);                        // L0
3259         OUT_BCS_BATCH(batch, fwd_ref_entry);
3260
3261         for (i = 0; i < 7; i++) {
3262             OUT_BCS_BATCH(batch, 0x80808080);
3263         }
3264
3265         ADVANCE_BCS_BATCH(batch);
3266     }
3267
3268     if (slice_type == SLICE_TYPE_B) {
3269         /* VDEnc on SKL doesn't support BDW */
3270         assert(0);
3271     }
3272 }
3273
3274 static void
3275 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3276                                       struct encode_state *encode_state,
3277                                       struct intel_encoder_context *encoder_context,
3278                                       VAEncPictureParameterBufferH264 *pic_param,
3279                                       VAEncSliceParameterBufferH264 *slice_param)
3280 {
3281     struct intel_batchbuffer *batch = encoder_context->base.batch;
3282     int i, slice_type;
3283     short weightoffsets[32 * 6];
3284
3285     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3286
3287     if (slice_type == SLICE_TYPE_P &&
3288         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3289
3290         for (i = 0; i < 32; i++) {
3291             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3292             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3293             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3294             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3295             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3296             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3297         }
3298
3299         BEGIN_BCS_BATCH(batch, 98);
3300         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3301         OUT_BCS_BATCH(batch, 0);
3302         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3303
3304         ADVANCE_BCS_BATCH(batch);
3305     }
3306
3307     if (slice_type == SLICE_TYPE_B) {
3308         /* VDEnc on SKL doesn't support BWD */
3309         assert(0);
3310     }
3311 }
3312
3313 static void
3314 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3315                                 struct encode_state *encode_state,
3316                                 struct intel_encoder_context *encoder_context,
3317                                 VAEncSliceParameterBufferH264 *slice_param,
3318                                 VAEncSliceParameterBufferH264 *next_slice_param,
3319                                 int slice_index)
3320 {
3321     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3322
3323     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3324     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3325                                           encode_state,
3326                                           encoder_context,
3327                                           pic_param,
3328                                           slice_param);
3329     gen9_vdenc_mfx_avc_slice_state(ctx,
3330                                    encode_state,
3331                                    encoder_context,
3332                                    pic_param,
3333                                    slice_param,
3334                                    next_slice_param);
3335     gen9_vdenc_mfx_avc_inset_headers(ctx,
3336                                      encode_state,
3337                                      encoder_context,
3338                                      slice_param,
3339                                      slice_index);
3340 }
3341
3342 static void
3343 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3344                                 struct encode_state *encode_state,
3345                                 struct intel_encoder_context *encoder_context)
3346 {
3347     struct intel_batchbuffer *batch = encoder_context->base.batch;
3348     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3349     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3350     int i, j;
3351     int slice_index = 0;
3352     int is_frame_level_vdenc = 1;       /* TODO: check it for SKL */
3353     int has_tail = 0;                   /* TODO: check it later */
3354
3355     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3356         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3357
3358         if (j == encode_state->num_slice_params_ext - 1)
3359             next_slice_group_param = NULL;
3360         else
3361             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3362
3363         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3364             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3365                 next_slice_param = slice_param + 1;
3366             else
3367                 next_slice_param = next_slice_group_param;
3368
3369             gen9_vdenc_mfx_avc_single_slice(ctx,
3370                                             encode_state,
3371                                             encoder_context,
3372                                             slice_param,
3373                                             next_slice_param,
3374                                             slice_index);
3375             slice_param++;
3376             slice_index++;
3377
3378             if (is_frame_level_vdenc)
3379                 break;
3380             else {
3381                 /* TODO: remove assert(0) and add other commands here */
3382                 assert(0);
3383             }
3384         }
3385
3386         if (is_frame_level_vdenc)
3387             break;
3388     }
3389
3390     if (is_frame_level_vdenc) {
3391         struct vd_pipeline_flush_parameter pipeline_flush_params;
3392
3393         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3394
3395         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3396         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3397         pipeline_flush_params.vdenc_pipeline_done = 1;
3398         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3399         pipeline_flush_params.vd_command_message_parser_done = 1;
3400         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3401     }
3402
3403     if (has_tail) {
3404         /* TODO: insert a tail if required */
3405     }
3406
3407     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3408     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3409     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3410 }
3411
3412 static void
3413 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3414                               struct encode_state *encode_state,
3415                               struct intel_encoder_context *encoder_context)
3416 {
3417     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3418     struct intel_batchbuffer *batch = encoder_context->base.batch;
3419     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3420
3421     if (vdenc_context->brc_enabled) {
3422         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3423
3424         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3425         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3426         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3427     }
3428
3429     if (vdenc_context->current_pass) {
3430         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3431
3432         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3433         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3434         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3435     }
3436
3437     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3438
3439     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3440     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3441     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3442
3443     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3444     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3445     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3446
3447     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3448     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3449     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3450     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3451     gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3452     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3453
3454     if (!vdenc_context->brc_enabled) {
3455         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3456         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3457     } else {
3458         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3459         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3460         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3461         gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3462     }
3463
3464     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3465     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3466
3467     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3468 }
3469
3470 static void
3471 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3472                                struct intel_encoder_context *encoder_context)
3473 {
3474     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3475     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3476
3477     switch (rate_control_mode & 0x7f) {
3478     case VA_RC_CBR:
3479         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3480         break;
3481
3482     case VA_RC_VBR:
3483         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3484         break;
3485
3486     case VA_RC_CQP:
3487     default:
3488         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3489         break;
3490     }
3491 }
3492
3493 static void
3494 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3495 {
3496     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3497     struct intel_batchbuffer *batch = encoder_context->base.batch;
3498     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3499     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3500     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3501     int i;
3502
3503     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3504     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3505
3506     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3507     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3508     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3509     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3510     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3511
3512     /* Update DMEM buffer for BRC Update */
3513     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3514         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3515         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3516         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3517         gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3518
3519         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3520         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3521         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3522         gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3523     }
3524 }
3525
3526 static VAStatus
3527 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3528                                 struct encode_state *encode_state,
3529                                 struct intel_encoder_context *encoder_context)
3530 {
3531     VAEncSliceParameterBufferH264 *slice_param;
3532     int i, j;
3533
3534     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3535         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3536
3537         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3538             if (slice_param->slice_type == SLICE_TYPE_B)
3539                 return VA_STATUS_ERROR_UNKNOWN;
3540
3541             slice_param++;
3542         }
3543     }
3544
3545     return VA_STATUS_SUCCESS;
3546 }
3547
3548 static VAStatus
3549 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3550                               VAProfile profile,
3551                               struct encode_state *encode_state,
3552                               struct intel_encoder_context *encoder_context)
3553 {
3554     VAStatus va_status;
3555     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3556     struct intel_batchbuffer *batch = encoder_context->base.batch;
3557
3558     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3559
3560     if (va_status != VA_STATUS_SUCCESS)
3561         return va_status;
3562
3563     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3564
3565     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3566         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3567         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3568
3569         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3570         intel_batchbuffer_emit_mi_flush(batch);
3571
3572         if (vdenc_context->brc_enabled) {
3573             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3574                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3575
3576             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3577             intel_batchbuffer_emit_mi_flush(batch);
3578         }
3579
3580         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3581         gen9_vdenc_read_status(ctx, encoder_context);
3582
3583         intel_batchbuffer_end_atomic(batch);
3584         intel_batchbuffer_flush(batch);
3585
3586         vdenc_context->brc_initted = 1;
3587         vdenc_context->brc_need_reset = 0;
3588     }
3589
3590     return VA_STATUS_SUCCESS;
3591 }
3592
3593 static VAStatus
3594 gen9_vdenc_pipeline(VADriverContextP ctx,
3595                     VAProfile profile,
3596                     struct encode_state *encode_state,
3597                     struct intel_encoder_context *encoder_context)
3598 {
3599     VAStatus vaStatus;
3600
3601     switch (profile) {
3602     case VAProfileH264ConstrainedBaseline:
3603     case VAProfileH264Main:
3604     case VAProfileH264High:
3605         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3606         break;
3607
3608     default:
3609         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3610         break;
3611     }
3612
3613     return vaStatus;
3614 }
3615
3616 static void
3617 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3618 {
3619     int i;
3620
3621     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3622     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3623     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3624     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3625     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3626
3627     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3628         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3629
3630     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3631     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3632     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3633     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3634     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3635     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3636
3637     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3638     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3639
3640     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3641     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3642     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3643     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3644
3645     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3646         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3647         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3648     }
3649
3650     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3651     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3652     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3653
3654     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3655     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3656     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3657     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3658
3659     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3660 }
3661
3662 static void
3663 gen9_vdenc_context_destroy(void *context)
3664 {
3665     struct gen9_vdenc_context *vdenc_context = context;
3666
3667     gen9_vdenc_free_resources(vdenc_context);
3668
3669     free(vdenc_context);
3670 }
3671
3672 static void
3673 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3674                               struct intel_encoder_context *encoder_context,
3675                               struct gen9_vdenc_context *vdenc_context)
3676 {
3677     struct i965_driver_data *i965 = i965_driver_data(ctx);
3678     int i;
3679
3680     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3681                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3682                                 "HuC Init&Reset DMEM buffer");
3683
3684     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3685                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3686                                 "HuC History buffer");
3687
3688     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3689                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3690                                 "HuC Stream In buffer");
3691
3692     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3693                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3694                                 "HuC Stream Out buffer");
3695
3696     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3697                                 0x1000,
3698                                 "HuC dummy buffer");
3699
3700     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3701         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3702                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3703                                     "HuC BRC Update buffer");
3704         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3705     }
3706
3707     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3708                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3709                                 "VDENC statistics buffer");
3710
3711     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3712                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3713                                 "PAK statistics buffer");
3714
3715     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3716                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3717                                 "VDENC/AVC image state buffer");
3718
3719     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3720                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3721                                 "HME summary buffer");
3722
3723     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3724                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3725                                 "BRC constant buffer");
3726
3727     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3728                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3729                                 "Second level batch buffer");
3730
3731     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3732                                 0x1000,
3733                                 "HuC Status buffer");
3734
3735     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3736                                 0x1000,
3737                                 "HuC Status buffer");
3738 }
3739
3740 static VAStatus
3741 gen9_vdenc_context_get_status(VADriverContextP ctx,
3742                               struct intel_encoder_context *encoder_context,
3743                               struct i965_coded_buffer_segment *coded_buffer_segment)
3744 {
3745     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3746
3747     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3748
3749     return VA_STATUS_SUCCESS;
3750 }
3751
3752 Bool
3753 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3754 {
3755     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3756
3757     if (!vdenc_context)
3758         return False;
3759
3760     vdenc_context->brc_initted = 0;
3761     vdenc_context->brc_need_reset = 0;
3762     vdenc_context->is_low_delay = 0;
3763     vdenc_context->current_pass = 0;
3764     vdenc_context->num_passes = 1;
3765     vdenc_context->vdenc_streamin_enable = 0;
3766     vdenc_context->vdenc_pak_threshold_check_enable = 0;
3767
3768     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3769
3770     encoder_context->mfc_context = vdenc_context;
3771     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3772     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3773     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3774     encoder_context->get_status = gen9_vdenc_context_get_status;
3775
3776     return True;
3777 }