OSDN Git Service

Add some utility functions for i965_gpe_resources
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vdenc.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
43
44 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
45     0,   0, -8, -12, -16, -20, -28, -36,
46     0,   0, -4,  -8, -12, -16, -24, -32,
47     4,   2,  0,  -1,  -3,  -8, -16, -24,
48     8,   4,  2,   0,  -1,  -4,  -8, -16,
49     20, 16,  4,   0,  -1,  -4,  -8, -16,
50     24, 20, 16,   8,   4,   0,  -4,  -8,
51     28, 24, 20,  16,   8,   4,   0,  -8,
52     32, 24, 20,  16,   8,   4,   0,  -4,
53     64, 48, 28,  20,   16, 12,   8,   4,
54 };
55
56 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
57     -8, -24, -32, -40, -44, -48, -52, -80,
58     -8, -16, -32, -40, -40, -44, -44, -56,
59     0,    0, -12, -20, -24, -28, -32, -36,
60     8,    4,   0,   0,  -8, -16, -24, -32,
61     32,  16,   8,   4,  -4,  -8, -16, -20,
62     36,  24,  16,   8,   4,  -2,  -4,  -8,
63     40,  36,  24,  20,  16,   8,   0,  -8,
64     48,  40,  28,  24,  20,  12,   0,  -4,
65     64,  48,  28,  20,  16,  12,   8,   4,
66 };
67
68 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
69     0,  -4, -8, -16, -24, -32, -40, -48,
70     1,   0, -4,  -8, -16, -24, -32, -40,
71     4,   2,  0,  -1,  -3,  -8, -16, -24,
72     8,   4,  2,   0,  -1,  -4,  -8, -16,
73     20, 16,  4,   0,  -1,  -4,  -8, -16,
74     24, 20, 16,   8,   4,   0,  -4,  -8,
75     28, 24, 20,  16,   8,   4,   0,  -8,
76     32, 24, 20,  16,   8,   4,   0,  -4,
77     64, 48, 28,  20,  16,  12,   8,   4,
78 };
79
80 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
81     +0,  0,  0,  0, 0, 3, 4, 6, 8,
82     +0,  0,  0,  0, 0, 2, 3, 5, 7,
83     -1,  0,  0,  0, 0, 2, 2, 4, 5,
84     -1, -1,  0,  0, 0, 1, 2, 2, 4,
85     -2, -2, -1,  0, 0, 0, 1, 2, 4,
86     -2, -2, -1,  0, 0, 0, 1, 2, 4,
87     -3, -2, -1, -1, 0, 0, 1, 2, 5,
88     -3, -2, -1, -1, 0, 0, 2, 4, 7,
89     -4, -3, -2, -1, 0, 1, 3, 5, 8,
90 };
91
92 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
93     -1,  0,  0,  0, 0, 1, 1, 2, 3,
94     -1, -1,  0,  0, 0, 1, 1, 2, 3,
95     -2, -1, -1,  0, 0, 1, 1, 2, 3,
96     -3, -2, -2, -1, 0, 0, 1, 2, 3,
97     -3, -2, -1, -1, 0, 0, 1, 2, 3,
98     -3, -2, -1, -1, 0, 0, 1, 2, 3,
99     -3, -2, -1, -1, 0, 0, 1, 2, 3,
100     -3, -2, -1, -1, 0, 0, 1, 2, 3,
101     -3, -2, -1, -1, 0, 0, 1, 2, 3,
102 };
103
104 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
105     +0,  0,  0,  0, 0, 2, 3, 3, 4,
106     +0,  0,  0,  0, 0, 2, 3, 3, 4,
107     -1,  0,  0,  0, 0, 2, 2, 3, 3,
108     -1, -1,  0,  0, 0, 1, 2, 2, 2,
109     -1, -1, -1,  0, 0, 0, 1, 2, 2,
110     -2, -1, -1,  0, 0, 0, 0, 1, 2,
111     -2, -1, -1, -1, 0, 0, 0, 1, 3,
112     -2, -2, -1, -1, 0, 0, 1, 1, 3,
113     -2, -2, -1, -1, 0, 1, 1, 2, 4,
114 };
115
116 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
117     -4, -20, -28, -36, -40, -44, -48, -80,
118     +0,  -8, -12, -20, -24, -28, -32, -36,
119     +0,   0,  -8, -16, -20, -24, -28, -32,
120     +8,   4,   0,   0,  -8, -16, -24, -28,
121     32,  24,  16,   2,  -4,  -8, -16, -20,
122     36,  32,  28,  16,   8,   0,  -4,  -8,
123     40,  36,  24,  20,  16,   8,   0,  -8,
124     48,  40,  28,  24,  20,  12,   0,  -4,
125     64,  48,  28,  20,  16,  12,   8,   4,
126 };
127
128 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
129     -8, -24, -32, -44, -48, -56, -64, -80,
130     -8, -16, -32, -40, -44, -52, -56, -64,
131     +0,   0, -16, -28, -36, -40, -44, -48,
132     +8,   4,   0,   0,  -8, -16, -24, -36,
133     20,  12,   4,   0,  -8,  -8,  -8, -16,
134     24,  16,   8,   8,   8,   0,  -4,  -8,
135     40,  36,  24,  20,  16,   8,   0,  -8,
136     48,  40,  28,  24,  20,  12,   0,  -4,
137     64,  48,  28,  20,  16,  12,   8,   4,
138 };
139
140 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
141     0,  -4, -8, -16, -24, -32, -40, -48,
142     1,   0, -4,  -8, -16, -24, -32, -40,
143     4,   2,  0,  -1,  -3,  -8, -16, -24,
144     8,   4,  2,   0,  -1,  -4,  -8, -16,
145     20, 16,  4,   0,  -1,  -4,  -8, -16,
146     24, 20, 16,   8,   4,   0,  -4,  -8,
147     28, 24, 20,  16,   8,   4,   0,  -8,
148     32, 24, 20,  16,   8,   4,   0,  -4,
149     64, 48, 28,  20,  16,  12,   8,   4,
150 };
151
152 static struct huc_brc_update_constant_data
153 gen9_brc_update_constant_data = {
154     .global_rate_qp_adj_tab_i = {
155         48, 40, 32,  24,  16,   8,   0,  -8,
156         40, 32, 24,  16,   8,   0,  -8, -16,
157         32, 24, 16,   8,   0,  -8, -16, -24,
158         24, 16,  8,   0,  -8, -16, -24, -32,
159         16, 8,   0,  -8, -16, -24, -32, -40,
160         8,  0,  -8, -16, -24, -32, -40, -48,
161         0, -8, -16, -24, -32, -40, -48, -56,
162         48, 40, 32,  24,  16,   8,   0,  -8,
163     },
164
165     .global_rate_qp_adj_tab_p = {
166         48,  40,  32,  24,  16,  8,    0,  -8,
167         40,  32,  24,  16,   8,  0,   -8, -16,
168         16,   8,   8,   4,  -8, -16, -16, -24,
169         8,    0,   0,  -8, -16, -16, -16, -24,
170         8,    0,   0, -24, -32, -32, -32, -48,
171         0,  -16, -16, -24, -32, -48, -56, -64,
172         -8, -16, -32, -32, -48, -48, -56, -64,
173         -16,-32, -48, -48, -48, -56, -64, -80,
174     },
175
176     .global_rate_qp_adj_tab_b = {
177         48, 40, 32, 24,  16,   8,   0,  -8,
178         40, 32, 24, 16,  8,    0,  -8, -16,
179         32, 24, 16,  8,  0,   -8, -16, -24,
180         24, 16, 8,   0, -8,   -8, -16, -24,
181         16, 8,  0,   0, -8,  -16, -24, -32,
182         16, 8,  0,   0, -8,  -16, -24, -32,
183         0, -8, -8, -16, -32, -48, -56, -64,
184         0, -8, -8, -16, -32, -48, -56, -64
185     },
186
187     .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
188     .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
189     .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
190
191     .dist_qp_adj_tab_i = {
192         0,   0,  0,  0,  0,  3,  4,  6,  8,
193         0,   0,  0,  0,  0,  2,  3,  5,  7,
194         -1,  0,  0,  0,  0,  2,  2,  4,  5,
195         -1, -1,  0,  0,  0,  1,  2,  2,  4,
196         -2, -2, -1,  0,  0,  0,  1,  2,  4,
197         -2, -2, -1,  0,  0,  0,  1,  2,  4,
198         -3, -2, -1, -1,  0,  0,  1,  2,  5,
199         -3, -2, -1, -1,  0,  0,  2,  4,  7,
200         -4, -3, -2, -1,  0,  1,  3,  5,  8,
201     },
202
203     .dist_qp_adj_tab_p = {
204         -1,   0,  0,  0,  0,  1,  1,  2,  3,
205         -1,  -1,  0,  0,  0,  1,  1,  2,  3,
206         -2,  -1, -1,  0,  0,  1,  1,  2,  3,
207         -3,  -2, -2, -1,  0,  0,  1,  2,  3,
208         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
209         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
210         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
211         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
212         -3,  -2, -1, -1,  0,  0,  1,  2,  3,
213     },
214
215     .dist_qp_adj_tab_b = {
216         0,   0,  0,  0, 0, 2, 3, 3, 4,
217         0,   0,  0,  0, 0, 2, 3, 3, 4,
218         -1,  0,  0,  0, 0, 2, 2, 3, 3,
219         -1, -1,  0,  0, 0, 1, 2, 2, 2,
220         -1, -1, -1,  0, 0, 0, 1, 2, 2,
221         -2, -1, -1,  0, 0, 0, 0, 1, 2,
222         -2, -1, -1, -1, 0, 0, 0, 1, 3,
223         -2, -2, -1, -1, 0, 0, 1, 1, 3,
224         -2, -2, -1, -1, 0, 1, 1, 2, 4,
225     },
226
227     /* default table for non lowdelay */
228     .buf_rate_adj_tab_i = {
229         -4, -20, -28, -36, -40, -44, -48, -80,
230         0,   -8, -12, -20, -24, -28, -32, -36,
231         0,    0,  -8, -16, -20, -24, -28, -32,
232         8,    4,   0,   0,  -8, -16, -24, -28,
233         32,  24,  16,   2,  -4,  -8, -16, -20,
234         36,  32,  28,  16,   8,   0,  -4,  -8,
235         40,  36,  24,  20,  16,   8,   0,  -8,
236         48,  40,  28,  24,  20,  12,   0,  -4,
237         64,  48,  28,  20,  16,  12,   8,   4,
238     },
239
240     /* default table for non lowdelay */
241     .buf_rate_adj_tab_p = {
242         -8, -24, -32, -44, -48, -56, -64, -80,
243         -8, -16, -32, -40, -44, -52, -56, -64,
244         0,    0, -16, -28, -36, -40, -44, -48,
245         8,    4,   0,   0,  -8, -16, -24, -36,
246         20,  12,   4,   0,  -8,  -8,  -8, -16,
247         24,  16,   8,   8,   8,   0,  -4,  -8,
248         40,  36,  24,  20,  16,   8,   0,  -8,
249         48,  40,  28,  24,  20,  12,   0,  -4,
250         64,  48,  28,  20,  16,  12,   8,   4,
251     },
252
253     /* default table for non lowdelay */
254     .buf_rate_adj_tab_b = {
255         0,  -4, -8, -16, -24, -32, -40, -48,
256         1,   0, -4,  -8, -16, -24, -32, -40,
257         4,   2,  0,  -1,  -3,  -8, -16, -24,
258         8,   4,  2,   0,  -1,  -4,  -8, -16,
259         20, 16,  4,   0,  -1,  -4,  -8, -16,
260         24, 20, 16,   8,   4,   0,  -4,  -8,
261         28, 24, 20,  16,   8,   4,   0,  -8,
262         32, 24, 20,  16,   8,   4,   0,  -4,
263         64, 48, 28,  20,  16,  12,   8,   4,
264     },
265
266     .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
267     .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
268
269     .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
270     .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
271
272     .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
273     .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
274
275     .i_intra_non_pred = {
276         0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
277         0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
278         0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
279     },
280
281     .i_intra_16x16 = {
282         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
284         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
285     },
286
287     .i_intra_8x8 = {
288         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
289         0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
290         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
291     },
292
293     .i_intra_4x4 = {
294         0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
295         0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
296         0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
297     },
298
299     .i_intra_chroma = {
300         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
303     },
304
305     .p_intra_non_pred = {
306         0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
307         0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
308         0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
309     },
310
311     .p_intra_16x16 = {
312         0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
313         0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
314         0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
315     },
316
317     .p_intra_8x8 = {
318         0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
319         0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
320         0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
321     },
322
323     .p_intra_4x4 = {
324         0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
325         0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
326         0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
327     },
328
329     .p_intra_chroma = {
330         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
332         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
333     },
334
335     .p_inter_16x8 = {
336         0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
337         0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
338         0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
339     },
340
341     .p_inter_8x8 = {
342         0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
343         0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
344         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
345     },
346
347     .p_inter_16x16 = {
348         0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
349         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
350         0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
351     },
352
353     .p_ref_id = {
354         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
355         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
356         0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
357     },
358
359     .hme_mv_cost = {
360         /* mv = 0 */
361         {
362             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
363             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
365         },
366
367         /* mv <= 16 */
368         {
369             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
371             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
372         },
373
374         /* mv <= 32 */
375         {
376             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
377             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
378             0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
379         },
380
381         /* mv <= 64 */
382         {
383             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
384             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
385             0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
386         },
387
388         /* mv <= 128 */
389         {
390             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
391             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
392             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
393         },
394
395         /* mv <= 256 */
396         {
397             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
398             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
399             0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
400         },
401
402         /* mv <= 512 */
403         {
404             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
405             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
406             0x1a, 0x1a, 0x1a, 0x1a, 0x2a, 0x2f, 0x3a, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d,
407         },
408
409         /* mv <= 1024 */
410         {
411             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
412             0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
413             0x1a, 0x1a, 0x1a, 0x1f, 0x2d, 0x3d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d,
414         },
415     },
416 };
417
418 /* 11 DWs */
419 static uint8_t vdenc_const_qp_lambda[44] = {
420     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
421     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
422     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
423     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
424     0x4a, 0x53, 0x00, 0x00
425 };
426
427 /* 14 DWs */
428 static uint16_t vdenc_const_skip_threshold[28] = {
429
430 };
431
432 /* 14 DWs */
433 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
434
435 };
436
437 /* 7 DWs */
438 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
439
440 };
441
442 /* 7 DWs */
443 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
444
445 };
446
447 /* 7 DWs */
448 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
449
450 };
451
452 /* P frame */
453 /* 11 DWs */
454 static uint8_t vdenc_const_qp_lambda_p[44] = {
455     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
456     0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
457     0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
458     0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
459     0x4a, 0x53, 0x00, 0x00
460 };
461
462 /* 14 DWs */
463 static uint16_t vdenc_const_skip_threshold_p[28] = {
464     0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
465     0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
466     0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
467     0x0333, 0x03b0, 0x0000, 0x0000
468 };
469
470 /* 14 DWs */
471 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
472     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
473     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
474     0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
475 };
476
477 /* 7 DWs */
478 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
479     0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
480     0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
481     0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
482 };
483
484 /* 7 DWs */
485 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
486     0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
487     0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
488     0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
489 };
490
491 /* 7 DWs */
492 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
493     0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
494     0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
495     0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
496 };
497
498 static const double
499 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
500
501 static const double
502 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
503
504 static const double
505 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
506
507 static const double
508 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
509
510 static const double
511 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
512
513 static const double
514 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
515
516 static const unsigned char
517 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
518
519 static const unsigned char
520 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521
522 static const uint16_t
523 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
524
525 static const uint8_t
526 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
527
528 static const uint8_t
529 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
530
531 static const uint8_t
532 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
533
534 static const int8_t
535 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
536
537 const int vdenc_mode_const[2][12][52] = {
538     //INTRASLICE
539     {
540         //LUTMODE_INTRA_NONPRED
541         {
542             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,         //QP=[0 ~12]
543             16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14,         //QP=[13~25]
544             12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8,              //QP=[26~38]
545             8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7,                      //QP=[39~51]
546         },
547
548         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
549         {
550             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
551             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
552             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
553             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
554         },
555
556         //LUTMODE_INTRA_8x8
557         {
558             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  //QP=[0 ~12]
559             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  //QP=[13~25]
560             1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6,  //QP=[26~38]
561             6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,  //QP=[39~51]
562         },
563
564         //LUTMODE_INTRA_4x4
565         {
566             56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56,   //QP=[0 ~12]
567             64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64,   //QP=[13~25]
568             57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50,   //QP=[26~38]
569             46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13,   //QP=[39~51]
570         },
571
572         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
573         { 0, },
574
575         //LUTMODE_INTER_8X8Q
576         { 0, },
577
578         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
579         { 0, },
580
581         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
582         { 0, },
583
584         //LUTMODE_INTER_16x16, LUTMODE_INTER
585         { 0, },
586
587         //LUTMODE_INTER_BWD
588         { 0, },
589
590         //LUTMODE_REF_ID
591         { 0, },
592
593         //LUTMODE_INTRA_CHROMA
594         { 0, },
595     },
596
597     //PREDSLICE
598     {
599         //LUTMODE_INTRA_NONPRED
600         {
601             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     //QP=[0 ~12]
602             7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7,    //QP=[13~25]
603             6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[26~38]
604             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     //QP=[39~51]
605         },
606
607         //LUTMODE_INTRA_16x16, LUTMODE_INTRA
608         {
609             21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
610             24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
611             24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
612             24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
613
614         },
615
616         //LUTMODE_INTRA_8x8
617         {
618             26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,   //QP=[0 ~12]
619             28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28,   //QP=[13~25]
620             26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28,   //QP=[26~38]
621             28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,   //QP=[39~51]
622         },
623
624         //LUTMODE_INTRA_4x4
625         {
626             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,   //QP=[0 ~12]
627             72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68,  //QP=[13~25]
628             64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68,   //QP=[26~38]
629             68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,   //QP=[39~51]
630         },
631
632         //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
633         {
634             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,      //QP=[0 ~12]
635             8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9,   //QP=[13~25]
636             8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8,      //QP=[26~38]
637             8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,      //QP=[39~51]
638         },
639
640         //LUTMODE_INTER_8X8Q
641         {
642             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,   //QP=[0 ~12]
643             2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3,   //QP=[13~25]
644             2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[26~38]
645             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,   //QP=[39~51]
646         },
647
648         //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
649         {
650             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
651             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[13~25]
652             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[26~38]
653             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[39~51]
654         },
655
656         //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
657         {
658             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[0 ~12]
659             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[13~25]
660             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[26~38]
661             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,   //QP=[39~51]
662         },
663
664         //LUTMODE_INTER_16x16, LUTMODE_INTER
665         {
666             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,   //QP=[0 ~12]
667             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[13~25]
668             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[26~38]
669             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,   //QP=[39~51]
670         },
671
672         //LUTMODE_INTER_BWD
673         {
674             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
675             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
676             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
677             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
678         },
679
680         //LUTMODE_REF_ID
681         {
682             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[0 ~12]
683             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[13~25]
684             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[26~38]
685             4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,    //QP=[39~51]
686         },
687
688         //LUTMODE_INTRA_CHROMA
689         {
690             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[0 ~12]
691             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[13~25]
692             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[26~38]
693             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    //QP=[39~51]
694         },
695     },
696 };
697
698 const int vdenc_mv_cost_skipbias_qpel[8] = {
699     //PREDSLICE
700     0, 6, 6, 9, 10, 13, 14, 16
701 };
702
703 const int vdenc_hme_cost[8][52] = {
704     //mv=0
705     {
706         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
707         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
708         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
709         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
710     },
711     //mv<=16
712     {
713         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[0 ~12]
714         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[13 ~25]
715         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[26 ~38]
716         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     //QP=[39 ~51]
717     },
718     //mv<=32
719     {
720         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[0 ~12]
721         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[13 ~25]
722         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[26 ~38]
723         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     //QP=[39 ~51]
724     },
725     //mv<=64
726     {
727         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[0 ~12]
728         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[13 ~25]
729         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[26 ~38]
730         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     //QP=[39 ~51]
731     },
732     //mv<=128
733     {
734         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
735         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
736         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
737         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[39 ~51]
738     },
739     //mv<=256
740     {
741         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[0 ~12]
742         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[13 ~25]
743         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,     //QP=[26 ~38]
744         10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50,     //QP=[39 ~51]
745     },
746     //mv<=512
747     {
748         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
749         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
750         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
751         20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100,     //QP=[39 ~51]
752     },
753
754     //mv<=1024
755     {
756         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[0 ~12]
757         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[13 ~25]
758         20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,     //QP=[26 ~38]
759         20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200,     //QP=[39 ~51]
760     },
761 };
762
763 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
764         if (bo) {                                                       \
765             OUT_BCS_RELOC64(batch,                                      \
766                             bo,                                         \
767                             I915_GEM_DOMAIN_RENDER,                     \
768                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
769                             delta);                                     \
770         } else {                                                        \
771             OUT_BCS_BATCH(batch, 0);                                    \
772             OUT_BCS_BATCH(batch, 0);                                    \
773         }                                                               \
774     } while (0)
775
776 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
777         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
778         OUT_BCS_BATCH(batch, attr);                             \
779     } while (0)
780
781 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) {      \
782         buffer.type = I965_GPE_RESOURCE_BUFFER;                 \
783         buffer.width = bfsize;                                  \
784         buffer.height = 1;                                      \
785         buffer.pitch = buffer.width;                            \
786         buffer.size = buffer.pitch;                             \
787         buffer.tiling = I915_TILING_NONE;                       \
788         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
789                                    &buffer,                     \
790                                    bfsize,                      \
791                                    (des));                      \
792     } while (0)
793
794 static int
795 gen9_vdenc_get_max_vmv_range(int level)
796 {
797     int max_vmv_range = 512;
798
799     if (level == 10)
800         max_vmv_range = 256;
801     else if (level <= 20)
802         max_vmv_range = 512;
803     else if (level <= 30)
804         max_vmv_range = 1024;
805     else
806         max_vmv_range = 2048;
807
808     return max_vmv_range;
809 }
810
811 static unsigned char
812 map_44_lut_value(unsigned int v, unsigned char max)
813 {
814     unsigned int maxcost;
815     int d;
816     unsigned char ret;
817
818     if (v == 0) {
819         return 0;
820     }
821
822     maxcost = ((max & 15) << (max >> 4));
823
824     if (v >= maxcost) {
825         return max;
826     }
827
828     d = (int)(log((double)v) / log(2.0)) - 3;
829
830     if (d < 0) {
831         d = 0;
832     }
833
834     ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
835     ret =  (ret & 0xf) == 0 ? (ret | 8) : ret;
836
837     return ret;
838 }
839
840 static void
841 gen9_vdenc_update_rate_control_parameters(VADriverContextP ctx,
842                                           struct intel_encoder_context *encoder_context,
843                                           VAEncMiscParameterRateControl *misc)
844 {
845     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
846
847     vdenc_context->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
848     vdenc_context->mb_brc_enabled = 0;
849
850     if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
851         vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
852         vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
853
854         if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate) {
855             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
856             vdenc_context->brc_need_reset = 1;
857         }
858     } else if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
859         vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
860         vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
861
862         if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate * misc->target_percentage / 100) {
863             vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * misc->target_percentage / 100;
864             vdenc_context->brc_need_reset = 1;
865         }
866     }
867 }
868
869 static void
870 gen9_vdenc_update_hrd_parameters(VADriverContextP ctx,
871                                  struct intel_encoder_context *encoder_context,
872                                  VAEncMiscParameterHRD *misc)
873 {
874     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
875
876     if (vdenc_context->internal_rate_mode == I965_BRC_CQP)
877         return;
878
879     vdenc_context->vbv_buffer_size_in_bit = misc->buffer_size;
880     vdenc_context->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
881 }
882
883 static void
884 gen9_vdenc_update_framerate_parameters(VADriverContextP ctx,
885                                        struct intel_encoder_context *encoder_context,
886                                        VAEncMiscParameterFrameRate *misc)
887 {
888     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
889
890     vdenc_context->frames_per_100s = misc->framerate; /* misc->framerate is multiple of 100 */
891 }
892
893 static void
894 gen9_vdenc_update_roi_parameters(VADriverContextP ctx,
895                                  struct intel_encoder_context *encoder_context,
896                                  VAEncMiscParameterBufferROI *misc)
897 {
898     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
899     int i;
900
901     if (!misc || !misc->roi) {
902         vdenc_context->num_roi = 0;
903         return;
904     }
905
906     vdenc_context->num_roi = MIN(misc->num_roi, 3);
907     vdenc_context->max_delta_qp = misc->max_delta_qp;
908     vdenc_context->min_delta_qp = misc->min_delta_qp;
909     vdenc_context->vdenc_streamin_enable = (vdenc_context->num_roi == 0);
910
911     for (i = 0; i < vdenc_context->num_roi; i++) {
912         vdenc_context->roi[i].left = misc->roi->roi_rectangle.x;
913         vdenc_context->roi[i].right = vdenc_context->roi[i].left + misc->roi->roi_rectangle.width;
914         vdenc_context->roi[i].top = misc->roi->roi_rectangle.y;
915         vdenc_context->roi[i].bottom = vdenc_context->roi[i].top + misc->roi->roi_rectangle.height;
916         vdenc_context->roi[i].value = misc->roi->roi_value;
917
918         vdenc_context->roi[i].left /= 16;
919         vdenc_context->roi[i].right /= 16;
920         vdenc_context->roi[i].top /= 16;
921         vdenc_context->roi[i].bottom /= 16;
922     }
923 }
924
925 static void
926 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
927                                   struct encode_state *encode_state,
928                                   struct intel_encoder_context *encoder_context)
929 {
930     int i;
931     VAEncMiscParameterBuffer *misc_param;
932
933     for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
934         if (!encode_state->misc_param[i] || !encode_state->misc_param[i]->buffer)
935             continue;
936
937         misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i]->buffer;
938
939         switch (misc_param->type) {
940         case VAEncMiscParameterTypeFrameRate:
941             gen9_vdenc_update_framerate_parameters(ctx,
942                                                    encoder_context,
943                                                    (VAEncMiscParameterFrameRate *)misc_param->data);
944             break;
945
946         case VAEncMiscParameterTypeRateControl:
947             gen9_vdenc_update_rate_control_parameters(ctx,
948                                                       encoder_context,
949                                                       (VAEncMiscParameterRateControl *)misc_param->data);
950             break;
951
952         case VAEncMiscParameterTypeHRD:
953             gen9_vdenc_update_hrd_parameters(ctx,
954                                              encoder_context,
955                                              (VAEncMiscParameterHRD *)misc_param->data);
956             break;
957
958         case VAEncMiscParameterTypeROI:
959             gen9_vdenc_update_roi_parameters(ctx,
960                                              encoder_context,
961                                              (VAEncMiscParameterBufferROI *)misc_param->data);
962             break;
963
964         default:
965             break;
966         }
967     }
968 }
969
970 static void
971 gen9_vdenc_update_parameters(VADriverContextP ctx,
972                              VAProfile profile,
973                              struct encode_state *encode_state,
974                              struct intel_encoder_context *encoder_context)
975 {
976     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
977     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
978     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
979
980     if (profile == VAProfileH264High)
981         vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
982     else
983         vdenc_context->transform_8x8_mode_enable = 0;
984
985     vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
986     vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
987
988     vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
989     vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
990
991     vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
992     vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
993     vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
994     vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
995     vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
996
997     if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
998         vdenc_context->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
999         vdenc_context->max_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1000         vdenc_context->min_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1001     }
1002
1003     vdenc_context->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
1004     vdenc_context->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
1005     vdenc_context->frames_per_100s = 3000; /* 30fps */
1006     vdenc_context->gop_size = seq_param->intra_period;
1007     vdenc_context->ref_dist = seq_param->ip_period;
1008     vdenc_context->vdenc_streamin_enable = 0;
1009
1010     gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
1011
1012     vdenc_context->current_pass = 0;
1013     vdenc_context->num_passes = 1;
1014
1015     if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
1016         vdenc_context->internal_rate_mode == I965_BRC_VBR)
1017         vdenc_context->brc_enabled = 1;
1018     else
1019         vdenc_context->brc_enabled = 0;
1020
1021     if (vdenc_context->brc_enabled &&
1022         (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
1023          !vdenc_context->vbv_buffer_size_in_bit ||
1024          !vdenc_context->max_bit_rate ||
1025          !vdenc_context->target_bit_rate ||
1026          !vdenc_context->frames_per_100s))
1027         vdenc_context->brc_enabled = 0;
1028
1029     if (!vdenc_context->brc_enabled) {
1030         vdenc_context->target_bit_rate = 0;
1031         vdenc_context->max_bit_rate = 0;
1032         vdenc_context->min_bit_rate = 0;
1033         vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
1034         vdenc_context->vbv_buffer_size_in_bit = 0;
1035     } else {
1036         vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
1037     }
1038 }
1039
1040 static void
1041 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
1042                                    struct encode_state *encode_state,
1043                                    struct intel_encoder_context *encoder_context,
1044                                    int qp)
1045 {
1046     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1047     unsigned int frame_type = vdenc_context->frame_type;
1048
1049     memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
1050     memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
1051     memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
1052
1053     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
1054     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
1055     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
1056     vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
1057
1058     if (frame_type == VDENC_FRAME_P) {
1059         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
1060         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
1061         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
1062         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
1063         vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
1064         vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
1065
1066         vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
1067         vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
1068         vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
1069         vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
1070         vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
1071         vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
1072         vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
1073         vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
1074
1075         vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
1076         vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
1077         vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
1078         vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
1079         vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
1080         vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
1081         vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
1082         vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
1083     }
1084 }
1085
1086 static void
1087 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
1088                                         struct intel_encoder_context *encoder_context)
1089 {
1090     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1091     struct gen9_vdenc_streamin_state *streamin_state;
1092     int row, col, i;
1093
1094     if (!vdenc_context->num_roi)
1095         return;
1096
1097     streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1098
1099     if (!streamin_state)
1100         return;
1101
1102     for (col = 0;  col < vdenc_context->frame_width_in_mbs; col++) {
1103         for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1104             streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1105
1106             /* The last one has higher priority */
1107             for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1108                 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1109                     (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1110                     streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1111
1112                     break;
1113                 }
1114             }
1115         }
1116     }
1117
1118     i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1119 }
1120
1121 static VAStatus
1122 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1123                        VAProfile profile,
1124                        struct encode_state *encode_state,
1125                        struct intel_encoder_context *encoder_context)
1126 {
1127     struct i965_driver_data *i965 = i965_driver_data(ctx);
1128     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1129     struct i965_coded_buffer_segment *coded_buffer_segment;
1130     struct object_surface *obj_surface;
1131     struct object_buffer *obj_buffer;
1132     VAEncPictureParameterBufferH264 *pic_param;
1133     VAEncSliceParameterBufferH264 *slice_param;
1134     VDEncAvcSurface *vdenc_avc_surface;
1135     dri_bo *bo;
1136     int i, j, enable_avc_ildb = 0;
1137     int qp;
1138     char *pbuffer;
1139
1140     gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1141
1142     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1143         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1144         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1145
1146         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1147             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1148                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1149                    (slice_param->slice_type == SLICE_TYPE_P) ||
1150                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1151                    (slice_param->slice_type == SLICE_TYPE_B));
1152
1153             if (slice_param->disable_deblocking_filter_idc != 1) {
1154                 enable_avc_ildb = 1;
1155                 break;
1156             }
1157
1158             slice_param++;
1159         }
1160     }
1161
1162     /* Setup current frame */
1163     obj_surface = encode_state->reconstructed_object;
1164     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1165
1166     if (obj_surface->private_data == NULL) {
1167         vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1168         assert(vdenc_avc_surface);
1169
1170         vdenc_avc_surface->ctx = ctx;
1171         i965_CreateSurfaces(ctx,
1172                             vdenc_context->down_scaled_width_4x,
1173                             vdenc_context->down_scaled_height_4x,
1174                             VA_RT_FORMAT_YUV420,
1175                             1,
1176                             &vdenc_avc_surface->scaled_4x_surface_id);
1177         vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1178         assert(vdenc_avc_surface->scaled_4x_surface_obj);
1179         i965_check_alloc_surface_bo(ctx,
1180                                     vdenc_avc_surface->scaled_4x_surface_obj,
1181                                     1,
1182                                     VA_FOURCC_NV12,
1183                                     SUBSAMPLE_YUV420);
1184
1185         obj_surface->private_data = (void *)vdenc_avc_surface;
1186         obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1187     }
1188
1189     vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1190     assert(vdenc_avc_surface->scaled_4x_surface_obj);
1191
1192     /* Reconstructed surfaces */
1193     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1194     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1195     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1196     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1197
1198     i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1199     i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1200
1201     if (enable_avc_ildb) {
1202         i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1203     } else {
1204         i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1205     }
1206
1207
1208     /* Reference surfaces */
1209     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1210         assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1211                ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1212         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1213         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1214         obj_surface = encode_state->reference_objects[i];
1215
1216         if (obj_surface && obj_surface->bo) {
1217             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1218
1219             if (obj_surface->private_data == NULL) {
1220                 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1221                 assert(vdenc_avc_surface);
1222
1223                 vdenc_avc_surface->ctx = ctx;
1224                 i965_CreateSurfaces(ctx,
1225                                     vdenc_context->down_scaled_width_4x,
1226                                     vdenc_context->down_scaled_height_4x,
1227                                     VA_RT_FORMAT_YUV420,
1228                                     1,
1229                                     &vdenc_avc_surface->scaled_4x_surface_id);
1230                 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1231                 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1232                 i965_check_alloc_surface_bo(ctx,
1233                                             vdenc_avc_surface->scaled_4x_surface_obj,
1234                                             1,
1235                                             VA_FOURCC_NV12,
1236                                             SUBSAMPLE_YUV420);
1237
1238                 obj_surface->private_data = vdenc_avc_surface;
1239                 obj_surface->free_private_data = gen_free_avc_surface;
1240             }
1241
1242             vdenc_avc_surface = obj_surface->private_data;
1243             i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1244         }
1245     }
1246
1247     /* Input YUV surface */
1248     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1249     i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1250
1251     /* Encoded bitstream */
1252     obj_buffer = encode_state->coded_buf_object;
1253     bo = obj_buffer->buffer_store->bo;
1254     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1255     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1256     vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1257     vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1258
1259     /* Status buffer */
1260     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1261     i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1262     vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1263     vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1264     vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1265     assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1266            vdenc_context->compressed_bitstream.start_offset);
1267
1268     dri_bo_map(bo, 1);
1269
1270     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1271     coded_buffer_segment->mapped = 0;
1272     coded_buffer_segment->codec = encoder_context->codec;
1273     coded_buffer_segment->status_support = 1;
1274
1275     pbuffer = bo->virtual;
1276     pbuffer += vdenc_context->status_bffuer.base_offset;
1277     memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1278
1279     dri_bo_unmap(bo);
1280
1281     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1282     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1283                                 vdenc_context->frame_width_in_mbs * 64,
1284                                 "Intra row store scratch buffer");
1285
1286     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1287     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1288                                 vdenc_context->frame_width_in_mbs * 256,
1289                                 "Deblocking filter row store scratch buffer");
1290
1291     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1292     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1293                                 vdenc_context->frame_width_in_mbs * 128,
1294                                 "BSD/MPC row store scratch buffer");
1295
1296     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1297     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1298                                 vdenc_context->frame_width_in_mbs * 64,
1299                                 "VDENC row store scratch buffer");
1300
1301     assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1302     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1303     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1304                                 vdenc_context->frame_width_in_mbs *
1305                                 vdenc_context->frame_height_in_mbs *
1306                                 sizeof(struct gen9_vdenc_streamin_state),
1307                                 "VDENC StreamIn buffer");
1308
1309     /*
1310      * Calculate the index for each reference surface in list0 for the first slice
1311      * TODO: other slices
1312      */
1313     pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1314     slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1315
1316     vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1317
1318     if (slice_param->num_ref_idx_active_override_flag)
1319         vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1320
1321     if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1322         return VA_STATUS_ERROR_INVALID_VALUE;
1323
1324     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1325         VAPictureH264 *va_pic;
1326
1327         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1328         vdenc_context->list_ref_idx[0][i] = 0;
1329
1330         if (i >= vdenc_context->num_refs[0])
1331             continue;
1332
1333         va_pic = &slice_param->RefPicList0[i];
1334
1335         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1336             obj_surface = encode_state->reference_objects[j];
1337
1338             if (obj_surface &&
1339                 obj_surface->bo &&
1340                 obj_surface->base.id == va_pic->picture_id) {
1341
1342                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1343                 vdenc_context->list_ref_idx[0][i] = j;
1344
1345                 break;
1346             }
1347         }
1348     }
1349
1350     if (slice_param->slice_type == SLICE_TYPE_I ||
1351         slice_param->slice_type == SLICE_TYPE_SI)
1352         vdenc_context->frame_type = VDENC_FRAME_I;
1353     else
1354         vdenc_context->frame_type = VDENC_FRAME_P;
1355
1356     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1357
1358     gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1359     gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1360
1361     return VA_STATUS_SUCCESS;
1362 }
1363
1364 static void
1365 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1366                                 struct intel_encoder_context *encoder_context,
1367                                 struct huc_pipe_mode_select_parameter *params)
1368 {
1369     struct intel_batchbuffer *batch = encoder_context->base.batch;
1370
1371     BEGIN_BCS_BATCH(batch, 3);
1372
1373     OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1374     OUT_BCS_BATCH(batch,
1375                   (params->huc_stream_object_enable << 10) |
1376                   (params->indirect_stream_out_enable << 4));
1377     OUT_BCS_BATCH(batch,
1378                   params->media_soft_reset_counter);
1379
1380     ADVANCE_BCS_BATCH(batch);
1381 }
1382
1383 static void
1384 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1385                           struct intel_encoder_context *encoder_context,
1386                           struct huc_imem_state_parameter *params)
1387 {
1388     struct intel_batchbuffer *batch = encoder_context->base.batch;
1389
1390     BEGIN_BCS_BATCH(batch, 5);
1391
1392     OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1393     OUT_BCS_BATCH(batch, 0);
1394     OUT_BCS_BATCH(batch, 0);
1395     OUT_BCS_BATCH(batch, 0);
1396     OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1397
1398     ADVANCE_BCS_BATCH(batch);
1399 }
1400
1401 static void
1402 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1403                           struct intel_encoder_context *encoder_context,
1404                           struct huc_dmem_state_parameter *params)
1405 {
1406     struct intel_batchbuffer *batch = encoder_context->base.batch;
1407
1408     BEGIN_BCS_BATCH(batch, 6);
1409
1410     OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1411     OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1412     OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1413     OUT_BCS_BATCH(batch, params->huc_data_length);
1414
1415     ADVANCE_BCS_BATCH(batch);
1416 }
1417
1418 /*
1419 static void
1420 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1421                          struct intel_encoder_context *encoder_context,
1422                          struct huc_cfg_state_parameter *params)
1423 {
1424     struct intel_batchbuffer *batch = encoder_context->base.batch;
1425
1426     BEGIN_BCS_BATCH(batch, 2);
1427
1428     OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1429     OUT_BCS_BATCH(batch, !!params->force_reset);
1430
1431     ADVANCE_BCS_BATCH(batch);
1432 }
1433 */
1434 static void
1435 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1436                                   struct intel_encoder_context *encoder_context,
1437                                   struct huc_virtual_addr_parameter *params)
1438 {
1439     struct intel_batchbuffer *batch = encoder_context->base.batch;
1440     int i;
1441
1442     BEGIN_BCS_BATCH(batch, 49);
1443
1444     OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1445
1446     for (i = 0; i < 16; i++) {
1447         if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1448             OUT_BUFFER_3DW(batch,
1449                            params->regions[i].huc_surface_res->bo,
1450                            !!params->regions[i].is_target, 0, 0);
1451         else
1452             OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1453     }
1454
1455     ADVANCE_BCS_BATCH(batch);
1456 }
1457
1458 static void
1459 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1460                                        struct intel_encoder_context *encoder_context,
1461                                        struct huc_ind_obj_base_addr_parameter *params)
1462 {
1463     struct intel_batchbuffer *batch = encoder_context->base.batch;
1464
1465     BEGIN_BCS_BATCH(batch, 11);
1466
1467     OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1468
1469     if (params->huc_indirect_stream_in_object_res)
1470         OUT_BUFFER_3DW(batch,
1471                        params->huc_indirect_stream_in_object_res->bo,
1472                        0, 0, 0);
1473     else
1474         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1475
1476     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1477
1478     if (params->huc_indirect_stream_out_object_res)
1479         OUT_BUFFER_3DW(batch,
1480                        params->huc_indirect_stream_out_object_res->bo,
1481                        1, 0, 0);
1482     else
1483         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1484
1485     OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1486
1487     ADVANCE_BCS_BATCH(batch);
1488 }
1489
1490 static void
1491 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1492                                  struct intel_encoder_context *encoder_context)
1493 {
1494     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1495     struct intel_batchbuffer *batch = encoder_context->base.batch;
1496     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1497     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1498
1499     /* Write HUC_STATUS2 mask (1 << 6) */
1500     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1501     mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1502     mi_store_data_imm_params.offset = 0;
1503     mi_store_data_imm_params.dw0 = (1 << 6);
1504     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1505
1506     /* Store HUC_STATUS2 */
1507     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1508     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1509     mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1510     mi_store_register_mem_params.offset = 4;
1511     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1512 }
1513
1514 static void
1515 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1516                              struct intel_encoder_context *encoder_context,
1517                              struct huc_stream_object_parameter *params)
1518 {
1519     struct intel_batchbuffer *batch = encoder_context->base.batch;
1520
1521     BEGIN_BCS_BATCH(batch, 5);
1522
1523     OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1524     OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1525     OUT_BCS_BATCH(batch,
1526                   (1 << 31) |   /* Must be 1 */
1527                   params->indirect_stream_in_start_address);
1528     OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1529     OUT_BCS_BATCH(batch,
1530                   (!!params->huc_bitstream_enable << 29) |
1531                   (params->length_mode << 27) |
1532                   (!!params->stream_out << 26) |
1533                   (!!params->emulation_prevention_byte_removal << 25) |
1534                   (!!params->start_code_search_engine << 24) |
1535                   (params->start_code_byte2 << 16) |
1536                   (params->start_code_byte1 << 8) |
1537                   params->start_code_byte0);
1538
1539     ADVANCE_BCS_BATCH(batch);
1540 }
1541
1542 static void
1543 gen9_vdenc_huc_start(VADriverContextP ctx,
1544                      struct intel_encoder_context *encoder_context,
1545                      struct huc_start_parameter *params)
1546 {
1547     struct intel_batchbuffer *batch = encoder_context->base.batch;
1548
1549     BEGIN_BCS_BATCH(batch, 2);
1550
1551     OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1552     OUT_BCS_BATCH(batch, !!params->last_stream_object);
1553
1554     ADVANCE_BCS_BATCH(batch);
1555 }
1556
1557 static void
1558 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1559                              struct intel_encoder_context *encoder_context,
1560                              struct vd_pipeline_flush_parameter *params)
1561 {
1562     struct intel_batchbuffer *batch = encoder_context->base.batch;
1563
1564     BEGIN_BCS_BATCH(batch, 2);
1565
1566     OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1567     OUT_BCS_BATCH(batch,
1568                   params->mfx_pipeline_command_flush << 19 |
1569                   params->mfl_pipeline_command_flush << 18 |
1570                   params->vdenc_pipeline_command_flush << 17 |
1571                   params->hevc_pipeline_command_flush << 16 |
1572                   params->vd_command_message_parser_done << 4 |
1573                   params->mfx_pipeline_done << 3 |
1574                   params->mfl_pipeline_done << 2 |
1575                   params->vdenc_pipeline_done << 1 |
1576                   params->hevc_pipeline_done);
1577
1578     ADVANCE_BCS_BATCH(batch);
1579 }
1580
1581 static int
1582 gen9_vdenc_get_max_mbps(int level_idc)
1583 {
1584     int max_mbps = 11880;
1585
1586     switch (level_idc) {
1587     case 20:
1588         max_mbps = 11880;
1589         break;
1590
1591     case 21:
1592         max_mbps = 19800;
1593         break;
1594
1595     case 22:
1596         max_mbps = 20250;
1597         break;
1598
1599     case 30:
1600         max_mbps = 40500;
1601         break;
1602
1603     case 31:
1604         max_mbps = 108000;
1605         break;
1606
1607     case 32:
1608         max_mbps = 216000;
1609         break;
1610
1611     case 40:
1612     case 41:
1613         max_mbps = 245760;
1614         break;
1615
1616     case 42:
1617         max_mbps = 522240;
1618         break;
1619
1620     case 50:
1621         max_mbps = 589824;
1622         break;
1623
1624     case 51:
1625         max_mbps = 983040;
1626         break;
1627
1628     case 52:
1629         max_mbps = 2073600;
1630         break;
1631
1632     default:
1633         break;
1634     }
1635
1636     return max_mbps;
1637 };
1638
1639 static unsigned int
1640 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1641                                        struct intel_encoder_context *encoder_context,
1642                                        int level_idc)
1643 {
1644     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1645     double bits_per_mb, tmpf;
1646     int max_mbps, num_mb_per_frame;
1647     uint64_t max_byte_per_frame0, max_byte_per_frame1;
1648     unsigned int ret;
1649
1650     if (level_idc >= 31 && level_idc <= 40)
1651         bits_per_mb = 96.0;
1652     else
1653         bits_per_mb = 192.0;
1654
1655     max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1656     num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1657
1658     tmpf = (double)num_mb_per_frame;
1659
1660     if (tmpf < max_mbps / 172.0)
1661         tmpf = max_mbps / 172.0;
1662
1663     max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1664     max_byte_per_frame1 = (uint64_t)(((double)max_mbps * 100) / vdenc_context->frames_per_100s *bits_per_mb);
1665
1666     /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1667     ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1668     ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1669
1670     return ret;
1671 }
1672
1673 static int
1674 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1675                                 struct encode_state *encode_state,
1676                                 struct intel_encoder_context *encoder_context)
1677 {
1678     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1679     float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1680     unsigned frame_size;
1681     int qp, delat_qp;
1682
1683     frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1684     qp = (int)(1.0 / 1.2 * pow(10.0,
1685                                (log10(frame_size * 2.0 / 3.0 * ((float)vdenc_context->frames_per_100s) /
1686                                       ((float)(vdenc_context->target_bit_rate * 1000) * 100)) - x0) *
1687                                (y1 - y0) / (x1 - x0) + y0) + 0.5);
1688     qp += 2;
1689     delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((float)vdenc_context->frames_per_100s) /
1690                           ((float)(vdenc_context->target_bit_rate * 1000) * 100)));
1691     if (delat_qp > 0)
1692         qp += delat_qp;
1693
1694     qp = CLAMP(1, 51, qp);
1695     qp--;
1696
1697     if (qp < 0)
1698         qp = 1;
1699
1700     return qp;
1701 }
1702
1703 static void
1704 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1705                                     struct encode_state *encode_state,
1706                                     struct intel_encoder_context *encoder_context)
1707 {
1708     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1709     struct huc_brc_init_dmem *dmem;
1710     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1711     double input_bits_per_frame, bps_ratio;
1712     int i;
1713
1714     vdenc_context->brc_init_reset_input_bits_per_frame = ((double)(vdenc_context->max_bit_rate * 1000) * 100) / vdenc_context->frames_per_100s;
1715     vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1716     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1717
1718     dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1719
1720     if (!dmem)
1721         return;
1722
1723     memset(dmem, 0, sizeof(*dmem));
1724
1725     dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1726
1727     dmem->frame_width = vdenc_context->frame_width;
1728     dmem->frame_height = vdenc_context->frame_height;
1729
1730     dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1731     dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1732     dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1733     dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1734     dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1735
1736     if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1737         dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1738
1739     if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1740         dmem->brc_flag |= 0x10;
1741     else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1742         dmem->brc_flag |= 0x20;
1743
1744     dmem->frame_rate_m = vdenc_context->frames_per_100s;
1745     dmem->frame_rate_d = 100;
1746
1747     dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1748
1749     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1750         dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1751
1752     dmem->min_qp = 10;
1753     dmem->max_qp = 51;
1754
1755     input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000 * 100) / vdenc_context->frames_per_100s;
1756     bps_ratio = input_bits_per_frame / ((double)vdenc_context->vbv_buffer_size_in_bit * 100 / vdenc_context->frames_per_100s);
1757
1758     if (bps_ratio < 0.1)
1759         bps_ratio = 0.1;
1760
1761     if (bps_ratio > 3.5)
1762         bps_ratio = 3.5;
1763
1764     for (i = 0; i < 4; i++) {
1765         dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1766         dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1767
1768         dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1769         dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1770
1771         dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1772         dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1773     }
1774
1775     dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1776
1777     if (vdenc_context->mb_brc_enabled) {
1778         dmem->mb_qp_ctrl = 1;
1779         dmem->dist_qp_delta[0] = -5;
1780         dmem->dist_qp_delta[1] = -2;
1781         dmem->dist_qp_delta[2] = 2;
1782         dmem->dist_qp_delta[3] = 5;
1783     }
1784
1785     dmem->slice_size_ctrl_en = 0;       /* TODO: add support for slice size control */
1786
1787     dmem->oscillation_qp_delta = 0;     /* TODO: add support */
1788     dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1789
1790     // 2nd re-encode pass if possible
1791     if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1792         dmem->top_qp_delta_thr_for_2nd_pass = 5;
1793         dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1794         dmem->top_frame_size_threshold_for_2nd_pass = 80;
1795         dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1796     } else {
1797         dmem->top_qp_delta_thr_for_2nd_pass = 2;
1798         dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1799         dmem->top_frame_size_threshold_for_2nd_pass = 32;
1800         dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1801     }
1802
1803     dmem->qp_select_for_first_pass = 1;
1804     dmem->mb_header_compensation = 1;
1805     dmem->delta_qp_adaptation = 1;
1806     dmem->max_crf_quality_factor = 52;
1807
1808     dmem->crf_quality_factor = 0;               /* TODO: add support for CRF */
1809     dmem->scenario_info = 0;
1810
1811     memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1812     memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1813
1814     i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1815 }
1816
1817 static void
1818 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1819                               struct encode_state *encode_state,
1820                               struct intel_encoder_context *encoder_context)
1821 {
1822     struct intel_batchbuffer *batch = encoder_context->base.batch;
1823     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1824     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1825     struct huc_imem_state_parameter imem_state_params;
1826     struct huc_dmem_state_parameter dmem_state_params;
1827     struct huc_virtual_addr_parameter virtual_addr_params;
1828     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1829     struct huc_stream_object_parameter stream_object_params;
1830     struct huc_start_parameter start_params;
1831     struct vd_pipeline_flush_parameter pipeline_flush_params;
1832     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1833
1834     vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1835
1836     memset(&imem_state_params, 0, sizeof(imem_state_params));
1837     imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1838     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1839
1840     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1841     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1842
1843     gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1844     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1845     dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1846     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1847     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1848     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1849
1850     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1851     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1852     virtual_addr_params.regions[0].is_target = 1;
1853     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1854
1855     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1856     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1857     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1858     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1859
1860     memset(&stream_object_params, 0, sizeof(stream_object_params));
1861     stream_object_params.indirect_stream_in_data_length = 1;
1862     stream_object_params.indirect_stream_in_start_address = 0;
1863     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1864
1865     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1866
1867     memset(&start_params, 0, sizeof(start_params));
1868     start_params.last_stream_object = 1;
1869     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1870
1871     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1872     pipeline_flush_params.hevc_pipeline_done = 1;
1873     pipeline_flush_params.hevc_pipeline_command_flush = 1;
1874     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1875
1876     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1877     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1878     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1879 }
1880
1881 static void
1882 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1883 {
1884     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1885     struct huc_brc_update_dmem *dmem;
1886     int i, num_p_in_gop = 0;
1887
1888     dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1889
1890     if (!dmem)
1891         return;
1892
1893     dmem->brc_func = 1;
1894
1895     if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1896         vdenc_context->brc_init_previous_target_buf_full_in_bits =
1897             (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1898         vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1899         vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1900     }
1901
1902     if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1903         vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1904
1905     dmem->target_size = vdenc_context->brc_target_size;
1906
1907     dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1908
1909     dmem->target_slice_size = 0;        // TODO: add support for slice size control
1910
1911     memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1912     memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1913
1914     dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3;      // I frame:2, P frame:0, B frame:1
1915
1916     memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1917     memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1918     memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1919
1920     dmem->current_pak_pass = vdenc_context->current_pass;
1921     dmem->max_num_passes = 2;
1922
1923     dmem->scene_change_detect_enable = 1;
1924     dmem->scene_change_prev_intra_percent_threshold = 96;
1925     dmem->scene_change_cur_intra_perent_threshold = 192;
1926
1927     if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1928         num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1929
1930     for (i = 0; i < 2; i++)
1931         dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1932
1933     if (vdenc_context->is_low_delay)
1934         dmem->ip_average_coeff = 0;
1935     else
1936         dmem->ip_average_coeff = 128;
1937
1938     dmem->skip_frame_size = 0;
1939     dmem->num_of_frames_skipped = 0;
1940
1941     dmem->roi_source = 0;               // TODO: add support for dirty ROI
1942     dmem->hme_detection_enable = 0;     // TODO: support HME kernel
1943     dmem->hme_cost_enable = 1;
1944
1945     dmem->second_level_batchbuffer_size = 228;
1946
1947     i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1948 }
1949
1950 static void
1951 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1952                                   struct encode_state *encode_state,
1953                                   struct intel_encoder_context *encoder_context,
1954                                   struct gen9_mfx_avc_img_state *pstate)
1955 {
1956     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1957     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1958     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1959
1960     memset(pstate, 0, sizeof(*pstate));
1961
1962     pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1963
1964     pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1965
1966     pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1967     pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1968
1969     pstate->dw3.image_structure = 0;
1970     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1971     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1972     pstate->dw3.brc_domain_rate_control_enable = 1;
1973     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1974     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1975
1976     pstate->dw4.field_picture_flag = 0;
1977     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1978     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1979     pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1980     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1981     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1982     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1983     pstate->dw4.mb_mv_format_flag = 1;
1984     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1985     pstate->dw4.mv_unpacked_flag = 1;
1986     pstate->dw4.insert_test_flag = 0;
1987     pstate->dw4.load_slice_pointer_flag = 0;
1988     pstate->dw4.macroblock_stat_enable = 0;        /* Always 0 in VDEnc mode */
1989     pstate->dw4.minimum_frame_size = 0;
1990
1991     pstate->dw5.intra_mb_max_bit_flag = 1;
1992     pstate->dw5.inter_mb_max_bit_flag = 1;
1993     pstate->dw5.frame_size_over_flag = 1;
1994     pstate->dw5.frame_size_under_flag = 1;
1995     pstate->dw5.intra_mb_ipcm_flag = 1;
1996     pstate->dw5.mb_rate_ctrl_flag = 0;             /* Always 0 in VDEnc mode */
1997     pstate->dw5.non_first_pass_flag = 0;
1998     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1999     pstate->dw5.aq_chroma_disable = 1;
2000
2001     pstate->dw6.intra_mb_max_size = 2700;
2002     pstate->dw6.inter_mb_max_size = 4095;
2003
2004     pstate->dw8.slice_delta_qp_max0 = 0;
2005     pstate->dw8.slice_delta_qp_max1 = 0;
2006     pstate->dw8.slice_delta_qp_max2 = 0;
2007     pstate->dw8.slice_delta_qp_max3 = 0;
2008
2009     pstate->dw9.slice_delta_qp_min0 = 0;
2010     pstate->dw9.slice_delta_qp_min1 = 0;
2011     pstate->dw9.slice_delta_qp_min2 = 0;
2012     pstate->dw9.slice_delta_qp_min3 = 0;
2013
2014     pstate->dw10.frame_bitrate_min = 0;
2015     pstate->dw10.frame_bitrate_min_unit = 1;
2016     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2017     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2018     pstate->dw10.frame_bitrate_max_unit = 1;
2019     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2020
2021     pstate->dw11.frame_bitrate_min_delta = 0;
2022     pstate->dw11.frame_bitrate_max_delta = 0;
2023
2024     pstate->dw12.vad_error_logic = 1;
2025     /* TODO: set paramters DW19/DW20 for slices */
2026 }
2027
2028 static void
2029 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
2030                                 struct encode_state *encode_state,
2031                                 struct intel_encoder_context *encoder_context,
2032                                 struct gen9_vdenc_img_state *pstate,
2033                                 int update_cost)
2034 {
2035     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2036     VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2037     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2038     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2039
2040     memset(pstate, 0, sizeof(*pstate));
2041
2042     pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
2043
2044     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2045         pstate->dw4.intra_sad_measure_adjustment = 2;
2046         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2047
2048         pstate->dw5.cre_prefetch_enable = 1;
2049
2050         pstate->dw9.mode0_cost = 10;
2051         pstate->dw9.mode1_cost = 0;
2052         pstate->dw9.mode2_cost = 3;
2053         pstate->dw9.mode3_cost = 30;
2054
2055         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2056         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2057         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2058
2059         pstate->dw22.small_mb_size_in_word = 0xff;
2060         pstate->dw22.large_mb_size_in_word = 0xff;
2061
2062         pstate->dw27.max_hmv_r = 0x2000;
2063         pstate->dw27.max_vmv_r = 0x200;
2064
2065         pstate->dw33.qp_range_check_upper_bound = 0x33;
2066         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2067         pstate->dw33.qp_range_check_value = 0x0f;
2068     } else {
2069         pstate->dw2.bidirectional_weight = 0x20;
2070
2071         pstate->dw4.subpel_mode = 3;
2072         pstate->dw4.bme_disable_for_fbr_message = 1;
2073         pstate->dw4.inter_sad_measure_adjustment = 2;
2074         pstate->dw4.intra_sad_measure_adjustment = 2;
2075         pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2076
2077         pstate->dw5.cre_prefetch_enable = 1;
2078
2079         pstate->dw8.non_skip_zero_mv_const_added = 1;
2080         pstate->dw8.non_skip_mb_mode_const_added = 1;
2081         pstate->dw8.ref_id_cost_mode_select = 1;
2082
2083         pstate->dw9.mode0_cost = 7;
2084         pstate->dw9.mode1_cost = 26;
2085         pstate->dw9.mode2_cost = 30;
2086         pstate->dw9.mode3_cost = 57;
2087
2088         pstate->dw10.mode4_cost = 8;
2089         pstate->dw10.mode5_cost = 2;
2090         pstate->dw10.mode6_cost = 4;
2091         pstate->dw10.mode7_cost = 6;
2092
2093         pstate->dw11.mode8_cost = 5;
2094         pstate->dw11.mode9_cost = 0;
2095         pstate->dw11.ref_id_cost = 4;
2096         pstate->dw11.chroma_intra_mode_cost = 0;
2097
2098         pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2099         pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2100         pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2101         pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2102         pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2103         pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2104         pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2105         pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2106
2107         pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2108         pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2109         pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2110
2111         pstate->dw22.small_mb_size_in_word = 0xff;
2112         pstate->dw22.large_mb_size_in_word = 0xff;
2113
2114         pstate->dw27.max_hmv_r = 0x2000;
2115         pstate->dw27.max_vmv_r = 0x200;
2116
2117         pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2118
2119         pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2120         pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2121
2122         pstate->dw33.qp_range_check_upper_bound = 0x33;
2123         pstate->dw33.qp_range_check_lower_bound = 0x0a;
2124         pstate->dw33.qp_range_check_value = 0x0f;
2125
2126         pstate->dw34.midpoint_distortion = 0x640;
2127     }
2128
2129     /* ROI will be updated in HuC kernel for CBR/VBR */
2130     if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2131         pstate->dw34.roi_enable = 1;
2132
2133         pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2134
2135         if (vdenc_context->num_roi > 1)
2136             pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2137
2138         if (vdenc_context->num_roi > 2)
2139             pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2140     }
2141
2142     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2143
2144     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2145
2146     pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2147
2148     pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2149     pstate->dw5.picture_type = vdenc_context->frame_type;
2150     pstate->dw5.constrained_intra_prediction_flag  = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2151
2152     if (vdenc_context->frame_type == VDENC_FRAME_P) {
2153         pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2154     }
2155
2156     pstate->dw5.mb_slice_threshold_value = 0;
2157
2158     pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2159
2160     if (pstate->dw1.transform_8x8_flag)
2161         pstate->dw8.luma_intra_partition_mask = 0;
2162     else
2163         pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2164
2165     pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta;      /* TODO: check whether it is OK to use the first slice only */
2166
2167     if (update_cost) {
2168         pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2169         pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2170         pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2171         pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2172
2173         pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2174         pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2175         pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2176         pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2177
2178         pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2179         pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2180         pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2181         pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2182
2183         pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2184         pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2185         pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2186         pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2187         pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2188         pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2189         pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2190         pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2191
2192         pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2193         pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2194         pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2195         pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2196         pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2197         pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2198         pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2199         pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2200     }
2201
2202     pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2203
2204     pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2205
2206     /* TODO: check rolling I */
2207
2208     /* TODO: handle ROI */
2209
2210     /* TODO: check stream in support */
2211 }
2212
2213 static void
2214 gen9_vdenc_init_img_states(VADriverContextP ctx,
2215                            struct encode_state *encode_state,
2216                            struct intel_encoder_context *encoder_context)
2217 {
2218     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2219     struct gen9_mfx_avc_img_state *mfx_img_cmd;
2220     struct gen9_vdenc_img_state *vdenc_img_cmd;
2221     char *pbuffer;
2222
2223     pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2224
2225     if (!pbuffer)
2226         return;
2227
2228     mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2229     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd);
2230     pbuffer += sizeof(*mfx_img_cmd);
2231
2232     vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2233     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2234     pbuffer += sizeof(*vdenc_img_cmd);
2235
2236     /* Add batch buffer end command */
2237     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2238
2239     i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2240 }
2241
2242 static void
2243 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2244                                         struct encode_state *encode_state,
2245                                         struct intel_encoder_context *encoder_context)
2246 {
2247     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2248     char *pbuffer;
2249
2250     pbuffer = i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2251
2252     if (!pbuffer)
2253         return;
2254
2255     if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2256         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2257         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2258         memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2259         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2260         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2261         memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2262     }
2263
2264     memcpy(pbuffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2265
2266     i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2267 }
2268
2269 static void
2270 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2271                           struct encode_state *encode_state,
2272                           struct intel_encoder_context *encoder_context)
2273 {
2274     struct intel_batchbuffer *batch = encoder_context->base.batch;
2275     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2276     struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2277     struct huc_imem_state_parameter imem_state_params;
2278     struct huc_dmem_state_parameter dmem_state_params;
2279     struct huc_virtual_addr_parameter virtual_addr_params;
2280     struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2281     struct huc_stream_object_parameter stream_object_params;
2282     struct huc_start_parameter start_params;
2283     struct vd_pipeline_flush_parameter pipeline_flush_params;
2284     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2285     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2286     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2287
2288     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2289     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2290     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2291
2292     if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2293         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2294
2295         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2296         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2297         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2298     }
2299
2300     gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2301
2302     memset(&imem_state_params, 0, sizeof(imem_state_params));
2303     imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2304     gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2305
2306     memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2307     gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2308
2309     gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2310     memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2311     dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2312     dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2313     dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2314     gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2315
2316     gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2317     memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2318     virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2319     virtual_addr_params.regions[0].is_target = 1;
2320     virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2321     virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2322     virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2323     virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2324     virtual_addr_params.regions[4].is_target = 1;
2325     virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2326     virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2327     virtual_addr_params.regions[6].is_target = 1;
2328     gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2329
2330     memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2331     ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2332     ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2333     gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2334
2335     memset(&stream_object_params, 0, sizeof(stream_object_params));
2336     stream_object_params.indirect_stream_in_data_length = 1;
2337     stream_object_params.indirect_stream_in_start_address = 0;
2338     gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2339
2340     gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2341
2342     memset(&start_params, 0, sizeof(start_params));
2343     start_params.last_stream_object = 1;
2344     gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2345
2346     memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2347     pipeline_flush_params.hevc_pipeline_done = 1;
2348     pipeline_flush_params.hevc_pipeline_command_flush = 1;
2349     gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2350
2351     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2352     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2353     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2354
2355     /* Store HUC_STATUS */
2356     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2357     mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2358     mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2359     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2360
2361     /* Write HUC_STATUS mask (1 << 31) */
2362     memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2363     mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2364     mi_store_data_imm_params.offset = 4;
2365     mi_store_data_imm_params.dw0 = (1 << 31);
2366     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2367 }
2368
2369 static void
2370 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2371                                 struct encode_state *encode_state,
2372                                 struct intel_encoder_context *encoder_context)
2373 {
2374     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2375     struct intel_batchbuffer *batch = encoder_context->base.batch;
2376
2377     BEGIN_BCS_BATCH(batch, 5);
2378
2379     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2380     OUT_BCS_BATCH(batch,
2381                   (1 << 29) |
2382                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
2383                   (MFD_MODE_VLD << 15) |
2384                   (1 << 13) |                   /* VDEnc mode */
2385                   ((!!vdenc_context->post_deblocking_output_res.bo) << 9)  |    /* Post Deblocking Output */
2386                   ((!!vdenc_context->pre_deblocking_output_res.bo) << 8)  |     /* Pre Deblocking Output */
2387                   (1 << 7)  |                   /* Scaled surface enable */
2388                   (1 << 6)  |                   /* Frame statistics stream out enable, always '1' in VDEnc mode */
2389                   (1 << 4)  |                   /* encoding mode */
2390                   (MFX_FORMAT_AVC << 0));
2391     OUT_BCS_BATCH(batch, 0);
2392     OUT_BCS_BATCH(batch, 0);
2393     OUT_BCS_BATCH(batch, 0);
2394
2395     ADVANCE_BCS_BATCH(batch);
2396 }
2397
2398 static void
2399 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2400                              struct intel_encoder_context *encoder_context,
2401                              struct i965_gpe_resource *gpe_resource,
2402                              int id)
2403 {
2404     struct intel_batchbuffer *batch = encoder_context->base.batch;
2405
2406     BEGIN_BCS_BATCH(batch, 6);
2407
2408     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2409     OUT_BCS_BATCH(batch, id);
2410     OUT_BCS_BATCH(batch,
2411                   ((gpe_resource->height - 1) << 18) |
2412                   ((gpe_resource->width - 1) << 4));
2413     OUT_BCS_BATCH(batch,
2414                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
2415                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2416                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2417                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2418                   (1 << 1)  |                           /* must be tiled */
2419                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2420     OUT_BCS_BATCH(batch,
2421                   (0 << 16) |                           /* must be 0 for interleave U/V */
2422                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2423     OUT_BCS_BATCH(batch,
2424                   (0 << 16) |                           /* must be 0 for interleave U/V */
2425                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2426
2427     ADVANCE_BCS_BATCH(batch);
2428 }
2429
2430 static void
2431 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2432 {
2433     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2434     struct intel_batchbuffer *batch = encoder_context->base.batch;
2435     int i;
2436
2437     BEGIN_BCS_BATCH(batch, 65);
2438
2439     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2440
2441     /* the DW1-3 is for pre_deblocking */
2442     OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2443
2444     /* the DW4-6 is for the post_deblocking */
2445     OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2446
2447     /* the DW7-9 is for the uncompressed_picture */
2448     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2449
2450     /* the DW10-12 is for PAK information (write) */
2451     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2452
2453     /* the DW13-15 is for the intra_row_store_scratch */
2454     OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2455
2456     /* the DW16-18 is for the deblocking filter */
2457     OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2458
2459     /* the DW 19-50 is for Reference pictures*/
2460     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2461         OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2462     }
2463
2464     /* DW 51, reference picture attributes */
2465     OUT_BCS_BATCH(batch, 0);
2466
2467     /* The DW 52-54 is for PAK information (read) */
2468     OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2469
2470     /* the DW 55-57 is the ILDB buffer */
2471     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2472
2473     /* the DW 58-60 is the second ILDB buffer */
2474     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2475
2476     /* DW 61, memory compress enable & mode */
2477     OUT_BCS_BATCH(batch, 0);
2478
2479     /* the DW 62-64 is the 4x Down Scaling surface */
2480     OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 0, 0, 0);
2481
2482     ADVANCE_BCS_BATCH(batch);
2483 }
2484
2485 static void
2486 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2487 {
2488     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2489     struct intel_batchbuffer *batch = encoder_context->base.batch;
2490
2491     BEGIN_BCS_BATCH(batch, 26);
2492
2493     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2494     /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2495     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2496     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2497
2498     /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2499     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2500     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2501
2502     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2503     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2504     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2505
2506     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2507     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2508     OUT_BUFFER_2DW(batch, NULL, 0, 0);
2509
2510     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2511      * Note: an offset is specified in MFX_AVC_SLICE_STATE
2512      */
2513     OUT_BUFFER_3DW(batch,
2514                    vdenc_context->compressed_bitstream.res.bo,
2515                    1,
2516                    0,
2517                    0);
2518     OUT_BUFFER_2DW(batch,
2519                    vdenc_context->compressed_bitstream.res.bo,
2520                    1,
2521                    vdenc_context->compressed_bitstream.end_offset);
2522
2523     ADVANCE_BCS_BATCH(batch);
2524 }
2525
2526 static void
2527 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2528 {
2529     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2530     struct intel_batchbuffer *batch = encoder_context->base.batch;
2531
2532     BEGIN_BCS_BATCH(batch, 10);
2533
2534     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2535
2536     /* The DW1-3 is for bsd/mpc row store scratch buffer */
2537     OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2538
2539     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2540     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2541
2542     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2543     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2544
2545     ADVANCE_BCS_BATCH(batch);
2546 }
2547
2548 static void
2549 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2550                         int qm_type,
2551                         unsigned int *qm,
2552                         int qm_length,
2553                         struct intel_encoder_context *encoder_context)
2554 {
2555     struct intel_batchbuffer *batch = encoder_context->base.batch;
2556     unsigned int qm_buffer[16];
2557
2558     assert(qm_length <= 16);
2559     assert(sizeof(*qm) == 4);
2560     memcpy(qm_buffer, qm, qm_length * 4);
2561
2562     BEGIN_BCS_BATCH(batch, 18);
2563     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2564     OUT_BCS_BATCH(batch, qm_type << 0);
2565     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2566     ADVANCE_BCS_BATCH(batch);
2567 }
2568
2569 static void
2570 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2571 {
2572     /* TODO: add support for non flat matrix */
2573     unsigned int qm[16] = {
2574         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2575         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2576         0x10101010, 0x10101010, 0x10101010, 0x10101010,
2577         0x10101010, 0x10101010, 0x10101010, 0x10101010
2578     };
2579
2580     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2581     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2582     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2583     gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2584 }
2585
2586 static void
2587 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2588                          int fqm_type,
2589                          unsigned int *fqm,
2590                          int fqm_length,
2591                          struct intel_encoder_context *encoder_context)
2592 {
2593     struct intel_batchbuffer *batch = encoder_context->base.batch;
2594     unsigned int fqm_buffer[32];
2595
2596     assert(fqm_length <= 32);
2597     assert(sizeof(*fqm) == 4);
2598     memcpy(fqm_buffer, fqm, fqm_length * 4);
2599
2600     BEGIN_BCS_BATCH(batch, 34);
2601     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2602     OUT_BCS_BATCH(batch, fqm_type << 0);
2603     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2604     ADVANCE_BCS_BATCH(batch);
2605 }
2606
2607 static void
2608 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2609 {
2610     /* TODO: add support for non flat matrix */
2611     unsigned int qm[32] = {
2612         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2613         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2614         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2615         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2616         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2617         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2618         0x10001000, 0x10001000, 0x10001000, 0x10001000,
2619         0x10001000, 0x10001000, 0x10001000, 0x10001000
2620     };
2621
2622     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2623     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2624     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2625     gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2626 }
2627
2628 static void
2629 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2630                              struct encode_state *encode_state,
2631                              struct intel_encoder_context *encoder_context)
2632 {
2633     struct intel_batchbuffer *batch = encoder_context->base.batch;
2634     struct gen9_mfx_avc_img_state mfx_img_cmd;
2635
2636     gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd);
2637
2638     BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2639     intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2640     ADVANCE_BCS_BATCH(batch);
2641 }
2642
2643 static void
2644 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2645                                   struct encode_state *encode_state,
2646                                   struct intel_encoder_context *encoder_context)
2647 {
2648     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2649     struct intel_batchbuffer *batch = encoder_context->base.batch;
2650
2651     BEGIN_BCS_BATCH(batch, 2);
2652
2653     OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2654     OUT_BCS_BATCH(batch,
2655                   (vdenc_context->vdenc_streamin_enable << 9) |
2656                   (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2657                   (1 << 7)  |                   /* Tlb prefetch enable */
2658                   (1 << 5)  |                   /* Frame Statistics Stream-Out Enable */
2659                   (VDENC_CODEC_AVC << 0));
2660
2661     ADVANCE_BCS_BATCH(batch);
2662 }
2663
2664 static void
2665 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2666                                struct intel_encoder_context *encoder_context,
2667                                struct i965_gpe_resource *gpe_resource,
2668                                int vdenc_surface_cmd)
2669 {
2670     struct intel_batchbuffer *batch = encoder_context->base.batch;
2671
2672     BEGIN_BCS_BATCH(batch, 6);
2673
2674     OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2675     OUT_BCS_BATCH(batch, 0);
2676     OUT_BCS_BATCH(batch,
2677                   ((gpe_resource->height - 1) << 18) |
2678                   ((gpe_resource->width - 1) << 4));
2679     OUT_BCS_BATCH(batch,
2680                   (VDENC_SURFACE_PLANAR_420_8 << 28) |  /* 420 planar YUV surface only on SKL */
2681                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
2682                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
2683                   (0 << 2)  |                           /* must be 0 for interleave U/V */
2684                   (1 << 1)  |                           /* must be tiled */
2685                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
2686     OUT_BCS_BATCH(batch,
2687                   (0 << 16) |                           /* must be 0 for interleave U/V */
2688                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
2689     OUT_BCS_BATCH(batch,
2690                   (0 << 16) |                           /* must be 0 for interleave U/V */
2691                   (gpe_resource->y_cb_offset));         /* y offset for v(cr) */
2692
2693     ADVANCE_BCS_BATCH(batch);
2694 }
2695
2696 static void
2697 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2698                                    struct intel_encoder_context *encoder_context,
2699                                    struct i965_gpe_resource *gpe_resource)
2700 {
2701     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2702 }
2703
2704 static void
2705 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2706                                    struct intel_encoder_context *encoder_context,
2707                                    struct i965_gpe_resource *gpe_resource)
2708 {
2709     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2710 }
2711
2712 static void
2713 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2714                                       struct intel_encoder_context *encoder_context,
2715                                       struct i965_gpe_resource *gpe_resource)
2716 {
2717     gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2718 }
2719
2720 static void
2721 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2722                                      struct encode_state *encode_state,
2723                                      struct intel_encoder_context *encoder_context)
2724 {
2725     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2726     struct intel_batchbuffer *batch = encoder_context->base.batch;
2727
2728     BEGIN_BCS_BATCH(batch, 37);
2729
2730     OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2731
2732     /* DW1-6 for DS FWD REF0/REF1 */
2733     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2734     OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2735
2736     /* DW7-9 for DS BWD REF0, ignored on SKL */
2737     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2738
2739     /* DW10-12 for uncompressed input data */
2740     OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2741
2742     /* DW13-DW15 for streamin data */
2743     if (vdenc_context->vdenc_streamin_enable)
2744         OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2745     else
2746         OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2747
2748     /* DW16-DW18 for row scratch buffer */
2749     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2750
2751     /* DW19-DW21, ignored on SKL */
2752     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2753
2754     /* DW22-DW27 for FWD REF0/REF1 */
2755     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2756     OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2757
2758     /* DW28-DW30 for FWD REF2, ignored on SKL */
2759     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2760
2761     /* DW31-DW33 for BDW REF0, ignored on SKL */
2762     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2763
2764     /* DW34-DW36 for VDEnc statistics streamout */
2765     OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2766
2767     ADVANCE_BCS_BATCH(batch);
2768 }
2769
2770 static void
2771 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2772                                  struct encode_state *encode_state,
2773                                  struct intel_encoder_context *encoder_context)
2774 {
2775     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2776     struct intel_batchbuffer *batch = encoder_context->base.batch;
2777
2778     BEGIN_BCS_BATCH(batch, 61);
2779
2780     OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2781
2782     if (vdenc_context->frame_type == VDENC_FRAME_I) {
2783         /* DW1-DW11 */
2784         intel_batchbuffer_data(batch, vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2785
2786         /* DW12-DW25 */
2787         intel_batchbuffer_data(batch, vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2788
2789         /* DW26-DW39 */
2790         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2791
2792         /* DW40-DW46 */
2793         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2794
2795         /* DW47-DW53 */
2796         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2797
2798         /* DW54-DW60 */
2799         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2800     } else {
2801         int i;
2802
2803         for (i = 0; i < 28; i++) {
2804             vdenc_const_skip_threshold_p[i] *= 3;
2805         }
2806
2807         /* DW1-DW11 */
2808         intel_batchbuffer_data(batch, vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2809
2810         /* DW12-DW25 */
2811         intel_batchbuffer_data(batch, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2812
2813         /* DW26-DW39 */
2814         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2815
2816         /* DW40-DW46 */
2817         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2818
2819         /* DW47-DW53 */
2820         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2821
2822         /* DW54-DW60 */
2823         intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2824     }
2825
2826     ADVANCE_BCS_BATCH(batch);
2827 }
2828
2829 static void
2830 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2831                               struct encode_state *encode_state,
2832                               struct intel_encoder_context *encoder_context)
2833 {
2834     struct intel_batchbuffer *batch = encoder_context->base.batch;
2835
2836     BEGIN_BCS_BATCH(batch, 2);
2837
2838     OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2839     OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2840
2841     ADVANCE_BCS_BATCH(batch);
2842 }
2843
2844 static void
2845 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2846                            struct encode_state *encode_state,
2847                            struct intel_encoder_context *encoder_context)
2848 {
2849     struct intel_batchbuffer *batch = encoder_context->base.batch;
2850     struct gen9_vdenc_img_state vdenc_img_cmd;
2851
2852     gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2853
2854     BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2855     intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2856     ADVANCE_BCS_BATCH(batch);
2857 }
2858
2859 extern int
2860 intel_avc_enc_slice_type_fixup(int slice_type);
2861
2862 static void
2863 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2864                                  struct intel_encoder_context *encoder_context,
2865                                  unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2866                                  int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2867                                  int slice_header_indicator)
2868 {
2869     struct intel_batchbuffer *batch = encoder_context->base.batch;
2870
2871     if (data_bits_in_last_dw == 0)
2872         data_bits_in_last_dw = 32;
2873
2874     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2875
2876     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2877     OUT_BCS_BATCH(batch,
2878                   (0 << 16) |   /* always start at offset 0 */
2879                   (slice_header_indicator << 14) |
2880                   (data_bits_in_last_dw << 8) |
2881                   (skip_emul_byte_count << 4) |
2882                   (!!emulation_flag << 3) |
2883                   ((!!is_last_header) << 2) |
2884                   ((!!is_end_of_slice) << 1) |
2885                   (0 << 0));    /* TODO: check this flag */
2886     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2887
2888     ADVANCE_BCS_BATCH(batch);
2889 }
2890
2891 static void
2892 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2893                                             struct encode_state *encode_state,
2894                                             struct intel_encoder_context *encoder_context,
2895                                             int slice_index)
2896 {
2897     VAEncPackedHeaderParameterBuffer *param = NULL;
2898     unsigned int length_in_bits;
2899     unsigned int *header_data = NULL;
2900     int count, i, start_index;
2901     int slice_header_index;
2902
2903     if (encode_state->slice_header_index[slice_index] == 0)
2904         slice_header_index = -1;
2905     else
2906         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2907
2908     count = encode_state->slice_rawdata_count[slice_index];
2909     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2910
2911     for (i = 0; i < count; i++) {
2912         unsigned int skip_emul_byte_cnt;
2913
2914         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2915
2916         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2917
2918         /* skip the slice header packed data type as it is lastly inserted */
2919         if (param->type == VAEncPackedHeaderSlice)
2920             continue;
2921
2922         length_in_bits = param->bit_length;
2923
2924         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2925
2926         /* as the slice header is still required, the last header flag is set to
2927          * zero.
2928          */
2929         gen9_vdenc_mfx_avc_insert_object(ctx,
2930                                          encoder_context,
2931                                          header_data,
2932                                          ALIGN(length_in_bits, 32) >> 5,
2933                                          length_in_bits & 0x1f,
2934                                          skip_emul_byte_cnt,
2935                                          0,
2936                                          0,
2937                                          !param->has_emulation_bytes,
2938                                          0);
2939     }
2940
2941     if (slice_header_index == -1) {
2942         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2943         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2944         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2945         unsigned char *slice_header = NULL;
2946         int slice_header_length_in_bits = 0;
2947
2948         /* No slice header data is passed. And the driver needs to generate it */
2949         /* For the Normal H264 */
2950         slice_header_length_in_bits = build_avc_slice_header(seq_param,
2951                                                              pic_param,
2952                                                              slice_params,
2953                                                              &slice_header);
2954         gen9_vdenc_mfx_avc_insert_object(ctx,
2955                                          encoder_context,
2956                                          (unsigned int *)slice_header,
2957                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
2958                                          slice_header_length_in_bits & 0x1f,
2959                                          5,  /* first 5 bytes are start code + nal unit type */
2960                                          1, 0, 1,
2961                                          1);
2962
2963         free(slice_header);
2964     } else {
2965         unsigned int skip_emul_byte_cnt;
2966
2967         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
2968
2969         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
2970         length_in_bits = param->bit_length;
2971
2972         /* as the slice header is the last header data for one slice,
2973          * the last header flag is set to one.
2974          */
2975         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2976
2977         gen9_vdenc_mfx_avc_insert_object(ctx,
2978                                          encoder_context,
2979                                          header_data,
2980                                          ALIGN(length_in_bits, 32) >> 5,
2981                                          length_in_bits & 0x1f,
2982                                          skip_emul_byte_cnt,
2983                                          1,
2984                                          0,
2985                                          !param->has_emulation_bytes,
2986                                          1);
2987     }
2988
2989     return;
2990 }
2991
2992 static void
2993 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
2994                                  struct encode_state *encode_state,
2995                                  struct intel_encoder_context *encoder_context,
2996                                  VAEncSliceParameterBufferH264 *slice_param,
2997                                  int slice_index)
2998 {
2999     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3000     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3001     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3002     unsigned int skip_emul_byte_cnt;
3003
3004     if (slice_index == 0) {
3005         if (encode_state->packed_header_data[idx]) {
3006             VAEncPackedHeaderParameterBuffer *param = NULL;
3007             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3008             unsigned int length_in_bits;
3009
3010             assert(encode_state->packed_header_param[idx]);
3011             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3012             length_in_bits = param->bit_length;
3013
3014             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3015             gen9_vdenc_mfx_avc_insert_object(ctx,
3016                                              encoder_context,
3017                                              header_data,
3018                                              ALIGN(length_in_bits, 32) >> 5,
3019                                              length_in_bits & 0x1f,
3020                                              skip_emul_byte_cnt,
3021                                              0,
3022                                              0,
3023                                              !param->has_emulation_bytes,
3024                                              0);
3025         }
3026
3027         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3028
3029         if (encode_state->packed_header_data[idx]) {
3030             VAEncPackedHeaderParameterBuffer *param = NULL;
3031             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3032             unsigned int length_in_bits;
3033
3034             assert(encode_state->packed_header_param[idx]);
3035             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3036             length_in_bits = param->bit_length;
3037
3038             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3039
3040             gen9_vdenc_mfx_avc_insert_object(ctx,
3041                                              encoder_context,
3042                                              header_data,
3043                                              ALIGN(length_in_bits, 32) >> 5,
3044                                              length_in_bits & 0x1f,
3045                                              skip_emul_byte_cnt,
3046                                              0,
3047                                              0,
3048                                              !param->has_emulation_bytes,
3049                                              0);
3050         }
3051
3052         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3053
3054         if (encode_state->packed_header_data[idx]) {
3055             VAEncPackedHeaderParameterBuffer *param = NULL;
3056             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3057             unsigned int length_in_bits;
3058
3059             assert(encode_state->packed_header_param[idx]);
3060             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3061             length_in_bits = param->bit_length;
3062
3063             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3064             gen9_vdenc_mfx_avc_insert_object(ctx,
3065                                              encoder_context,
3066                                              header_data,
3067                                              ALIGN(length_in_bits, 32) >> 5,
3068                                              length_in_bits & 0x1f,
3069                                              skip_emul_byte_cnt,
3070                                              0,
3071                                              0,
3072                                              !param->has_emulation_bytes,
3073                                              0);
3074         } else if (internal_rate_mode == I965_BRC_CBR) {
3075             /* TODO: insert others */
3076         }
3077     }
3078
3079     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3080                                                 encode_state,
3081                                                 encoder_context,
3082                                                 slice_index);
3083 }
3084
3085 static void
3086 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3087                                struct encode_state *encode_state,
3088                                struct intel_encoder_context *encoder_context,
3089                                VAEncPictureParameterBufferH264 *pic_param,
3090                                VAEncSliceParameterBufferH264 *slice_param,
3091                                VAEncSliceParameterBufferH264 *next_slice_param)
3092 {
3093     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3094     struct intel_batchbuffer *batch = encoder_context->base.batch;
3095     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3096     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3097     unsigned char correct[6], grow, shrink;
3098     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3099     int max_qp_n, max_qp_p;
3100     int i;
3101     int weighted_pred_idc = 0;
3102     int num_ref_l0 = 0, num_ref_l1 = 0;
3103     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3104     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3105
3106     slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3107     slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3108
3109     if (next_slice_param) {
3110         next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3111         next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3112     } else {
3113         next_slice_hor_pos = 0;
3114         next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3115     }
3116
3117     if (slice_type == SLICE_TYPE_I) {
3118         luma_log2_weight_denom = 0;
3119         chroma_log2_weight_denom = 0;
3120     } else if (slice_type == SLICE_TYPE_P) {
3121         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3122         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3123
3124         if (slice_param->num_ref_idx_active_override_flag)
3125             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3126     } else if (slice_type == SLICE_TYPE_B) {
3127         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3128         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3129         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3130
3131         if (slice_param->num_ref_idx_active_override_flag) {
3132             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3133             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3134         }
3135
3136         if (weighted_pred_idc == 2) {
3137             /* 8.4.3 - Derivation process for prediction weights (8-279) */
3138             luma_log2_weight_denom = 5;
3139             chroma_log2_weight_denom = 5;
3140         }
3141     }
3142
3143     max_qp_n = 0;       /* TODO: update it */
3144     max_qp_p = 0;       /* TODO: update it */
3145     grow = 0;           /* TODO: update it */
3146     shrink = 0;         /* TODO: update it */
3147
3148     for (i = 0; i < 6; i++)
3149         correct[i] = 0; /* TODO: update it */
3150
3151     BEGIN_BCS_BATCH(batch, 11);
3152
3153     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3154     OUT_BCS_BATCH(batch, slice_type);
3155     OUT_BCS_BATCH(batch,
3156                   (num_ref_l0 << 16) |
3157                   (num_ref_l1 << 24) |
3158                   (chroma_log2_weight_denom << 8) |
3159                   (luma_log2_weight_denom << 0));
3160     OUT_BCS_BATCH(batch,
3161                   (weighted_pred_idc << 30) |
3162                   (slice_param->direct_spatial_mv_pred_flag << 29) |
3163                   (slice_param->disable_deblocking_filter_idc << 27) |
3164                   (slice_param->cabac_init_idc << 24) |
3165                   (slice_qp << 16) |
3166                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3167                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3168
3169     OUT_BCS_BATCH(batch,
3170                   slice_ver_pos << 24 |
3171                   slice_hor_pos << 16 |
3172                   slice_param->macroblock_address);
3173     OUT_BCS_BATCH(batch,
3174                   next_slice_ver_pos << 16 |
3175                   next_slice_hor_pos);
3176
3177     OUT_BCS_BATCH(batch,
3178                   (0 << 31) |           /* TODO: ignore it for VDENC ??? */
3179                   (!slice_param->macroblock_address << 30) |    /* ResetRateControlCounter */
3180                   (2 << 28) |           /* Loose Rate Control */
3181                   (0 << 24) |           /* RC Stable Tolerance */
3182                   (0 << 23) |           /* RC Panic Enable */
3183                   (1 << 22) |           /* CBP mode */
3184                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3185                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3186                   (!next_slice_param << 19) |                   /* Is Last Slice */
3187                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3188                   (1 << 17) |           /* HeaderPresentFlag */
3189                   (1 << 16) |           /* SliceData PresentFlag */
3190                   (0 << 15) |           /* TailPresentFlag, TODO: check it on VDEnc  */
3191                   (1 << 13) |           /* RBSP NAL TYPE */
3192                   (1 << 12));           /* CabacZeroWordInsertionEnable */
3193
3194     OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3195
3196     OUT_BCS_BATCH(batch,
3197                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
3198                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
3199                   (shrink << 8) |
3200                   (grow << 0));
3201     OUT_BCS_BATCH(batch,
3202                   (1 << 31) |
3203                   (3 << 28) |
3204                   (1 << 27) |
3205                   (5 << 24) |
3206                   (correct[5] << 20) |
3207                   (correct[4] << 16) |
3208                   (correct[3] << 12) |
3209                   (correct[2] << 8) |
3210                   (correct[1] << 4) |
3211                   (correct[0] << 0));
3212     OUT_BCS_BATCH(batch, 0);
3213
3214     ADVANCE_BCS_BATCH(batch);
3215 }
3216
3217 static uint8_t
3218 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3219 {
3220     unsigned int is_long_term =
3221         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3222     unsigned int is_top_field =
3223         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3224     unsigned int is_bottom_field =
3225         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3226
3227     return ((is_long_term                         << 6) |
3228             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3229             (frame_store_id                       << 1) |
3230             ((is_top_field ^ 1) & is_bottom_field));
3231 }
3232
3233 static void
3234 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3235                                  struct encode_state *encode_state,
3236                                  struct intel_encoder_context *encoder_context,
3237                                  VAEncSliceParameterBufferH264 *slice_param)
3238 {
3239     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3240     struct intel_batchbuffer *batch = encoder_context->base.batch;
3241     VAPictureH264 *ref_pic;
3242     int i, slice_type, ref_idx_shift;
3243     unsigned int fwd_ref_entry;
3244
3245     fwd_ref_entry = 0x80808080;
3246     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3247
3248     for (i = 0; i < MAX(vdenc_context->num_refs[0], 2); i++) {
3249         ref_pic = &slice_param->RefPicList0[i];
3250         ref_idx_shift = vdenc_context->list_ref_idx[0][i] * 8;
3251
3252         fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3253         fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3254     }
3255
3256     if (slice_type == SLICE_TYPE_P) {
3257         BEGIN_BCS_BATCH(batch, 10);
3258         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3259         OUT_BCS_BATCH(batch, 0);                        // L0
3260         OUT_BCS_BATCH(batch, fwd_ref_entry);
3261
3262         for (i = 0; i < 7; i++) {
3263             OUT_BCS_BATCH(batch, 0x80808080);
3264         }
3265
3266         ADVANCE_BCS_BATCH(batch);
3267     }
3268
3269     if (slice_type == SLICE_TYPE_B) {
3270         /* VDEnc on SKL doesn't support BDW */
3271         assert(0);
3272     }
3273 }
3274
3275 static void
3276 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3277                                       struct encode_state *encode_state,
3278                                       struct intel_encoder_context *encoder_context,
3279                                       VAEncPictureParameterBufferH264 *pic_param,
3280                                       VAEncSliceParameterBufferH264 *slice_param)
3281 {
3282     struct intel_batchbuffer *batch = encoder_context->base.batch;
3283     int i, slice_type;
3284     short weightoffsets[32 * 6];
3285
3286     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3287
3288     if (slice_type == SLICE_TYPE_P &&
3289         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3290
3291         for (i = 0; i < 32; i++) {
3292             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3293             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3294             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3295             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3296             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3297             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3298         }
3299
3300         BEGIN_BCS_BATCH(batch, 98);
3301         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3302         OUT_BCS_BATCH(batch, 0);
3303         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3304
3305         ADVANCE_BCS_BATCH(batch);
3306     }
3307
3308     if (slice_type == SLICE_TYPE_B) {
3309         /* VDEnc on SKL doesn't support BWD */
3310         assert(0);
3311     }
3312 }
3313
3314 static void
3315 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3316                                 struct encode_state *encode_state,
3317                                 struct intel_encoder_context *encoder_context,
3318                                 VAEncSliceParameterBufferH264 *slice_param,
3319                                 VAEncSliceParameterBufferH264 *next_slice_param,
3320                                 int slice_index)
3321 {
3322     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3323
3324     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3325     gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3326                                           encode_state,
3327                                           encoder_context,
3328                                           pic_param,
3329                                           slice_param);
3330     gen9_vdenc_mfx_avc_slice_state(ctx,
3331                                    encode_state,
3332                                    encoder_context,
3333                                    pic_param,
3334                                    slice_param,
3335                                    next_slice_param);
3336     gen9_vdenc_mfx_avc_inset_headers(ctx,
3337                                      encode_state,
3338                                      encoder_context,
3339                                      slice_param,
3340                                      slice_index);
3341 }
3342
3343 static void
3344 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3345                                 struct encode_state *encode_state,
3346                                 struct intel_encoder_context *encoder_context)
3347 {
3348     struct intel_batchbuffer *batch = encoder_context->base.batch;
3349     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3350     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3351     int i, j;
3352     int slice_index = 0;
3353     int is_frame_level_vdenc = 1;       /* TODO: check it for SKL */
3354     int has_tail = 0;                   /* TODO: check it later */
3355
3356     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3357         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3358
3359         if (j == encode_state->num_slice_params_ext - 1)
3360             next_slice_group_param = NULL;
3361         else
3362             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3363
3364         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3365             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3366                 next_slice_param = slice_param + 1;
3367             else
3368                 next_slice_param = next_slice_group_param;
3369
3370             gen9_vdenc_mfx_avc_single_slice(ctx,
3371                                             encode_state,
3372                                             encoder_context,
3373                                             slice_param,
3374                                             next_slice_param,
3375                                             slice_index);
3376             slice_param++;
3377             slice_index++;
3378
3379             if (is_frame_level_vdenc)
3380                 break;
3381             else {
3382                 /* TODO: remove assert(0) and add other commands here */
3383                 assert(0);
3384             }
3385         }
3386
3387         if (is_frame_level_vdenc)
3388             break;
3389     }
3390
3391     if (is_frame_level_vdenc) {
3392         struct vd_pipeline_flush_parameter pipeline_flush_params;
3393
3394         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3395
3396         memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3397         pipeline_flush_params.mfx_pipeline_done = !has_tail;
3398         pipeline_flush_params.vdenc_pipeline_done = 1;
3399         pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3400         pipeline_flush_params.vd_command_message_parser_done = 1;
3401         gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3402     }
3403
3404     if (has_tail) {
3405         /* TODO: insert a tail if required */
3406     }
3407
3408     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3409     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3410     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3411 }
3412
3413 static void
3414 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3415                               struct encode_state *encode_state,
3416                               struct intel_encoder_context *encoder_context)
3417 {
3418     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3419     struct intel_batchbuffer *batch = encoder_context->base.batch;
3420     struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3421
3422     if (vdenc_context->brc_enabled) {
3423         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3424
3425         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3426         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3427         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3428     }
3429
3430     if (vdenc_context->current_pass) {
3431         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3432
3433         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3434         mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3435         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3436     }
3437
3438     gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3439
3440     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3441     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3442     gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3443
3444     gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3445     gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3446     gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3447
3448     gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3449     gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3450     gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3451     gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3452     gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3453     gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3454
3455     if (!vdenc_context->brc_enabled) {
3456         gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3457         gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3458     } else {
3459         memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3460         mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3461         mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3462         gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3463     }
3464
3465     gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3466     gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3467
3468     gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3469 }
3470
3471 static void
3472 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3473                                struct intel_encoder_context *encoder_context)
3474 {
3475     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3476     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3477
3478     switch (rate_control_mode & 0x7f) {
3479     case VA_RC_CBR:
3480         vdenc_context->internal_rate_mode = I965_BRC_CBR;
3481         break;
3482
3483     case VA_RC_VBR:
3484         vdenc_context->internal_rate_mode = I965_BRC_VBR;
3485         break;
3486
3487     case VA_RC_CQP:
3488     default:
3489         vdenc_context->internal_rate_mode = I965_BRC_CQP;
3490         break;
3491     }
3492 }
3493
3494 static void
3495 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3496 {
3497     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3498     struct intel_batchbuffer *batch = encoder_context->base.batch;
3499     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3500     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3501     unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3502     int i;
3503
3504     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3505     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3506
3507     memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3508     mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3509     mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3510     mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3511     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3512
3513     /* Update DMEM buffer for BRC Update */
3514     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3515         mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3516         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3517         mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3518         gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3519
3520         mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3521         mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3522         mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3523         gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3524     }
3525 }
3526
3527 static VAStatus
3528 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3529                                 struct encode_state *encode_state,
3530                                 struct intel_encoder_context *encoder_context)
3531 {
3532     VAEncSliceParameterBufferH264 *slice_param;
3533     int i, j;
3534
3535     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3536         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3537
3538         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3539             if (slice_param->slice_type == SLICE_TYPE_B)
3540                 return VA_STATUS_ERROR_UNKNOWN;
3541
3542             slice_param++;
3543         }
3544     }
3545
3546     return VA_STATUS_SUCCESS;
3547 }
3548
3549 static VAStatus
3550 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3551                               VAProfile profile,
3552                               struct encode_state *encode_state,
3553                               struct intel_encoder_context *encoder_context)
3554 {
3555     VAStatus va_status;
3556     struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3557     struct intel_batchbuffer *batch = encoder_context->base.batch;
3558
3559     va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3560
3561     if (va_status != VA_STATUS_SUCCESS)
3562         return va_status;
3563
3564     gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3565
3566     for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3567         vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3568         vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3569
3570         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3571         intel_batchbuffer_emit_mi_flush(batch);
3572
3573         if (vdenc_context->brc_enabled) {
3574             if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3575                 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3576
3577             gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3578             intel_batchbuffer_emit_mi_flush(batch);
3579         }
3580
3581         gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3582         gen9_vdenc_read_status(ctx, encoder_context);
3583
3584         intel_batchbuffer_end_atomic(batch);
3585         intel_batchbuffer_flush(batch);
3586
3587         vdenc_context->brc_initted = 1;
3588         vdenc_context->brc_need_reset = 0;
3589     }
3590
3591     return VA_STATUS_SUCCESS;
3592 }
3593
3594 static VAStatus
3595 gen9_vdenc_pipeline(VADriverContextP ctx,
3596                     VAProfile profile,
3597                     struct encode_state *encode_state,
3598                     struct intel_encoder_context *encoder_context)
3599 {
3600     VAStatus vaStatus;
3601
3602     switch (profile) {
3603     case VAProfileH264ConstrainedBaseline:
3604     case VAProfileH264Main:
3605     case VAProfileH264High:
3606         vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3607         break;
3608
3609     default:
3610         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3611         break;
3612     }
3613
3614     return vaStatus;
3615 }
3616
3617 static void
3618 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3619 {
3620     int i;
3621
3622     i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3623     i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3624     i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3625     i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3626     i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3627
3628     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3629         i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3630
3631     i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3632     i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3633     i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3634     i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3635     i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3636     i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3637
3638     i965_free_gpe_resource(&vdenc_context->huc_status_res);
3639     i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3640
3641     i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3642     i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3643     i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3644     i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3645
3646     for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3647         i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3648         i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3649     }
3650
3651     i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3652     i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3653     i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3654
3655     i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3656     i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3657     i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3658     i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3659
3660     i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3661 }
3662
3663 static void
3664 gen9_vdenc_context_destroy(void *context)
3665 {
3666     struct gen9_vdenc_context *vdenc_context = context;
3667
3668     gen9_vdenc_free_resources(vdenc_context);
3669
3670     free(vdenc_context);
3671 }
3672
3673 static void
3674 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3675                               struct intel_encoder_context *encoder_context,
3676                               struct gen9_vdenc_context *vdenc_context)
3677 {
3678     struct i965_driver_data *i965 = i965_driver_data(ctx);
3679     int i;
3680
3681     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3682                                 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3683                                 "HuC Init&Reset DMEM buffer");
3684
3685     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3686                                 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3687                                 "HuC History buffer");
3688
3689     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3690                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3691                                 "HuC Stream In buffer");
3692
3693     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3694                                 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3695                                 "HuC Stream Out buffer");
3696
3697     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3698                                 0x1000,
3699                                 "HuC dummy buffer");
3700
3701     for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3702         ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3703                                     ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3704                                     "HuC BRC Update buffer");
3705         i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3706     }
3707
3708     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3709                                 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3710                                 "VDENC statistics buffer");
3711
3712     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3713                                 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3714                                 "PAK statistics buffer");
3715
3716     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3717                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3718                                 "VDENC/AVC image state buffer");
3719
3720     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3721                                 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3722                                 "HME summary buffer");
3723
3724     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3725                                 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3726                                 "BRC constant buffer");
3727
3728     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3729                                 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3730                                 "Second level batch buffer");
3731
3732     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3733                                 0x1000,
3734                                 "HuC Status buffer");
3735
3736     ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3737                                 0x1000,
3738                                 "HuC Status buffer");
3739 }
3740
3741 static VAStatus
3742 gen9_vdenc_context_get_status(VADriverContextP ctx,
3743                               struct intel_encoder_context *encoder_context,
3744                               struct i965_coded_buffer_segment *coded_buffer_segment)
3745 {
3746     struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3747
3748     coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3749
3750     return VA_STATUS_SUCCESS;
3751 }
3752
3753 Bool
3754 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3755 {
3756     struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3757
3758     if (!vdenc_context)
3759         return False;
3760
3761     vdenc_context->brc_initted = 0;
3762     vdenc_context->brc_need_reset = 0;
3763     vdenc_context->is_low_delay = 0;
3764     vdenc_context->current_pass = 0;
3765     vdenc_context->num_passes = 1;
3766     vdenc_context->vdenc_streamin_enable = 0;
3767     vdenc_context->vdenc_pak_threshold_check_enable = 0;
3768
3769     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3770
3771     encoder_context->mfc_context = vdenc_context;
3772     encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3773     encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3774     encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3775     encoder_context->get_status = gen9_vdenc_context_get_status;
3776
3777     return True;
3778 }