2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
45 intel_avc_enc_slice_type_fixup(int slice_type);
47 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
48 0, 0, -8, -12, -16, -20, -28, -36,
49 0, 0, -4, -8, -12, -16, -24, -32,
50 4, 2, 0, -1, -3, -8, -16, -24,
51 8, 4, 2, 0, -1, -4, -8, -16,
52 20, 16, 4, 0, -1, -4, -8, -16,
53 24, 20, 16, 8, 4, 0, -4, -8,
54 28, 24, 20, 16, 8, 4, 0, -8,
55 32, 24, 20, 16, 8, 4, 0, -4,
56 64, 48, 28, 20, 16, 12, 8, 4,
59 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
60 -8, -24, -32, -40, -44, -48, -52, -80,
61 -8, -16, -32, -40, -40, -44, -44, -56,
62 0, 0, -12, -20, -24, -28, -32, -36,
63 8, 4, 0, 0, -8, -16, -24, -32,
64 32, 16, 8, 4, -4, -8, -16, -20,
65 36, 24, 16, 8, 4, -2, -4, -8,
66 40, 36, 24, 20, 16, 8, 0, -8,
67 48, 40, 28, 24, 20, 12, 0, -4,
68 64, 48, 28, 20, 16, 12, 8, 4,
71 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
72 0, -4, -8, -16, -24, -32, -40, -48,
73 1, 0, -4, -8, -16, -24, -32, -40,
74 4, 2, 0, -1, -3, -8, -16, -24,
75 8, 4, 2, 0, -1, -4, -8, -16,
76 20, 16, 4, 0, -1, -4, -8, -16,
77 24, 20, 16, 8, 4, 0, -4, -8,
78 28, 24, 20, 16, 8, 4, 0, -8,
79 32, 24, 20, 16, 8, 4, 0, -4,
80 64, 48, 28, 20, 16, 12, 8, 4,
83 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
84 +0, 0, 0, 0, 0, 3, 4, 6, 8,
85 +0, 0, 0, 0, 0, 2, 3, 5, 7,
86 -1, 0, 0, 0, 0, 2, 2, 4, 5,
87 -1, -1, 0, 0, 0, 1, 2, 2, 4,
88 -2, -2, -1, 0, 0, 0, 1, 2, 4,
89 -2, -2, -1, 0, 0, 0, 1, 2, 4,
90 -3, -2, -1, -1, 0, 0, 1, 2, 5,
91 -3, -2, -1, -1, 0, 0, 2, 4, 7,
92 -4, -3, -2, -1, 0, 1, 3, 5, 8,
95 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
96 -1, 0, 0, 0, 0, 1, 1, 2, 3,
97 -1, -1, 0, 0, 0, 1, 1, 2, 3,
98 -2, -1, -1, 0, 0, 1, 1, 2, 3,
99 -3, -2, -2, -1, 0, 0, 1, 2, 3,
100 -3, -2, -1, -1, 0, 0, 1, 2, 3,
101 -3, -2, -1, -1, 0, 0, 1, 2, 3,
102 -3, -2, -1, -1, 0, 0, 1, 2, 3,
103 -3, -2, -1, -1, 0, 0, 1, 2, 3,
104 -3, -2, -1, -1, 0, 0, 1, 2, 3,
107 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
108 +0, 0, 0, 0, 0, 2, 3, 3, 4,
109 +0, 0, 0, 0, 0, 2, 3, 3, 4,
110 -1, 0, 0, 0, 0, 2, 2, 3, 3,
111 -1, -1, 0, 0, 0, 1, 2, 2, 2,
112 -1, -1, -1, 0, 0, 0, 1, 2, 2,
113 -2, -1, -1, 0, 0, 0, 0, 1, 2,
114 -2, -1, -1, -1, 0, 0, 0, 1, 3,
115 -2, -2, -1, -1, 0, 0, 1, 1, 3,
116 -2, -2, -1, -1, 0, 1, 1, 2, 4,
119 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
120 -4, -20, -28, -36, -40, -44, -48, -80,
121 +0, -8, -12, -20, -24, -28, -32, -36,
122 +0, 0, -8, -16, -20, -24, -28, -32,
123 +8, 4, 0, 0, -8, -16, -24, -28,
124 32, 24, 16, 2, -4, -8, -16, -20,
125 36, 32, 28, 16, 8, 0, -4, -8,
126 40, 36, 24, 20, 16, 8, 0, -8,
127 48, 40, 28, 24, 20, 12, 0, -4,
128 64, 48, 28, 20, 16, 12, 8, 4,
131 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
132 -8, -24, -32, -44, -48, -56, -64, -80,
133 -8, -16, -32, -40, -44, -52, -56, -64,
134 +0, 0, -16, -28, -36, -40, -44, -48,
135 +8, 4, 0, 0, -8, -16, -24, -36,
136 20, 12, 4, 0, -8, -8, -8, -16,
137 24, 16, 8, 8, 8, 0, -4, -8,
138 40, 36, 24, 20, 16, 8, 0, -8,
139 48, 40, 28, 24, 20, 12, 0, -4,
140 64, 48, 28, 20, 16, 12, 8, 4,
143 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
144 0, -4, -8, -16, -24, -32, -40, -48,
145 1, 0, -4, -8, -16, -24, -32, -40,
146 4, 2, 0, -1, -3, -8, -16, -24,
147 8, 4, 2, 0, -1, -4, -8, -16,
148 20, 16, 4, 0, -1, -4, -8, -16,
149 24, 20, 16, 8, 4, 0, -4, -8,
150 28, 24, 20, 16, 8, 4, 0, -8,
151 32, 24, 20, 16, 8, 4, 0, -4,
152 64, 48, 28, 20, 16, 12, 8, 4,
155 static const struct huc_brc_update_constant_data
156 gen9_brc_update_constant_data = {
157 .global_rate_qp_adj_tab_i = {
158 48, 40, 32, 24, 16, 8, 0, -8,
159 40, 32, 24, 16, 8, 0, -8, -16,
160 32, 24, 16, 8, 0, -8, -16, -24,
161 24, 16, 8, 0, -8, -16, -24, -32,
162 16, 8, 0, -8, -16, -24, -32, -40,
163 8, 0, -8, -16, -24, -32, -40, -48,
164 0, -8, -16, -24, -32, -40, -48, -56,
165 48, 40, 32, 24, 16, 8, 0, -8,
168 .global_rate_qp_adj_tab_p = {
169 48, 40, 32, 24, 16, 8, 0, -8,
170 40, 32, 24, 16, 8, 0, -8, -16,
171 16, 8, 8, 4, -8, -16, -16, -24,
172 8, 0, 0, -8, -16, -16, -16, -24,
173 8, 0, 0, -24, -32, -32, -32, -48,
174 0, -16, -16, -24, -32, -48, -56, -64,
175 -8, -16, -32, -32, -48, -48, -56, -64,
176 -16,-32, -48, -48, -48, -56, -64, -80,
179 .global_rate_qp_adj_tab_b = {
180 48, 40, 32, 24, 16, 8, 0, -8,
181 40, 32, 24, 16, 8, 0, -8, -16,
182 32, 24, 16, 8, 0, -8, -16, -24,
183 24, 16, 8, 0, -8, -8, -16, -24,
184 16, 8, 0, 0, -8, -16, -24, -32,
185 16, 8, 0, 0, -8, -16, -24, -32,
186 0, -8, -8, -16, -32, -48, -56, -64,
187 0, -8, -8, -16, -32, -48, -56, -64
190 .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191 .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
192 .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
194 .dist_qp_adj_tab_i = {
195 0, 0, 0, 0, 0, 3, 4, 6, 8,
196 0, 0, 0, 0, 0, 2, 3, 5, 7,
197 -1, 0, 0, 0, 0, 2, 2, 4, 5,
198 -1, -1, 0, 0, 0, 1, 2, 2, 4,
199 -2, -2, -1, 0, 0, 0, 1, 2, 4,
200 -2, -2, -1, 0, 0, 0, 1, 2, 4,
201 -3, -2, -1, -1, 0, 0, 1, 2, 5,
202 -3, -2, -1, -1, 0, 0, 2, 4, 7,
203 -4, -3, -2, -1, 0, 1, 3, 5, 8,
206 .dist_qp_adj_tab_p = {
207 -1, 0, 0, 0, 0, 1, 1, 2, 3,
208 -1, -1, 0, 0, 0, 1, 1, 2, 3,
209 -2, -1, -1, 0, 0, 1, 1, 2, 3,
210 -3, -2, -2, -1, 0, 0, 1, 2, 3,
211 -3, -2, -1, -1, 0, 0, 1, 2, 3,
212 -3, -2, -1, -1, 0, 0, 1, 2, 3,
213 -3, -2, -1, -1, 0, 0, 1, 2, 3,
214 -3, -2, -1, -1, 0, 0, 1, 2, 3,
215 -3, -2, -1, -1, 0, 0, 1, 2, 3,
218 .dist_qp_adj_tab_b = {
219 0, 0, 0, 0, 0, 2, 3, 3, 4,
220 0, 0, 0, 0, 0, 2, 3, 3, 4,
221 -1, 0, 0, 0, 0, 2, 2, 3, 3,
222 -1, -1, 0, 0, 0, 1, 2, 2, 2,
223 -1, -1, -1, 0, 0, 0, 1, 2, 2,
224 -2, -1, -1, 0, 0, 0, 0, 1, 2,
225 -2, -1, -1, -1, 0, 0, 0, 1, 3,
226 -2, -2, -1, -1, 0, 0, 1, 1, 3,
227 -2, -2, -1, -1, 0, 1, 1, 2, 4,
230 /* default table for non lowdelay */
231 .buf_rate_adj_tab_i = {
232 -4, -20, -28, -36, -40, -44, -48, -80,
233 0, -8, -12, -20, -24, -28, -32, -36,
234 0, 0, -8, -16, -20, -24, -28, -32,
235 8, 4, 0, 0, -8, -16, -24, -28,
236 32, 24, 16, 2, -4, -8, -16, -20,
237 36, 32, 28, 16, 8, 0, -4, -8,
238 40, 36, 24, 20, 16, 8, 0, -8,
239 48, 40, 28, 24, 20, 12, 0, -4,
240 64, 48, 28, 20, 16, 12, 8, 4,
243 /* default table for non lowdelay */
244 .buf_rate_adj_tab_p = {
245 -8, -24, -32, -44, -48, -56, -64, -80,
246 -8, -16, -32, -40, -44, -52, -56, -64,
247 0, 0, -16, -28, -36, -40, -44, -48,
248 8, 4, 0, 0, -8, -16, -24, -36,
249 20, 12, 4, 0, -8, -8, -8, -16,
250 24, 16, 8, 8, 8, 0, -4, -8,
251 40, 36, 24, 20, 16, 8, 0, -8,
252 48, 40, 28, 24, 20, 12, 0, -4,
253 64, 48, 28, 20, 16, 12, 8, 4,
256 /* default table for non lowdelay */
257 .buf_rate_adj_tab_b = {
258 0, -4, -8, -16, -24, -32, -40, -48,
259 1, 0, -4, -8, -16, -24, -32, -40,
260 4, 2, 0, -1, -3, -8, -16, -24,
261 8, 4, 2, 0, -1, -4, -8, -16,
262 20, 16, 4, 0, -1, -4, -8, -16,
263 24, 20, 16, 8, 4, 0, -4, -8,
264 28, 24, 20, 16, 8, 4, 0, -8,
265 32, 24, 20, 16, 8, 4, 0, -4,
266 64, 48, 28, 20, 16, 12, 8, 4,
269 .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
270 .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
272 .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
273 .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
275 .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
276 .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
278 .i_intra_non_pred = {
279 0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
280 0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
281 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
285 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
286 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
287 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
291 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
292 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
293 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
297 0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
298 0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
299 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
303 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
308 .p_intra_non_pred = {
309 0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
310 0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
311 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
315 0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
316 0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
317 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
321 0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
322 0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
323 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
327 0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
328 0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
329 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
339 0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
340 0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
341 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
345 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
346 0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
347 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
351 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
352 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
353 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
357 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
358 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
359 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
379 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
380 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
381 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
386 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
387 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
388 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
393 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
394 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
395 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
400 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
401 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
402 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
407 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
408 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
409 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
414 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
415 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
416 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
422 static const uint8_t vdenc_const_qp_lambda[44] = {
423 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
424 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
425 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
426 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
427 0x4a, 0x53, 0x00, 0x00
431 static const uint16_t vdenc_const_skip_threshold[28] = {
436 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
441 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
446 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
457 static const uint8_t vdenc_const_qp_lambda_p[44] = {
458 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
459 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
460 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
461 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
462 0x4a, 0x53, 0x00, 0x00
466 static const uint16_t vdenc_const_skip_threshold_p[28] = {
467 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
468 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
469 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
470 0x0333, 0x03b0, 0x0000, 0x0000
474 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
475 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
476 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
477 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
481 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
482 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
483 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
484 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
488 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
489 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
490 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
491 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
495 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
496 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
497 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
498 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
502 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
505 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
508 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
511 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
514 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
517 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
519 static const unsigned char
520 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
522 static const unsigned char
523 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
525 static const uint16_t
526 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
529 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
532 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
535 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
538 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
540 static const int vdenc_mode_const[2][12][52] = {
543 //LUTMODE_INTRA_NONPRED
545 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
546 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
547 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
548 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
551 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
554 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
555 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
562 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
563 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
564 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
569 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
570 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
571 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
572 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
575 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
581 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
584 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
587 //LUTMODE_INTER_16x16, LUTMODE_INTER
596 //LUTMODE_INTRA_CHROMA
602 //LUTMODE_INTRA_NONPRED
604 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
605 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
606 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
607 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
610 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
612 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
613 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
614 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
615 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
621 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
622 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
623 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
624 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
629 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
630 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
631 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
632 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
635 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
637 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
638 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
639 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
640 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
645 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
646 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
647 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
648 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
651 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
653 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
654 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
655 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
656 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
659 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
661 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
662 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
663 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
664 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
667 //LUTMODE_INTER_16x16, LUTMODE_INTER
669 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
670 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
671 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
672 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
685 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
686 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
687 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
688 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
691 //LUTMODE_INTRA_CHROMA
693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
701 static const int vdenc_mv_cost_skipbias_qpel[8] = {
703 0, 6, 6, 9, 10, 13, 14, 16
706 static const int vdenc_hme_cost[8][52] = {
709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
723 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
724 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
725 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
726 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[39 ~51]
730 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
731 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
732 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
733 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39 ~51]
737 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
738 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
739 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
740 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[39 ~51]
744 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
745 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
746 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
747 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50, //QP=[39 ~51]
751 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
752 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
753 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
754 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100, //QP=[39 ~51]
759 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
760 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
761 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
762 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200, //QP=[39 ~51]
766 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
768 OUT_BCS_RELOC64(batch, \
770 I915_GEM_DOMAIN_RENDER, \
771 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
774 OUT_BCS_BATCH(batch, 0); \
775 OUT_BCS_BATCH(batch, 0); \
779 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
780 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
781 OUT_BCS_BATCH(batch, i965->intel.mocs_state); \
784 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do { \
785 buffer.type = I965_GPE_RESOURCE_BUFFER; \
786 buffer.width = bfsize; \
788 buffer.pitch = buffer.width; \
789 buffer.size = buffer.pitch; \
790 buffer.tiling = I915_TILING_NONE; \
791 i965_allocate_gpe_resource(i965->intel.bufmgr, \
798 gen9_vdenc_get_max_vmv_range(int level)
800 int max_vmv_range = 512;
804 else if (level <= 20)
806 else if (level <= 30)
807 max_vmv_range = 1024;
809 max_vmv_range = 2048;
811 return max_vmv_range;
815 map_44_lut_value(unsigned int v, unsigned char max)
817 unsigned int maxcost;
825 maxcost = ((max & 15) << (max >> 4));
831 d = (int)(log((double)v) / log(2.0)) - 3;
837 ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
838 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
844 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
845 struct encode_state *encode_state,
846 struct intel_encoder_context *encoder_context)
848 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
851 vdenc_context->gop_size = encoder_context->brc.gop_size;
852 vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
854 if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
855 encoder_context->brc.need_reset) {
856 /* So far, vdenc doesn't support temporal layer */
857 vdenc_context->framerate = encoder_context->brc.framerate[0];
859 vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
860 vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
862 vdenc_context->max_bit_rate = ALIGN(encoder_context->brc.bits_per_second[0], 1000) / 1000;
863 vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
864 vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
866 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
867 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
868 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
870 assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
871 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
872 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
876 vdenc_context->mb_brc_enabled = 1;
877 vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
878 vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
879 vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
880 vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
882 for (i = 0; i < vdenc_context->num_roi; i++) {
883 vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
884 vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
885 vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
886 vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].top >> 4;
887 vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
892 gen9_vdenc_update_parameters(VADriverContextP ctx,
894 struct encode_state *encode_state,
895 struct intel_encoder_context *encoder_context)
897 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
899 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
901 if (profile == VAProfileH264High)
902 vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
904 vdenc_context->transform_8x8_mode_enable = 0;
906 vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
907 vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
909 vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
910 vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
912 vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
913 vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
914 vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
915 vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
916 vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
918 gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
920 vdenc_context->current_pass = 0;
921 vdenc_context->num_passes = 1;
923 if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
924 vdenc_context->internal_rate_mode == I965_BRC_VBR)
925 vdenc_context->brc_enabled = 1;
927 vdenc_context->brc_enabled = 0;
929 if (vdenc_context->brc_enabled &&
930 (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
931 !vdenc_context->vbv_buffer_size_in_bit ||
932 !vdenc_context->max_bit_rate ||
933 !vdenc_context->target_bit_rate ||
934 !vdenc_context->framerate.num ||
935 !vdenc_context->framerate.den))
936 vdenc_context->brc_enabled = 0;
938 if (!vdenc_context->brc_enabled) {
939 vdenc_context->target_bit_rate = 0;
940 vdenc_context->max_bit_rate = 0;
941 vdenc_context->min_bit_rate = 0;
942 vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
943 vdenc_context->vbv_buffer_size_in_bit = 0;
945 vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
950 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
951 struct encode_state *encode_state,
952 struct intel_encoder_context *encoder_context,
955 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
956 unsigned int frame_type = vdenc_context->frame_type;
958 memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
959 memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
960 memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
962 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
963 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
964 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
965 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
967 if (frame_type == VDENC_FRAME_P) {
968 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
969 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
970 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
971 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
972 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
973 vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
975 vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
976 vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
977 vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
978 vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
979 vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
980 vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
981 vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
982 vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
984 vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
985 vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
986 vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
987 vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
988 vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
989 vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
990 vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
991 vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
996 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
997 struct intel_encoder_context *encoder_context)
999 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1000 struct gen9_vdenc_streamin_state *streamin_state;
1003 if (!vdenc_context->num_roi)
1006 streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1008 if (!streamin_state)
1011 for (col = 0; col < vdenc_context->frame_width_in_mbs; col++) {
1012 for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1013 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1015 /* The last one has higher priority */
1016 for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1017 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1018 (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1019 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1027 i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1031 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1033 struct encode_state *encode_state,
1034 struct intel_encoder_context *encoder_context)
1036 struct i965_driver_data *i965 = i965_driver_data(ctx);
1037 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1038 struct i965_coded_buffer_segment *coded_buffer_segment;
1039 struct object_surface *obj_surface;
1040 struct object_buffer *obj_buffer;
1041 VAEncPictureParameterBufferH264 *pic_param;
1042 VAEncSliceParameterBufferH264 *slice_param;
1043 VDEncAvcSurface *vdenc_avc_surface;
1045 int i, j, enable_avc_ildb = 0;
1049 gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1051 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1052 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1053 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1055 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1056 assert((slice_param->slice_type == SLICE_TYPE_I) ||
1057 (slice_param->slice_type == SLICE_TYPE_SI) ||
1058 (slice_param->slice_type == SLICE_TYPE_P) ||
1059 (slice_param->slice_type == SLICE_TYPE_SP) ||
1060 (slice_param->slice_type == SLICE_TYPE_B));
1062 if (slice_param->disable_deblocking_filter_idc != 1) {
1063 enable_avc_ildb = 1;
1071 /* Setup current frame */
1072 obj_surface = encode_state->reconstructed_object;
1073 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1075 if (obj_surface->private_data == NULL) {
1076 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1077 assert(vdenc_avc_surface);
1079 vdenc_avc_surface->ctx = ctx;
1080 i965_CreateSurfaces(ctx,
1081 vdenc_context->down_scaled_width_4x,
1082 vdenc_context->down_scaled_height_4x,
1083 VA_RT_FORMAT_YUV420,
1085 &vdenc_avc_surface->scaled_4x_surface_id);
1086 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1087 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1088 i965_check_alloc_surface_bo(ctx,
1089 vdenc_avc_surface->scaled_4x_surface_obj,
1094 obj_surface->private_data = (void *)vdenc_avc_surface;
1095 obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1098 vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1099 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1101 /* Reconstructed surfaces */
1102 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1103 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1104 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1105 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1107 i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1108 i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1110 if (enable_avc_ildb) {
1111 i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1113 i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1117 /* Reference surfaces */
1118 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1119 assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1120 ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1121 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1122 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1123 obj_surface = encode_state->reference_objects[i];
1125 if (obj_surface && obj_surface->bo) {
1126 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1128 if (obj_surface->private_data == NULL) {
1129 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1130 assert(vdenc_avc_surface);
1132 vdenc_avc_surface->ctx = ctx;
1133 i965_CreateSurfaces(ctx,
1134 vdenc_context->down_scaled_width_4x,
1135 vdenc_context->down_scaled_height_4x,
1136 VA_RT_FORMAT_YUV420,
1138 &vdenc_avc_surface->scaled_4x_surface_id);
1139 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1140 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1141 i965_check_alloc_surface_bo(ctx,
1142 vdenc_avc_surface->scaled_4x_surface_obj,
1147 obj_surface->private_data = vdenc_avc_surface;
1148 obj_surface->free_private_data = gen_free_avc_surface;
1151 vdenc_avc_surface = obj_surface->private_data;
1152 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1156 /* Input YUV surface */
1157 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1158 i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1160 /* Encoded bitstream */
1161 obj_buffer = encode_state->coded_buf_object;
1162 bo = obj_buffer->buffer_store->bo;
1163 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1164 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1165 vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1166 vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1169 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1170 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1171 vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1172 vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1173 vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1174 assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1175 vdenc_context->compressed_bitstream.start_offset);
1179 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1180 coded_buffer_segment->mapped = 0;
1181 coded_buffer_segment->codec = encoder_context->codec;
1182 coded_buffer_segment->status_support = 1;
1184 pbuffer = bo->virtual;
1185 pbuffer += vdenc_context->status_bffuer.base_offset;
1186 memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1190 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1191 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1192 vdenc_context->frame_width_in_mbs * 64,
1193 "Intra row store scratch buffer");
1195 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1196 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1197 vdenc_context->frame_width_in_mbs * 256,
1198 "Deblocking filter row store scratch buffer");
1200 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1201 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1202 vdenc_context->frame_width_in_mbs * 128,
1203 "BSD/MPC row store scratch buffer");
1205 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1206 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1207 vdenc_context->frame_width_in_mbs * 64,
1208 "VDENC row store scratch buffer");
1210 assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1211 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1212 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1213 vdenc_context->frame_width_in_mbs *
1214 vdenc_context->frame_height_in_mbs *
1215 sizeof(struct gen9_vdenc_streamin_state),
1216 "VDENC StreamIn buffer");
1219 * Calculate the index for each reference surface in list0 for the first slice
1220 * TODO: other slices
1222 pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1223 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1225 vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1227 if (slice_param->num_ref_idx_active_override_flag)
1228 vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1230 if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1231 return VA_STATUS_ERROR_INVALID_VALUE;
1233 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1234 VAPictureH264 *va_pic;
1236 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1237 vdenc_context->list_ref_idx[0][i] = 0;
1239 if (i >= vdenc_context->num_refs[0])
1242 va_pic = &slice_param->RefPicList0[i];
1244 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1245 obj_surface = encode_state->reference_objects[j];
1249 obj_surface->base.id == va_pic->picture_id) {
1251 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1252 vdenc_context->list_ref_idx[0][i] = j;
1259 if (slice_param->slice_type == SLICE_TYPE_I ||
1260 slice_param->slice_type == SLICE_TYPE_SI)
1261 vdenc_context->frame_type = VDENC_FRAME_I;
1263 vdenc_context->frame_type = VDENC_FRAME_P;
1265 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1267 gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1268 gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1270 return VA_STATUS_SUCCESS;
1274 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1275 struct intel_encoder_context *encoder_context,
1276 struct huc_pipe_mode_select_parameter *params)
1278 struct intel_batchbuffer *batch = encoder_context->base.batch;
1280 BEGIN_BCS_BATCH(batch, 3);
1282 OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1283 OUT_BCS_BATCH(batch,
1284 (params->huc_stream_object_enable << 10) |
1285 (params->indirect_stream_out_enable << 4));
1286 OUT_BCS_BATCH(batch,
1287 params->media_soft_reset_counter);
1289 ADVANCE_BCS_BATCH(batch);
1293 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1294 struct intel_encoder_context *encoder_context,
1295 struct huc_imem_state_parameter *params)
1297 struct intel_batchbuffer *batch = encoder_context->base.batch;
1299 BEGIN_BCS_BATCH(batch, 5);
1301 OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1302 OUT_BCS_BATCH(batch, 0);
1303 OUT_BCS_BATCH(batch, 0);
1304 OUT_BCS_BATCH(batch, 0);
1305 OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1307 ADVANCE_BCS_BATCH(batch);
1311 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1312 struct intel_encoder_context *encoder_context,
1313 struct huc_dmem_state_parameter *params)
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct intel_batchbuffer *batch = encoder_context->base.batch;
1318 BEGIN_BCS_BATCH(batch, 6);
1320 OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1321 OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1322 OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1323 OUT_BCS_BATCH(batch, params->huc_data_length);
1325 ADVANCE_BCS_BATCH(batch);
1330 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1331 struct intel_encoder_context *encoder_context,
1332 struct huc_cfg_state_parameter *params)
1334 struct intel_batchbuffer *batch = encoder_context->base.batch;
1336 BEGIN_BCS_BATCH(batch, 2);
1338 OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1339 OUT_BCS_BATCH(batch, !!params->force_reset);
1341 ADVANCE_BCS_BATCH(batch);
1345 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1346 struct intel_encoder_context *encoder_context,
1347 struct huc_virtual_addr_parameter *params)
1349 struct i965_driver_data *i965 = i965_driver_data(ctx);
1350 struct intel_batchbuffer *batch = encoder_context->base.batch;
1353 BEGIN_BCS_BATCH(batch, 49);
1355 OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1357 for (i = 0; i < 16; i++) {
1358 if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1359 OUT_BUFFER_3DW(batch,
1360 params->regions[i].huc_surface_res->bo,
1361 !!params->regions[i].is_target, 0, 0);
1363 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1366 ADVANCE_BCS_BATCH(batch);
1370 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1371 struct intel_encoder_context *encoder_context,
1372 struct huc_ind_obj_base_addr_parameter *params)
1374 struct i965_driver_data *i965 = i965_driver_data(ctx);
1375 struct intel_batchbuffer *batch = encoder_context->base.batch;
1377 BEGIN_BCS_BATCH(batch, 11);
1379 OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1381 if (params->huc_indirect_stream_in_object_res)
1382 OUT_BUFFER_3DW(batch,
1383 params->huc_indirect_stream_in_object_res->bo,
1386 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1388 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1390 if (params->huc_indirect_stream_out_object_res)
1391 OUT_BUFFER_3DW(batch,
1392 params->huc_indirect_stream_out_object_res->bo,
1395 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1397 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1399 ADVANCE_BCS_BATCH(batch);
1403 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1404 struct intel_encoder_context *encoder_context)
1406 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1407 struct intel_batchbuffer *batch = encoder_context->base.batch;
1408 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1409 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1411 /* Write HUC_STATUS2 mask (1 << 6) */
1412 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1413 mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1414 mi_store_data_imm_params.offset = 0;
1415 mi_store_data_imm_params.dw0 = (1 << 6);
1416 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1418 /* Store HUC_STATUS2 */
1419 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1420 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1421 mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1422 mi_store_register_mem_params.offset = 4;
1423 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1427 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1428 struct intel_encoder_context *encoder_context,
1429 struct huc_stream_object_parameter *params)
1431 struct intel_batchbuffer *batch = encoder_context->base.batch;
1433 BEGIN_BCS_BATCH(batch, 5);
1435 OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1436 OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1437 OUT_BCS_BATCH(batch,
1438 (1 << 31) | /* Must be 1 */
1439 params->indirect_stream_in_start_address);
1440 OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1441 OUT_BCS_BATCH(batch,
1442 (!!params->huc_bitstream_enable << 29) |
1443 (params->length_mode << 27) |
1444 (!!params->stream_out << 26) |
1445 (!!params->emulation_prevention_byte_removal << 25) |
1446 (!!params->start_code_search_engine << 24) |
1447 (params->start_code_byte2 << 16) |
1448 (params->start_code_byte1 << 8) |
1449 params->start_code_byte0);
1451 ADVANCE_BCS_BATCH(batch);
1455 gen9_vdenc_huc_start(VADriverContextP ctx,
1456 struct intel_encoder_context *encoder_context,
1457 struct huc_start_parameter *params)
1459 struct intel_batchbuffer *batch = encoder_context->base.batch;
1461 BEGIN_BCS_BATCH(batch, 2);
1463 OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1464 OUT_BCS_BATCH(batch, !!params->last_stream_object);
1466 ADVANCE_BCS_BATCH(batch);
1470 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1471 struct intel_encoder_context *encoder_context,
1472 struct vd_pipeline_flush_parameter *params)
1474 struct intel_batchbuffer *batch = encoder_context->base.batch;
1476 BEGIN_BCS_BATCH(batch, 2);
1478 OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1479 OUT_BCS_BATCH(batch,
1480 params->mfx_pipeline_command_flush << 19 |
1481 params->mfl_pipeline_command_flush << 18 |
1482 params->vdenc_pipeline_command_flush << 17 |
1483 params->hevc_pipeline_command_flush << 16 |
1484 params->vd_command_message_parser_done << 4 |
1485 params->mfx_pipeline_done << 3 |
1486 params->mfl_pipeline_done << 2 |
1487 params->vdenc_pipeline_done << 1 |
1488 params->hevc_pipeline_done);
1490 ADVANCE_BCS_BATCH(batch);
1494 gen9_vdenc_get_max_mbps(int level_idc)
1496 int max_mbps = 11880;
1498 switch (level_idc) {
1552 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1553 struct intel_encoder_context *encoder_context,
1556 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1557 double bits_per_mb, tmpf;
1558 int max_mbps, num_mb_per_frame;
1559 uint64_t max_byte_per_frame0, max_byte_per_frame1;
1562 if (level_idc >= 31 && level_idc <= 40)
1565 bits_per_mb = 192.0;
1567 max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1568 num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1570 tmpf = (double)num_mb_per_frame;
1572 if (tmpf < max_mbps / 172.0)
1573 tmpf = max_mbps / 172.0;
1575 max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1576 max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1577 (double)vdenc_context->framerate.num * bits_per_mb);
1579 /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1580 ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1581 ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1587 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1588 struct encode_state *encode_state,
1589 struct intel_encoder_context *encoder_context)
1591 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1592 float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1593 unsigned frame_size;
1596 frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1597 qp = (int)(1.0 / 1.2 * pow(10.0,
1598 (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1599 ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)) - x0) *
1600 (y1 - y0) / (x1 - x0) + y0) + 0.5);
1602 delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1603 ((double)vdenc_context->target_bit_rate * 1000.0 * vdenc_context->framerate.den)));
1607 qp = CLAMP(1, 51, qp);
1617 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1618 struct encode_state *encode_state,
1619 struct intel_encoder_context *encoder_context)
1621 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1622 struct huc_brc_init_dmem *dmem;
1623 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1624 double input_bits_per_frame, bps_ratio;
1627 vdenc_context->brc_init_reset_input_bits_per_frame =
1628 ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1629 vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1630 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1632 dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1637 memset(dmem, 0, sizeof(*dmem));
1639 dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1641 dmem->frame_width = vdenc_context->frame_width;
1642 dmem->frame_height = vdenc_context->frame_height;
1644 dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1645 dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1646 dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1647 dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1648 dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1650 if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1651 dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1653 if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1654 dmem->brc_flag |= 0x10;
1655 else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1656 dmem->brc_flag |= 0x20;
1658 dmem->frame_rate_m = vdenc_context->framerate.num;
1659 dmem->frame_rate_d = vdenc_context->framerate.den;
1661 dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1663 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1664 dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1669 input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000.0 * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1670 bps_ratio = input_bits_per_frame /
1671 ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1673 if (bps_ratio < 0.1)
1676 if (bps_ratio > 3.5)
1679 for (i = 0; i < 4; i++) {
1680 dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1681 dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1683 dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1684 dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1686 dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1687 dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1690 dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1692 if (vdenc_context->mb_brc_enabled) {
1693 dmem->mb_qp_ctrl = 1;
1694 dmem->dist_qp_delta[0] = -5;
1695 dmem->dist_qp_delta[1] = -2;
1696 dmem->dist_qp_delta[2] = 2;
1697 dmem->dist_qp_delta[3] = 5;
1700 dmem->slice_size_ctrl_en = 0; /* TODO: add support for slice size control */
1702 dmem->oscillation_qp_delta = 0; /* TODO: add support */
1703 dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1705 // 2nd re-encode pass if possible
1706 if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1707 dmem->top_qp_delta_thr_for_2nd_pass = 5;
1708 dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1709 dmem->top_frame_size_threshold_for_2nd_pass = 80;
1710 dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1712 dmem->top_qp_delta_thr_for_2nd_pass = 2;
1713 dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1714 dmem->top_frame_size_threshold_for_2nd_pass = 32;
1715 dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1718 dmem->qp_select_for_first_pass = 1;
1719 dmem->mb_header_compensation = 1;
1720 dmem->delta_qp_adaptation = 1;
1721 dmem->max_crf_quality_factor = 52;
1723 dmem->crf_quality_factor = 0; /* TODO: add support for CRF */
1724 dmem->scenario_info = 0;
1726 memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1727 memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1729 i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1733 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1734 struct encode_state *encode_state,
1735 struct intel_encoder_context *encoder_context)
1737 struct intel_batchbuffer *batch = encoder_context->base.batch;
1738 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1739 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1740 struct huc_imem_state_parameter imem_state_params;
1741 struct huc_dmem_state_parameter dmem_state_params;
1742 struct huc_virtual_addr_parameter virtual_addr_params;
1743 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1744 struct huc_stream_object_parameter stream_object_params;
1745 struct huc_start_parameter start_params;
1746 struct vd_pipeline_flush_parameter pipeline_flush_params;
1747 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1749 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1751 memset(&imem_state_params, 0, sizeof(imem_state_params));
1752 imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1753 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1755 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1756 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1758 gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1759 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1760 dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1761 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1762 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1763 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1765 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1766 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1767 virtual_addr_params.regions[0].is_target = 1;
1768 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1770 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1771 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1772 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1773 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1775 memset(&stream_object_params, 0, sizeof(stream_object_params));
1776 stream_object_params.indirect_stream_in_data_length = 1;
1777 stream_object_params.indirect_stream_in_start_address = 0;
1778 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1780 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1782 memset(&start_params, 0, sizeof(start_params));
1783 start_params.last_stream_object = 1;
1784 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1786 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1787 pipeline_flush_params.hevc_pipeline_done = 1;
1788 pipeline_flush_params.hevc_pipeline_command_flush = 1;
1789 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1791 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1792 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1793 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1797 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1799 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1800 struct huc_brc_update_dmem *dmem;
1801 int i, num_p_in_gop = 0;
1803 dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1810 if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1811 vdenc_context->brc_init_previous_target_buf_full_in_bits =
1812 (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1813 vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1814 vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1817 if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1818 vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1820 dmem->target_size = vdenc_context->brc_target_size;
1822 dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1824 dmem->target_slice_size = 0; // TODO: add support for slice size control
1826 memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1827 memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1829 dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3; // I frame:2, P frame:0, B frame:1
1831 memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1832 memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1833 memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1835 dmem->current_pak_pass = vdenc_context->current_pass;
1836 dmem->max_num_passes = 2;
1838 dmem->scene_change_detect_enable = 1;
1839 dmem->scene_change_prev_intra_percent_threshold = 96;
1840 dmem->scene_change_cur_intra_perent_threshold = 192;
1842 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1843 num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1845 for (i = 0; i < 2; i++)
1846 dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1848 if (vdenc_context->is_low_delay)
1849 dmem->ip_average_coeff = 0;
1851 dmem->ip_average_coeff = 128;
1853 dmem->skip_frame_size = 0;
1854 dmem->num_of_frames_skipped = 0;
1856 dmem->roi_source = 0; // TODO: add support for dirty ROI
1857 dmem->hme_detection_enable = 0; // TODO: support HME kernel
1858 dmem->hme_cost_enable = 1;
1860 dmem->second_level_batchbuffer_size = 228;
1862 i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1866 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1867 struct encode_state *encode_state,
1868 struct intel_encoder_context *encoder_context,
1869 struct gen9_mfx_avc_img_state *pstate,
1872 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1873 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1874 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1876 memset(pstate, 0, sizeof(*pstate));
1878 pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1880 pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1882 pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1883 pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1885 pstate->dw3.image_structure = 0;
1886 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1887 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1888 pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1889 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1890 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1892 pstate->dw4.field_picture_flag = 0;
1893 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1894 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1895 pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1896 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1897 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1898 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1899 pstate->dw4.mb_mv_format_flag = 1;
1900 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1901 pstate->dw4.mv_unpacked_flag = 1;
1902 pstate->dw4.insert_test_flag = 0;
1903 pstate->dw4.load_slice_pointer_flag = 0;
1904 pstate->dw4.macroblock_stat_enable = 0; /* Always 0 in VDEnc mode */
1905 pstate->dw4.minimum_frame_size = 0;
1907 pstate->dw5.intra_mb_max_bit_flag = 1;
1908 pstate->dw5.inter_mb_max_bit_flag = 1;
1909 pstate->dw5.frame_size_over_flag = 1;
1910 pstate->dw5.frame_size_under_flag = 1;
1911 pstate->dw5.intra_mb_ipcm_flag = 1;
1912 pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */
1913 pstate->dw5.non_first_pass_flag = 0;
1914 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1915 pstate->dw5.aq_chroma_disable = 1;
1917 pstate->dw6.intra_mb_max_size = 2700;
1918 pstate->dw6.inter_mb_max_size = 4095;
1920 pstate->dw8.slice_delta_qp_max0 = 0;
1921 pstate->dw8.slice_delta_qp_max1 = 0;
1922 pstate->dw8.slice_delta_qp_max2 = 0;
1923 pstate->dw8.slice_delta_qp_max3 = 0;
1925 pstate->dw9.slice_delta_qp_min0 = 0;
1926 pstate->dw9.slice_delta_qp_min1 = 0;
1927 pstate->dw9.slice_delta_qp_min2 = 0;
1928 pstate->dw9.slice_delta_qp_min3 = 0;
1930 pstate->dw10.frame_bitrate_min = 0;
1931 pstate->dw10.frame_bitrate_min_unit = 1;
1932 pstate->dw10.frame_bitrate_min_unit_mode = 1;
1933 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1934 pstate->dw10.frame_bitrate_max_unit = 1;
1935 pstate->dw10.frame_bitrate_max_unit_mode = 1;
1937 pstate->dw11.frame_bitrate_min_delta = 0;
1938 pstate->dw11.frame_bitrate_max_delta = 0;
1940 pstate->dw12.vad_error_logic = 1;
1941 /* TODO: set paramters DW19/DW20 for slices */
1945 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1946 struct encode_state *encode_state,
1947 struct intel_encoder_context *encoder_context,
1948 struct gen9_vdenc_img_state *pstate,
1951 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1952 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1953 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1954 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1956 memset(pstate, 0, sizeof(*pstate));
1958 pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1960 if (vdenc_context->frame_type == VDENC_FRAME_I) {
1961 pstate->dw4.intra_sad_measure_adjustment = 2;
1962 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1964 pstate->dw5.cre_prefetch_enable = 1;
1966 pstate->dw9.mode0_cost = 10;
1967 pstate->dw9.mode1_cost = 0;
1968 pstate->dw9.mode2_cost = 3;
1969 pstate->dw9.mode3_cost = 30;
1971 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1972 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1973 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1975 pstate->dw22.small_mb_size_in_word = 0xff;
1976 pstate->dw22.large_mb_size_in_word = 0xff;
1978 pstate->dw27.max_hmv_r = 0x2000;
1979 pstate->dw27.max_vmv_r = 0x200;
1981 pstate->dw33.qp_range_check_upper_bound = 0x33;
1982 pstate->dw33.qp_range_check_lower_bound = 0x0a;
1983 pstate->dw33.qp_range_check_value = 0x0f;
1985 pstate->dw2.bidirectional_weight = 0x20;
1987 pstate->dw4.subpel_mode = 3;
1988 pstate->dw4.bme_disable_for_fbr_message = 1;
1989 pstate->dw4.inter_sad_measure_adjustment = 2;
1990 pstate->dw4.intra_sad_measure_adjustment = 2;
1991 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1993 pstate->dw5.cre_prefetch_enable = 1;
1995 pstate->dw8.non_skip_zero_mv_const_added = 1;
1996 pstate->dw8.non_skip_mb_mode_const_added = 1;
1997 pstate->dw8.ref_id_cost_mode_select = 1;
1999 pstate->dw9.mode0_cost = 7;
2000 pstate->dw9.mode1_cost = 26;
2001 pstate->dw9.mode2_cost = 30;
2002 pstate->dw9.mode3_cost = 57;
2004 pstate->dw10.mode4_cost = 8;
2005 pstate->dw10.mode5_cost = 2;
2006 pstate->dw10.mode6_cost = 4;
2007 pstate->dw10.mode7_cost = 6;
2009 pstate->dw11.mode8_cost = 5;
2010 pstate->dw11.mode9_cost = 0;
2011 pstate->dw11.ref_id_cost = 4;
2012 pstate->dw11.chroma_intra_mode_cost = 0;
2014 pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2015 pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2016 pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2017 pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2018 pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2019 pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2020 pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2021 pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2023 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2024 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2025 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2027 pstate->dw22.small_mb_size_in_word = 0xff;
2028 pstate->dw22.large_mb_size_in_word = 0xff;
2030 pstate->dw27.max_hmv_r = 0x2000;
2031 pstate->dw27.max_vmv_r = 0x200;
2033 pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2035 pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2036 pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2038 pstate->dw33.qp_range_check_upper_bound = 0x33;
2039 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2040 pstate->dw33.qp_range_check_value = 0x0f;
2042 pstate->dw34.midpoint_distortion = 0x640;
2045 /* ROI will be updated in HuC kernel for CBR/VBR */
2046 if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2047 pstate->dw34.roi_enable = 1;
2049 pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2051 if (vdenc_context->num_roi > 1)
2052 pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2054 if (vdenc_context->num_roi > 2)
2055 pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2058 pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2059 pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2061 pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2063 pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2065 pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2066 pstate->dw5.picture_type = vdenc_context->frame_type;
2067 pstate->dw5.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2069 if (vdenc_context->frame_type == VDENC_FRAME_P) {
2070 pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2073 pstate->dw5.mb_slice_threshold_value = 0;
2075 pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2077 if (pstate->dw1.transform_8x8_flag)
2078 pstate->dw8.luma_intra_partition_mask = 0;
2080 pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2082 pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta; /* TODO: check whether it is OK to use the first slice only */
2085 pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2086 pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2087 pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2088 pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2090 pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2091 pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2092 pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2093 pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2095 pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2096 pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2097 pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2098 pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2100 pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2101 pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2102 pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2103 pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2104 pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2105 pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2106 pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2107 pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2109 pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2110 pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2111 pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2112 pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2113 pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2114 pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2115 pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2116 pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2119 pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2121 pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2123 /* TODO: check rolling I */
2125 /* TODO: handle ROI */
2127 /* TODO: check stream in support */
2131 gen9_vdenc_init_img_states(VADriverContextP ctx,
2132 struct encode_state *encode_state,
2133 struct intel_encoder_context *encoder_context)
2135 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2136 struct gen9_mfx_avc_img_state *mfx_img_cmd;
2137 struct gen9_vdenc_img_state *vdenc_img_cmd;
2140 pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2145 mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2146 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2147 pbuffer += sizeof(*mfx_img_cmd);
2149 vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2150 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2151 pbuffer += sizeof(*vdenc_img_cmd);
2153 /* Add batch buffer end command */
2154 *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2156 i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2160 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2161 struct encode_state *encode_state,
2162 struct intel_encoder_context *encoder_context)
2164 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2165 struct huc_brc_update_constant_data *brc_buffer;
2167 brc_buffer = (struct huc_brc_update_constant_data *)
2168 i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2173 memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2175 if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2176 memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2177 memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2178 memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2179 memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2180 memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2181 memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2185 i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2189 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2190 struct encode_state *encode_state,
2191 struct intel_encoder_context *encoder_context)
2193 struct intel_batchbuffer *batch = encoder_context->base.batch;
2194 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2195 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2196 struct huc_imem_state_parameter imem_state_params;
2197 struct huc_dmem_state_parameter dmem_state_params;
2198 struct huc_virtual_addr_parameter virtual_addr_params;
2199 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2200 struct huc_stream_object_parameter stream_object_params;
2201 struct huc_start_parameter start_params;
2202 struct vd_pipeline_flush_parameter pipeline_flush_params;
2203 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2204 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2205 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2207 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2208 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2209 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2211 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2212 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2214 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2215 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2216 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2219 gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2221 memset(&imem_state_params, 0, sizeof(imem_state_params));
2222 imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2223 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2225 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2226 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2228 gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2229 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2230 dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2231 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2232 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2233 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2235 gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2236 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2237 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2238 virtual_addr_params.regions[0].is_target = 1;
2239 virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2240 virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2241 virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2242 virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2243 virtual_addr_params.regions[4].is_target = 1;
2244 virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2245 virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2246 virtual_addr_params.regions[6].is_target = 1;
2247 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2249 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2250 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2251 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2252 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2254 memset(&stream_object_params, 0, sizeof(stream_object_params));
2255 stream_object_params.indirect_stream_in_data_length = 1;
2256 stream_object_params.indirect_stream_in_start_address = 0;
2257 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2259 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2261 memset(&start_params, 0, sizeof(start_params));
2262 start_params.last_stream_object = 1;
2263 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2265 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2266 pipeline_flush_params.hevc_pipeline_done = 1;
2267 pipeline_flush_params.hevc_pipeline_command_flush = 1;
2268 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2270 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2271 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2272 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2274 /* Store HUC_STATUS */
2275 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2276 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2277 mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2278 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2280 /* Write HUC_STATUS mask (1 << 31) */
2281 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2282 mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2283 mi_store_data_imm_params.offset = 4;
2284 mi_store_data_imm_params.dw0 = (1 << 31);
2285 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2289 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2290 struct encode_state *encode_state,
2291 struct intel_encoder_context *encoder_context)
2293 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2294 struct intel_batchbuffer *batch = encoder_context->base.batch;
2296 BEGIN_BCS_BATCH(batch, 5);
2298 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2299 OUT_BCS_BATCH(batch,
2301 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
2302 (MFD_MODE_VLD << 15) |
2303 (1 << 13) | /* VDEnc mode */
2304 ((!!vdenc_context->post_deblocking_output_res.bo) << 9) | /* Post Deblocking Output */
2305 ((!!vdenc_context->pre_deblocking_output_res.bo) << 8) | /* Pre Deblocking Output */
2306 (1 << 7) | /* Scaled surface enable */
2307 (1 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
2308 (1 << 4) | /* encoding mode */
2309 (MFX_FORMAT_AVC << 0));
2310 OUT_BCS_BATCH(batch, 0);
2311 OUT_BCS_BATCH(batch, 0);
2312 OUT_BCS_BATCH(batch, 0);
2314 ADVANCE_BCS_BATCH(batch);
2318 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2319 struct intel_encoder_context *encoder_context,
2320 struct i965_gpe_resource *gpe_resource,
2323 struct intel_batchbuffer *batch = encoder_context->base.batch;
2325 BEGIN_BCS_BATCH(batch, 6);
2327 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2328 OUT_BCS_BATCH(batch, id);
2329 OUT_BCS_BATCH(batch,
2330 ((gpe_resource->height - 1) << 18) |
2331 ((gpe_resource->width - 1) << 4));
2332 OUT_BCS_BATCH(batch,
2333 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2334 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2335 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2336 (0 << 2) | /* must be 0 for interleave U/V */
2337 (1 << 1) | /* must be tiled */
2338 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2339 OUT_BCS_BATCH(batch,
2340 (0 << 16) | /* must be 0 for interleave U/V */
2341 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2342 OUT_BCS_BATCH(batch,
2343 (0 << 16) | /* must be 0 for interleave U/V */
2344 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2346 ADVANCE_BCS_BATCH(batch);
2350 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2352 struct i965_driver_data *i965 = i965_driver_data(ctx);
2353 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2354 struct intel_batchbuffer *batch = encoder_context->base.batch;
2357 BEGIN_BCS_BATCH(batch, 65);
2359 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2361 /* the DW1-3 is for pre_deblocking */
2362 OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2364 /* the DW4-6 is for the post_deblocking */
2365 OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2367 /* the DW7-9 is for the uncompressed_picture */
2368 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2370 /* the DW10-12 is for PAK information (write) */
2371 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2373 /* the DW13-15 is for the intra_row_store_scratch */
2374 OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2376 /* the DW16-18 is for the deblocking filter */
2377 OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2379 /* the DW 19-50 is for Reference pictures*/
2380 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2381 OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2384 /* DW 51, reference picture attributes */
2385 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2387 /* The DW 52-54 is for PAK information (read) */
2388 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2390 /* the DW 55-57 is the ILDB buffer */
2391 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2393 /* the DW 58-60 is the second ILDB buffer */
2394 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2396 /* DW 61, memory compress enable & mode */
2397 OUT_BCS_BATCH(batch, 0);
2399 /* the DW 62-64 is the 4x Down Scaling surface */
2400 OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2402 ADVANCE_BCS_BATCH(batch);
2406 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2408 struct i965_driver_data *i965 = i965_driver_data(ctx);
2409 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2410 struct intel_batchbuffer *batch = encoder_context->base.batch;
2412 BEGIN_BCS_BATCH(batch, 26);
2414 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2415 /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2416 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2417 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2419 /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2420 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2421 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2423 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2424 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2425 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2427 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2428 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2429 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2431 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2432 * Note: an offset is specified in MFX_AVC_SLICE_STATE
2434 OUT_BUFFER_3DW(batch,
2435 vdenc_context->compressed_bitstream.res.bo,
2439 OUT_BUFFER_2DW(batch,
2440 vdenc_context->compressed_bitstream.res.bo,
2442 vdenc_context->compressed_bitstream.end_offset);
2444 ADVANCE_BCS_BATCH(batch);
2448 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2450 struct i965_driver_data *i965 = i965_driver_data(ctx);
2451 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2452 struct intel_batchbuffer *batch = encoder_context->base.batch;
2454 BEGIN_BCS_BATCH(batch, 10);
2456 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2458 /* The DW1-3 is for bsd/mpc row store scratch buffer */
2459 OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2461 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2462 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2464 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2465 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2467 ADVANCE_BCS_BATCH(batch);
2471 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2475 struct intel_encoder_context *encoder_context)
2477 struct intel_batchbuffer *batch = encoder_context->base.batch;
2478 unsigned int qm_buffer[16];
2480 assert(qm_length <= 16);
2481 assert(sizeof(*qm) == 4);
2482 memcpy(qm_buffer, qm, qm_length * 4);
2484 BEGIN_BCS_BATCH(batch, 18);
2485 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2486 OUT_BCS_BATCH(batch, qm_type << 0);
2487 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2488 ADVANCE_BCS_BATCH(batch);
2492 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2494 /* TODO: add support for non flat matrix */
2495 unsigned int qm[16] = {
2496 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2497 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2498 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2499 0x10101010, 0x10101010, 0x10101010, 0x10101010
2502 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2503 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2504 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2505 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2509 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2513 struct intel_encoder_context *encoder_context)
2515 struct intel_batchbuffer *batch = encoder_context->base.batch;
2516 unsigned int fqm_buffer[32];
2518 assert(fqm_length <= 32);
2519 assert(sizeof(*fqm) == 4);
2520 memcpy(fqm_buffer, fqm, fqm_length * 4);
2522 BEGIN_BCS_BATCH(batch, 34);
2523 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2524 OUT_BCS_BATCH(batch, fqm_type << 0);
2525 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2526 ADVANCE_BCS_BATCH(batch);
2530 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2532 /* TODO: add support for non flat matrix */
2533 unsigned int qm[32] = {
2534 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2535 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2536 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2537 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2538 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2539 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2540 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2541 0x10001000, 0x10001000, 0x10001000, 0x10001000
2544 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2545 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2546 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2547 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2551 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct intel_encoder_context *encoder_context)
2555 struct intel_batchbuffer *batch = encoder_context->base.batch;
2556 struct gen9_mfx_avc_img_state mfx_img_cmd;
2558 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2560 BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2561 intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2562 ADVANCE_BCS_BATCH(batch);
2566 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2567 struct encode_state *encode_state,
2568 struct intel_encoder_context *encoder_context)
2570 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2571 struct intel_batchbuffer *batch = encoder_context->base.batch;
2573 BEGIN_BCS_BATCH(batch, 2);
2575 OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2576 OUT_BCS_BATCH(batch,
2577 (vdenc_context->vdenc_streamin_enable << 9) |
2578 (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2579 (1 << 7) | /* Tlb prefetch enable */
2580 (1 << 5) | /* Frame Statistics Stream-Out Enable */
2581 (VDENC_CODEC_AVC << 0));
2583 ADVANCE_BCS_BATCH(batch);
2587 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2588 struct intel_encoder_context *encoder_context,
2589 struct i965_gpe_resource *gpe_resource,
2590 int vdenc_surface_cmd)
2592 struct intel_batchbuffer *batch = encoder_context->base.batch;
2594 BEGIN_BCS_BATCH(batch, 6);
2596 OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2597 OUT_BCS_BATCH(batch, 0);
2598 OUT_BCS_BATCH(batch,
2599 ((gpe_resource->height - 1) << 18) |
2600 ((gpe_resource->width - 1) << 4));
2601 OUT_BCS_BATCH(batch,
2602 (VDENC_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface only on SKL */
2603 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2604 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2605 (0 << 2) | /* must be 0 for interleave U/V */
2606 (1 << 1) | /* must be tiled */
2607 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2608 OUT_BCS_BATCH(batch,
2609 (0 << 16) | /* must be 0 for interleave U/V */
2610 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2611 OUT_BCS_BATCH(batch,
2612 (0 << 16) | /* must be 0 for interleave U/V */
2613 (gpe_resource->y_cb_offset)); /* y offset for v(cr) */
2615 ADVANCE_BCS_BATCH(batch);
2619 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2620 struct intel_encoder_context *encoder_context,
2621 struct i965_gpe_resource *gpe_resource)
2623 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2627 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2628 struct intel_encoder_context *encoder_context,
2629 struct i965_gpe_resource *gpe_resource)
2631 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2635 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2636 struct intel_encoder_context *encoder_context,
2637 struct i965_gpe_resource *gpe_resource)
2639 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2643 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2644 struct encode_state *encode_state,
2645 struct intel_encoder_context *encoder_context)
2647 struct i965_driver_data *i965 = i965_driver_data(ctx);
2648 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2649 struct intel_batchbuffer *batch = encoder_context->base.batch;
2651 BEGIN_BCS_BATCH(batch, 37);
2653 OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2655 /* DW1-6 for DS FWD REF0/REF1 */
2656 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2657 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2659 /* DW7-9 for DS BWD REF0, ignored on SKL */
2660 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2662 /* DW10-12 for uncompressed input data */
2663 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2665 /* DW13-DW15 for streamin data */
2666 if (vdenc_context->vdenc_streamin_enable)
2667 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2669 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2671 /* DW16-DW18 for row scratch buffer */
2672 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2674 /* DW19-DW21, ignored on SKL */
2675 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2677 /* DW22-DW27 for FWD REF0/REF1 */
2678 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2679 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2681 /* DW28-DW30 for FWD REF2, ignored on SKL */
2682 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2684 /* DW31-DW33 for BDW REF0, ignored on SKL */
2685 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2687 /* DW34-DW36 for VDEnc statistics streamout */
2688 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2690 ADVANCE_BCS_BATCH(batch);
2694 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2695 struct encode_state *encode_state,
2696 struct intel_encoder_context *encoder_context)
2698 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2699 struct intel_batchbuffer *batch = encoder_context->base.batch;
2701 BEGIN_BCS_BATCH(batch, 61);
2703 OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2705 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2707 intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2710 intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2713 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2716 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2719 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2722 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2725 uint16_t tmp_vdenc_skip_threshold_p[28];
2727 memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2729 for (i = 0; i < 28; i++) {
2730 tmp_vdenc_skip_threshold_p[i] *= 3;
2734 intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2737 intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2740 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2743 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2746 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2749 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2752 ADVANCE_BCS_BATCH(batch);
2756 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2757 struct encode_state *encode_state,
2758 struct intel_encoder_context *encoder_context)
2760 struct intel_batchbuffer *batch = encoder_context->base.batch;
2762 BEGIN_BCS_BATCH(batch, 2);
2764 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2765 OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2767 ADVANCE_BCS_BATCH(batch);
2771 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2772 struct encode_state *encode_state,
2773 struct intel_encoder_context *encoder_context,
2774 VAEncSliceParameterBufferH264 *slice_param)
2776 struct intel_batchbuffer *batch = encoder_context->base.batch;
2777 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2779 BEGIN_BCS_BATCH(batch, 3);
2781 OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2783 if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2784 OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2785 slice_param->luma_weight_l0[1] << 16 |
2786 slice_param->luma_offset_l0[0] << 8 |
2787 slice_param->luma_weight_l0[0] << 0));
2788 OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2789 slice_param->luma_weight_l0[2] << 0));
2791 OUT_BCS_BATCH(batch, (0 << 24 |
2795 OUT_BCS_BATCH(batch, (0 << 8 |
2800 ADVANCE_BCS_BATCH(batch);
2804 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2805 struct encode_state *encode_state,
2806 struct intel_encoder_context *encoder_context,
2807 VAEncSliceParameterBufferH264 *slice_param,
2808 VAEncSliceParameterBufferH264 *next_slice_param)
2810 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2811 struct intel_batchbuffer *batch = encoder_context->base.batch;
2812 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2813 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2814 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2815 int luma_log2_weight_denom, weighted_pred_idc;
2817 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2818 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2820 if (next_slice_param) {
2821 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2822 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2824 next_slice_hor_pos = 0;
2825 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2828 if (slice_type == SLICE_TYPE_P)
2829 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2831 weighted_pred_idc = 0;
2833 if (weighted_pred_idc == 1)
2834 luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2836 luma_log2_weight_denom = 0;
2838 BEGIN_BCS_BATCH(batch, 4);
2840 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2841 OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2843 OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2844 next_slice_ver_pos));
2845 OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2847 ADVANCE_BCS_BATCH(batch);
2851 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2852 struct encode_state *encode_state,
2853 struct intel_encoder_context *encoder_context)
2855 struct intel_batchbuffer *batch = encoder_context->base.batch;
2856 struct gen9_vdenc_img_state vdenc_img_cmd;
2858 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2860 BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2861 intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2862 ADVANCE_BCS_BATCH(batch);
2866 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2867 struct intel_encoder_context *encoder_context,
2868 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2869 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2870 int slice_header_indicator)
2872 struct intel_batchbuffer *batch = encoder_context->base.batch;
2874 if (data_bits_in_last_dw == 0)
2875 data_bits_in_last_dw = 32;
2877 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2879 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2880 OUT_BCS_BATCH(batch,
2881 (0 << 16) | /* always start at offset 0 */
2882 (slice_header_indicator << 14) |
2883 (data_bits_in_last_dw << 8) |
2884 (skip_emul_byte_count << 4) |
2885 (!!emulation_flag << 3) |
2886 ((!!is_last_header) << 2) |
2887 ((!!is_end_of_slice) << 1) |
2888 (0 << 0)); /* TODO: check this flag */
2889 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2891 ADVANCE_BCS_BATCH(batch);
2895 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2896 struct encode_state *encode_state,
2897 struct intel_encoder_context *encoder_context,
2900 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2901 struct i965_driver_data *i965 = i965_driver_data(ctx);
2902 VAEncPackedHeaderParameterBuffer *param = NULL;
2903 unsigned int length_in_bits;
2904 unsigned int *header_data = NULL;
2905 int count, i, start_index;
2906 int slice_header_index;
2907 unsigned int insert_one_zero_byte = 0;
2909 if (encode_state->slice_header_index[slice_index] == 0)
2910 slice_header_index = -1;
2912 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2914 count = encode_state->slice_rawdata_count[slice_index];
2915 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2917 for (i = 0; i < count; i++) {
2918 unsigned int skip_emul_byte_cnt;
2920 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2922 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2924 /* skip the slice header packed data type as it is lastly inserted */
2925 if (param->type == VAEncPackedHeaderSlice)
2928 length_in_bits = param->bit_length;
2930 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2932 /* as the slice header is still required, the last header flag is set to
2935 gen9_vdenc_mfx_avc_insert_object(ctx,
2938 ALIGN(length_in_bits, 32) >> 5,
2939 length_in_bits & 0x1f,
2943 !param->has_emulation_bytes,
2948 if (!vdenc_context->is_frame_level_vdenc) {
2949 insert_one_zero_byte = 1;
2952 /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
2953 if (insert_one_zero_byte) {
2954 unsigned int insert_data[] = { 0, };
2956 gen9_vdenc_mfx_avc_insert_object(ctx,
2965 if (slice_header_index == -1) {
2966 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2967 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2968 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2969 unsigned char *slice_header = NULL, *slice_header1 = NULL;
2970 int slice_header_length_in_bits = 0;
2971 uint32_t saved_macroblock_address = 0;
2973 /* No slice header data is passed. And the driver needs to generate it */
2974 /* For the Normal H264 */
2977 IS_KBL(i965->intel.device_info)) {
2978 saved_macroblock_address = slice_params->macroblock_address;
2979 slice_params->macroblock_address = 0;
2982 slice_header_length_in_bits = build_avc_slice_header(seq_param,
2987 slice_header1 = slice_header;
2990 IS_KBL(i965->intel.device_info)) {
2991 slice_params->macroblock_address = saved_macroblock_address;
2994 if (insert_one_zero_byte) {
2996 slice_header_length_in_bits -= 8;
2999 gen9_vdenc_mfx_avc_insert_object(ctx,
3001 (unsigned int *)slice_header1,
3002 ALIGN(slice_header_length_in_bits, 32) >> 5,
3003 slice_header_length_in_bits & 0x1f,
3004 5, /* first 5 bytes are start code + nal unit type */
3010 unsigned int skip_emul_byte_cnt;
3011 unsigned char *slice_header1 = NULL;
3014 IS_KBL(i965->intel.device_info)) {
3015 slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3018 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3020 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3021 length_in_bits = param->bit_length;
3023 slice_header1 = (unsigned char *)header_data;
3025 if (insert_one_zero_byte) {
3027 length_in_bits -= 8;
3030 /* as the slice header is the last header data for one slice,
3031 * the last header flag is set to one.
3033 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3035 if (insert_one_zero_byte)
3036 skip_emul_byte_cnt -= 1;
3038 gen9_vdenc_mfx_avc_insert_object(ctx,
3040 (unsigned int *)slice_header1,
3041 ALIGN(length_in_bits, 32) >> 5,
3042 length_in_bits & 0x1f,
3046 !param->has_emulation_bytes,
3054 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3055 struct encode_state *encode_state,
3056 struct intel_encoder_context *encoder_context,
3057 VAEncSliceParameterBufferH264 *slice_param,
3060 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3061 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3062 unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3063 unsigned int skip_emul_byte_cnt;
3065 if (slice_index == 0) {
3067 if (encode_state->packed_header_data[idx]) {
3068 VAEncPackedHeaderParameterBuffer *param = NULL;
3069 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3070 unsigned int length_in_bits;
3072 assert(encode_state->packed_header_param[idx]);
3073 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3074 length_in_bits = param->bit_length;
3076 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3077 gen9_vdenc_mfx_avc_insert_object(ctx,
3080 ALIGN(length_in_bits, 32) >> 5,
3081 length_in_bits & 0x1f,
3085 !param->has_emulation_bytes,
3090 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3092 if (encode_state->packed_header_data[idx]) {
3093 VAEncPackedHeaderParameterBuffer *param = NULL;
3094 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3095 unsigned int length_in_bits;
3097 assert(encode_state->packed_header_param[idx]);
3098 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3099 length_in_bits = param->bit_length;
3101 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3103 gen9_vdenc_mfx_avc_insert_object(ctx,
3106 ALIGN(length_in_bits, 32) >> 5,
3107 length_in_bits & 0x1f,
3111 !param->has_emulation_bytes,
3116 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3118 if (encode_state->packed_header_data[idx]) {
3119 VAEncPackedHeaderParameterBuffer *param = NULL;
3120 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3121 unsigned int length_in_bits;
3123 assert(encode_state->packed_header_param[idx]);
3124 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3125 length_in_bits = param->bit_length;
3127 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3128 gen9_vdenc_mfx_avc_insert_object(ctx,
3131 ALIGN(length_in_bits, 32) >> 5,
3132 length_in_bits & 0x1f,
3136 !param->has_emulation_bytes,
3139 } else if (internal_rate_mode == I965_BRC_CBR) {
3140 /* TODO: insert others */
3144 gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3151 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3152 struct encode_state *encode_state,
3153 struct intel_encoder_context *encoder_context,
3154 VAEncPictureParameterBufferH264 *pic_param,
3155 VAEncSliceParameterBufferH264 *slice_param,
3156 VAEncSliceParameterBufferH264 *next_slice_param,
3159 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3160 struct intel_batchbuffer *batch = encoder_context->base.batch;
3161 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3162 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3163 unsigned char correct[6], grow, shrink;
3164 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3165 int max_qp_n, max_qp_p;
3167 int weighted_pred_idc = 0;
3168 int num_ref_l0 = 0, num_ref_l1 = 0;
3169 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3170 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3171 int inter_rounding = 0;
3173 if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3176 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3177 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3179 if (next_slice_param) {
3180 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3181 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3183 next_slice_hor_pos = 0;
3184 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3187 if (slice_type == SLICE_TYPE_I) {
3188 luma_log2_weight_denom = 0;
3189 chroma_log2_weight_denom = 0;
3190 } else if (slice_type == SLICE_TYPE_P) {
3191 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3192 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3194 if (slice_param->num_ref_idx_active_override_flag)
3195 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3196 } else if (slice_type == SLICE_TYPE_B) {
3197 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3198 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3199 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3201 if (slice_param->num_ref_idx_active_override_flag) {
3202 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3203 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3206 if (weighted_pred_idc == 2) {
3207 /* 8.4.3 - Derivation process for prediction weights (8-279) */
3208 luma_log2_weight_denom = 5;
3209 chroma_log2_weight_denom = 5;
3213 max_qp_n = 0; /* TODO: update it */
3214 max_qp_p = 0; /* TODO: update it */
3215 grow = 0; /* TODO: update it */
3216 shrink = 0; /* TODO: update it */
3218 for (i = 0; i < 6; i++)
3219 correct[i] = 0; /* TODO: update it */
3221 BEGIN_BCS_BATCH(batch, 11);
3223 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3224 OUT_BCS_BATCH(batch, slice_type);
3225 OUT_BCS_BATCH(batch,
3226 (num_ref_l0 << 16) |
3227 (num_ref_l1 << 24) |
3228 (chroma_log2_weight_denom << 8) |
3229 (luma_log2_weight_denom << 0));
3230 OUT_BCS_BATCH(batch,
3231 (weighted_pred_idc << 30) |
3232 (slice_param->direct_spatial_mv_pred_flag << 29) |
3233 (slice_param->disable_deblocking_filter_idc << 27) |
3234 (slice_param->cabac_init_idc << 24) |
3236 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3237 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3239 OUT_BCS_BATCH(batch,
3240 slice_ver_pos << 24 |
3241 slice_hor_pos << 16 |
3242 slice_param->macroblock_address);
3243 OUT_BCS_BATCH(batch,
3244 next_slice_ver_pos << 16 |
3245 next_slice_hor_pos);
3247 OUT_BCS_BATCH(batch,
3248 (0 << 31) | /* TODO: ignore it for VDENC ??? */
3249 (!slice_param->macroblock_address << 30) | /* ResetRateControlCounter */
3250 (2 << 28) | /* Loose Rate Control */
3251 (0 << 24) | /* RC Stable Tolerance */
3252 (0 << 23) | /* RC Panic Enable */
3253 (1 << 22) | /* CBP mode */
3254 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3255 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3256 (!next_slice_param << 19) | /* Is Last Slice */
3257 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3258 (1 << 17) | /* HeaderPresentFlag */
3259 (1 << 16) | /* SliceData PresentFlag */
3260 (0 << 15) | /* TailPresentFlag, TODO: check it on VDEnc */
3261 (1 << 13) | /* RBSP NAL TYPE */
3262 (slice_index << 4) |
3263 (1 << 12)); /* CabacZeroWordInsertionEnable */
3265 OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3267 OUT_BCS_BATCH(batch,
3268 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
3269 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
3272 OUT_BCS_BATCH(batch,
3274 (inter_rounding << 28) |
3277 (correct[5] << 20) |
3278 (correct[4] << 16) |
3279 (correct[3] << 12) |
3283 OUT_BCS_BATCH(batch, 0);
3285 ADVANCE_BCS_BATCH(batch);
3289 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3291 unsigned int is_long_term =
3292 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3293 unsigned int is_top_field =
3294 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3295 unsigned int is_bottom_field =
3296 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3298 return ((is_long_term << 6) |
3299 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3300 (frame_store_id << 1) |
3301 ((is_top_field ^ 1) & is_bottom_field));
3305 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3306 struct encode_state *encode_state,
3307 struct intel_encoder_context *encoder_context,
3308 VAEncSliceParameterBufferH264 *slice_param)
3310 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3311 struct intel_batchbuffer *batch = encoder_context->base.batch;
3312 VAPictureH264 *ref_pic;
3313 int i, slice_type, ref_idx_shift;
3314 unsigned int fwd_ref_entry;
3316 fwd_ref_entry = 0x80808080;
3317 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3319 for (i = 0; i < MAX(vdenc_context->num_refs[0], 3); i++) {
3320 ref_pic = &slice_param->RefPicList0[i];
3321 ref_idx_shift = i * 8;
3323 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3324 fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3327 if (slice_type == SLICE_TYPE_P) {
3328 BEGIN_BCS_BATCH(batch, 10);
3329 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3330 OUT_BCS_BATCH(batch, 0); // L0
3331 OUT_BCS_BATCH(batch, fwd_ref_entry);
3333 for (i = 0; i < 7; i++) {
3334 OUT_BCS_BATCH(batch, 0x80808080);
3337 ADVANCE_BCS_BATCH(batch);
3340 if (slice_type == SLICE_TYPE_B) {
3341 /* VDEnc on SKL doesn't support BDW */
3347 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3348 struct encode_state *encode_state,
3349 struct intel_encoder_context *encoder_context,
3350 VAEncPictureParameterBufferH264 *pic_param,
3351 VAEncSliceParameterBufferH264 *slice_param)
3353 struct intel_batchbuffer *batch = encoder_context->base.batch;
3355 short weightoffsets[32 * 6];
3357 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3359 if (slice_type == SLICE_TYPE_P &&
3360 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3362 for (i = 0; i < 32; i++) {
3363 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3364 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3365 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3366 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3367 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3368 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3371 BEGIN_BCS_BATCH(batch, 98);
3372 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3373 OUT_BCS_BATCH(batch, 0);
3374 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3376 ADVANCE_BCS_BATCH(batch);
3379 if (slice_type == SLICE_TYPE_B) {
3380 /* VDEnc on SKL doesn't support BWD */
3386 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3387 struct encode_state *encode_state,
3388 struct intel_encoder_context *encoder_context,
3389 VAEncSliceParameterBufferH264 *slice_param,
3390 VAEncSliceParameterBufferH264 *next_slice_param,
3393 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3394 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3396 gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3397 gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3402 gen9_vdenc_mfx_avc_slice_state(ctx,
3409 gen9_vdenc_mfx_avc_inset_headers(ctx,
3415 if (!vdenc_context->is_frame_level_vdenc) {
3416 gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3420 gen95_vdenc_vdenc_walker_state(ctx,
3429 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3430 struct encode_state *encode_state,
3431 struct intel_encoder_context *encoder_context)
3433 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3434 struct intel_batchbuffer *batch = encoder_context->base.batch;
3435 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3436 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3438 int slice_index = 0;
3439 int has_tail = 0; /* TODO: check it later */
3441 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3442 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3444 if (j == encode_state->num_slice_params_ext - 1)
3445 next_slice_group_param = NULL;
3447 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3449 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3450 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3451 next_slice_param = slice_param + 1;
3453 next_slice_param = next_slice_group_param;
3455 gen9_vdenc_mfx_avc_single_slice(ctx,
3462 if (vdenc_context->is_frame_level_vdenc)
3465 struct vd_pipeline_flush_parameter pipeline_flush_params;
3466 int insert_mi_flush;
3468 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3470 if (next_slice_group_param) {
3471 pipeline_flush_params.mfx_pipeline_done = 1;
3472 insert_mi_flush = 1;
3473 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3474 pipeline_flush_params.mfx_pipeline_done = 1;
3475 insert_mi_flush = 1;
3477 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3478 insert_mi_flush = 0;
3481 pipeline_flush_params.vdenc_pipeline_done = 1;
3482 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3483 pipeline_flush_params.vd_command_message_parser_done = 1;
3484 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3486 if (insert_mi_flush) {
3487 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3488 mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3489 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3497 if (vdenc_context->is_frame_level_vdenc)
3501 if (vdenc_context->is_frame_level_vdenc) {
3502 struct vd_pipeline_flush_parameter pipeline_flush_params;
3504 gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3506 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3507 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3508 pipeline_flush_params.vdenc_pipeline_done = 1;
3509 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3510 pipeline_flush_params.vd_command_message_parser_done = 1;
3511 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3515 /* TODO: insert a tail if required */
3518 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3519 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3520 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3524 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3525 struct encode_state *encode_state,
3526 struct intel_encoder_context *encoder_context)
3528 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3529 struct intel_batchbuffer *batch = encoder_context->base.batch;
3530 struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3532 if (vdenc_context->brc_enabled) {
3533 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3535 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3536 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3537 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3540 if (vdenc_context->current_pass) {
3541 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3543 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3544 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3545 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3548 gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3550 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3551 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3552 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3554 gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3555 gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3556 gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3558 gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3559 gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3560 gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3561 gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3562 gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3563 gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3565 if (!vdenc_context->brc_enabled) {
3566 gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3567 gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3569 memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3570 mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3571 mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3572 gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3575 gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3576 gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3578 gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3582 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3583 struct intel_encoder_context *encoder_context)
3585 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3586 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3588 switch (rate_control_mode & 0x7f) {
3590 vdenc_context->internal_rate_mode = I965_BRC_CBR;
3594 vdenc_context->internal_rate_mode = I965_BRC_VBR;
3599 vdenc_context->internal_rate_mode = I965_BRC_CQP;
3605 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3607 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3608 struct intel_batchbuffer *batch = encoder_context->base.batch;
3609 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3610 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3611 unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3614 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3615 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3617 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3618 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3619 mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3620 mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3621 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3623 /* Update DMEM buffer for BRC Update */
3624 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3625 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3626 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3627 mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3628 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3630 mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3631 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3632 mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3633 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3638 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3639 struct encode_state *encode_state,
3640 struct intel_encoder_context *encoder_context)
3642 VAEncSliceParameterBufferH264 *slice_param;
3645 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3646 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3648 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3649 if (slice_param->slice_type == SLICE_TYPE_B)
3650 return VA_STATUS_ERROR_UNKNOWN;
3656 return VA_STATUS_SUCCESS;
3660 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3662 struct encode_state *encode_state,
3663 struct intel_encoder_context *encoder_context)
3666 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3667 struct intel_batchbuffer *batch = encoder_context->base.batch;
3669 va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3671 if (va_status != VA_STATUS_SUCCESS)
3674 gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3676 for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3677 vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3678 vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3680 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3682 intel_batchbuffer_emit_mi_flush(batch);
3684 if (vdenc_context->brc_enabled) {
3685 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3686 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3688 gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3689 intel_batchbuffer_emit_mi_flush(batch);
3692 gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3693 gen9_vdenc_read_status(ctx, encoder_context);
3695 intel_batchbuffer_end_atomic(batch);
3696 intel_batchbuffer_flush(batch);
3698 vdenc_context->brc_initted = 1;
3699 vdenc_context->brc_need_reset = 0;
3702 return VA_STATUS_SUCCESS;
3706 gen9_vdenc_pipeline(VADriverContextP ctx,
3708 struct encode_state *encode_state,
3709 struct intel_encoder_context *encoder_context)
3714 case VAProfileH264ConstrainedBaseline:
3715 case VAProfileH264Main:
3716 case VAProfileH264High:
3717 vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3721 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3729 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3733 i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3734 i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3735 i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3736 i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3737 i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3739 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3740 i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3742 i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3743 i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3744 i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3745 i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3746 i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3747 i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3749 i965_free_gpe_resource(&vdenc_context->huc_status_res);
3750 i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3752 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3753 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3754 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3755 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3757 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3758 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3759 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3762 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3763 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3764 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3766 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3767 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3768 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3769 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3771 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3775 gen9_vdenc_context_destroy(void *context)
3777 struct gen9_vdenc_context *vdenc_context = context;
3779 gen9_vdenc_free_resources(vdenc_context);
3781 free(vdenc_context);
3785 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3786 struct intel_encoder_context *encoder_context,
3787 struct gen9_vdenc_context *vdenc_context)
3789 struct i965_driver_data *i965 = i965_driver_data(ctx);
3792 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3793 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3794 "HuC Init&Reset DMEM buffer");
3796 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3797 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3798 "HuC History buffer");
3800 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3801 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3802 "HuC Stream In buffer");
3804 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3805 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3806 "HuC Stream Out buffer");
3808 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3810 "HuC dummy buffer");
3812 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3813 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3814 ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3815 "HuC BRC Update buffer");
3816 i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3819 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3820 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3821 "VDENC statistics buffer");
3823 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3824 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3825 "PAK statistics buffer");
3827 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3828 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3829 "VDENC/AVC image state buffer");
3831 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3832 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3833 "HME summary buffer");
3835 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3836 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3837 "BRC constant buffer");
3839 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3840 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3841 "Second level batch buffer");
3843 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3845 "HuC Status buffer");
3847 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3849 "HuC Status buffer");
3853 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3854 struct intel_encoder_context *encoder_context,
3855 struct gen9_vdenc_context *vdenc_context)
3857 vdenc_context->is_frame_level_vdenc = 1;
3861 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
3862 struct intel_encoder_context *encoder_context,
3863 struct gen9_vdenc_context *vdenc_context)
3865 vdenc_context->use_extended_pak_obj_cmd = 1;
3869 vdenc_hw_interfaces_init(VADriverContextP ctx,
3870 struct intel_encoder_context *encoder_context,
3871 struct gen9_vdenc_context *vdenc_context)
3873 struct i965_driver_data *i965 = i965_driver_data(ctx);
3875 if (IS_KBL(i965->intel.device_info)) {
3876 gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3878 gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3883 gen9_vdenc_context_get_status(VADriverContextP ctx,
3884 struct intel_encoder_context *encoder_context,
3885 struct i965_coded_buffer_segment *coded_buffer_segment)
3887 struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3889 coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3891 return VA_STATUS_SUCCESS;
3895 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3897 struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3902 vdenc_context->brc_initted = 0;
3903 vdenc_context->brc_need_reset = 0;
3904 vdenc_context->is_low_delay = 0;
3905 vdenc_context->current_pass = 0;
3906 vdenc_context->num_passes = 1;
3907 vdenc_context->vdenc_streamin_enable = 0;
3908 vdenc_context->vdenc_pak_threshold_check_enable = 0;
3909 vdenc_context->is_frame_level_vdenc = 0;
3911 vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
3912 gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3914 encoder_context->mfc_context = vdenc_context;
3915 encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3916 encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3917 encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3918 encoder_context->get_status = gen9_vdenc_context_get_status;