2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
45 intel_avc_enc_slice_type_fixup(int slice_type);
47 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
48 +0, 0, 0, 0, 0, 3, 4, 6, 8,
49 +0, 0, 0, 0, 0, 2, 3, 5, 7,
50 -1, 0, 0, 0, 0, 2, 2, 4, 5,
51 -1, -1, 0, 0, 0, 1, 2, 2, 4,
52 -2, -2, -1, 0, 0, 0, 1, 2, 4,
53 -2, -2, -1, 0, 0, 0, 1, 2, 4,
54 -3, -2, -1, -1, 0, 0, 1, 2, 5,
55 -3, -2, -1, -1, 0, 0, 2, 4, 7,
56 -4, -3, -2, -1, 0, 1, 3, 5, 8,
59 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
60 -1, 0, 0, 0, 0, 1, 1, 2, 3,
61 -1, -1, 0, 0, 0, 1, 1, 2, 3,
62 -2, -1, -1, 0, 0, 1, 1, 2, 3,
63 -3, -2, -2, -1, 0, 0, 1, 2, 3,
64 -3, -2, -1, -1, 0, 0, 1, 2, 3,
65 -3, -2, -1, -1, 0, 0, 1, 2, 3,
66 -3, -2, -1, -1, 0, 0, 1, 2, 3,
67 -3, -2, -1, -1, 0, 0, 1, 2, 3,
68 -3, -2, -1, -1, 0, 0, 1, 2, 3,
71 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
72 +0, 0, 0, 0, 0, 2, 3, 3, 4,
73 +0, 0, 0, 0, 0, 2, 3, 3, 4,
74 -1, 0, 0, 0, 0, 2, 2, 3, 3,
75 -1, -1, 0, 0, 0, 1, 2, 2, 2,
76 -1, -1, -1, 0, 0, 0, 1, 2, 2,
77 -2, -1, -1, 0, 0, 0, 0, 1, 2,
78 -2, -1, -1, -1, 0, 0, 0, 1, 3,
79 -2, -2, -1, -1, 0, 0, 1, 1, 3,
80 -2, -2, -1, -1, 0, 1, 1, 2, 4,
83 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
84 -4, -20, -28, -36, -40, -44, -48, -80,
85 +0, -8, -12, -20, -24, -28, -32, -36,
86 +0, 0, -8, -16, -20, -24, -28, -32,
87 +8, 4, 0, 0, -8, -16, -24, -28,
88 32, 24, 16, 2, -4, -8, -16, -20,
89 36, 32, 28, 16, 8, 0, -4, -8,
90 40, 36, 24, 20, 16, 8, 0, -8,
91 48, 40, 28, 24, 20, 12, 0, -4,
92 64, 48, 28, 20, 16, 12, 8, 4,
95 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
96 -8, -24, -32, -44, -48, -56, -64, -80,
97 -8, -16, -32, -40, -44, -52, -56, -64,
98 +0, 0, -16, -28, -36, -40, -44, -48,
99 +8, 4, 0, 0, -8, -16, -24, -36,
100 20, 12, 4, 0, -8, -8, -8, -16,
101 24, 16, 8, 8, 8, 0, -4, -8,
102 40, 36, 24, 20, 16, 8, 0, -8,
103 48, 40, 28, 24, 20, 12, 0, -4,
104 64, 48, 28, 20, 16, 12, 8, 4,
107 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
108 0, -4, -8, -16, -24, -32, -40, -48,
109 1, 0, -4, -8, -16, -24, -32, -40,
110 4, 2, 0, -1, -3, -8, -16, -24,
111 8, 4, 2, 0, -1, -4, -8, -16,
112 20, 16, 4, 0, -1, -4, -8, -16,
113 24, 20, 16, 8, 4, 0, -4, -8,
114 28, 24, 20, 16, 8, 4, 0, -8,
115 32, 24, 20, 16, 8, 4, 0, -4,
116 64, 48, 28, 20, 16, 12, 8, 4,
119 static const struct huc_brc_update_constant_data gen9_brc_update_constant_data = {
120 .global_rate_qp_adj_tab_i = {
121 48, 40, 32, 24, 16, 8, 0, -8,
122 40, 32, 24, 16, 8, 0, -8, -16,
123 32, 24, 16, 8, 0, -8, -16, -24,
124 24, 16, 8, 0, -8, -16, -24, -32,
125 16, 8, 0, -8, -16, -24, -32, -40,
126 8, 0, -8, -16, -24, -32, -40, -48,
127 0, -8, -16, -24, -32, -40, -48, -56,
128 48, 40, 32, 24, 16, 8, 0, -8,
131 .global_rate_qp_adj_tab_p = {
132 48, 40, 32, 24, 16, 8, 0, -8,
133 40, 32, 24, 16, 8, 0, -8, -16,
134 16, 8, 8, 4, -8, -16, -16, -24,
135 8, 0, 0, -8, -16, -16, -16, -24,
136 8, 0, 0, -24, -32, -32, -32, -48,
137 0, -16, -16, -24, -32, -48, -56, -64,
138 -8, -16, -32, -32, -48, -48, -56, -64,
139 -16, -32, -48, -48, -48, -56, -64, -80,
142 .global_rate_qp_adj_tab_b = {
143 48, 40, 32, 24, 16, 8, 0, -8,
144 40, 32, 24, 16, 8, 0, -8, -16,
145 32, 24, 16, 8, 0, -8, -16, -24,
146 24, 16, 8, 0, -8, -8, -16, -24,
147 16, 8, 0, 0, -8, -16, -24, -32,
148 16, 8, 0, 0, -8, -16, -24, -32,
149 0, -8, -8, -16, -32, -48, -56, -64,
150 0, -8, -8, -16, -32, -48, -56, -64
153 .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
154 .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
155 .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
157 .dist_qp_adj_tab_i = {
158 0, 0, 0, 0, 0, 3, 4, 6, 8,
159 0, 0, 0, 0, 0, 2, 3, 5, 7,
160 -1, 0, 0, 0, 0, 2, 2, 4, 5,
161 -1, -1, 0, 0, 0, 1, 2, 2, 4,
162 -2, -2, -1, 0, 0, 0, 1, 2, 4,
163 -2, -2, -1, 0, 0, 0, 1, 2, 4,
164 -3, -2, -1, -1, 0, 0, 1, 2, 5,
165 -3, -2, -1, -1, 0, 0, 2, 4, 7,
166 -4, -3, -2, -1, 0, 1, 3, 5, 8,
169 .dist_qp_adj_tab_p = {
170 -1, 0, 0, 0, 0, 1, 1, 2, 3,
171 -1, -1, 0, 0, 0, 1, 1, 2, 3,
172 -2, -1, -1, 0, 0, 1, 1, 2, 3,
173 -3, -2, -2, -1, 0, 0, 1, 2, 3,
174 -3, -2, -1, -1, 0, 0, 1, 2, 3,
175 -3, -2, -1, -1, 0, 0, 1, 2, 3,
176 -3, -2, -1, -1, 0, 0, 1, 2, 3,
177 -3, -2, -1, -1, 0, 0, 1, 2, 3,
178 -3, -2, -1, -1, 0, 0, 1, 2, 3,
181 .dist_qp_adj_tab_b = {
182 0, 0, 0, 0, 0, 2, 3, 3, 4,
183 0, 0, 0, 0, 0, 2, 3, 3, 4,
184 -1, 0, 0, 0, 0, 2, 2, 3, 3,
185 -1, -1, 0, 0, 0, 1, 2, 2, 2,
186 -1, -1, -1, 0, 0, 0, 1, 2, 2,
187 -2, -1, -1, 0, 0, 0, 0, 1, 2,
188 -2, -1, -1, -1, 0, 0, 0, 1, 3,
189 -2, -2, -1, -1, 0, 0, 1, 1, 3,
190 -2, -2, -1, -1, 0, 1, 1, 2, 4,
193 /* default table for non lowdelay */
194 .buf_rate_adj_tab_i = {
195 -4, -20, -28, -36, -40, -44, -48, -80,
196 0, -8, -12, -20, -24, -28, -32, -36,
197 0, 0, -8, -16, -20, -24, -28, -32,
198 8, 4, 0, 0, -8, -16, -24, -28,
199 32, 24, 16, 2, -4, -8, -16, -20,
200 36, 32, 28, 16, 8, 0, -4, -8,
201 40, 36, 24, 20, 16, 8, 0, -8,
202 48, 40, 28, 24, 20, 12, 0, -4,
203 64, 48, 28, 20, 16, 12, 8, 4,
206 /* default table for non lowdelay */
207 .buf_rate_adj_tab_p = {
208 -8, -24, -32, -44, -48, -56, -64, -80,
209 -8, -16, -32, -40, -44, -52, -56, -64,
210 0, 0, -16, -28, -36, -40, -44, -48,
211 8, 4, 0, 0, -8, -16, -24, -36,
212 20, 12, 4, 0, -8, -8, -8, -16,
213 24, 16, 8, 8, 8, 0, -4, -8,
214 40, 36, 24, 20, 16, 8, 0, -8,
215 48, 40, 28, 24, 20, 12, 0, -4,
216 64, 48, 28, 20, 16, 12, 8, 4,
219 /* default table for non lowdelay */
220 .buf_rate_adj_tab_b = {
221 0, -4, -8, -16, -24, -32, -40, -48,
222 1, 0, -4, -8, -16, -24, -32, -40,
223 4, 2, 0, -1, -3, -8, -16, -24,
224 8, 4, 2, 0, -1, -4, -8, -16,
225 20, 16, 4, 0, -1, -4, -8, -16,
226 24, 20, 16, 8, 4, 0, -4, -8,
227 28, 24, 20, 16, 8, 4, 0, -8,
228 32, 24, 20, 16, 8, 4, 0, -4,
229 64, 48, 28, 20, 16, 12, 8, 4,
232 .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
233 .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
235 .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
236 .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
238 .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
239 .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
241 .i_intra_non_pred = {
242 0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
243 0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
244 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
248 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
249 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
250 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
254 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
255 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
256 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
260 0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
261 0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
262 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
267 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
271 .p_intra_non_pred = {
272 0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
273 0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
274 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
278 0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
279 0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
280 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
284 0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
285 0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
286 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
290 0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
291 0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
292 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
296 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
297 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
298 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302 0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
303 0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
304 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
308 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
309 0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
310 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
314 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
315 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
316 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
320 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
321 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
322 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
342 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
343 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
344 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
349 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
350 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
351 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
356 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
357 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
358 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
363 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
364 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
365 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
370 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
371 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
372 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
377 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
378 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
379 0x1a, 0x1a, 0x1a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
385 static const uint8_t vdenc_const_qp_lambda[44] = {
386 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
387 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
388 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
389 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
390 0x4a, 0x53, 0x00, 0x00
394 static const uint16_t vdenc_const_skip_threshold[28] = {
399 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
404 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
409 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
414 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
420 static const uint8_t vdenc_const_qp_lambda_p[44] = {
421 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
422 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
423 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
424 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
425 0x4a, 0x53, 0x00, 0x00
429 static const uint16_t vdenc_const_skip_threshold_p[28] = {
430 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
431 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
432 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
433 0x0333, 0x03b0, 0x0000, 0x0000
437 static const uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
438 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
439 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
440 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
444 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
445 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
446 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
447 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
451 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
452 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
453 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
454 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
458 static const uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
459 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
460 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
461 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
465 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
468 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
471 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
474 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
477 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
480 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
482 static const unsigned char
483 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
485 static const unsigned char
486 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
488 static const uint16_t
489 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
492 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
495 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
498 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
501 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
503 static const int vdenc_mode_const[2][12][52] = {
506 //LUTMODE_INTRA_NONPRED
508 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
509 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
510 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
511 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
514 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
525 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
526 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
527 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
532 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
533 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
534 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
535 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
538 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
544 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
547 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
550 //LUTMODE_INTER_16x16, LUTMODE_INTER
559 //LUTMODE_INTRA_CHROMA
565 //LUTMODE_INTRA_NONPRED
567 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
568 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
569 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
570 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
573 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
575 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
576 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
577 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
578 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
584 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
585 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
586 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
587 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
592 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
593 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
594 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
595 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
598 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
600 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
601 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
602 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
603 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
608 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
609 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
610 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
611 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
614 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
616 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
617 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
618 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
619 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
622 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
624 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
625 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
626 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
627 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
630 //LUTMODE_INTER_16x16, LUTMODE_INTER
632 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
633 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
634 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
635 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
648 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
649 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
650 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
651 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
654 //LUTMODE_INTRA_CHROMA
656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
664 static const int vdenc_mv_cost_skipbias_qpel[8] = {
666 0, 6, 6, 9, 10, 13, 14, 16
669 static const int vdenc_hme_cost[8][52] = {
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
679 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
686 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
687 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
688 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
689 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[39 ~51]
693 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
694 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
695 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
696 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39 ~51]
700 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
701 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
702 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
703 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[39 ~51]
707 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
708 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
709 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
710 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50, //QP=[39 ~51]
714 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
715 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
716 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
717 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100, //QP=[39 ~51]
722 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
723 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
724 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
725 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200, //QP=[39 ~51]
729 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
731 OUT_BCS_RELOC64(batch, \
733 I915_GEM_DOMAIN_RENDER, \
734 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
737 OUT_BCS_BATCH(batch, 0); \
738 OUT_BCS_BATCH(batch, 0); \
742 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
743 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
744 OUT_BCS_BATCH(batch, i965->intel.mocs_state); \
747 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do { \
748 buffer.type = I965_GPE_RESOURCE_BUFFER; \
749 buffer.width = bfsize; \
751 buffer.pitch = buffer.width; \
752 buffer.size = buffer.pitch; \
753 buffer.tiling = I915_TILING_NONE; \
754 i965_allocate_gpe_resource(i965->intel.bufmgr, \
761 gen9_vdenc_get_max_vmv_range(int level)
763 int max_vmv_range = 512;
767 else if (level <= 20)
769 else if (level <= 30)
770 max_vmv_range = 1024;
772 max_vmv_range = 2048;
774 return max_vmv_range;
778 map_44_lut_value(unsigned int v, unsigned char max)
780 unsigned int maxcost;
788 maxcost = ((max & 15) << (max >> 4));
794 d = (int)(log((double)v) / log(2.0)) - 3;
800 ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
801 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
807 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
808 struct encode_state *encode_state,
809 struct intel_encoder_context *encoder_context)
811 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
814 vdenc_context->gop_size = encoder_context->brc.gop_size;
815 vdenc_context->ref_dist = encoder_context->brc.num_bframes_in_gop + 1;
817 if (vdenc_context->internal_rate_mode != I965_BRC_CQP &&
818 encoder_context->brc.need_reset) {
819 /* So far, vdenc doesn't support temporal layer */
820 vdenc_context->framerate = encoder_context->brc.framerate[0];
822 vdenc_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
823 vdenc_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
825 vdenc_context->max_bit_rate = encoder_context->brc.bits_per_second[0];
826 vdenc_context->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
827 vdenc_context->brc_need_reset = (vdenc_context->brc_initted && encoder_context->brc.need_reset);
829 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
830 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
831 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
833 assert(vdenc_context->internal_rate_mode == I965_BRC_VBR);
834 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
835 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
839 vdenc_context->mb_brc_enabled = 1;
840 vdenc_context->num_roi = MIN(encoder_context->brc.num_roi, 3);
841 vdenc_context->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
842 vdenc_context->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
843 vdenc_context->vdenc_streamin_enable = !!vdenc_context->num_roi;
845 for (i = 0; i < vdenc_context->num_roi; i++) {
846 vdenc_context->roi[i].left = encoder_context->brc.roi[i].left >> 4;
847 vdenc_context->roi[i].right = encoder_context->brc.roi[i].right >> 4;
848 vdenc_context->roi[i].top = encoder_context->brc.roi[i].top >> 4;
849 vdenc_context->roi[i].bottom = encoder_context->brc.roi[i].bottom >> 4;
850 vdenc_context->roi[i].value = encoder_context->brc.roi[i].value;
855 gen9_vdenc_update_parameters(VADriverContextP ctx,
857 struct encode_state *encode_state,
858 struct intel_encoder_context *encoder_context)
860 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
861 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
862 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
864 if (profile == VAProfileH264High)
865 vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
867 vdenc_context->transform_8x8_mode_enable = 0;
869 vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
870 vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
872 vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
873 vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
875 vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
876 vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
877 vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
878 vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
879 vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
881 gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
883 vdenc_context->current_pass = 0;
884 vdenc_context->num_passes = 1;
886 if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
887 vdenc_context->internal_rate_mode == I965_BRC_VBR)
888 vdenc_context->brc_enabled = 1;
890 vdenc_context->brc_enabled = 0;
892 if (vdenc_context->brc_enabled &&
893 (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
894 !vdenc_context->vbv_buffer_size_in_bit ||
895 !vdenc_context->max_bit_rate ||
896 !vdenc_context->target_bit_rate ||
897 !vdenc_context->framerate.num ||
898 !vdenc_context->framerate.den))
899 vdenc_context->brc_enabled = 0;
901 if (!vdenc_context->brc_enabled) {
902 vdenc_context->target_bit_rate = 0;
903 vdenc_context->max_bit_rate = 0;
904 vdenc_context->min_bit_rate = 0;
905 vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
906 vdenc_context->vbv_buffer_size_in_bit = 0;
908 vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
913 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
914 struct encode_state *encode_state,
915 struct intel_encoder_context *encoder_context,
918 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
919 unsigned int frame_type = vdenc_context->frame_type;
921 memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
922 memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
923 memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
925 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
926 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
927 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
928 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
930 if (frame_type == VDENC_FRAME_P) {
931 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
932 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
933 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
934 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
935 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
936 vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
938 vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
939 vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
940 vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
941 vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
942 vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
943 vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
944 vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
945 vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
947 vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
948 vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
949 vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
950 vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
951 vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
952 vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
953 vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
954 vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
959 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
960 struct intel_encoder_context *encoder_context)
962 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
963 struct gen9_vdenc_streamin_state *streamin_state;
966 if (!vdenc_context->num_roi)
969 streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
974 for (col = 0; col < vdenc_context->frame_width_in_mbs; col++) {
975 for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
976 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
978 /* The last one has higher priority */
979 for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
980 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
981 (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
982 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
990 i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
994 gen9_vdenc_avc_prepare(VADriverContextP ctx,
996 struct encode_state *encode_state,
997 struct intel_encoder_context *encoder_context)
999 struct i965_driver_data *i965 = i965_driver_data(ctx);
1000 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1001 struct i965_coded_buffer_segment *coded_buffer_segment;
1002 struct object_surface *obj_surface;
1003 struct object_buffer *obj_buffer;
1004 VAEncPictureParameterBufferH264 *pic_param;
1005 VAEncSliceParameterBufferH264 *slice_param;
1006 VDEncAvcSurface *vdenc_avc_surface;
1008 int i, j, enable_avc_ildb = 0;
1012 gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1014 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1015 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1016 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1018 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1019 assert((slice_param->slice_type == SLICE_TYPE_I) ||
1020 (slice_param->slice_type == SLICE_TYPE_SI) ||
1021 (slice_param->slice_type == SLICE_TYPE_P) ||
1022 (slice_param->slice_type == SLICE_TYPE_SP) ||
1023 (slice_param->slice_type == SLICE_TYPE_B));
1025 if (slice_param->disable_deblocking_filter_idc != 1) {
1026 enable_avc_ildb = 1;
1034 /* Setup current frame */
1035 obj_surface = encode_state->reconstructed_object;
1036 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1038 if (obj_surface->private_data == NULL) {
1039 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1040 assert(vdenc_avc_surface);
1042 vdenc_avc_surface->ctx = ctx;
1043 i965_CreateSurfaces(ctx,
1044 vdenc_context->down_scaled_width_4x,
1045 vdenc_context->down_scaled_height_4x,
1046 VA_RT_FORMAT_YUV420,
1048 &vdenc_avc_surface->scaled_4x_surface_id);
1049 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1050 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1051 i965_check_alloc_surface_bo(ctx,
1052 vdenc_avc_surface->scaled_4x_surface_obj,
1057 obj_surface->private_data = (void *)vdenc_avc_surface;
1058 obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1061 vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1062 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1064 /* Reconstructed surfaces */
1065 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1066 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1067 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1068 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1070 i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1071 i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1073 if (enable_avc_ildb) {
1074 i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1076 i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1080 /* Reference surfaces */
1081 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1082 assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1083 ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1084 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1085 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1086 obj_surface = encode_state->reference_objects[i];
1088 if (obj_surface && obj_surface->bo) {
1089 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1091 if (obj_surface->private_data == NULL) {
1092 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1093 assert(vdenc_avc_surface);
1095 vdenc_avc_surface->ctx = ctx;
1096 i965_CreateSurfaces(ctx,
1097 vdenc_context->down_scaled_width_4x,
1098 vdenc_context->down_scaled_height_4x,
1099 VA_RT_FORMAT_YUV420,
1101 &vdenc_avc_surface->scaled_4x_surface_id);
1102 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1103 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1104 i965_check_alloc_surface_bo(ctx,
1105 vdenc_avc_surface->scaled_4x_surface_obj,
1110 obj_surface->private_data = vdenc_avc_surface;
1111 obj_surface->free_private_data = gen_free_avc_surface;
1114 vdenc_avc_surface = obj_surface->private_data;
1115 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1119 /* Input YUV surface */
1120 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1121 i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1123 /* Encoded bitstream */
1124 obj_buffer = encode_state->coded_buf_object;
1125 bo = obj_buffer->buffer_store->bo;
1126 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1127 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1128 vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1129 vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1132 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1133 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1134 vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1135 vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1136 vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1137 assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1138 vdenc_context->compressed_bitstream.start_offset);
1142 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1143 coded_buffer_segment->mapped = 0;
1144 coded_buffer_segment->codec = encoder_context->codec;
1145 coded_buffer_segment->status_support = 1;
1147 pbuffer = bo->virtual;
1148 pbuffer += vdenc_context->status_bffuer.base_offset;
1149 memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1153 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1154 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1155 vdenc_context->frame_width_in_mbs * 64,
1156 "Intra row store scratch buffer");
1158 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1159 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1160 vdenc_context->frame_width_in_mbs * 256,
1161 "Deblocking filter row store scratch buffer");
1163 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1164 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1165 vdenc_context->frame_width_in_mbs * 128,
1166 "BSD/MPC row store scratch buffer");
1168 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1169 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1170 vdenc_context->frame_width_in_mbs * 64,
1171 "VDENC row store scratch buffer");
1173 assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1174 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1175 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1176 vdenc_context->frame_width_in_mbs *
1177 vdenc_context->frame_height_in_mbs *
1178 sizeof(struct gen9_vdenc_streamin_state),
1179 "VDENC StreamIn buffer");
1182 * Calculate the index for each reference surface in list0 for the first slice
1183 * TODO: other slices
1185 pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1186 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1188 vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1190 if (slice_param->num_ref_idx_active_override_flag)
1191 vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1193 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1194 vdenc_context->list_ref_idx[0][i] = 0xFF;
1197 if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1198 return VA_STATUS_ERROR_INVALID_VALUE;
1200 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1201 VAPictureH264 *va_pic;
1203 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1205 if (i >= vdenc_context->num_refs[0])
1208 va_pic = &slice_param->RefPicList0[i];
1210 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1211 obj_surface = encode_state->reference_objects[j];
1215 obj_surface->base.id == va_pic->picture_id) {
1217 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1218 vdenc_context->list_ref_idx[0][i] = j;
1225 if (slice_param->slice_type == SLICE_TYPE_I ||
1226 slice_param->slice_type == SLICE_TYPE_SI)
1227 vdenc_context->frame_type = VDENC_FRAME_I;
1229 vdenc_context->frame_type = VDENC_FRAME_P;
1231 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1233 gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1234 gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1236 return VA_STATUS_SUCCESS;
1240 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1241 struct intel_encoder_context *encoder_context,
1242 struct huc_pipe_mode_select_parameter *params)
1244 struct intel_batchbuffer *batch = encoder_context->base.batch;
1246 BEGIN_BCS_BATCH(batch, 3);
1248 OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1249 OUT_BCS_BATCH(batch,
1250 (params->huc_stream_object_enable << 10) |
1251 (params->indirect_stream_out_enable << 4));
1252 OUT_BCS_BATCH(batch,
1253 params->media_soft_reset_counter);
1255 ADVANCE_BCS_BATCH(batch);
1259 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1260 struct intel_encoder_context *encoder_context,
1261 struct huc_imem_state_parameter *params)
1263 struct intel_batchbuffer *batch = encoder_context->base.batch;
1265 BEGIN_BCS_BATCH(batch, 5);
1267 OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1268 OUT_BCS_BATCH(batch, 0);
1269 OUT_BCS_BATCH(batch, 0);
1270 OUT_BCS_BATCH(batch, 0);
1271 OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1273 ADVANCE_BCS_BATCH(batch);
1277 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1278 struct intel_encoder_context *encoder_context,
1279 struct huc_dmem_state_parameter *params)
1281 struct i965_driver_data *i965 = i965_driver_data(ctx);
1282 struct intel_batchbuffer *batch = encoder_context->base.batch;
1284 BEGIN_BCS_BATCH(batch, 6);
1286 OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1287 OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1288 OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1289 OUT_BCS_BATCH(batch, params->huc_data_length);
1291 ADVANCE_BCS_BATCH(batch);
1296 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1297 struct intel_encoder_context *encoder_context,
1298 struct huc_cfg_state_parameter *params)
1300 struct intel_batchbuffer *batch = encoder_context->base.batch;
1302 BEGIN_BCS_BATCH(batch, 2);
1304 OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1305 OUT_BCS_BATCH(batch, !!params->force_reset);
1307 ADVANCE_BCS_BATCH(batch);
1311 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1312 struct intel_encoder_context *encoder_context,
1313 struct huc_virtual_addr_parameter *params)
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct intel_batchbuffer *batch = encoder_context->base.batch;
1319 BEGIN_BCS_BATCH(batch, 49);
1321 OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1323 for (i = 0; i < 16; i++) {
1324 if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1325 OUT_BUFFER_3DW(batch,
1326 params->regions[i].huc_surface_res->bo,
1327 !!params->regions[i].is_target, 0, 0);
1329 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1332 ADVANCE_BCS_BATCH(batch);
1336 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1337 struct intel_encoder_context *encoder_context,
1338 struct huc_ind_obj_base_addr_parameter *params)
1340 struct i965_driver_data *i965 = i965_driver_data(ctx);
1341 struct intel_batchbuffer *batch = encoder_context->base.batch;
1343 BEGIN_BCS_BATCH(batch, 11);
1345 OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1347 if (params->huc_indirect_stream_in_object_res)
1348 OUT_BUFFER_3DW(batch,
1349 params->huc_indirect_stream_in_object_res->bo,
1352 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1354 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1356 if (params->huc_indirect_stream_out_object_res)
1357 OUT_BUFFER_3DW(batch,
1358 params->huc_indirect_stream_out_object_res->bo,
1361 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1363 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1365 ADVANCE_BCS_BATCH(batch);
1369 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1370 struct intel_encoder_context *encoder_context)
1372 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1373 struct intel_batchbuffer *batch = encoder_context->base.batch;
1374 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1375 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1377 /* Write HUC_STATUS2 mask (1 << 6) */
1378 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1379 mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1380 mi_store_data_imm_params.offset = 0;
1381 mi_store_data_imm_params.dw0 = (1 << 6);
1382 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1384 /* Store HUC_STATUS2 */
1385 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1386 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1387 mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1388 mi_store_register_mem_params.offset = 4;
1389 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1393 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1394 struct intel_encoder_context *encoder_context,
1395 struct huc_stream_object_parameter *params)
1397 struct intel_batchbuffer *batch = encoder_context->base.batch;
1399 BEGIN_BCS_BATCH(batch, 5);
1401 OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1402 OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1403 OUT_BCS_BATCH(batch,
1404 (1 << 31) | /* Must be 1 */
1405 params->indirect_stream_in_start_address);
1406 OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1407 OUT_BCS_BATCH(batch,
1408 (!!params->huc_bitstream_enable << 29) |
1409 (params->length_mode << 27) |
1410 (!!params->stream_out << 26) |
1411 (!!params->emulation_prevention_byte_removal << 25) |
1412 (!!params->start_code_search_engine << 24) |
1413 (params->start_code_byte2 << 16) |
1414 (params->start_code_byte1 << 8) |
1415 params->start_code_byte0);
1417 ADVANCE_BCS_BATCH(batch);
1421 gen9_vdenc_huc_start(VADriverContextP ctx,
1422 struct intel_encoder_context *encoder_context,
1423 struct huc_start_parameter *params)
1425 struct intel_batchbuffer *batch = encoder_context->base.batch;
1427 BEGIN_BCS_BATCH(batch, 2);
1429 OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1430 OUT_BCS_BATCH(batch, !!params->last_stream_object);
1432 ADVANCE_BCS_BATCH(batch);
1436 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1437 struct intel_encoder_context *encoder_context,
1438 struct vd_pipeline_flush_parameter *params)
1440 struct intel_batchbuffer *batch = encoder_context->base.batch;
1442 BEGIN_BCS_BATCH(batch, 2);
1444 OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1445 OUT_BCS_BATCH(batch,
1446 params->mfx_pipeline_command_flush << 19 |
1447 params->mfl_pipeline_command_flush << 18 |
1448 params->vdenc_pipeline_command_flush << 17 |
1449 params->hevc_pipeline_command_flush << 16 |
1450 params->vd_command_message_parser_done << 4 |
1451 params->mfx_pipeline_done << 3 |
1452 params->mfl_pipeline_done << 2 |
1453 params->vdenc_pipeline_done << 1 |
1454 params->hevc_pipeline_done);
1456 ADVANCE_BCS_BATCH(batch);
1460 gen9_vdenc_get_max_mbps(int level_idc)
1462 int max_mbps = 11880;
1464 switch (level_idc) {
1518 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1519 struct intel_encoder_context *encoder_context,
1522 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1523 double bits_per_mb, tmpf;
1524 int max_mbps, num_mb_per_frame;
1525 uint64_t max_byte_per_frame0, max_byte_per_frame1;
1528 if (level_idc >= 31 && level_idc <= 40)
1531 bits_per_mb = 192.0;
1533 max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1534 num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1536 tmpf = (double)num_mb_per_frame;
1538 if (tmpf < max_mbps / 172.0)
1539 tmpf = max_mbps / 172.0;
1541 max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1542 max_byte_per_frame1 = (uint64_t)(((double)max_mbps * vdenc_context->framerate.den) /
1543 (double)vdenc_context->framerate.num * bits_per_mb);
1545 /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1546 ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1547 ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1553 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1554 struct encode_state *encode_state,
1555 struct intel_encoder_context *encoder_context)
1557 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1558 float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1559 unsigned frame_size;
1562 frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1563 qp = (int)(1.0 / 1.2 * pow(10.0,
1564 (log10(frame_size * 2.0 / 3.0 * vdenc_context->framerate.num /
1565 ((double)vdenc_context->target_bit_rate * vdenc_context->framerate.den)) - x0) *
1566 (y1 - y0) / (x1 - x0) + y0) + 0.5);
1568 delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((double)vdenc_context->framerate.num) /
1569 ((double)vdenc_context->target_bit_rate * vdenc_context->framerate.den)));
1573 qp = CLAMP(1, 51, qp);
1583 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1584 struct encode_state *encode_state,
1585 struct intel_encoder_context *encoder_context)
1587 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1588 struct huc_brc_init_dmem *dmem;
1589 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1590 double input_bits_per_frame, bps_ratio;
1593 vdenc_context->brc_init_reset_input_bits_per_frame =
1594 ((double)vdenc_context->max_bit_rate * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1595 vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1596 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1598 dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1603 memset(dmem, 0, sizeof(*dmem));
1605 dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1607 dmem->frame_width = vdenc_context->frame_width;
1608 dmem->frame_height = vdenc_context->frame_height;
1610 dmem->target_bitrate = vdenc_context->target_bit_rate;
1611 dmem->min_rate = vdenc_context->min_bit_rate;
1612 dmem->max_rate = vdenc_context->max_bit_rate;
1613 dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1614 dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1616 if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1617 dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1619 if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1620 dmem->brc_flag |= 0x10;
1621 else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1622 dmem->brc_flag |= 0x20;
1624 dmem->frame_rate_m = vdenc_context->framerate.num;
1625 dmem->frame_rate_d = vdenc_context->framerate.den;
1627 dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1629 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1630 dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1635 input_bits_per_frame = ((double)vdenc_context->max_bit_rate * vdenc_context->framerate.den) / vdenc_context->framerate.num;
1636 bps_ratio = input_bits_per_frame /
1637 ((double)vdenc_context->vbv_buffer_size_in_bit * vdenc_context->framerate.den / vdenc_context->framerate.num);
1639 if (bps_ratio < 0.1)
1642 if (bps_ratio > 3.5)
1645 for (i = 0; i < 4; i++) {
1646 dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1647 dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1649 dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1650 dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1652 dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1653 dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1656 dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1658 if (vdenc_context->mb_brc_enabled) {
1659 dmem->mb_qp_ctrl = 1;
1660 dmem->dist_qp_delta[0] = -5;
1661 dmem->dist_qp_delta[1] = -2;
1662 dmem->dist_qp_delta[2] = 2;
1663 dmem->dist_qp_delta[3] = 5;
1666 dmem->slice_size_ctrl_en = 0; /* TODO: add support for slice size control */
1668 dmem->oscillation_qp_delta = 0; /* TODO: add support */
1669 dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1671 // 2nd re-encode pass if possible
1672 if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1673 dmem->top_qp_delta_thr_for_2nd_pass = 5;
1674 dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1675 dmem->top_frame_size_threshold_for_2nd_pass = 80;
1676 dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1678 dmem->top_qp_delta_thr_for_2nd_pass = 2;
1679 dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1680 dmem->top_frame_size_threshold_for_2nd_pass = 32;
1681 dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1684 dmem->qp_select_for_first_pass = 1;
1685 dmem->mb_header_compensation = 1;
1686 dmem->delta_qp_adaptation = 1;
1687 dmem->max_crf_quality_factor = 52;
1689 dmem->crf_quality_factor = 0; /* TODO: add support for CRF */
1690 dmem->scenario_info = 0;
1692 memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1693 memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1695 i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1699 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1700 struct encode_state *encode_state,
1701 struct intel_encoder_context *encoder_context)
1703 struct intel_batchbuffer *batch = encoder_context->base.batch;
1704 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1705 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1706 struct huc_imem_state_parameter imem_state_params;
1707 struct huc_dmem_state_parameter dmem_state_params;
1708 struct huc_virtual_addr_parameter virtual_addr_params;
1709 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1710 struct huc_stream_object_parameter stream_object_params;
1711 struct huc_start_parameter start_params;
1712 struct vd_pipeline_flush_parameter pipeline_flush_params;
1713 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1715 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1717 memset(&imem_state_params, 0, sizeof(imem_state_params));
1718 imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1719 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1721 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1722 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1724 gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1725 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1726 dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1727 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1728 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1729 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1731 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1732 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1733 virtual_addr_params.regions[0].is_target = 1;
1734 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1736 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1737 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1738 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1739 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1741 memset(&stream_object_params, 0, sizeof(stream_object_params));
1742 stream_object_params.indirect_stream_in_data_length = 1;
1743 stream_object_params.indirect_stream_in_start_address = 0;
1744 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1746 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1748 memset(&start_params, 0, sizeof(start_params));
1749 start_params.last_stream_object = 1;
1750 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1752 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1753 pipeline_flush_params.hevc_pipeline_done = 1;
1754 pipeline_flush_params.hevc_pipeline_command_flush = 1;
1755 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1757 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1758 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1759 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1763 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1765 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1766 struct huc_brc_update_dmem *dmem;
1767 int i, num_p_in_gop = 0;
1769 dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1776 if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1777 vdenc_context->brc_init_previous_target_buf_full_in_bits =
1778 (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1779 vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1780 vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1783 if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1784 vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1786 dmem->target_size = vdenc_context->brc_target_size;
1788 dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1790 dmem->target_slice_size = 0; // TODO: add support for slice size control
1792 memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1793 memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1795 dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3; // I frame:2, P frame:0, B frame:1
1797 memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1798 memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1799 memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1801 dmem->current_pak_pass = vdenc_context->current_pass;
1802 dmem->max_num_passes = 2;
1804 dmem->scene_change_detect_enable = 1;
1805 dmem->scene_change_prev_intra_percent_threshold = 96;
1806 dmem->scene_change_cur_intra_perent_threshold = 192;
1808 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1809 num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1811 for (i = 0; i < 2; i++)
1812 dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1814 if (vdenc_context->is_low_delay)
1815 dmem->ip_average_coeff = 0;
1817 dmem->ip_average_coeff = 128;
1819 dmem->skip_frame_size = 0;
1820 dmem->num_of_frames_skipped = 0;
1822 dmem->roi_source = 0; // TODO: add support for dirty ROI
1823 dmem->hme_detection_enable = 0; // TODO: support HME kernel
1824 dmem->hme_cost_enable = 1;
1826 dmem->second_level_batchbuffer_size = 228;
1828 i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1832 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1833 struct encode_state *encode_state,
1834 struct intel_encoder_context *encoder_context,
1835 struct gen9_mfx_avc_img_state *pstate,
1838 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1839 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1840 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1842 memset(pstate, 0, sizeof(*pstate));
1844 pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1846 pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1848 pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1849 pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1851 pstate->dw3.image_structure = 0;
1852 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1853 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1854 pstate->dw3.brc_domain_rate_control_enable = !!use_huc;
1855 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1856 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1858 pstate->dw4.field_picture_flag = 0;
1859 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1860 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1861 pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1862 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1863 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1864 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1865 pstate->dw4.mb_mv_format_flag = 1;
1866 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1867 pstate->dw4.mv_unpacked_flag = 1;
1868 pstate->dw4.insert_test_flag = 0;
1869 pstate->dw4.load_slice_pointer_flag = 0;
1870 pstate->dw4.macroblock_stat_enable = 0; /* Always 0 in VDEnc mode */
1871 pstate->dw4.minimum_frame_size = 0;
1873 pstate->dw5.intra_mb_max_bit_flag = 1;
1874 pstate->dw5.inter_mb_max_bit_flag = 1;
1875 pstate->dw5.frame_size_over_flag = 1;
1876 pstate->dw5.frame_size_under_flag = 1;
1877 pstate->dw5.intra_mb_ipcm_flag = 1;
1878 pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */
1879 pstate->dw5.non_first_pass_flag = 0;
1880 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1881 pstate->dw5.aq_chroma_disable = 1;
1883 pstate->dw6.intra_mb_max_size = 2700;
1884 pstate->dw6.inter_mb_max_size = 4095;
1886 pstate->dw8.slice_delta_qp_max0 = 0;
1887 pstate->dw8.slice_delta_qp_max1 = 0;
1888 pstate->dw8.slice_delta_qp_max2 = 0;
1889 pstate->dw8.slice_delta_qp_max3 = 0;
1891 pstate->dw9.slice_delta_qp_min0 = 0;
1892 pstate->dw9.slice_delta_qp_min1 = 0;
1893 pstate->dw9.slice_delta_qp_min2 = 0;
1894 pstate->dw9.slice_delta_qp_min3 = 0;
1896 pstate->dw10.frame_bitrate_min = 0;
1897 pstate->dw10.frame_bitrate_min_unit = 1;
1898 pstate->dw10.frame_bitrate_min_unit_mode = 1;
1899 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1900 pstate->dw10.frame_bitrate_max_unit = 1;
1901 pstate->dw10.frame_bitrate_max_unit_mode = 1;
1903 pstate->dw11.frame_bitrate_min_delta = 0;
1904 pstate->dw11.frame_bitrate_max_delta = 0;
1906 pstate->dw12.vad_error_logic = 1;
1907 /* TODO: set paramters DW19/DW20 for slices */
1911 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
1912 struct encode_state *encode_state,
1913 struct intel_encoder_context *encoder_context,
1914 struct gen9_vdenc_img_state *pstate,
1917 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1918 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1919 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1920 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1922 memset(pstate, 0, sizeof(*pstate));
1924 pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1926 if (vdenc_context->frame_type == VDENC_FRAME_I) {
1927 pstate->dw4.intra_sad_measure_adjustment = 2;
1928 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1930 pstate->dw5.cre_prefetch_enable = 1;
1932 pstate->dw9.mode0_cost = 10;
1933 pstate->dw9.mode1_cost = 0;
1934 pstate->dw9.mode2_cost = 3;
1935 pstate->dw9.mode3_cost = 30;
1937 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1938 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1939 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1941 pstate->dw22.small_mb_size_in_word = 0xff;
1942 pstate->dw22.large_mb_size_in_word = 0xff;
1944 pstate->dw27.max_hmv_r = 0x2000;
1945 pstate->dw27.max_vmv_r = 0x200;
1947 pstate->dw33.qp_range_check_upper_bound = 0x33;
1948 pstate->dw33.qp_range_check_lower_bound = 0x0a;
1949 pstate->dw33.qp_range_check_value = 0x0f;
1951 pstate->dw2.bidirectional_weight = 0x20;
1953 pstate->dw4.subpel_mode = 3;
1954 pstate->dw4.bme_disable_for_fbr_message = 1;
1955 pstate->dw4.inter_sad_measure_adjustment = 2;
1956 pstate->dw4.intra_sad_measure_adjustment = 2;
1957 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
1959 pstate->dw5.cre_prefetch_enable = 1;
1961 pstate->dw8.non_skip_zero_mv_const_added = 1;
1962 pstate->dw8.non_skip_mb_mode_const_added = 1;
1963 pstate->dw8.ref_id_cost_mode_select = 1;
1965 pstate->dw9.mode0_cost = 7;
1966 pstate->dw9.mode1_cost = 26;
1967 pstate->dw9.mode2_cost = 30;
1968 pstate->dw9.mode3_cost = 57;
1970 pstate->dw10.mode4_cost = 8;
1971 pstate->dw10.mode5_cost = 2;
1972 pstate->dw10.mode6_cost = 4;
1973 pstate->dw10.mode7_cost = 6;
1975 pstate->dw11.mode8_cost = 5;
1976 pstate->dw11.mode9_cost = 0;
1977 pstate->dw11.ref_id_cost = 4;
1978 pstate->dw11.chroma_intra_mode_cost = 0;
1980 pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
1981 pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
1982 pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
1983 pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
1984 pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
1985 pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
1986 pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
1987 pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
1989 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
1990 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
1991 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
1993 pstate->dw22.small_mb_size_in_word = 0xff;
1994 pstate->dw22.large_mb_size_in_word = 0xff;
1996 pstate->dw27.max_hmv_r = 0x2000;
1997 pstate->dw27.max_vmv_r = 0x200;
1999 pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2001 pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2002 pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2004 pstate->dw33.qp_range_check_upper_bound = 0x33;
2005 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2006 pstate->dw33.qp_range_check_value = 0x0f;
2008 pstate->dw34.midpoint_distortion = 0x640;
2011 /* ROI will be updated in HuC kernel for CBR/VBR */
2012 if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2013 pstate->dw34.roi_enable = 1;
2015 pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2017 if (vdenc_context->num_roi > 1)
2018 pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2020 if (vdenc_context->num_roi > 2)
2021 pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2024 pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2025 pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
2027 pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2029 pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2031 pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2032 pstate->dw5.picture_type = vdenc_context->frame_type;
2033 pstate->dw5.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2035 if (vdenc_context->frame_type == VDENC_FRAME_P) {
2036 pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2039 pstate->dw5.mb_slice_threshold_value = 0;
2041 pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2043 if (pstate->dw1.transform_8x8_flag)
2044 pstate->dw8.luma_intra_partition_mask = 0;
2046 pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2048 pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta; /* TODO: check whether it is OK to use the first slice only */
2051 pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2052 pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2053 pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2054 pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2056 pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2057 pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2058 pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2059 pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2061 pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2062 pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2063 pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2064 pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2066 pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2067 pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2068 pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2069 pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2070 pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2071 pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2072 pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2073 pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2075 pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2076 pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2077 pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2078 pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2079 pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2080 pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2081 pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2082 pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2085 pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2087 pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2089 /* TODO: check rolling I */
2091 /* TODO: handle ROI */
2093 /* TODO: check stream in support */
2097 gen9_vdenc_init_img_states(VADriverContextP ctx,
2098 struct encode_state *encode_state,
2099 struct intel_encoder_context *encoder_context)
2101 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2102 struct gen9_mfx_avc_img_state *mfx_img_cmd;
2103 struct gen9_vdenc_img_state *vdenc_img_cmd;
2106 pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2111 mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2112 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd, 1);
2113 pbuffer += sizeof(*mfx_img_cmd);
2115 vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2116 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2117 pbuffer += sizeof(*vdenc_img_cmd);
2119 /* Add batch buffer end command */
2120 *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2122 i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2126 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2127 struct encode_state *encode_state,
2128 struct intel_encoder_context *encoder_context)
2130 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2131 struct huc_brc_update_constant_data *brc_buffer;
2134 brc_buffer = (struct huc_brc_update_constant_data *)
2135 i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2140 memcpy(brc_buffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2142 for (i = 0; i < 8; i++) {
2143 for (j = 0; j < 42; j++) {
2144 brc_buffer->hme_mv_cost[i][j] = map_44_lut_value((vdenc_hme_cost[i][j + 10]), 0x6f);
2148 if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2149 memcpy(brc_buffer->dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2150 memcpy(brc_buffer->dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2151 memcpy(brc_buffer->dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2152 memcpy(brc_buffer->buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2153 memcpy(brc_buffer->buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2154 memcpy(brc_buffer->buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2158 i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2162 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2163 struct encode_state *encode_state,
2164 struct intel_encoder_context *encoder_context)
2166 struct intel_batchbuffer *batch = encoder_context->base.batch;
2167 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2168 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2169 struct huc_imem_state_parameter imem_state_params;
2170 struct huc_dmem_state_parameter dmem_state_params;
2171 struct huc_virtual_addr_parameter virtual_addr_params;
2172 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2173 struct huc_stream_object_parameter stream_object_params;
2174 struct huc_start_parameter start_params;
2175 struct vd_pipeline_flush_parameter pipeline_flush_params;
2176 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2177 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2178 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2180 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2181 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2182 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2184 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2185 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2187 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2188 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2189 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2192 gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2194 memset(&imem_state_params, 0, sizeof(imem_state_params));
2195 imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2196 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2198 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2199 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2201 gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2202 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2203 dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2204 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2205 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2206 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2208 gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2209 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2210 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2211 virtual_addr_params.regions[0].is_target = 1;
2212 virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2213 virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2214 virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2215 virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2216 virtual_addr_params.regions[4].is_target = 1;
2217 virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2218 virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2219 virtual_addr_params.regions[6].is_target = 1;
2220 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2222 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2223 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2224 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2225 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2227 memset(&stream_object_params, 0, sizeof(stream_object_params));
2228 stream_object_params.indirect_stream_in_data_length = 1;
2229 stream_object_params.indirect_stream_in_start_address = 0;
2230 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2232 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2234 memset(&start_params, 0, sizeof(start_params));
2235 start_params.last_stream_object = 1;
2236 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2238 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2239 pipeline_flush_params.hevc_pipeline_done = 1;
2240 pipeline_flush_params.hevc_pipeline_command_flush = 1;
2241 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2243 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2244 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2245 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2247 /* Store HUC_STATUS */
2248 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2249 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2250 mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2251 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2253 /* Write HUC_STATUS mask (1 << 31) */
2254 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2255 mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2256 mi_store_data_imm_params.offset = 4;
2257 mi_store_data_imm_params.dw0 = (1 << 31);
2258 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2262 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2263 struct encode_state *encode_state,
2264 struct intel_encoder_context *encoder_context)
2266 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2267 struct intel_batchbuffer *batch = encoder_context->base.batch;
2269 BEGIN_BCS_BATCH(batch, 5);
2271 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2272 OUT_BCS_BATCH(batch,
2274 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
2275 (MFD_MODE_VLD << 15) |
2276 (1 << 13) | /* VDEnc mode */
2277 ((!!vdenc_context->post_deblocking_output_res.bo) << 9) | /* Post Deblocking Output */
2278 ((!!vdenc_context->pre_deblocking_output_res.bo) << 8) | /* Pre Deblocking Output */
2279 (1 << 7) | /* Scaled surface enable */
2280 (1 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
2281 (1 << 4) | /* encoding mode */
2282 (MFX_FORMAT_AVC << 0));
2283 OUT_BCS_BATCH(batch, 0);
2284 OUT_BCS_BATCH(batch, 0);
2285 OUT_BCS_BATCH(batch, 0);
2287 ADVANCE_BCS_BATCH(batch);
2291 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2292 struct intel_encoder_context *encoder_context,
2293 struct i965_gpe_resource *gpe_resource,
2296 struct intel_batchbuffer *batch = encoder_context->base.batch;
2298 BEGIN_BCS_BATCH(batch, 6);
2300 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2301 OUT_BCS_BATCH(batch, id);
2302 OUT_BCS_BATCH(batch,
2303 ((gpe_resource->height - 1) << 18) |
2304 ((gpe_resource->width - 1) << 4));
2305 OUT_BCS_BATCH(batch,
2306 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2307 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2308 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2309 (0 << 2) | /* must be 0 for interleave U/V */
2310 (1 << 1) | /* must be tiled */
2311 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2312 OUT_BCS_BATCH(batch,
2313 (0 << 16) | /* must be 0 for interleave U/V */
2314 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2315 OUT_BCS_BATCH(batch,
2316 (0 << 16) | /* must be 0 for interleave U/V */
2317 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2319 ADVANCE_BCS_BATCH(batch);
2323 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2325 struct i965_driver_data *i965 = i965_driver_data(ctx);
2326 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2327 struct intel_batchbuffer *batch = encoder_context->base.batch;
2330 if (IS_GEN10(i965->intel.device_info)) {
2331 BEGIN_BCS_BATCH(batch, 68);
2332 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (68 - 2));
2334 BEGIN_BCS_BATCH(batch, 65);
2335 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2339 /* the DW1-3 is for pre_deblocking */
2340 OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2342 /* the DW4-6 is for the post_deblocking */
2343 OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2345 /* the DW7-9 is for the uncompressed_picture */
2346 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2348 /* the DW10-12 is for PAK information (write) */
2349 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2351 /* the DW13-15 is for the intra_row_store_scratch */
2352 OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2354 /* the DW16-18 is for the deblocking filter */
2355 OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2357 /* the DW 19-50 is for Reference pictures*/
2358 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2359 OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2362 /* DW 51, reference picture attributes */
2363 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2365 /* The DW 52-54 is for PAK information (read) */
2366 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2368 /* the DW 55-57 is the ILDB buffer */
2369 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2371 /* the DW 58-60 is the second ILDB buffer */
2372 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2374 /* DW 61, memory compress enable & mode */
2375 OUT_BCS_BATCH(batch, 0);
2377 /* the DW 62-64 is the 4x Down Scaling surface */
2378 OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 1, 0, 0);
2381 if (IS_GEN10(i965->intel.device_info)) {
2382 OUT_BCS_BATCH(batch, 0);
2383 OUT_BCS_BATCH(batch, 0);
2384 OUT_BCS_BATCH(batch, 0);
2387 ADVANCE_BCS_BATCH(batch);
2391 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2393 struct i965_driver_data *i965 = i965_driver_data(ctx);
2394 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2395 struct intel_batchbuffer *batch = encoder_context->base.batch;
2397 BEGIN_BCS_BATCH(batch, 26);
2399 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2400 /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2401 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2402 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2404 /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2405 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2406 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2408 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2409 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2410 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2412 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2413 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2414 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2416 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2417 * Note: an offset is specified in MFX_AVC_SLICE_STATE
2419 OUT_BUFFER_3DW(batch,
2420 vdenc_context->compressed_bitstream.res.bo,
2424 OUT_BUFFER_2DW(batch,
2425 vdenc_context->compressed_bitstream.res.bo,
2427 vdenc_context->compressed_bitstream.end_offset);
2429 ADVANCE_BCS_BATCH(batch);
2433 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2435 struct i965_driver_data *i965 = i965_driver_data(ctx);
2436 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2437 struct intel_batchbuffer *batch = encoder_context->base.batch;
2439 BEGIN_BCS_BATCH(batch, 10);
2441 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2443 /* The DW1-3 is for bsd/mpc row store scratch buffer */
2444 OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2446 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2447 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2449 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2450 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2452 ADVANCE_BCS_BATCH(batch);
2456 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2460 struct intel_encoder_context *encoder_context)
2462 struct intel_batchbuffer *batch = encoder_context->base.batch;
2463 unsigned int qm_buffer[16];
2465 assert(qm_length <= 16);
2466 assert(sizeof(*qm) == 4);
2467 memcpy(qm_buffer, qm, qm_length * 4);
2469 BEGIN_BCS_BATCH(batch, 18);
2470 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2471 OUT_BCS_BATCH(batch, qm_type << 0);
2472 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2473 ADVANCE_BCS_BATCH(batch);
2477 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2479 /* TODO: add support for non flat matrix */
2480 unsigned int qm[16] = {
2481 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2482 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2483 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2484 0x10101010, 0x10101010, 0x10101010, 0x10101010
2487 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2488 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2489 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2490 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2494 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2498 struct intel_encoder_context *encoder_context)
2500 struct intel_batchbuffer *batch = encoder_context->base.batch;
2501 unsigned int fqm_buffer[32];
2503 assert(fqm_length <= 32);
2504 assert(sizeof(*fqm) == 4);
2505 memcpy(fqm_buffer, fqm, fqm_length * 4);
2507 BEGIN_BCS_BATCH(batch, 34);
2508 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2509 OUT_BCS_BATCH(batch, fqm_type << 0);
2510 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2511 ADVANCE_BCS_BATCH(batch);
2515 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2517 /* TODO: add support for non flat matrix */
2518 unsigned int qm[32] = {
2519 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2520 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2521 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2522 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2523 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2524 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2525 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2526 0x10001000, 0x10001000, 0x10001000, 0x10001000
2529 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2530 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2531 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2532 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2536 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2537 struct encode_state *encode_state,
2538 struct intel_encoder_context *encoder_context)
2540 struct intel_batchbuffer *batch = encoder_context->base.batch;
2541 struct gen9_mfx_avc_img_state mfx_img_cmd;
2543 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd, 0);
2545 BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2546 intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2547 ADVANCE_BCS_BATCH(batch);
2551 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct intel_encoder_context *encoder_context)
2555 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2556 struct intel_batchbuffer *batch = encoder_context->base.batch;
2558 BEGIN_BCS_BATCH(batch, 2);
2560 OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2561 OUT_BCS_BATCH(batch,
2562 (vdenc_context->vdenc_streamin_enable << 9) |
2563 (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2564 (1 << 7) | /* Tlb prefetch enable */
2565 (1 << 5) | /* Frame Statistics Stream-Out Enable */
2566 (VDENC_CODEC_AVC << 0));
2568 ADVANCE_BCS_BATCH(batch);
2572 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2573 struct intel_encoder_context *encoder_context,
2574 struct i965_gpe_resource *gpe_resource,
2575 int vdenc_surface_cmd)
2577 struct intel_batchbuffer *batch = encoder_context->base.batch;
2579 BEGIN_BCS_BATCH(batch, 6);
2581 OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2582 OUT_BCS_BATCH(batch, 0);
2583 OUT_BCS_BATCH(batch,
2584 ((gpe_resource->height - 1) << 18) |
2585 ((gpe_resource->width - 1) << 4));
2586 OUT_BCS_BATCH(batch,
2587 (VDENC_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface only on SKL */
2588 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2589 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2590 (0 << 2) | /* must be 0 for interleave U/V */
2591 (1 << 1) | /* must be tiled */
2592 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2593 OUT_BCS_BATCH(batch,
2594 (0 << 16) | /* must be 0 for interleave U/V */
2595 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2596 OUT_BCS_BATCH(batch,
2597 (0 << 16) | /* must be 0 for interleave U/V */
2598 (gpe_resource->y_cb_offset)); /* y offset for v(cr) */
2600 ADVANCE_BCS_BATCH(batch);
2604 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2605 struct intel_encoder_context *encoder_context,
2606 struct i965_gpe_resource *gpe_resource)
2608 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2612 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2613 struct intel_encoder_context *encoder_context,
2614 struct i965_gpe_resource *gpe_resource)
2616 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2620 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2621 struct intel_encoder_context *encoder_context,
2622 struct i965_gpe_resource *gpe_resource)
2624 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2628 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2629 struct encode_state *encode_state,
2630 struct intel_encoder_context *encoder_context)
2632 struct i965_driver_data *i965 = i965_driver_data(ctx);
2633 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2634 struct intel_batchbuffer *batch = encoder_context->base.batch;
2636 BEGIN_BCS_BATCH(batch, 37);
2638 OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2640 /* DW1-6 for DS FWD REF0/REF1 */
2642 if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2643 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2645 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2647 if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2648 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2650 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2652 /* DW7-9 for DS BWD REF0, ignored on SKL */
2653 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2655 /* DW10-12 for uncompressed input data */
2656 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2658 /* DW13-DW15 for streamin data */
2659 if (vdenc_context->vdenc_streamin_enable)
2660 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2662 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2664 /* DW16-DW18 for row scratch buffer */
2665 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2667 /* DW19-DW21, ignored on SKL */
2668 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2670 /* DW22-DW27 for FWD REF0/REF1 */
2672 if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2673 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2675 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2677 if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2678 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2680 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2682 /* DW28-DW30 for FWD REF2, ignored on SKL */
2683 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2685 /* DW31-DW33 for BDW REF0, ignored on SKL */
2686 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2688 /* DW34-DW36 for VDEnc statistics streamout */
2689 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2691 ADVANCE_BCS_BATCH(batch);
2695 gen10_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2696 struct encode_state *encode_state,
2697 struct intel_encoder_context *encoder_context)
2699 struct i965_driver_data *i965 = i965_driver_data(ctx);
2700 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2701 struct intel_batchbuffer *batch = encoder_context->base.batch;
2703 BEGIN_BCS_BATCH(batch, 62);
2705 OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (62 - 2));
2707 /* DW1-6 for DS FWD REF0/REF1 */
2708 if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2709 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo,
2712 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2714 if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2715 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo,
2718 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2720 /* DW7-9 for DS BWD REF0. B-frame is not supported */
2721 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2723 /* DW10-12 for uncompressed input data */
2724 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2726 /* DW13-DW15 for streamin data */
2727 if (vdenc_context->vdenc_streamin_enable)
2728 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2730 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2732 /* DW16-DW18 for row scratch buffer */
2733 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2735 /* DW19-DW21, Not used */
2736 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2738 /* DW22-DW27 for FWD REF0/REF1 */
2739 if (vdenc_context->list_ref_idx[0][0] != 0xFF)
2740 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2742 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2744 if (vdenc_context->list_ref_idx[0][1] != 0xFF)
2745 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2747 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2749 if (vdenc_context->list_ref_idx[0][2] != 0xFF)
2750 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][2]].bo, 0, 0, 0);
2752 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2754 /* DW31-DW33 for BDW REF0. Ignored*/
2755 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2757 /* DW34-DW36 for VDEnc statistics streamout */
2758 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2760 /* DW37..DW39. Not used */
2761 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2763 /* DW40..DW42. Not used */
2764 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2766 /* DW43..DW45. Not used */
2767 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2769 /* DW46..DW48. Not used */
2770 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2772 /* DW49..DW51. Not used */
2773 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2775 /* DW52..DW54. Not used */
2776 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2778 /* DW55..DW57. Not used */
2779 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2781 /* DW58..DW60. Not used */
2782 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2784 /* DW 61. Not used */
2785 OUT_BCS_BATCH(batch, 0);
2787 ADVANCE_BCS_BATCH(batch);
2791 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2792 struct encode_state *encode_state,
2793 struct intel_encoder_context *encoder_context)
2795 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2796 struct intel_batchbuffer *batch = encoder_context->base.batch;
2798 BEGIN_BCS_BATCH(batch, 61);
2800 OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2802 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2804 intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2807 intel_batchbuffer_data(batch, (void *)vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2810 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2813 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2816 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2819 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2822 uint16_t tmp_vdenc_skip_threshold_p[28];
2824 memcpy(&tmp_vdenc_skip_threshold_p, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2826 for (i = 0; i < 28; i++) {
2827 tmp_vdenc_skip_threshold_p[i] *= 3;
2831 intel_batchbuffer_data(batch, (void *)vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2834 intel_batchbuffer_data(batch, (void *)tmp_vdenc_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2837 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2840 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2843 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2846 intel_batchbuffer_data(batch, (void *)vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2849 ADVANCE_BCS_BATCH(batch);
2853 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2854 struct encode_state *encode_state,
2855 struct intel_encoder_context *encoder_context)
2857 struct intel_batchbuffer *batch = encoder_context->base.batch;
2859 BEGIN_BCS_BATCH(batch, 2);
2861 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2862 OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2864 ADVANCE_BCS_BATCH(batch);
2868 gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
2869 struct encode_state *encode_state,
2870 struct intel_encoder_context *encoder_context,
2871 VAEncSliceParameterBufferH264 *slice_param)
2873 struct i965_driver_data *i965 = i965_driver_data(ctx);
2874 struct intel_batchbuffer *batch = encoder_context->base.batch;
2875 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2877 if (IS_GEN10(i965->intel.device_info)) {
2878 BEGIN_BCS_BATCH(batch, 5);
2879 OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (5 - 2));
2881 BEGIN_BCS_BATCH(batch, 3);
2882 OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
2885 if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
2886 OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
2887 slice_param->luma_weight_l0[1] << 16 |
2888 slice_param->luma_offset_l0[0] << 8 |
2889 slice_param->luma_weight_l0[0] << 0));
2890 OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
2891 slice_param->luma_weight_l0[2] << 0));
2893 OUT_BCS_BATCH(batch, (0 << 24 |
2897 OUT_BCS_BATCH(batch, (0 << 8 |
2901 if (IS_GEN10(i965->intel.device_info)) {
2902 OUT_BCS_BATCH(batch, (0 << 24 |
2906 OUT_BCS_BATCH(batch, (0 << 24 |
2912 ADVANCE_BCS_BATCH(batch);
2916 gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
2917 struct encode_state *encode_state,
2918 struct intel_encoder_context *encoder_context,
2919 VAEncSliceParameterBufferH264 *slice_param,
2920 VAEncSliceParameterBufferH264 *next_slice_param)
2922 struct i965_driver_data *i965 = i965_driver_data(ctx);
2923 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2924 struct intel_batchbuffer *batch = encoder_context->base.batch;
2925 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2926 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2927 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
2928 int luma_log2_weight_denom, weighted_pred_idc;
2930 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2931 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2933 if (next_slice_param) {
2934 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
2935 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
2937 next_slice_hor_pos = 0;
2938 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
2941 if (slice_type == SLICE_TYPE_P)
2942 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
2944 weighted_pred_idc = 0;
2946 if (weighted_pred_idc == 1)
2947 luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
2949 luma_log2_weight_denom = 0;
2951 if (IS_GEN10(i965->intel.device_info)) {
2952 BEGIN_BCS_BATCH(batch, 6);
2953 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (6 - 2));
2955 BEGIN_BCS_BATCH(batch, 4);
2956 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
2959 OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
2961 OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
2962 next_slice_ver_pos));
2963 OUT_BCS_BATCH(batch, luma_log2_weight_denom);
2965 if (IS_GEN10(i965->intel.device_info)) {
2966 /* Not used for VDENC H264 */
2967 OUT_BCS_BATCH(batch, 0);
2968 OUT_BCS_BATCH(batch, 0);
2971 ADVANCE_BCS_BATCH(batch);
2975 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2976 struct encode_state *encode_state,
2977 struct intel_encoder_context *encoder_context)
2979 struct intel_batchbuffer *batch = encoder_context->base.batch;
2980 struct gen9_vdenc_img_state vdenc_img_cmd;
2982 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2984 BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2985 intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2986 ADVANCE_BCS_BATCH(batch);
2990 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2991 struct intel_encoder_context *encoder_context,
2992 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2993 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2994 int slice_header_indicator)
2996 struct intel_batchbuffer *batch = encoder_context->base.batch;
2998 if (data_bits_in_last_dw == 0)
2999 data_bits_in_last_dw = 32;
3001 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
3003 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
3004 OUT_BCS_BATCH(batch,
3005 (0 << 16) | /* always start at offset 0 */
3006 (slice_header_indicator << 14) |
3007 (data_bits_in_last_dw << 8) |
3008 (skip_emul_byte_count << 4) |
3009 (!!emulation_flag << 3) |
3010 ((!!is_last_header) << 2) |
3011 ((!!is_end_of_slice) << 1) |
3012 (0 << 0)); /* TODO: check this flag */
3013 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
3015 ADVANCE_BCS_BATCH(batch);
3019 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
3020 struct encode_state *encode_state,
3021 struct intel_encoder_context *encoder_context,
3024 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3025 struct i965_driver_data *i965 = i965_driver_data(ctx);
3026 VAEncPackedHeaderParameterBuffer *param = NULL;
3027 unsigned int length_in_bits;
3028 unsigned int *header_data = NULL;
3029 int count, i, start_index;
3030 int slice_header_index;
3031 unsigned int insert_one_zero_byte = 0;
3033 if (encode_state->slice_header_index[slice_index] == 0)
3034 slice_header_index = -1;
3036 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
3038 count = encode_state->slice_rawdata_count[slice_index];
3039 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
3041 for (i = 0; i < count; i++) {
3042 unsigned int skip_emul_byte_cnt;
3044 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
3046 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
3048 /* skip the slice header packed data type as it is lastly inserted */
3049 if (param->type == VAEncPackedHeaderSlice)
3052 length_in_bits = param->bit_length;
3054 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3056 /* as the slice header is still required, the last header flag is set to
3059 gen9_vdenc_mfx_avc_insert_object(ctx,
3062 ALIGN(length_in_bits, 32) >> 5,
3063 length_in_bits & 0x1f,
3067 !param->has_emulation_bytes,
3072 if (!vdenc_context->is_frame_level_vdenc) {
3073 insert_one_zero_byte = 1;
3076 /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
3077 if (insert_one_zero_byte) {
3078 unsigned int insert_data[] = { 0, };
3080 gen9_vdenc_mfx_avc_insert_object(ctx,
3089 if (slice_header_index == -1) {
3090 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
3091 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3092 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
3093 unsigned char *slice_header = NULL, *slice_header1 = NULL;
3094 int slice_header_length_in_bits = 0;
3095 uint32_t saved_macroblock_address = 0;
3097 /* No slice header data is passed. And the driver needs to generate it */
3098 /* For the Normal H264 */
3101 (IS_KBL(i965->intel.device_info) ||
3102 IS_GLK(i965->intel.device_info) ||
3103 IS_GEN10(i965->intel.device_info))) {
3104 saved_macroblock_address = slice_params->macroblock_address;
3105 slice_params->macroblock_address = 0;
3108 slice_header_length_in_bits = build_avc_slice_header(seq_param,
3113 slice_header1 = slice_header;
3116 (IS_KBL(i965->intel.device_info) ||
3117 IS_GLK(i965->intel.device_info) ||
3118 IS_GEN10(i965->intel.device_info))) {
3119 slice_params->macroblock_address = saved_macroblock_address;
3122 if (insert_one_zero_byte) {
3124 slice_header_length_in_bits -= 8;
3127 gen9_vdenc_mfx_avc_insert_object(ctx,
3129 (unsigned int *)slice_header1,
3130 ALIGN(slice_header_length_in_bits, 32) >> 5,
3131 slice_header_length_in_bits & 0x1f,
3132 5, /* first 5 bytes are start code + nal unit type */
3138 unsigned int skip_emul_byte_cnt;
3139 unsigned char *slice_header1 = NULL;
3142 (IS_KBL(i965->intel.device_info) ||
3143 IS_GLK(i965->intel.device_info) ||
3144 IS_GEN10(i965->intel.device_info))) {
3145 slice_header_index = (encode_state->slice_header_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
3148 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
3150 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
3151 length_in_bits = param->bit_length;
3153 slice_header1 = (unsigned char *)header_data;
3155 if (insert_one_zero_byte) {
3157 length_in_bits -= 8;
3160 /* as the slice header is the last header data for one slice,
3161 * the last header flag is set to one.
3163 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3165 if (insert_one_zero_byte)
3166 skip_emul_byte_cnt -= 1;
3168 gen9_vdenc_mfx_avc_insert_object(ctx,
3170 (unsigned int *)slice_header1,
3171 ALIGN(length_in_bits, 32) >> 5,
3172 length_in_bits & 0x1f,
3176 !param->has_emulation_bytes,
3184 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
3185 struct encode_state *encode_state,
3186 struct intel_encoder_context *encoder_context,
3187 VAEncSliceParameterBufferH264 *slice_param,
3190 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3191 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3192 unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3193 unsigned int skip_emul_byte_cnt;
3195 if (slice_index == 0) {
3197 if (encode_state->packed_header_data[idx]) {
3198 VAEncPackedHeaderParameterBuffer *param = NULL;
3199 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3200 unsigned int length_in_bits;
3202 assert(encode_state->packed_header_param[idx]);
3203 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3204 length_in_bits = param->bit_length;
3206 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3207 gen9_vdenc_mfx_avc_insert_object(ctx,
3210 ALIGN(length_in_bits, 32) >> 5,
3211 length_in_bits & 0x1f,
3215 !param->has_emulation_bytes,
3220 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3222 if (encode_state->packed_header_data[idx]) {
3223 VAEncPackedHeaderParameterBuffer *param = NULL;
3224 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3225 unsigned int length_in_bits;
3227 assert(encode_state->packed_header_param[idx]);
3228 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3229 length_in_bits = param->bit_length;
3231 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3233 gen9_vdenc_mfx_avc_insert_object(ctx,
3236 ALIGN(length_in_bits, 32) >> 5,
3237 length_in_bits & 0x1f,
3241 !param->has_emulation_bytes,
3246 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3248 if (encode_state->packed_header_data[idx]) {
3249 VAEncPackedHeaderParameterBuffer *param = NULL;
3250 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3251 unsigned int length_in_bits;
3253 assert(encode_state->packed_header_param[idx]);
3254 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3255 length_in_bits = param->bit_length;
3257 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3258 gen9_vdenc_mfx_avc_insert_object(ctx,
3261 ALIGN(length_in_bits, 32) >> 5,
3262 length_in_bits & 0x1f,
3266 !param->has_emulation_bytes,
3269 } else if (internal_rate_mode == I965_BRC_CBR) {
3270 /* TODO: insert others */
3274 gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3281 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3282 struct encode_state *encode_state,
3283 struct intel_encoder_context *encoder_context,
3284 VAEncPictureParameterBufferH264 *pic_param,
3285 VAEncSliceParameterBufferH264 *slice_param,
3286 VAEncSliceParameterBufferH264 *next_slice_param,
3289 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3290 struct intel_batchbuffer *batch = encoder_context->base.batch;
3291 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3292 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3293 unsigned char correct[6], grow, shrink;
3294 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3295 int max_qp_n, max_qp_p;
3297 int weighted_pred_idc = 0;
3298 int num_ref_l0 = 0, num_ref_l1 = 0;
3299 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3300 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3301 int inter_rounding = 0;
3303 if (vdenc_context->internal_rate_mode != I965_BRC_CQP)
3306 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3307 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3309 if (next_slice_param) {
3310 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3311 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_width_in_mbs;
3313 next_slice_hor_pos = 0;
3314 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3317 if (slice_type == SLICE_TYPE_I) {
3318 luma_log2_weight_denom = 0;
3319 chroma_log2_weight_denom = 0;
3320 } else if (slice_type == SLICE_TYPE_P) {
3321 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3322 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3324 if (slice_param->num_ref_idx_active_override_flag)
3325 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3326 } else if (slice_type == SLICE_TYPE_B) {
3327 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3328 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3329 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3331 if (slice_param->num_ref_idx_active_override_flag) {
3332 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3333 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3336 if (weighted_pred_idc == 2) {
3337 /* 8.4.3 - Derivation process for prediction weights (8-279) */
3338 luma_log2_weight_denom = 5;
3339 chroma_log2_weight_denom = 5;
3343 max_qp_n = 0; /* TODO: update it */
3344 max_qp_p = 0; /* TODO: update it */
3345 grow = 0; /* TODO: update it */
3346 shrink = 0; /* TODO: update it */
3348 for (i = 0; i < 6; i++)
3349 correct[i] = 0; /* TODO: update it */
3351 BEGIN_BCS_BATCH(batch, 11);
3353 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3354 OUT_BCS_BATCH(batch, slice_type);
3355 OUT_BCS_BATCH(batch,
3356 (num_ref_l0 << 16) |
3357 (num_ref_l1 << 24) |
3358 (chroma_log2_weight_denom << 8) |
3359 (luma_log2_weight_denom << 0));
3360 OUT_BCS_BATCH(batch,
3361 (weighted_pred_idc << 30) |
3362 (slice_param->direct_spatial_mv_pred_flag << 29) |
3363 (slice_param->disable_deblocking_filter_idc << 27) |
3364 (slice_param->cabac_init_idc << 24) |
3366 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3367 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3369 OUT_BCS_BATCH(batch,
3370 slice_ver_pos << 24 |
3371 slice_hor_pos << 16 |
3372 slice_param->macroblock_address);
3373 OUT_BCS_BATCH(batch,
3374 next_slice_ver_pos << 16 |
3375 next_slice_hor_pos);
3377 OUT_BCS_BATCH(batch,
3378 (0 << 31) | /* TODO: ignore it for VDENC ??? */
3379 (!slice_param->macroblock_address << 30) | /* ResetRateControlCounter */
3380 (2 << 28) | /* Loose Rate Control */
3381 (0 << 24) | /* RC Stable Tolerance */
3382 (0 << 23) | /* RC Panic Enable */
3383 (1 << 22) | /* CBP mode */
3384 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3385 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3386 (!next_slice_param << 19) | /* Is Last Slice */
3387 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3388 (1 << 17) | /* HeaderPresentFlag */
3389 (1 << 16) | /* SliceData PresentFlag */
3390 (0 << 15) | /* TailPresentFlag, TODO: check it on VDEnc */
3391 (1 << 13) | /* RBSP NAL TYPE */
3392 (slice_index << 4) |
3393 (1 << 12)); /* CabacZeroWordInsertionEnable */
3395 OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3397 OUT_BCS_BATCH(batch,
3398 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
3399 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
3402 OUT_BCS_BATCH(batch,
3404 (inter_rounding << 28) |
3407 (correct[5] << 20) |
3408 (correct[4] << 16) |
3409 (correct[3] << 12) |
3413 OUT_BCS_BATCH(batch, 0);
3415 ADVANCE_BCS_BATCH(batch);
3419 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3421 unsigned int is_long_term =
3422 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3423 unsigned int is_top_field =
3424 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3425 unsigned int is_bottom_field =
3426 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3428 return ((is_long_term << 6) |
3429 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3430 (frame_store_id << 1) |
3431 ((is_top_field ^ 1) & is_bottom_field));
3435 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3436 struct encode_state *encode_state,
3437 struct intel_encoder_context *encoder_context,
3438 VAEncSliceParameterBufferH264 *slice_param)
3440 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3441 struct intel_batchbuffer *batch = encoder_context->base.batch;
3442 VAPictureH264 *ref_pic;
3443 int i, slice_type, ref_idx_shift;
3444 unsigned int fwd_ref_entry;
3446 fwd_ref_entry = 0x80808080;
3447 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3449 for (i = 0; i < MIN(vdenc_context->num_refs[0], 3); i++) {
3450 ref_pic = &slice_param->RefPicList0[i];
3451 ref_idx_shift = i * 8;
3453 if (vdenc_context->list_ref_idx[0][i] == 0xFF)
3456 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3457 fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3460 if (slice_type == SLICE_TYPE_P) {
3461 BEGIN_BCS_BATCH(batch, 10);
3462 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3463 OUT_BCS_BATCH(batch, 0); // L0
3464 OUT_BCS_BATCH(batch, fwd_ref_entry);
3466 for (i = 0; i < 7; i++) {
3467 OUT_BCS_BATCH(batch, 0x80808080);
3470 ADVANCE_BCS_BATCH(batch);
3473 if (slice_type == SLICE_TYPE_B) {
3474 /* VDEnc on SKL doesn't support BDW */
3480 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3481 struct encode_state *encode_state,
3482 struct intel_encoder_context *encoder_context,
3483 VAEncPictureParameterBufferH264 *pic_param,
3484 VAEncSliceParameterBufferH264 *slice_param)
3486 struct intel_batchbuffer *batch = encoder_context->base.batch;
3488 short weightoffsets[32 * 6];
3490 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3492 if (slice_type == SLICE_TYPE_P &&
3493 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3495 for (i = 0; i < 32; i++) {
3496 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3497 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3498 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3499 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3500 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3501 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3504 BEGIN_BCS_BATCH(batch, 98);
3505 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3506 OUT_BCS_BATCH(batch, 0);
3507 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3509 ADVANCE_BCS_BATCH(batch);
3512 if (slice_type == SLICE_TYPE_B) {
3513 /* VDEnc on SKL doesn't support BWD */
3519 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3520 struct encode_state *encode_state,
3521 struct intel_encoder_context *encoder_context,
3522 VAEncSliceParameterBufferH264 *slice_param,
3523 VAEncSliceParameterBufferH264 *next_slice_param,
3526 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3527 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3529 gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3530 gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3535 gen9_vdenc_mfx_avc_slice_state(ctx,
3542 gen9_vdenc_mfx_avc_inset_headers(ctx,
3548 if (!vdenc_context->is_frame_level_vdenc) {
3549 gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
3553 gen95_vdenc_vdenc_walker_state(ctx,
3562 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3563 struct encode_state *encode_state,
3564 struct intel_encoder_context *encoder_context)
3566 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3567 struct intel_batchbuffer *batch = encoder_context->base.batch;
3568 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3569 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3571 int slice_index = 0;
3572 int has_tail = 0; /* TODO: check it later */
3574 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3575 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3577 if (j == encode_state->num_slice_params_ext - 1)
3578 next_slice_group_param = NULL;
3580 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3582 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3583 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3584 next_slice_param = slice_param + 1;
3586 next_slice_param = next_slice_group_param;
3588 gen9_vdenc_mfx_avc_single_slice(ctx,
3595 if (vdenc_context->is_frame_level_vdenc)
3598 struct vd_pipeline_flush_parameter pipeline_flush_params;
3599 int insert_mi_flush;
3601 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3603 if (next_slice_group_param) {
3604 pipeline_flush_params.mfx_pipeline_done = 1;
3605 insert_mi_flush = 1;
3606 } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
3607 pipeline_flush_params.mfx_pipeline_done = 1;
3608 insert_mi_flush = 1;
3610 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3611 insert_mi_flush = 0;
3614 pipeline_flush_params.vdenc_pipeline_done = 1;
3615 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3616 pipeline_flush_params.vd_command_message_parser_done = 1;
3617 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3619 if (insert_mi_flush) {
3620 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3621 mi_flush_dw_params.video_pipeline_cache_invalidate = 0;
3622 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3630 if (vdenc_context->is_frame_level_vdenc)
3634 if (vdenc_context->is_frame_level_vdenc) {
3635 struct vd_pipeline_flush_parameter pipeline_flush_params;
3637 gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3639 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3640 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3641 pipeline_flush_params.vdenc_pipeline_done = 1;
3642 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3643 pipeline_flush_params.vd_command_message_parser_done = 1;
3644 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3648 /* TODO: insert a tail if required */
3651 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3652 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3653 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3657 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3658 struct encode_state *encode_state,
3659 struct intel_encoder_context *encoder_context)
3661 struct i965_driver_data *i965 = i965_driver_data(ctx);
3662 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3663 struct intel_batchbuffer *batch = encoder_context->base.batch;
3664 struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3666 if (vdenc_context->brc_enabled) {
3667 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3669 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3670 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3671 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3674 if (vdenc_context->current_pass) {
3675 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3677 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3678 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3679 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3682 gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3684 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3685 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3686 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3688 gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3689 gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3690 gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3692 gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3693 gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3694 gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3695 gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3697 if (IS_GEN10(i965->intel.device_info))
3698 gen10_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3700 gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3702 gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3704 if (!vdenc_context->brc_enabled) {
3705 gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3706 gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3708 memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3709 mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3710 mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3711 gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3714 gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3715 gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3717 gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3721 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3722 struct intel_encoder_context *encoder_context)
3724 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3725 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3727 switch (rate_control_mode & 0x7f) {
3729 vdenc_context->internal_rate_mode = I965_BRC_CBR;
3733 vdenc_context->internal_rate_mode = I965_BRC_VBR;
3738 vdenc_context->internal_rate_mode = I965_BRC_CQP;
3744 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3746 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3747 struct intel_batchbuffer *batch = encoder_context->base.batch;
3748 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3749 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3750 unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3753 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3754 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3756 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3757 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3758 mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3759 mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3760 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3762 /* Update DMEM buffer for BRC Update */
3763 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3764 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3765 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3766 mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3767 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3769 mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3770 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3771 mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3772 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3777 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3778 struct encode_state *encode_state,
3779 struct intel_encoder_context *encoder_context)
3781 VAEncSliceParameterBufferH264 *slice_param;
3784 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3785 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3787 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3788 if (slice_param->slice_type == SLICE_TYPE_B)
3789 return VA_STATUS_ERROR_UNKNOWN;
3795 return VA_STATUS_SUCCESS;
3799 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3801 struct encode_state *encode_state,
3802 struct intel_encoder_context *encoder_context)
3805 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3806 struct intel_batchbuffer *batch = encoder_context->base.batch;
3808 va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3810 if (va_status != VA_STATUS_SUCCESS)
3813 gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3815 for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3816 vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3817 vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3819 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
3821 intel_batchbuffer_emit_mi_flush(batch);
3823 if (vdenc_context->brc_enabled) {
3824 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3825 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3827 gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3828 intel_batchbuffer_emit_mi_flush(batch);
3831 gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3832 gen9_vdenc_read_status(ctx, encoder_context);
3834 intel_batchbuffer_end_atomic(batch);
3835 intel_batchbuffer_flush(batch);
3837 vdenc_context->brc_initted = 1;
3838 vdenc_context->brc_need_reset = 0;
3841 return VA_STATUS_SUCCESS;
3845 gen9_vdenc_pipeline(VADriverContextP ctx,
3847 struct encode_state *encode_state,
3848 struct intel_encoder_context *encoder_context)
3853 case VAProfileH264ConstrainedBaseline:
3854 case VAProfileH264Main:
3855 case VAProfileH264High:
3856 vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3860 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3868 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3872 i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3873 i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3874 i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3875 i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3876 i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3878 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3879 i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3881 i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3882 i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3883 i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3884 i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3885 i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3886 i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3888 i965_free_gpe_resource(&vdenc_context->huc_status_res);
3889 i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3891 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3892 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3893 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3894 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3896 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3897 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3898 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3901 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3902 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3903 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3905 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3906 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3907 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3908 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3910 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3914 gen9_vdenc_context_destroy(void *context)
3916 struct gen9_vdenc_context *vdenc_context = context;
3918 gen9_vdenc_free_resources(vdenc_context);
3920 free(vdenc_context);
3924 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3925 struct intel_encoder_context *encoder_context,
3926 struct gen9_vdenc_context *vdenc_context)
3928 struct i965_driver_data *i965 = i965_driver_data(ctx);
3931 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3932 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3933 "HuC Init&Reset DMEM buffer");
3935 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3936 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3937 "HuC History buffer");
3939 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3940 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3941 "HuC Stream In buffer");
3943 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3944 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3945 "HuC Stream Out buffer");
3947 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3949 "HuC dummy buffer");
3951 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3952 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3953 ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3954 "HuC BRC Update buffer");
3955 i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3958 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3959 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3960 "VDENC statistics buffer");
3962 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3963 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3964 "PAK statistics buffer");
3966 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3967 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3968 "VDENC/AVC image state buffer");
3970 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3971 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3972 "HME summary buffer");
3974 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3975 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3976 "BRC constant buffer");
3978 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3979 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3980 "Second level batch buffer");
3982 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3984 "HuC Status buffer");
3986 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3988 "HuC Status buffer");
3992 gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
3993 struct intel_encoder_context *encoder_context,
3994 struct gen9_vdenc_context *vdenc_context)
3996 vdenc_context->is_frame_level_vdenc = 1;
4000 gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
4001 struct intel_encoder_context *encoder_context,
4002 struct gen9_vdenc_context *vdenc_context)
4004 vdenc_context->use_extended_pak_obj_cmd = 1;
4008 vdenc_hw_interfaces_init(VADriverContextP ctx,
4009 struct intel_encoder_context *encoder_context,
4010 struct gen9_vdenc_context *vdenc_context)
4012 struct i965_driver_data *i965 = i965_driver_data(ctx);
4014 if (IS_KBL(i965->intel.device_info) ||
4015 IS_GLK(i965->intel.device_info) ||
4016 IS_GEN10(i965->intel.device_info)) {
4017 gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4019 gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4024 gen9_vdenc_context_get_status(VADriverContextP ctx,
4025 struct intel_encoder_context *encoder_context,
4026 struct i965_coded_buffer_segment *coded_buffer_segment)
4028 struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
4030 coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
4032 return VA_STATUS_SUCCESS;
4036 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4038 struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
4043 vdenc_context->brc_initted = 0;
4044 vdenc_context->brc_need_reset = 0;
4045 vdenc_context->is_low_delay = 0;
4046 vdenc_context->current_pass = 0;
4047 vdenc_context->num_passes = 1;
4048 vdenc_context->vdenc_streamin_enable = 0;
4049 vdenc_context->vdenc_pak_threshold_check_enable = 0;
4050 vdenc_context->is_frame_level_vdenc = 0;
4052 vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
4053 gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
4055 encoder_context->mfc_context = vdenc_context;
4056 encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
4057 encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
4058 encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
4059 encoder_context->get_status = gen9_vdenc_context_get_status;