2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
44 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
45 0, 0, -8, -12, -16, -20, -28, -36,
46 0, 0, -4, -8, -12, -16, -24, -32,
47 4, 2, 0, -1, -3, -8, -16, -24,
48 8, 4, 2, 0, -1, -4, -8, -16,
49 20, 16, 4, 0, -1, -4, -8, -16,
50 24, 20, 16, 8, 4, 0, -4, -8,
51 28, 24, 20, 16, 8, 4, 0, -8,
52 32, 24, 20, 16, 8, 4, 0, -4,
53 64, 48, 28, 20, 16, 12, 8, 4,
56 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
57 -8, -24, -32, -40, -44, -48, -52, -80,
58 -8, -16, -32, -40, -40, -44, -44, -56,
59 0, 0, -12, -20, -24, -28, -32, -36,
60 8, 4, 0, 0, -8, -16, -24, -32,
61 32, 16, 8, 4, -4, -8, -16, -20,
62 36, 24, 16, 8, 4, -2, -4, -8,
63 40, 36, 24, 20, 16, 8, 0, -8,
64 48, 40, 28, 24, 20, 12, 0, -4,
65 64, 48, 28, 20, 16, 12, 8, 4,
68 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
69 0, -4, -8, -16, -24, -32, -40, -48,
70 1, 0, -4, -8, -16, -24, -32, -40,
71 4, 2, 0, -1, -3, -8, -16, -24,
72 8, 4, 2, 0, -1, -4, -8, -16,
73 20, 16, 4, 0, -1, -4, -8, -16,
74 24, 20, 16, 8, 4, 0, -4, -8,
75 28, 24, 20, 16, 8, 4, 0, -8,
76 32, 24, 20, 16, 8, 4, 0, -4,
77 64, 48, 28, 20, 16, 12, 8, 4,
80 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
81 +0, 0, 0, 0, 0, 3, 4, 6, 8,
82 +0, 0, 0, 0, 0, 2, 3, 5, 7,
83 -1, 0, 0, 0, 0, 2, 2, 4, 5,
84 -1, -1, 0, 0, 0, 1, 2, 2, 4,
85 -2, -2, -1, 0, 0, 0, 1, 2, 4,
86 -2, -2, -1, 0, 0, 0, 1, 2, 4,
87 -3, -2, -1, -1, 0, 0, 1, 2, 5,
88 -3, -2, -1, -1, 0, 0, 2, 4, 7,
89 -4, -3, -2, -1, 0, 1, 3, 5, 8,
92 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
93 -1, 0, 0, 0, 0, 1, 1, 2, 3,
94 -1, -1, 0, 0, 0, 1, 1, 2, 3,
95 -2, -1, -1, 0, 0, 1, 1, 2, 3,
96 -3, -2, -2, -1, 0, 0, 1, 2, 3,
97 -3, -2, -1, -1, 0, 0, 1, 2, 3,
98 -3, -2, -1, -1, 0, 0, 1, 2, 3,
99 -3, -2, -1, -1, 0, 0, 1, 2, 3,
100 -3, -2, -1, -1, 0, 0, 1, 2, 3,
101 -3, -2, -1, -1, 0, 0, 1, 2, 3,
104 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
105 +0, 0, 0, 0, 0, 2, 3, 3, 4,
106 +0, 0, 0, 0, 0, 2, 3, 3, 4,
107 -1, 0, 0, 0, 0, 2, 2, 3, 3,
108 -1, -1, 0, 0, 0, 1, 2, 2, 2,
109 -1, -1, -1, 0, 0, 0, 1, 2, 2,
110 -2, -1, -1, 0, 0, 0, 0, 1, 2,
111 -2, -1, -1, -1, 0, 0, 0, 1, 3,
112 -2, -2, -1, -1, 0, 0, 1, 1, 3,
113 -2, -2, -1, -1, 0, 1, 1, 2, 4,
116 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
117 -4, -20, -28, -36, -40, -44, -48, -80,
118 +0, -8, -12, -20, -24, -28, -32, -36,
119 +0, 0, -8, -16, -20, -24, -28, -32,
120 +8, 4, 0, 0, -8, -16, -24, -28,
121 32, 24, 16, 2, -4, -8, -16, -20,
122 36, 32, 28, 16, 8, 0, -4, -8,
123 40, 36, 24, 20, 16, 8, 0, -8,
124 48, 40, 28, 24, 20, 12, 0, -4,
125 64, 48, 28, 20, 16, 12, 8, 4,
128 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
129 -8, -24, -32, -44, -48, -56, -64, -80,
130 -8, -16, -32, -40, -44, -52, -56, -64,
131 +0, 0, -16, -28, -36, -40, -44, -48,
132 +8, 4, 0, 0, -8, -16, -24, -36,
133 20, 12, 4, 0, -8, -8, -8, -16,
134 24, 16, 8, 8, 8, 0, -4, -8,
135 40, 36, 24, 20, 16, 8, 0, -8,
136 48, 40, 28, 24, 20, 12, 0, -4,
137 64, 48, 28, 20, 16, 12, 8, 4,
140 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
141 0, -4, -8, -16, -24, -32, -40, -48,
142 1, 0, -4, -8, -16, -24, -32, -40,
143 4, 2, 0, -1, -3, -8, -16, -24,
144 8, 4, 2, 0, -1, -4, -8, -16,
145 20, 16, 4, 0, -1, -4, -8, -16,
146 24, 20, 16, 8, 4, 0, -4, -8,
147 28, 24, 20, 16, 8, 4, 0, -8,
148 32, 24, 20, 16, 8, 4, 0, -4,
149 64, 48, 28, 20, 16, 12, 8, 4,
152 static struct huc_brc_update_constant_data
153 gen9_brc_update_constant_data = {
154 .global_rate_qp_adj_tab_i = {
155 48, 40, 32, 24, 16, 8, 0, -8,
156 40, 32, 24, 16, 8, 0, -8, -16,
157 32, 24, 16, 8, 0, -8, -16, -24,
158 24, 16, 8, 0, -8, -16, -24, -32,
159 16, 8, 0, -8, -16, -24, -32, -40,
160 8, 0, -8, -16, -24, -32, -40, -48,
161 0, -8, -16, -24, -32, -40, -48, -56,
162 48, 40, 32, 24, 16, 8, 0, -8,
165 .global_rate_qp_adj_tab_p = {
166 48, 40, 32, 24, 16, 8, 0, -8,
167 40, 32, 24, 16, 8, 0, -8, -16,
168 16, 8, 8, 4, -8, -16, -16, -24,
169 8, 0, 0, -8, -16, -16, -16, -24,
170 8, 0, 0, -24, -32, -32, -32, -48,
171 0, -16, -16, -24, -32, -48, -56, -64,
172 -8, -16, -32, -32, -48, -48, -56, -64,
173 -16,-32, -48, -48, -48, -56, -64, -80,
176 .global_rate_qp_adj_tab_b = {
177 48, 40, 32, 24, 16, 8, 0, -8,
178 40, 32, 24, 16, 8, 0, -8, -16,
179 32, 24, 16, 8, 0, -8, -16, -24,
180 24, 16, 8, 0, -8, -8, -16, -24,
181 16, 8, 0, 0, -8, -16, -24, -32,
182 16, 8, 0, 0, -8, -16, -24, -32,
183 0, -8, -8, -16, -32, -48, -56, -64,
184 0, -8, -8, -16, -32, -48, -56, -64
187 .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
188 .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
189 .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191 .dist_qp_adj_tab_i = {
192 0, 0, 0, 0, 0, 3, 4, 6, 8,
193 0, 0, 0, 0, 0, 2, 3, 5, 7,
194 -1, 0, 0, 0, 0, 2, 2, 4, 5,
195 -1, -1, 0, 0, 0, 1, 2, 2, 4,
196 -2, -2, -1, 0, 0, 0, 1, 2, 4,
197 -2, -2, -1, 0, 0, 0, 1, 2, 4,
198 -3, -2, -1, -1, 0, 0, 1, 2, 5,
199 -3, -2, -1, -1, 0, 0, 2, 4, 7,
200 -4, -3, -2, -1, 0, 1, 3, 5, 8,
203 .dist_qp_adj_tab_p = {
204 -1, 0, 0, 0, 0, 1, 1, 2, 3,
205 -1, -1, 0, 0, 0, 1, 1, 2, 3,
206 -2, -1, -1, 0, 0, 1, 1, 2, 3,
207 -3, -2, -2, -1, 0, 0, 1, 2, 3,
208 -3, -2, -1, -1, 0, 0, 1, 2, 3,
209 -3, -2, -1, -1, 0, 0, 1, 2, 3,
210 -3, -2, -1, -1, 0, 0, 1, 2, 3,
211 -3, -2, -1, -1, 0, 0, 1, 2, 3,
212 -3, -2, -1, -1, 0, 0, 1, 2, 3,
215 .dist_qp_adj_tab_b = {
216 0, 0, 0, 0, 0, 2, 3, 3, 4,
217 0, 0, 0, 0, 0, 2, 3, 3, 4,
218 -1, 0, 0, 0, 0, 2, 2, 3, 3,
219 -1, -1, 0, 0, 0, 1, 2, 2, 2,
220 -1, -1, -1, 0, 0, 0, 1, 2, 2,
221 -2, -1, -1, 0, 0, 0, 0, 1, 2,
222 -2, -1, -1, -1, 0, 0, 0, 1, 3,
223 -2, -2, -1, -1, 0, 0, 1, 1, 3,
224 -2, -2, -1, -1, 0, 1, 1, 2, 4,
227 /* default table for non lowdelay */
228 .buf_rate_adj_tab_i = {
229 -4, -20, -28, -36, -40, -44, -48, -80,
230 0, -8, -12, -20, -24, -28, -32, -36,
231 0, 0, -8, -16, -20, -24, -28, -32,
232 8, 4, 0, 0, -8, -16, -24, -28,
233 32, 24, 16, 2, -4, -8, -16, -20,
234 36, 32, 28, 16, 8, 0, -4, -8,
235 40, 36, 24, 20, 16, 8, 0, -8,
236 48, 40, 28, 24, 20, 12, 0, -4,
237 64, 48, 28, 20, 16, 12, 8, 4,
240 /* default table for non lowdelay */
241 .buf_rate_adj_tab_p = {
242 -8, -24, -32, -44, -48, -56, -64, -80,
243 -8, -16, -32, -40, -44, -52, -56, -64,
244 0, 0, -16, -28, -36, -40, -44, -48,
245 8, 4, 0, 0, -8, -16, -24, -36,
246 20, 12, 4, 0, -8, -8, -8, -16,
247 24, 16, 8, 8, 8, 0, -4, -8,
248 40, 36, 24, 20, 16, 8, 0, -8,
249 48, 40, 28, 24, 20, 12, 0, -4,
250 64, 48, 28, 20, 16, 12, 8, 4,
253 /* default table for non lowdelay */
254 .buf_rate_adj_tab_b = {
255 0, -4, -8, -16, -24, -32, -40, -48,
256 1, 0, -4, -8, -16, -24, -32, -40,
257 4, 2, 0, -1, -3, -8, -16, -24,
258 8, 4, 2, 0, -1, -4, -8, -16,
259 20, 16, 4, 0, -1, -4, -8, -16,
260 24, 20, 16, 8, 4, 0, -4, -8,
261 28, 24, 20, 16, 8, 4, 0, -8,
262 32, 24, 20, 16, 8, 4, 0, -4,
263 64, 48, 28, 20, 16, 12, 8, 4,
266 .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
267 .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
269 .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
270 .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
272 .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
273 .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
275 .i_intra_non_pred = {
276 0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
277 0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
278 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
282 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
284 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
289 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
290 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
294 0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
295 0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
296 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
300 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305 .p_intra_non_pred = {
306 0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
307 0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
308 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
312 0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
313 0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
314 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
318 0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
319 0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
320 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
324 0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
325 0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
326 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336 0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
337 0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
338 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
342 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
343 0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
344 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
348 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
349 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
350 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
354 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
355 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
356 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
376 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
377 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
378 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
383 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
384 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
385 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
390 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
391 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
392 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
397 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
398 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
399 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
404 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
405 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
406 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, 0x2f, 0x3a, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d,
411 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
412 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
413 0x1a, 0x1a, 0x1a, 0x1f, 0x2d, 0x3d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d,
419 static uint8_t vdenc_const_qp_lambda[44] = {
420 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
421 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
422 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
423 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
424 0x4a, 0x53, 0x00, 0x00
428 static uint16_t vdenc_const_skip_threshold[28] = {
433 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
438 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
443 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
448 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
454 static uint8_t vdenc_const_qp_lambda_p[44] = {
455 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
456 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
457 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
458 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
459 0x4a, 0x53, 0x00, 0x00
463 static uint16_t vdenc_const_skip_threshold_p[28] = {
464 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
465 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
466 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
467 0x0333, 0x03b0, 0x0000, 0x0000
471 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
472 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
473 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
474 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
478 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
479 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
480 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
481 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
485 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
486 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
487 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
488 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
492 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
493 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
494 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
495 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
499 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
502 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
505 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
508 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
511 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
514 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
516 static const unsigned char
517 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
519 static const unsigned char
520 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
522 static const uint16_t
523 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
526 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
529 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
532 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
535 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
537 const int vdenc_mode_const[2][12][52] = {
540 //LUTMODE_INTRA_NONPRED
542 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
543 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
544 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
545 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
548 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
559 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
560 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
561 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
566 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
567 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
568 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
569 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
572 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
578 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
581 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
584 //LUTMODE_INTER_16x16, LUTMODE_INTER
593 //LUTMODE_INTRA_CHROMA
599 //LUTMODE_INTRA_NONPRED
601 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
602 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
603 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
604 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
607 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
609 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
610 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
611 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
612 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
618 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
619 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
620 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
621 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
626 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
627 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
628 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
629 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
632 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
634 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
635 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
636 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
637 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
642 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
643 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
644 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
648 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
650 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
651 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
652 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
653 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
656 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
658 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
659 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
660 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
661 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
664 //LUTMODE_INTER_16x16, LUTMODE_INTER
666 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
667 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
668 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
669 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
682 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
683 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
684 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
685 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
688 //LUTMODE_INTRA_CHROMA
690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
698 const int vdenc_mv_cost_skipbias_qpel[8] = {
700 0, 6, 6, 9, 10, 13, 14, 16
703 const int vdenc_hme_cost[8][52] = {
706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
720 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
721 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
722 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
723 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[39 ~51]
727 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
728 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
729 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
730 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39 ~51]
734 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
735 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
736 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
737 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[39 ~51]
741 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
742 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
743 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
744 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50, //QP=[39 ~51]
748 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
749 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
750 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
751 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100, //QP=[39 ~51]
756 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
757 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
758 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
759 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200, //QP=[39 ~51]
763 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
765 OUT_BCS_RELOC64(batch, \
767 I915_GEM_DOMAIN_RENDER, \
768 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
771 OUT_BCS_BATCH(batch, 0); \
772 OUT_BCS_BATCH(batch, 0); \
776 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
777 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
778 OUT_BCS_BATCH(batch, attr); \
781 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) { \
782 buffer.type = I965_GPE_RESOURCE_BUFFER; \
783 buffer.width = bfsize; \
785 buffer.pitch = buffer.width; \
786 buffer.size = buffer.pitch; \
787 buffer.tiling = I915_TILING_NONE; \
788 i965_allocate_gpe_resource(i965->intel.bufmgr, \
795 gen9_vdenc_get_max_vmv_range(int level)
797 int max_vmv_range = 512;
801 else if (level <= 20)
803 else if (level <= 30)
804 max_vmv_range = 1024;
806 max_vmv_range = 2048;
808 return max_vmv_range;
812 map_44_lut_value(unsigned int v, unsigned char max)
814 unsigned int maxcost;
822 maxcost = ((max & 15) << (max >> 4));
828 d = (int)(log((double)v) / log(2.0)) - 3;
834 ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
835 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
841 gen9_vdenc_update_rate_control_parameters(VADriverContextP ctx,
842 struct intel_encoder_context *encoder_context,
843 VAEncMiscParameterRateControl *misc)
845 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
847 vdenc_context->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
848 vdenc_context->mb_brc_enabled = 0;
850 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
851 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
852 vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
854 if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate) {
855 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
856 vdenc_context->brc_need_reset = 1;
858 } else if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
859 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
860 vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
862 if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate * misc->target_percentage / 100) {
863 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * misc->target_percentage / 100;
864 vdenc_context->brc_need_reset = 1;
870 gen9_vdenc_update_hrd_parameters(VADriverContextP ctx,
871 struct intel_encoder_context *encoder_context,
872 VAEncMiscParameterHRD *misc)
874 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
876 if (vdenc_context->internal_rate_mode == I965_BRC_CQP)
879 vdenc_context->vbv_buffer_size_in_bit = misc->buffer_size;
880 vdenc_context->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
884 gen9_vdenc_update_framerate_parameters(VADriverContextP ctx,
885 struct intel_encoder_context *encoder_context,
886 VAEncMiscParameterFrameRate *misc)
888 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
890 vdenc_context->frames_per_100s = misc->framerate; /* misc->framerate is multiple of 100 */
894 gen9_vdenc_update_roi_parameters(VADriverContextP ctx,
895 struct intel_encoder_context *encoder_context,
896 VAEncMiscParameterBufferROI *misc)
898 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
901 if (!misc || !misc->roi) {
902 vdenc_context->num_roi = 0;
906 vdenc_context->num_roi = MIN(misc->num_roi, 3);
907 vdenc_context->max_delta_qp = misc->max_delta_qp;
908 vdenc_context->min_delta_qp = misc->min_delta_qp;
909 vdenc_context->vdenc_streamin_enable = (vdenc_context->num_roi == 0);
911 for (i = 0; i < vdenc_context->num_roi; i++) {
912 vdenc_context->roi[i].left = misc->roi->roi_rectangle.x;
913 vdenc_context->roi[i].right = vdenc_context->roi[i].left + misc->roi->roi_rectangle.width;
914 vdenc_context->roi[i].top = misc->roi->roi_rectangle.y;
915 vdenc_context->roi[i].bottom = vdenc_context->roi[i].top + misc->roi->roi_rectangle.height;
916 vdenc_context->roi[i].value = misc->roi->roi_value;
918 vdenc_context->roi[i].left /= 16;
919 vdenc_context->roi[i].right /= 16;
920 vdenc_context->roi[i].top /= 16;
921 vdenc_context->roi[i].bottom /= 16;
926 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
927 struct encode_state *encode_state,
928 struct intel_encoder_context *encoder_context)
931 VAEncMiscParameterBuffer *misc_param;
933 for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
934 if (!encode_state->misc_param[i] || !encode_state->misc_param[i]->buffer)
937 misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i]->buffer;
939 switch (misc_param->type) {
940 case VAEncMiscParameterTypeFrameRate:
941 gen9_vdenc_update_framerate_parameters(ctx,
943 (VAEncMiscParameterFrameRate *)misc_param->data);
946 case VAEncMiscParameterTypeRateControl:
947 gen9_vdenc_update_rate_control_parameters(ctx,
949 (VAEncMiscParameterRateControl *)misc_param->data);
952 case VAEncMiscParameterTypeHRD:
953 gen9_vdenc_update_hrd_parameters(ctx,
955 (VAEncMiscParameterHRD *)misc_param->data);
958 case VAEncMiscParameterTypeROI:
959 gen9_vdenc_update_roi_parameters(ctx,
961 (VAEncMiscParameterBufferROI *)misc_param->data);
971 gen9_vdenc_update_parameters(VADriverContextP ctx,
973 struct encode_state *encode_state,
974 struct intel_encoder_context *encoder_context)
976 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
977 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
978 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
980 if (profile == VAProfileH264High)
981 vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
983 vdenc_context->transform_8x8_mode_enable = 0;
985 vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
986 vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
988 vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
989 vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
991 vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
992 vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
993 vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
994 vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
995 vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
997 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
998 vdenc_context->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
999 vdenc_context->max_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1000 vdenc_context->min_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1003 vdenc_context->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
1004 vdenc_context->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
1005 vdenc_context->frames_per_100s = 3000; /* 30fps */
1006 vdenc_context->gop_size = seq_param->intra_period;
1007 vdenc_context->ref_dist = seq_param->ip_period;
1008 vdenc_context->vdenc_streamin_enable = 0;
1010 gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
1012 vdenc_context->current_pass = 0;
1013 vdenc_context->num_passes = 1;
1015 if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
1016 vdenc_context->internal_rate_mode == I965_BRC_VBR)
1017 vdenc_context->brc_enabled = 1;
1019 vdenc_context->brc_enabled = 0;
1021 if (vdenc_context->brc_enabled &&
1022 (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
1023 !vdenc_context->vbv_buffer_size_in_bit ||
1024 !vdenc_context->max_bit_rate ||
1025 !vdenc_context->target_bit_rate ||
1026 !vdenc_context->frames_per_100s))
1027 vdenc_context->brc_enabled = 0;
1029 if (!vdenc_context->brc_enabled) {
1030 vdenc_context->target_bit_rate = 0;
1031 vdenc_context->max_bit_rate = 0;
1032 vdenc_context->min_bit_rate = 0;
1033 vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
1034 vdenc_context->vbv_buffer_size_in_bit = 0;
1036 vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
1041 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
1042 struct encode_state *encode_state,
1043 struct intel_encoder_context *encoder_context,
1046 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1047 unsigned int frame_type = vdenc_context->frame_type;
1049 memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
1050 memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
1051 memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
1053 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
1054 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
1055 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
1056 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
1058 if (frame_type == VDENC_FRAME_P) {
1059 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
1060 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
1061 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
1062 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
1063 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
1064 vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
1066 vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
1067 vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
1068 vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
1069 vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
1070 vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
1071 vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
1072 vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
1073 vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
1075 vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
1076 vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
1077 vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
1078 vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
1079 vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
1080 vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
1081 vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
1082 vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
1087 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
1088 struct intel_encoder_context *encoder_context)
1090 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1091 struct gen9_vdenc_streamin_state *streamin_state;
1094 if (!vdenc_context->num_roi)
1097 streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1099 if (!streamin_state)
1102 for (col = 0; col < vdenc_context->frame_width_in_mbs; col++) {
1103 for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1104 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1106 /* The last one has higher priority */
1107 for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1108 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1109 (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1110 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1118 i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1122 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1124 struct encode_state *encode_state,
1125 struct intel_encoder_context *encoder_context)
1127 struct i965_driver_data *i965 = i965_driver_data(ctx);
1128 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1129 struct i965_coded_buffer_segment *coded_buffer_segment;
1130 struct object_surface *obj_surface;
1131 struct object_buffer *obj_buffer;
1132 VAEncPictureParameterBufferH264 *pic_param;
1133 VAEncSliceParameterBufferH264 *slice_param;
1134 VDEncAvcSurface *vdenc_avc_surface;
1136 int i, j, enable_avc_ildb = 0;
1140 gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1142 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1143 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1144 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1146 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1147 assert((slice_param->slice_type == SLICE_TYPE_I) ||
1148 (slice_param->slice_type == SLICE_TYPE_SI) ||
1149 (slice_param->slice_type == SLICE_TYPE_P) ||
1150 (slice_param->slice_type == SLICE_TYPE_SP) ||
1151 (slice_param->slice_type == SLICE_TYPE_B));
1153 if (slice_param->disable_deblocking_filter_idc != 1) {
1154 enable_avc_ildb = 1;
1162 /* Setup current frame */
1163 obj_surface = encode_state->reconstructed_object;
1164 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1166 if (obj_surface->private_data == NULL) {
1167 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1168 assert(vdenc_avc_surface);
1170 vdenc_avc_surface->ctx = ctx;
1171 i965_CreateSurfaces(ctx,
1172 vdenc_context->down_scaled_width_4x,
1173 vdenc_context->down_scaled_height_4x,
1174 VA_RT_FORMAT_YUV420,
1176 &vdenc_avc_surface->scaled_4x_surface_id);
1177 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1178 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1179 i965_check_alloc_surface_bo(ctx,
1180 vdenc_avc_surface->scaled_4x_surface_obj,
1185 obj_surface->private_data = (void *)vdenc_avc_surface;
1186 obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1189 vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1190 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1192 /* Reconstructed surfaces */
1193 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1194 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1195 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1196 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1198 i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1199 i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1201 if (enable_avc_ildb) {
1202 i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1204 i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1208 /* Reference surfaces */
1209 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1210 assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1211 ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1212 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1213 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1214 obj_surface = encode_state->reference_objects[i];
1216 if (obj_surface && obj_surface->bo) {
1217 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1219 if (obj_surface->private_data == NULL) {
1220 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1221 assert(vdenc_avc_surface);
1223 vdenc_avc_surface->ctx = ctx;
1224 i965_CreateSurfaces(ctx,
1225 vdenc_context->down_scaled_width_4x,
1226 vdenc_context->down_scaled_height_4x,
1227 VA_RT_FORMAT_YUV420,
1229 &vdenc_avc_surface->scaled_4x_surface_id);
1230 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1231 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1232 i965_check_alloc_surface_bo(ctx,
1233 vdenc_avc_surface->scaled_4x_surface_obj,
1238 obj_surface->private_data = vdenc_avc_surface;
1239 obj_surface->free_private_data = gen_free_avc_surface;
1242 vdenc_avc_surface = obj_surface->private_data;
1243 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1247 /* Input YUV surface */
1248 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1249 i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1251 /* Encoded bitstream */
1252 obj_buffer = encode_state->coded_buf_object;
1253 bo = obj_buffer->buffer_store->bo;
1254 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1255 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1256 vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1257 vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1260 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1261 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1262 vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1263 vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1264 vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1265 assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1266 vdenc_context->compressed_bitstream.start_offset);
1270 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1271 coded_buffer_segment->mapped = 0;
1272 coded_buffer_segment->codec = encoder_context->codec;
1273 coded_buffer_segment->status_support = 1;
1275 pbuffer = bo->virtual;
1276 pbuffer += vdenc_context->status_bffuer.base_offset;
1277 memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1281 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1282 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1283 vdenc_context->frame_width_in_mbs * 64,
1284 "Intra row store scratch buffer");
1286 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1287 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1288 vdenc_context->frame_width_in_mbs * 256,
1289 "Deblocking filter row store scratch buffer");
1291 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1292 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1293 vdenc_context->frame_width_in_mbs * 128,
1294 "BSD/MPC row store scratch buffer");
1296 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1297 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1298 vdenc_context->frame_width_in_mbs * 64,
1299 "VDENC row store scratch buffer");
1301 assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1302 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1303 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1304 vdenc_context->frame_width_in_mbs *
1305 vdenc_context->frame_height_in_mbs *
1306 sizeof(struct gen9_vdenc_streamin_state),
1307 "VDENC StreamIn buffer");
1310 * Calculate the index for each reference surface in list0 for the first slice
1311 * TODO: other slices
1313 pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1314 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1316 vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1318 if (slice_param->num_ref_idx_active_override_flag)
1319 vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1321 if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1322 return VA_STATUS_ERROR_INVALID_VALUE;
1324 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1325 VAPictureH264 *va_pic;
1327 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1328 vdenc_context->list_ref_idx[0][i] = 0;
1330 if (i >= vdenc_context->num_refs[0])
1333 va_pic = &slice_param->RefPicList0[i];
1335 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1336 obj_surface = encode_state->reference_objects[j];
1340 obj_surface->base.id == va_pic->picture_id) {
1342 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1343 vdenc_context->list_ref_idx[0][i] = j;
1350 if (slice_param->slice_type == SLICE_TYPE_I ||
1351 slice_param->slice_type == SLICE_TYPE_SI)
1352 vdenc_context->frame_type = VDENC_FRAME_I;
1354 vdenc_context->frame_type = VDENC_FRAME_P;
1356 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1358 gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1359 gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1361 return VA_STATUS_SUCCESS;
1365 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1366 struct intel_encoder_context *encoder_context,
1367 struct huc_pipe_mode_select_parameter *params)
1369 struct intel_batchbuffer *batch = encoder_context->base.batch;
1371 BEGIN_BCS_BATCH(batch, 3);
1373 OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1374 OUT_BCS_BATCH(batch,
1375 (params->huc_stream_object_enable << 10) |
1376 (params->indirect_stream_out_enable << 4));
1377 OUT_BCS_BATCH(batch,
1378 params->media_soft_reset_counter);
1380 ADVANCE_BCS_BATCH(batch);
1384 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1385 struct intel_encoder_context *encoder_context,
1386 struct huc_imem_state_parameter *params)
1388 struct intel_batchbuffer *batch = encoder_context->base.batch;
1390 BEGIN_BCS_BATCH(batch, 5);
1392 OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1393 OUT_BCS_BATCH(batch, 0);
1394 OUT_BCS_BATCH(batch, 0);
1395 OUT_BCS_BATCH(batch, 0);
1396 OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1398 ADVANCE_BCS_BATCH(batch);
1402 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1403 struct intel_encoder_context *encoder_context,
1404 struct huc_dmem_state_parameter *params)
1406 struct intel_batchbuffer *batch = encoder_context->base.batch;
1408 BEGIN_BCS_BATCH(batch, 6);
1410 OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1411 OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1412 OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1413 OUT_BCS_BATCH(batch, params->huc_data_length);
1415 ADVANCE_BCS_BATCH(batch);
1420 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1421 struct intel_encoder_context *encoder_context,
1422 struct huc_cfg_state_parameter *params)
1424 struct intel_batchbuffer *batch = encoder_context->base.batch;
1426 BEGIN_BCS_BATCH(batch, 2);
1428 OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1429 OUT_BCS_BATCH(batch, !!params->force_reset);
1431 ADVANCE_BCS_BATCH(batch);
1435 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1436 struct intel_encoder_context *encoder_context,
1437 struct huc_virtual_addr_parameter *params)
1439 struct intel_batchbuffer *batch = encoder_context->base.batch;
1442 BEGIN_BCS_BATCH(batch, 49);
1444 OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1446 for (i = 0; i < 16; i++) {
1447 if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1448 OUT_BUFFER_3DW(batch,
1449 params->regions[i].huc_surface_res->bo,
1450 !!params->regions[i].is_target, 0, 0);
1452 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1455 ADVANCE_BCS_BATCH(batch);
1459 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1460 struct intel_encoder_context *encoder_context,
1461 struct huc_ind_obj_base_addr_parameter *params)
1463 struct intel_batchbuffer *batch = encoder_context->base.batch;
1465 BEGIN_BCS_BATCH(batch, 11);
1467 OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1469 if (params->huc_indirect_stream_in_object_res)
1470 OUT_BUFFER_3DW(batch,
1471 params->huc_indirect_stream_in_object_res->bo,
1474 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1476 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1478 if (params->huc_indirect_stream_out_object_res)
1479 OUT_BUFFER_3DW(batch,
1480 params->huc_indirect_stream_out_object_res->bo,
1483 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1485 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1487 ADVANCE_BCS_BATCH(batch);
1491 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1492 struct intel_encoder_context *encoder_context)
1494 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1495 struct intel_batchbuffer *batch = encoder_context->base.batch;
1496 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1497 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1499 /* Write HUC_STATUS2 mask (1 << 6) */
1500 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1501 mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1502 mi_store_data_imm_params.offset = 0;
1503 mi_store_data_imm_params.dw0 = (1 << 6);
1504 gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1506 /* Store HUC_STATUS2 */
1507 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1508 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1509 mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1510 mi_store_register_mem_params.offset = 4;
1511 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1515 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1516 struct intel_encoder_context *encoder_context,
1517 struct huc_stream_object_parameter *params)
1519 struct intel_batchbuffer *batch = encoder_context->base.batch;
1521 BEGIN_BCS_BATCH(batch, 5);
1523 OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1524 OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1525 OUT_BCS_BATCH(batch,
1526 (1 << 31) | /* Must be 1 */
1527 params->indirect_stream_in_start_address);
1528 OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1529 OUT_BCS_BATCH(batch,
1530 (!!params->huc_bitstream_enable << 29) |
1531 (params->length_mode << 27) |
1532 (!!params->stream_out << 26) |
1533 (!!params->emulation_prevention_byte_removal << 25) |
1534 (!!params->start_code_search_engine << 24) |
1535 (params->start_code_byte2 << 16) |
1536 (params->start_code_byte1 << 8) |
1537 params->start_code_byte0);
1539 ADVANCE_BCS_BATCH(batch);
1543 gen9_vdenc_huc_start(VADriverContextP ctx,
1544 struct intel_encoder_context *encoder_context,
1545 struct huc_start_parameter *params)
1547 struct intel_batchbuffer *batch = encoder_context->base.batch;
1549 BEGIN_BCS_BATCH(batch, 2);
1551 OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1552 OUT_BCS_BATCH(batch, !!params->last_stream_object);
1554 ADVANCE_BCS_BATCH(batch);
1558 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1559 struct intel_encoder_context *encoder_context,
1560 struct vd_pipeline_flush_parameter *params)
1562 struct intel_batchbuffer *batch = encoder_context->base.batch;
1564 BEGIN_BCS_BATCH(batch, 2);
1566 OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1567 OUT_BCS_BATCH(batch,
1568 params->mfx_pipeline_command_flush << 19 |
1569 params->mfl_pipeline_command_flush << 18 |
1570 params->vdenc_pipeline_command_flush << 17 |
1571 params->hevc_pipeline_command_flush << 16 |
1572 params->vd_command_message_parser_done << 4 |
1573 params->mfx_pipeline_done << 3 |
1574 params->mfl_pipeline_done << 2 |
1575 params->vdenc_pipeline_done << 1 |
1576 params->hevc_pipeline_done);
1578 ADVANCE_BCS_BATCH(batch);
1582 gen9_vdenc_get_max_mbps(int level_idc)
1584 int max_mbps = 11880;
1586 switch (level_idc) {
1640 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1641 struct intel_encoder_context *encoder_context,
1644 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1645 double bits_per_mb, tmpf;
1646 int max_mbps, num_mb_per_frame;
1647 uint64_t max_byte_per_frame0, max_byte_per_frame1;
1650 if (level_idc >= 31 && level_idc <= 40)
1653 bits_per_mb = 192.0;
1655 max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1656 num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1658 tmpf = (double)num_mb_per_frame;
1660 if (tmpf < max_mbps / 172.0)
1661 tmpf = max_mbps / 172.0;
1663 max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1664 max_byte_per_frame1 = (uint64_t)(((double)max_mbps * 100) / vdenc_context->frames_per_100s *bits_per_mb);
1666 /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1667 ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1668 ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1674 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1675 struct encode_state *encode_state,
1676 struct intel_encoder_context *encoder_context)
1678 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1679 float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1680 unsigned frame_size;
1683 frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1684 qp = (int)(1.0 / 1.2 * pow(10.0,
1685 (log10(frame_size * 2.0 / 3.0 * ((float)vdenc_context->frames_per_100s) /
1686 ((float)(vdenc_context->target_bit_rate * 1000) * 100)) - x0) *
1687 (y1 - y0) / (x1 - x0) + y0) + 0.5);
1689 delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((float)vdenc_context->frames_per_100s) /
1690 ((float)(vdenc_context->target_bit_rate * 1000) * 100)));
1694 qp = CLAMP(1, 51, qp);
1704 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1705 struct encode_state *encode_state,
1706 struct intel_encoder_context *encoder_context)
1708 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1709 struct huc_brc_init_dmem *dmem;
1710 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1711 double input_bits_per_frame, bps_ratio;
1714 vdenc_context->brc_init_reset_input_bits_per_frame = ((double)(vdenc_context->max_bit_rate * 1000) * 100) / vdenc_context->frames_per_100s;
1715 vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1716 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1718 dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1723 memset(dmem, 0, sizeof(*dmem));
1725 dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1727 dmem->frame_width = vdenc_context->frame_width;
1728 dmem->frame_height = vdenc_context->frame_height;
1730 dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1731 dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1732 dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1733 dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1734 dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1736 if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1737 dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1739 if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1740 dmem->brc_flag |= 0x10;
1741 else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1742 dmem->brc_flag |= 0x20;
1744 dmem->frame_rate_m = vdenc_context->frames_per_100s;
1745 dmem->frame_rate_d = 100;
1747 dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1749 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1750 dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1755 input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000 * 100) / vdenc_context->frames_per_100s;
1756 bps_ratio = input_bits_per_frame / ((double)vdenc_context->vbv_buffer_size_in_bit * 100 / vdenc_context->frames_per_100s);
1758 if (bps_ratio < 0.1)
1761 if (bps_ratio > 3.5)
1764 for (i = 0; i < 4; i++) {
1765 dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1766 dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1768 dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1769 dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1771 dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1772 dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1775 dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1777 if (vdenc_context->mb_brc_enabled) {
1778 dmem->mb_qp_ctrl = 1;
1779 dmem->dist_qp_delta[0] = -5;
1780 dmem->dist_qp_delta[1] = -2;
1781 dmem->dist_qp_delta[2] = 2;
1782 dmem->dist_qp_delta[3] = 5;
1785 dmem->slice_size_ctrl_en = 0; /* TODO: add support for slice size control */
1787 dmem->oscillation_qp_delta = 0; /* TODO: add support */
1788 dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1790 // 2nd re-encode pass if possible
1791 if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1792 dmem->top_qp_delta_thr_for_2nd_pass = 5;
1793 dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1794 dmem->top_frame_size_threshold_for_2nd_pass = 80;
1795 dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1797 dmem->top_qp_delta_thr_for_2nd_pass = 2;
1798 dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1799 dmem->top_frame_size_threshold_for_2nd_pass = 32;
1800 dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1803 dmem->qp_select_for_first_pass = 1;
1804 dmem->mb_header_compensation = 1;
1805 dmem->delta_qp_adaptation = 1;
1806 dmem->max_crf_quality_factor = 52;
1808 dmem->crf_quality_factor = 0; /* TODO: add support for CRF */
1809 dmem->scenario_info = 0;
1811 memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1812 memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1814 i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1818 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1819 struct encode_state *encode_state,
1820 struct intel_encoder_context *encoder_context)
1822 struct intel_batchbuffer *batch = encoder_context->base.batch;
1823 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1824 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1825 struct huc_imem_state_parameter imem_state_params;
1826 struct huc_dmem_state_parameter dmem_state_params;
1827 struct huc_virtual_addr_parameter virtual_addr_params;
1828 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1829 struct huc_stream_object_parameter stream_object_params;
1830 struct huc_start_parameter start_params;
1831 struct vd_pipeline_flush_parameter pipeline_flush_params;
1832 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1834 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1836 memset(&imem_state_params, 0, sizeof(imem_state_params));
1837 imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1838 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1840 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1841 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1843 gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1844 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1845 dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1846 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1847 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1848 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1850 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1851 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1852 virtual_addr_params.regions[0].is_target = 1;
1853 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1855 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1856 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1857 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1858 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1860 memset(&stream_object_params, 0, sizeof(stream_object_params));
1861 stream_object_params.indirect_stream_in_data_length = 1;
1862 stream_object_params.indirect_stream_in_start_address = 0;
1863 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1865 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1867 memset(&start_params, 0, sizeof(start_params));
1868 start_params.last_stream_object = 1;
1869 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1871 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1872 pipeline_flush_params.hevc_pipeline_done = 1;
1873 pipeline_flush_params.hevc_pipeline_command_flush = 1;
1874 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1876 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1877 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1878 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1882 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1884 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1885 struct huc_brc_update_dmem *dmem;
1886 int i, num_p_in_gop = 0;
1888 dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1895 if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1896 vdenc_context->brc_init_previous_target_buf_full_in_bits =
1897 (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1898 vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1899 vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1902 if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1903 vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1905 dmem->target_size = vdenc_context->brc_target_size;
1907 dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1909 dmem->target_slice_size = 0; // TODO: add support for slice size control
1911 memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1912 memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1914 dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3; // I frame:2, P frame:0, B frame:1
1916 memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1917 memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1918 memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1920 dmem->current_pak_pass = vdenc_context->current_pass;
1921 dmem->max_num_passes = 2;
1923 dmem->scene_change_detect_enable = 1;
1924 dmem->scene_change_prev_intra_percent_threshold = 96;
1925 dmem->scene_change_cur_intra_perent_threshold = 192;
1927 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1928 num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1930 for (i = 0; i < 2; i++)
1931 dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1933 if (vdenc_context->is_low_delay)
1934 dmem->ip_average_coeff = 0;
1936 dmem->ip_average_coeff = 128;
1938 dmem->skip_frame_size = 0;
1939 dmem->num_of_frames_skipped = 0;
1941 dmem->roi_source = 0; // TODO: add support for dirty ROI
1942 dmem->hme_detection_enable = 0; // TODO: support HME kernel
1943 dmem->hme_cost_enable = 1;
1945 dmem->second_level_batchbuffer_size = 228;
1947 i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1951 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1952 struct encode_state *encode_state,
1953 struct intel_encoder_context *encoder_context,
1954 struct gen9_mfx_avc_img_state *pstate)
1956 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1957 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1958 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1960 memset(pstate, 0, sizeof(*pstate));
1962 pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1964 pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1966 pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1967 pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1969 pstate->dw3.image_structure = 0;
1970 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1971 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1972 pstate->dw3.brc_domain_rate_control_enable = 1;
1973 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1974 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1976 pstate->dw4.field_picture_flag = 0;
1977 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1978 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1979 pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1980 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1981 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1982 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1983 pstate->dw4.mb_mv_format_flag = 1;
1984 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1985 pstate->dw4.mv_unpacked_flag = 1;
1986 pstate->dw4.insert_test_flag = 0;
1987 pstate->dw4.load_slice_pointer_flag = 0;
1988 pstate->dw4.macroblock_stat_enable = 0; /* Always 0 in VDEnc mode */
1989 pstate->dw4.minimum_frame_size = 0;
1991 pstate->dw5.intra_mb_max_bit_flag = 1;
1992 pstate->dw5.inter_mb_max_bit_flag = 1;
1993 pstate->dw5.frame_size_over_flag = 1;
1994 pstate->dw5.frame_size_under_flag = 1;
1995 pstate->dw5.intra_mb_ipcm_flag = 1;
1996 pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */
1997 pstate->dw5.non_first_pass_flag = 0;
1998 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1999 pstate->dw5.aq_chroma_disable = 1;
2001 pstate->dw6.intra_mb_max_size = 2700;
2002 pstate->dw6.inter_mb_max_size = 4095;
2004 pstate->dw8.slice_delta_qp_max0 = 0;
2005 pstate->dw8.slice_delta_qp_max1 = 0;
2006 pstate->dw8.slice_delta_qp_max2 = 0;
2007 pstate->dw8.slice_delta_qp_max3 = 0;
2009 pstate->dw9.slice_delta_qp_min0 = 0;
2010 pstate->dw9.slice_delta_qp_min1 = 0;
2011 pstate->dw9.slice_delta_qp_min2 = 0;
2012 pstate->dw9.slice_delta_qp_min3 = 0;
2014 pstate->dw10.frame_bitrate_min = 0;
2015 pstate->dw10.frame_bitrate_min_unit = 1;
2016 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2017 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2018 pstate->dw10.frame_bitrate_max_unit = 1;
2019 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2021 pstate->dw11.frame_bitrate_min_delta = 0;
2022 pstate->dw11.frame_bitrate_max_delta = 0;
2024 pstate->dw12.vad_error_logic = 1;
2025 /* TODO: set paramters DW19/DW20 for slices */
2029 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
2030 struct encode_state *encode_state,
2031 struct intel_encoder_context *encoder_context,
2032 struct gen9_vdenc_img_state *pstate,
2035 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2036 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2037 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2038 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2040 memset(pstate, 0, sizeof(*pstate));
2042 pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
2044 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2045 pstate->dw4.intra_sad_measure_adjustment = 2;
2046 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2048 pstate->dw5.cre_prefetch_enable = 1;
2050 pstate->dw9.mode0_cost = 10;
2051 pstate->dw9.mode1_cost = 0;
2052 pstate->dw9.mode2_cost = 3;
2053 pstate->dw9.mode3_cost = 30;
2055 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2056 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2057 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2059 pstate->dw22.small_mb_size_in_word = 0xff;
2060 pstate->dw22.large_mb_size_in_word = 0xff;
2062 pstate->dw27.max_hmv_r = 0x2000;
2063 pstate->dw27.max_vmv_r = 0x200;
2065 pstate->dw33.qp_range_check_upper_bound = 0x33;
2066 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2067 pstate->dw33.qp_range_check_value = 0x0f;
2069 pstate->dw2.bidirectional_weight = 0x20;
2071 pstate->dw4.subpel_mode = 3;
2072 pstate->dw4.bme_disable_for_fbr_message = 1;
2073 pstate->dw4.inter_sad_measure_adjustment = 2;
2074 pstate->dw4.intra_sad_measure_adjustment = 2;
2075 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2077 pstate->dw5.cre_prefetch_enable = 1;
2079 pstate->dw8.non_skip_zero_mv_const_added = 1;
2080 pstate->dw8.non_skip_mb_mode_const_added = 1;
2081 pstate->dw8.ref_id_cost_mode_select = 1;
2083 pstate->dw9.mode0_cost = 7;
2084 pstate->dw9.mode1_cost = 26;
2085 pstate->dw9.mode2_cost = 30;
2086 pstate->dw9.mode3_cost = 57;
2088 pstate->dw10.mode4_cost = 8;
2089 pstate->dw10.mode5_cost = 2;
2090 pstate->dw10.mode6_cost = 4;
2091 pstate->dw10.mode7_cost = 6;
2093 pstate->dw11.mode8_cost = 5;
2094 pstate->dw11.mode9_cost = 0;
2095 pstate->dw11.ref_id_cost = 4;
2096 pstate->dw11.chroma_intra_mode_cost = 0;
2098 pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2099 pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2100 pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2101 pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2102 pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2103 pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2104 pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2105 pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2107 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2108 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2109 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2111 pstate->dw22.small_mb_size_in_word = 0xff;
2112 pstate->dw22.large_mb_size_in_word = 0xff;
2114 pstate->dw27.max_hmv_r = 0x2000;
2115 pstate->dw27.max_vmv_r = 0x200;
2117 pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2119 pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2120 pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2122 pstate->dw33.qp_range_check_upper_bound = 0x33;
2123 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2124 pstate->dw33.qp_range_check_value = 0x0f;
2126 pstate->dw34.midpoint_distortion = 0x640;
2129 /* ROI will be updated in HuC kernel for CBR/VBR */
2130 if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2131 pstate->dw34.roi_enable = 1;
2133 pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2135 if (vdenc_context->num_roi > 1)
2136 pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2138 if (vdenc_context->num_roi > 2)
2139 pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2142 pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2144 pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2146 pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2148 pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2149 pstate->dw5.picture_type = vdenc_context->frame_type;
2150 pstate->dw5.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2152 if (vdenc_context->frame_type == VDENC_FRAME_P) {
2153 pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2156 pstate->dw5.mb_slice_threshold_value = 0;
2158 pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2160 if (pstate->dw1.transform_8x8_flag)
2161 pstate->dw8.luma_intra_partition_mask = 0;
2163 pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2165 pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta; /* TODO: check whether it is OK to use the first slice only */
2168 pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2169 pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2170 pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2171 pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2173 pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2174 pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2175 pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2176 pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2178 pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2179 pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2180 pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2181 pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2183 pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2184 pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2185 pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2186 pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2187 pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2188 pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2189 pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2190 pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2192 pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2193 pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2194 pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2195 pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2196 pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2197 pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2198 pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2199 pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2202 pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2204 pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2206 /* TODO: check rolling I */
2208 /* TODO: handle ROI */
2210 /* TODO: check stream in support */
2214 gen9_vdenc_init_img_states(VADriverContextP ctx,
2215 struct encode_state *encode_state,
2216 struct intel_encoder_context *encoder_context)
2218 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2219 struct gen9_mfx_avc_img_state *mfx_img_cmd;
2220 struct gen9_vdenc_img_state *vdenc_img_cmd;
2223 pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2228 mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2229 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd);
2230 pbuffer += sizeof(*mfx_img_cmd);
2232 vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2233 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2234 pbuffer += sizeof(*vdenc_img_cmd);
2236 /* Add batch buffer end command */
2237 *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2239 i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2243 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2244 struct encode_state *encode_state,
2245 struct intel_encoder_context *encoder_context)
2247 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2250 pbuffer = i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2255 if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2256 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2257 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2258 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2259 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2260 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2261 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2264 memcpy(pbuffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2266 i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2270 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2271 struct encode_state *encode_state,
2272 struct intel_encoder_context *encoder_context)
2274 struct intel_batchbuffer *batch = encoder_context->base.batch;
2275 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2276 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2277 struct huc_imem_state_parameter imem_state_params;
2278 struct huc_dmem_state_parameter dmem_state_params;
2279 struct huc_virtual_addr_parameter virtual_addr_params;
2280 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2281 struct huc_stream_object_parameter stream_object_params;
2282 struct huc_start_parameter start_params;
2283 struct vd_pipeline_flush_parameter pipeline_flush_params;
2284 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2285 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2286 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2288 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2289 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2290 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2292 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2293 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2295 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2296 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2297 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2300 gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2302 memset(&imem_state_params, 0, sizeof(imem_state_params));
2303 imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2304 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2306 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2307 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2309 gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2310 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2311 dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2312 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2313 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2314 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2316 gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2317 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2318 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2319 virtual_addr_params.regions[0].is_target = 1;
2320 virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2321 virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2322 virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2323 virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2324 virtual_addr_params.regions[4].is_target = 1;
2325 virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2326 virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2327 virtual_addr_params.regions[6].is_target = 1;
2328 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2330 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2331 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2332 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2333 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2335 memset(&stream_object_params, 0, sizeof(stream_object_params));
2336 stream_object_params.indirect_stream_in_data_length = 1;
2337 stream_object_params.indirect_stream_in_start_address = 0;
2338 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2340 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2342 memset(&start_params, 0, sizeof(start_params));
2343 start_params.last_stream_object = 1;
2344 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2346 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2347 pipeline_flush_params.hevc_pipeline_done = 1;
2348 pipeline_flush_params.hevc_pipeline_command_flush = 1;
2349 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2351 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2352 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2353 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2355 /* Store HUC_STATUS */
2356 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2357 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2358 mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2359 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2361 /* Write HUC_STATUS mask (1 << 31) */
2362 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2363 mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2364 mi_store_data_imm_params.offset = 4;
2365 mi_store_data_imm_params.dw0 = (1 << 31);
2366 gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2370 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2371 struct encode_state *encode_state,
2372 struct intel_encoder_context *encoder_context)
2374 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2375 struct intel_batchbuffer *batch = encoder_context->base.batch;
2377 BEGIN_BCS_BATCH(batch, 5);
2379 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2380 OUT_BCS_BATCH(batch,
2382 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
2383 (MFD_MODE_VLD << 15) |
2384 (1 << 13) | /* VDEnc mode */
2385 ((!!vdenc_context->post_deblocking_output_res.bo) << 9) | /* Post Deblocking Output */
2386 ((!!vdenc_context->pre_deblocking_output_res.bo) << 8) | /* Pre Deblocking Output */
2387 (1 << 7) | /* Scaled surface enable */
2388 (1 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
2389 (1 << 4) | /* encoding mode */
2390 (MFX_FORMAT_AVC << 0));
2391 OUT_BCS_BATCH(batch, 0);
2392 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_BATCH(batch, 0);
2395 ADVANCE_BCS_BATCH(batch);
2399 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2400 struct intel_encoder_context *encoder_context,
2401 struct i965_gpe_resource *gpe_resource,
2404 struct intel_batchbuffer *batch = encoder_context->base.batch;
2406 BEGIN_BCS_BATCH(batch, 6);
2408 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2409 OUT_BCS_BATCH(batch, id);
2410 OUT_BCS_BATCH(batch,
2411 ((gpe_resource->height - 1) << 18) |
2412 ((gpe_resource->width - 1) << 4));
2413 OUT_BCS_BATCH(batch,
2414 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2415 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2416 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2417 (0 << 2) | /* must be 0 for interleave U/V */
2418 (1 << 1) | /* must be tiled */
2419 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2420 OUT_BCS_BATCH(batch,
2421 (0 << 16) | /* must be 0 for interleave U/V */
2422 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2423 OUT_BCS_BATCH(batch,
2424 (0 << 16) | /* must be 0 for interleave U/V */
2425 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2427 ADVANCE_BCS_BATCH(batch);
2431 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2433 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2434 struct intel_batchbuffer *batch = encoder_context->base.batch;
2437 BEGIN_BCS_BATCH(batch, 65);
2439 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2441 /* the DW1-3 is for pre_deblocking */
2442 OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2444 /* the DW4-6 is for the post_deblocking */
2445 OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2447 /* the DW7-9 is for the uncompressed_picture */
2448 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2450 /* the DW10-12 is for PAK information (write) */
2451 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2453 /* the DW13-15 is for the intra_row_store_scratch */
2454 OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2456 /* the DW16-18 is for the deblocking filter */
2457 OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2459 /* the DW 19-50 is for Reference pictures*/
2460 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2461 OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2464 /* DW 51, reference picture attributes */
2465 OUT_BCS_BATCH(batch, 0);
2467 /* The DW 52-54 is for PAK information (read) */
2468 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2470 /* the DW 55-57 is the ILDB buffer */
2471 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2473 /* the DW 58-60 is the second ILDB buffer */
2474 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2476 /* DW 61, memory compress enable & mode */
2477 OUT_BCS_BATCH(batch, 0);
2479 /* the DW 62-64 is the 4x Down Scaling surface */
2480 OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 0, 0, 0);
2482 ADVANCE_BCS_BATCH(batch);
2486 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2488 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2489 struct intel_batchbuffer *batch = encoder_context->base.batch;
2491 BEGIN_BCS_BATCH(batch, 26);
2493 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2494 /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2495 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2496 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2498 /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2499 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2500 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2502 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2503 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2504 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2506 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2507 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2508 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2510 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2511 * Note: an offset is specified in MFX_AVC_SLICE_STATE
2513 OUT_BUFFER_3DW(batch,
2514 vdenc_context->compressed_bitstream.res.bo,
2518 OUT_BUFFER_2DW(batch,
2519 vdenc_context->compressed_bitstream.res.bo,
2521 vdenc_context->compressed_bitstream.end_offset);
2523 ADVANCE_BCS_BATCH(batch);
2527 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2529 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2530 struct intel_batchbuffer *batch = encoder_context->base.batch;
2532 BEGIN_BCS_BATCH(batch, 10);
2534 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2536 /* The DW1-3 is for bsd/mpc row store scratch buffer */
2537 OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2539 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2540 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2542 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2543 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2545 ADVANCE_BCS_BATCH(batch);
2549 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2553 struct intel_encoder_context *encoder_context)
2555 struct intel_batchbuffer *batch = encoder_context->base.batch;
2556 unsigned int qm_buffer[16];
2558 assert(qm_length <= 16);
2559 assert(sizeof(*qm) == 4);
2560 memcpy(qm_buffer, qm, qm_length * 4);
2562 BEGIN_BCS_BATCH(batch, 18);
2563 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2564 OUT_BCS_BATCH(batch, qm_type << 0);
2565 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2566 ADVANCE_BCS_BATCH(batch);
2570 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2572 /* TODO: add support for non flat matrix */
2573 unsigned int qm[16] = {
2574 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2575 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2576 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2577 0x10101010, 0x10101010, 0x10101010, 0x10101010
2580 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2581 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2582 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2583 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2587 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2591 struct intel_encoder_context *encoder_context)
2593 struct intel_batchbuffer *batch = encoder_context->base.batch;
2594 unsigned int fqm_buffer[32];
2596 assert(fqm_length <= 32);
2597 assert(sizeof(*fqm) == 4);
2598 memcpy(fqm_buffer, fqm, fqm_length * 4);
2600 BEGIN_BCS_BATCH(batch, 34);
2601 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2602 OUT_BCS_BATCH(batch, fqm_type << 0);
2603 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2604 ADVANCE_BCS_BATCH(batch);
2608 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2610 /* TODO: add support for non flat matrix */
2611 unsigned int qm[32] = {
2612 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2613 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2614 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2615 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2616 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2617 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2618 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2619 0x10001000, 0x10001000, 0x10001000, 0x10001000
2622 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2623 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2624 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2625 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2629 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2630 struct encode_state *encode_state,
2631 struct intel_encoder_context *encoder_context)
2633 struct intel_batchbuffer *batch = encoder_context->base.batch;
2634 struct gen9_mfx_avc_img_state mfx_img_cmd;
2636 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd);
2638 BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2639 intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2640 ADVANCE_BCS_BATCH(batch);
2644 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2645 struct encode_state *encode_state,
2646 struct intel_encoder_context *encoder_context)
2648 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2649 struct intel_batchbuffer *batch = encoder_context->base.batch;
2651 BEGIN_BCS_BATCH(batch, 2);
2653 OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2654 OUT_BCS_BATCH(batch,
2655 (vdenc_context->vdenc_streamin_enable << 9) |
2656 (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2657 (1 << 7) | /* Tlb prefetch enable */
2658 (1 << 5) | /* Frame Statistics Stream-Out Enable */
2659 (VDENC_CODEC_AVC << 0));
2661 ADVANCE_BCS_BATCH(batch);
2665 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2666 struct intel_encoder_context *encoder_context,
2667 struct i965_gpe_resource *gpe_resource,
2668 int vdenc_surface_cmd)
2670 struct intel_batchbuffer *batch = encoder_context->base.batch;
2672 BEGIN_BCS_BATCH(batch, 6);
2674 OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2675 OUT_BCS_BATCH(batch, 0);
2676 OUT_BCS_BATCH(batch,
2677 ((gpe_resource->height - 1) << 18) |
2678 ((gpe_resource->width - 1) << 4));
2679 OUT_BCS_BATCH(batch,
2680 (VDENC_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface only on SKL */
2681 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2682 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2683 (0 << 2) | /* must be 0 for interleave U/V */
2684 (1 << 1) | /* must be tiled */
2685 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2686 OUT_BCS_BATCH(batch,
2687 (0 << 16) | /* must be 0 for interleave U/V */
2688 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2689 OUT_BCS_BATCH(batch,
2690 (0 << 16) | /* must be 0 for interleave U/V */
2691 (gpe_resource->y_cb_offset)); /* y offset for v(cr) */
2693 ADVANCE_BCS_BATCH(batch);
2697 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2698 struct intel_encoder_context *encoder_context,
2699 struct i965_gpe_resource *gpe_resource)
2701 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2705 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2706 struct intel_encoder_context *encoder_context,
2707 struct i965_gpe_resource *gpe_resource)
2709 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2713 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2714 struct intel_encoder_context *encoder_context,
2715 struct i965_gpe_resource *gpe_resource)
2717 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2721 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2722 struct encode_state *encode_state,
2723 struct intel_encoder_context *encoder_context)
2725 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2726 struct intel_batchbuffer *batch = encoder_context->base.batch;
2728 BEGIN_BCS_BATCH(batch, 37);
2730 OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2732 /* DW1-6 for DS FWD REF0/REF1 */
2733 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2734 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2736 /* DW7-9 for DS BWD REF0, ignored on SKL */
2737 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2739 /* DW10-12 for uncompressed input data */
2740 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2742 /* DW13-DW15 for streamin data */
2743 if (vdenc_context->vdenc_streamin_enable)
2744 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2746 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2748 /* DW16-DW18 for row scratch buffer */
2749 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2751 /* DW19-DW21, ignored on SKL */
2752 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2754 /* DW22-DW27 for FWD REF0/REF1 */
2755 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2756 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2758 /* DW28-DW30 for FWD REF2, ignored on SKL */
2759 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2761 /* DW31-DW33 for BDW REF0, ignored on SKL */
2762 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2764 /* DW34-DW36 for VDEnc statistics streamout */
2765 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2767 ADVANCE_BCS_BATCH(batch);
2771 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2772 struct encode_state *encode_state,
2773 struct intel_encoder_context *encoder_context)
2775 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2776 struct intel_batchbuffer *batch = encoder_context->base.batch;
2778 BEGIN_BCS_BATCH(batch, 61);
2780 OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2782 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2784 intel_batchbuffer_data(batch, vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2787 intel_batchbuffer_data(batch, vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2790 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2793 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2796 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2799 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2803 for (i = 0; i < 28; i++) {
2804 vdenc_const_skip_threshold_p[i] *= 3;
2808 intel_batchbuffer_data(batch, vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2811 intel_batchbuffer_data(batch, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2814 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2817 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2820 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2823 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2826 ADVANCE_BCS_BATCH(batch);
2830 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2831 struct encode_state *encode_state,
2832 struct intel_encoder_context *encoder_context)
2834 struct intel_batchbuffer *batch = encoder_context->base.batch;
2836 BEGIN_BCS_BATCH(batch, 2);
2838 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2839 OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2841 ADVANCE_BCS_BATCH(batch);
2845 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2846 struct encode_state *encode_state,
2847 struct intel_encoder_context *encoder_context)
2849 struct intel_batchbuffer *batch = encoder_context->base.batch;
2850 struct gen9_vdenc_img_state vdenc_img_cmd;
2852 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2854 BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2855 intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2856 ADVANCE_BCS_BATCH(batch);
2860 intel_avc_enc_slice_type_fixup(int slice_type);
2863 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2864 struct intel_encoder_context *encoder_context,
2865 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2866 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2867 int slice_header_indicator)
2869 struct intel_batchbuffer *batch = encoder_context->base.batch;
2871 if (data_bits_in_last_dw == 0)
2872 data_bits_in_last_dw = 32;
2874 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2876 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2877 OUT_BCS_BATCH(batch,
2878 (0 << 16) | /* always start at offset 0 */
2879 (slice_header_indicator << 14) |
2880 (data_bits_in_last_dw << 8) |
2881 (skip_emul_byte_count << 4) |
2882 (!!emulation_flag << 3) |
2883 ((!!is_last_header) << 2) |
2884 ((!!is_end_of_slice) << 1) |
2885 (0 << 0)); /* TODO: check this flag */
2886 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2888 ADVANCE_BCS_BATCH(batch);
2892 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2893 struct encode_state *encode_state,
2894 struct intel_encoder_context *encoder_context,
2897 VAEncPackedHeaderParameterBuffer *param = NULL;
2898 unsigned int length_in_bits;
2899 unsigned int *header_data = NULL;
2900 int count, i, start_index;
2901 int slice_header_index;
2903 if (encode_state->slice_header_index[slice_index] == 0)
2904 slice_header_index = -1;
2906 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2908 count = encode_state->slice_rawdata_count[slice_index];
2909 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2911 for (i = 0; i < count; i++) {
2912 unsigned int skip_emul_byte_cnt;
2914 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2916 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2918 /* skip the slice header packed data type as it is lastly inserted */
2919 if (param->type == VAEncPackedHeaderSlice)
2922 length_in_bits = param->bit_length;
2924 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2926 /* as the slice header is still required, the last header flag is set to
2929 gen9_vdenc_mfx_avc_insert_object(ctx,
2932 ALIGN(length_in_bits, 32) >> 5,
2933 length_in_bits & 0x1f,
2937 !param->has_emulation_bytes,
2941 if (slice_header_index == -1) {
2942 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2943 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2944 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2945 unsigned char *slice_header = NULL;
2946 int slice_header_length_in_bits = 0;
2948 /* No slice header data is passed. And the driver needs to generate it */
2949 /* For the Normal H264 */
2950 slice_header_length_in_bits = build_avc_slice_header(seq_param,
2954 gen9_vdenc_mfx_avc_insert_object(ctx,
2956 (unsigned int *)slice_header,
2957 ALIGN(slice_header_length_in_bits, 32) >> 5,
2958 slice_header_length_in_bits & 0x1f,
2959 5, /* first 5 bytes are start code + nal unit type */
2965 unsigned int skip_emul_byte_cnt;
2967 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
2969 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
2970 length_in_bits = param->bit_length;
2972 /* as the slice header is the last header data for one slice,
2973 * the last header flag is set to one.
2975 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2977 gen9_vdenc_mfx_avc_insert_object(ctx,
2980 ALIGN(length_in_bits, 32) >> 5,
2981 length_in_bits & 0x1f,
2985 !param->has_emulation_bytes,
2993 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
2994 struct encode_state *encode_state,
2995 struct intel_encoder_context *encoder_context,
2996 VAEncSliceParameterBufferH264 *slice_param,
2999 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3000 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3001 unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3002 unsigned int skip_emul_byte_cnt;
3004 if (slice_index == 0) {
3005 if (encode_state->packed_header_data[idx]) {
3006 VAEncPackedHeaderParameterBuffer *param = NULL;
3007 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3008 unsigned int length_in_bits;
3010 assert(encode_state->packed_header_param[idx]);
3011 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3012 length_in_bits = param->bit_length;
3014 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3015 gen9_vdenc_mfx_avc_insert_object(ctx,
3018 ALIGN(length_in_bits, 32) >> 5,
3019 length_in_bits & 0x1f,
3023 !param->has_emulation_bytes,
3027 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3029 if (encode_state->packed_header_data[idx]) {
3030 VAEncPackedHeaderParameterBuffer *param = NULL;
3031 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3032 unsigned int length_in_bits;
3034 assert(encode_state->packed_header_param[idx]);
3035 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3036 length_in_bits = param->bit_length;
3038 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3040 gen9_vdenc_mfx_avc_insert_object(ctx,
3043 ALIGN(length_in_bits, 32) >> 5,
3044 length_in_bits & 0x1f,
3048 !param->has_emulation_bytes,
3052 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3054 if (encode_state->packed_header_data[idx]) {
3055 VAEncPackedHeaderParameterBuffer *param = NULL;
3056 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3057 unsigned int length_in_bits;
3059 assert(encode_state->packed_header_param[idx]);
3060 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3061 length_in_bits = param->bit_length;
3063 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3064 gen9_vdenc_mfx_avc_insert_object(ctx,
3067 ALIGN(length_in_bits, 32) >> 5,
3068 length_in_bits & 0x1f,
3072 !param->has_emulation_bytes,
3074 } else if (internal_rate_mode == I965_BRC_CBR) {
3075 /* TODO: insert others */
3079 gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3086 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3087 struct encode_state *encode_state,
3088 struct intel_encoder_context *encoder_context,
3089 VAEncPictureParameterBufferH264 *pic_param,
3090 VAEncSliceParameterBufferH264 *slice_param,
3091 VAEncSliceParameterBufferH264 *next_slice_param)
3093 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3094 struct intel_batchbuffer *batch = encoder_context->base.batch;
3095 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3096 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3097 unsigned char correct[6], grow, shrink;
3098 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3099 int max_qp_n, max_qp_p;
3101 int weighted_pred_idc = 0;
3102 int num_ref_l0 = 0, num_ref_l1 = 0;
3103 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3104 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3106 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3107 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3109 if (next_slice_param) {
3110 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3111 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3113 next_slice_hor_pos = 0;
3114 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3117 if (slice_type == SLICE_TYPE_I) {
3118 luma_log2_weight_denom = 0;
3119 chroma_log2_weight_denom = 0;
3120 } else if (slice_type == SLICE_TYPE_P) {
3121 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3122 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3124 if (slice_param->num_ref_idx_active_override_flag)
3125 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3126 } else if (slice_type == SLICE_TYPE_B) {
3127 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3128 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3129 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3131 if (slice_param->num_ref_idx_active_override_flag) {
3132 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3133 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3136 if (weighted_pred_idc == 2) {
3137 /* 8.4.3 - Derivation process for prediction weights (8-279) */
3138 luma_log2_weight_denom = 5;
3139 chroma_log2_weight_denom = 5;
3143 max_qp_n = 0; /* TODO: update it */
3144 max_qp_p = 0; /* TODO: update it */
3145 grow = 0; /* TODO: update it */
3146 shrink = 0; /* TODO: update it */
3148 for (i = 0; i < 6; i++)
3149 correct[i] = 0; /* TODO: update it */
3151 BEGIN_BCS_BATCH(batch, 11);
3153 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3154 OUT_BCS_BATCH(batch, slice_type);
3155 OUT_BCS_BATCH(batch,
3156 (num_ref_l0 << 16) |
3157 (num_ref_l1 << 24) |
3158 (chroma_log2_weight_denom << 8) |
3159 (luma_log2_weight_denom << 0));
3160 OUT_BCS_BATCH(batch,
3161 (weighted_pred_idc << 30) |
3162 (slice_param->direct_spatial_mv_pred_flag << 29) |
3163 (slice_param->disable_deblocking_filter_idc << 27) |
3164 (slice_param->cabac_init_idc << 24) |
3166 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3167 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3169 OUT_BCS_BATCH(batch,
3170 slice_ver_pos << 24 |
3171 slice_hor_pos << 16 |
3172 slice_param->macroblock_address);
3173 OUT_BCS_BATCH(batch,
3174 next_slice_ver_pos << 16 |
3175 next_slice_hor_pos);
3177 OUT_BCS_BATCH(batch,
3178 (0 << 31) | /* TODO: ignore it for VDENC ??? */
3179 (!slice_param->macroblock_address << 30) | /* ResetRateControlCounter */
3180 (2 << 28) | /* Loose Rate Control */
3181 (0 << 24) | /* RC Stable Tolerance */
3182 (0 << 23) | /* RC Panic Enable */
3183 (1 << 22) | /* CBP mode */
3184 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3185 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3186 (!next_slice_param << 19) | /* Is Last Slice */
3187 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3188 (1 << 17) | /* HeaderPresentFlag */
3189 (1 << 16) | /* SliceData PresentFlag */
3190 (0 << 15) | /* TailPresentFlag, TODO: check it on VDEnc */
3191 (1 << 13) | /* RBSP NAL TYPE */
3192 (1 << 12)); /* CabacZeroWordInsertionEnable */
3194 OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3196 OUT_BCS_BATCH(batch,
3197 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
3198 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
3201 OUT_BCS_BATCH(batch,
3206 (correct[5] << 20) |
3207 (correct[4] << 16) |
3208 (correct[3] << 12) |
3212 OUT_BCS_BATCH(batch, 0);
3214 ADVANCE_BCS_BATCH(batch);
3218 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3220 unsigned int is_long_term =
3221 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3222 unsigned int is_top_field =
3223 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3224 unsigned int is_bottom_field =
3225 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3227 return ((is_long_term << 6) |
3228 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3229 (frame_store_id << 1) |
3230 ((is_top_field ^ 1) & is_bottom_field));
3234 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3235 struct encode_state *encode_state,
3236 struct intel_encoder_context *encoder_context,
3237 VAEncSliceParameterBufferH264 *slice_param)
3239 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3240 struct intel_batchbuffer *batch = encoder_context->base.batch;
3241 VAPictureH264 *ref_pic;
3242 int i, slice_type, ref_idx_shift;
3243 unsigned int fwd_ref_entry;
3245 fwd_ref_entry = 0x80808080;
3246 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3248 for (i = 0; i < MAX(vdenc_context->num_refs[0], 2); i++) {
3249 ref_pic = &slice_param->RefPicList0[i];
3250 ref_idx_shift = vdenc_context->list_ref_idx[0][i] * 8;
3252 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3253 fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3256 if (slice_type == SLICE_TYPE_P) {
3257 BEGIN_BCS_BATCH(batch, 10);
3258 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3259 OUT_BCS_BATCH(batch, 0); // L0
3260 OUT_BCS_BATCH(batch, fwd_ref_entry);
3262 for (i = 0; i < 7; i++) {
3263 OUT_BCS_BATCH(batch, 0x80808080);
3266 ADVANCE_BCS_BATCH(batch);
3269 if (slice_type == SLICE_TYPE_B) {
3270 /* VDEnc on SKL doesn't support BDW */
3276 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3277 struct encode_state *encode_state,
3278 struct intel_encoder_context *encoder_context,
3279 VAEncPictureParameterBufferH264 *pic_param,
3280 VAEncSliceParameterBufferH264 *slice_param)
3282 struct intel_batchbuffer *batch = encoder_context->base.batch;
3284 short weightoffsets[32 * 6];
3286 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3288 if (slice_type == SLICE_TYPE_P &&
3289 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3291 for (i = 0; i < 32; i++) {
3292 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3293 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3294 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3295 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3296 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3297 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3300 BEGIN_BCS_BATCH(batch, 98);
3301 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3302 OUT_BCS_BATCH(batch, 0);
3303 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3305 ADVANCE_BCS_BATCH(batch);
3308 if (slice_type == SLICE_TYPE_B) {
3309 /* VDEnc on SKL doesn't support BWD */
3315 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3316 struct encode_state *encode_state,
3317 struct intel_encoder_context *encoder_context,
3318 VAEncSliceParameterBufferH264 *slice_param,
3319 VAEncSliceParameterBufferH264 *next_slice_param,
3322 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3324 gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3325 gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3330 gen9_vdenc_mfx_avc_slice_state(ctx,
3336 gen9_vdenc_mfx_avc_inset_headers(ctx,
3344 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3345 struct encode_state *encode_state,
3346 struct intel_encoder_context *encoder_context)
3348 struct intel_batchbuffer *batch = encoder_context->base.batch;
3349 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3350 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3352 int slice_index = 0;
3353 int is_frame_level_vdenc = 1; /* TODO: check it for SKL */
3354 int has_tail = 0; /* TODO: check it later */
3356 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3357 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3359 if (j == encode_state->num_slice_params_ext - 1)
3360 next_slice_group_param = NULL;
3362 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3364 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3365 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3366 next_slice_param = slice_param + 1;
3368 next_slice_param = next_slice_group_param;
3370 gen9_vdenc_mfx_avc_single_slice(ctx,
3379 if (is_frame_level_vdenc)
3382 /* TODO: remove assert(0) and add other commands here */
3387 if (is_frame_level_vdenc)
3391 if (is_frame_level_vdenc) {
3392 struct vd_pipeline_flush_parameter pipeline_flush_params;
3394 gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3396 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3397 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3398 pipeline_flush_params.vdenc_pipeline_done = 1;
3399 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3400 pipeline_flush_params.vd_command_message_parser_done = 1;
3401 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3405 /* TODO: insert a tail if required */
3408 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3409 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3410 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3414 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3415 struct encode_state *encode_state,
3416 struct intel_encoder_context *encoder_context)
3418 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3419 struct intel_batchbuffer *batch = encoder_context->base.batch;
3420 struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3422 if (vdenc_context->brc_enabled) {
3423 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3425 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3426 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3427 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3430 if (vdenc_context->current_pass) {
3431 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3433 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3434 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3435 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3438 gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3440 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3441 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3442 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3444 gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3445 gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3446 gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3448 gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3449 gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3450 gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3451 gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3452 gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3453 gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3455 if (!vdenc_context->brc_enabled) {
3456 gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3457 gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3459 memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3460 mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3461 mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3462 gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3465 gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3466 gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3468 gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3472 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3473 struct intel_encoder_context *encoder_context)
3475 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3476 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3478 switch (rate_control_mode & 0x7f) {
3480 vdenc_context->internal_rate_mode = I965_BRC_CBR;
3484 vdenc_context->internal_rate_mode = I965_BRC_VBR;
3489 vdenc_context->internal_rate_mode = I965_BRC_CQP;
3495 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3497 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3498 struct intel_batchbuffer *batch = encoder_context->base.batch;
3499 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3500 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3501 unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3504 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3505 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3507 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3508 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3509 mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3510 mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3511 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3513 /* Update DMEM buffer for BRC Update */
3514 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3515 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3516 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3517 mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3518 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3520 mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3521 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3522 mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3523 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3528 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3529 struct encode_state *encode_state,
3530 struct intel_encoder_context *encoder_context)
3532 VAEncSliceParameterBufferH264 *slice_param;
3535 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3536 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3538 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3539 if (slice_param->slice_type == SLICE_TYPE_B)
3540 return VA_STATUS_ERROR_UNKNOWN;
3546 return VA_STATUS_SUCCESS;
3550 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3552 struct encode_state *encode_state,
3553 struct intel_encoder_context *encoder_context)
3556 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3557 struct intel_batchbuffer *batch = encoder_context->base.batch;
3559 va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3561 if (va_status != VA_STATUS_SUCCESS)
3564 gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3566 for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3567 vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3568 vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3570 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3571 intel_batchbuffer_emit_mi_flush(batch);
3573 if (vdenc_context->brc_enabled) {
3574 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3575 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3577 gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3578 intel_batchbuffer_emit_mi_flush(batch);
3581 gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3582 gen9_vdenc_read_status(ctx, encoder_context);
3584 intel_batchbuffer_end_atomic(batch);
3585 intel_batchbuffer_flush(batch);
3587 vdenc_context->brc_initted = 1;
3588 vdenc_context->brc_need_reset = 0;
3591 return VA_STATUS_SUCCESS;
3595 gen9_vdenc_pipeline(VADriverContextP ctx,
3597 struct encode_state *encode_state,
3598 struct intel_encoder_context *encoder_context)
3603 case VAProfileH264ConstrainedBaseline:
3604 case VAProfileH264Main:
3605 case VAProfileH264High:
3606 vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3610 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3618 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3622 i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3623 i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3624 i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3625 i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3626 i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3628 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3629 i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3631 i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3632 i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3633 i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3634 i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3635 i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3636 i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3638 i965_free_gpe_resource(&vdenc_context->huc_status_res);
3639 i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3641 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3642 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3643 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3644 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3646 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3647 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3648 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3651 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3652 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3653 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3655 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3656 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3657 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3658 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3660 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3664 gen9_vdenc_context_destroy(void *context)
3666 struct gen9_vdenc_context *vdenc_context = context;
3668 gen9_vdenc_free_resources(vdenc_context);
3670 free(vdenc_context);
3674 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3675 struct intel_encoder_context *encoder_context,
3676 struct gen9_vdenc_context *vdenc_context)
3678 struct i965_driver_data *i965 = i965_driver_data(ctx);
3681 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3682 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3683 "HuC Init&Reset DMEM buffer");
3685 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3686 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3687 "HuC History buffer");
3689 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3690 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3691 "HuC Stream In buffer");
3693 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3694 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3695 "HuC Stream Out buffer");
3697 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3699 "HuC dummy buffer");
3701 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3702 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3703 ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3704 "HuC BRC Update buffer");
3705 i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3708 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3709 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3710 "VDENC statistics buffer");
3712 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3713 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3714 "PAK statistics buffer");
3716 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3717 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3718 "VDENC/AVC image state buffer");
3720 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3721 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3722 "HME summary buffer");
3724 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3725 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3726 "BRC constant buffer");
3728 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3729 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3730 "Second level batch buffer");
3732 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3734 "HuC Status buffer");
3736 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3738 "HuC Status buffer");
3742 gen9_vdenc_context_get_status(VADriverContextP ctx,
3743 struct intel_encoder_context *encoder_context,
3744 struct i965_coded_buffer_segment *coded_buffer_segment)
3746 struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3748 coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3750 return VA_STATUS_SUCCESS;
3754 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3756 struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3761 vdenc_context->brc_initted = 0;
3762 vdenc_context->brc_need_reset = 0;
3763 vdenc_context->is_low_delay = 0;
3764 vdenc_context->current_pass = 0;
3765 vdenc_context->num_passes = 1;
3766 vdenc_context->vdenc_streamin_enable = 0;
3767 vdenc_context->vdenc_pak_threshold_check_enable = 0;
3769 gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3771 encoder_context->mfc_context = vdenc_context;
3772 encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3773 encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3774 encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3775 encoder_context->get_status = gen9_vdenc_context_get_status;