2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "intel_media.h"
42 #include "gen9_vdenc.h"
44 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
45 0, 0, -8, -12, -16, -20, -28, -36,
46 0, 0, -4, -8, -12, -16, -24, -32,
47 4, 2, 0, -1, -3, -8, -16, -24,
48 8, 4, 2, 0, -1, -4, -8, -16,
49 20, 16, 4, 0, -1, -4, -8, -16,
50 24, 20, 16, 8, 4, 0, -4, -8,
51 28, 24, 20, 16, 8, 4, 0, -8,
52 32, 24, 20, 16, 8, 4, 0, -4,
53 64, 48, 28, 20, 16, 12, 8, 4,
56 static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
57 -8, -24, -32, -40, -44, -48, -52, -80,
58 -8, -16, -32, -40, -40, -44, -44, -56,
59 0, 0, -12, -20, -24, -28, -32, -36,
60 8, 4, 0, 0, -8, -16, -24, -32,
61 32, 16, 8, 4, -4, -8, -16, -20,
62 36, 24, 16, 8, 4, -2, -4, -8,
63 40, 36, 24, 20, 16, 8, 0, -8,
64 48, 40, 28, 24, 20, 12, 0, -4,
65 64, 48, 28, 20, 16, 12, 8, 4,
68 static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
69 0, -4, -8, -16, -24, -32, -40, -48,
70 1, 0, -4, -8, -16, -24, -32, -40,
71 4, 2, 0, -1, -3, -8, -16, -24,
72 8, 4, 2, 0, -1, -4, -8, -16,
73 20, 16, 4, 0, -1, -4, -8, -16,
74 24, 20, 16, 8, 4, 0, -4, -8,
75 28, 24, 20, 16, 8, 4, 0, -8,
76 32, 24, 20, 16, 8, 4, 0, -4,
77 64, 48, 28, 20, 16, 12, 8, 4,
80 static const int8_t dist_qp_adj_tab_i_vbr[81] = {
81 +0, 0, 0, 0, 0, 3, 4, 6, 8,
82 +0, 0, 0, 0, 0, 2, 3, 5, 7,
83 -1, 0, 0, 0, 0, 2, 2, 4, 5,
84 -1, -1, 0, 0, 0, 1, 2, 2, 4,
85 -2, -2, -1, 0, 0, 0, 1, 2, 4,
86 -2, -2, -1, 0, 0, 0, 1, 2, 4,
87 -3, -2, -1, -1, 0, 0, 1, 2, 5,
88 -3, -2, -1, -1, 0, 0, 2, 4, 7,
89 -4, -3, -2, -1, 0, 1, 3, 5, 8,
92 static const int8_t dist_qp_adj_tab_p_vbr[81] = {
93 -1, 0, 0, 0, 0, 1, 1, 2, 3,
94 -1, -1, 0, 0, 0, 1, 1, 2, 3,
95 -2, -1, -1, 0, 0, 1, 1, 2, 3,
96 -3, -2, -2, -1, 0, 0, 1, 2, 3,
97 -3, -2, -1, -1, 0, 0, 1, 2, 3,
98 -3, -2, -1, -1, 0, 0, 1, 2, 3,
99 -3, -2, -1, -1, 0, 0, 1, 2, 3,
100 -3, -2, -1, -1, 0, 0, 1, 2, 3,
101 -3, -2, -1, -1, 0, 0, 1, 2, 3,
104 static const int8_t dist_qp_adj_tab_b_vbr[81] = {
105 +0, 0, 0, 0, 0, 2, 3, 3, 4,
106 +0, 0, 0, 0, 0, 2, 3, 3, 4,
107 -1, 0, 0, 0, 0, 2, 2, 3, 3,
108 -1, -1, 0, 0, 0, 1, 2, 2, 2,
109 -1, -1, -1, 0, 0, 0, 1, 2, 2,
110 -2, -1, -1, 0, 0, 0, 0, 1, 2,
111 -2, -1, -1, -1, 0, 0, 0, 1, 3,
112 -2, -2, -1, -1, 0, 0, 1, 1, 3,
113 -2, -2, -1, -1, 0, 1, 1, 2, 4,
116 static const int8_t buf_rate_adj_tab_i_vbr[72] = {
117 -4, -20, -28, -36, -40, -44, -48, -80,
118 +0, -8, -12, -20, -24, -28, -32, -36,
119 +0, 0, -8, -16, -20, -24, -28, -32,
120 +8, 4, 0, 0, -8, -16, -24, -28,
121 32, 24, 16, 2, -4, -8, -16, -20,
122 36, 32, 28, 16, 8, 0, -4, -8,
123 40, 36, 24, 20, 16, 8, 0, -8,
124 48, 40, 28, 24, 20, 12, 0, -4,
125 64, 48, 28, 20, 16, 12, 8, 4,
128 static const int8_t buf_rate_adj_tab_p_vbr[72] = {
129 -8, -24, -32, -44, -48, -56, -64, -80,
130 -8, -16, -32, -40, -44, -52, -56, -64,
131 +0, 0, -16, -28, -36, -40, -44, -48,
132 +8, 4, 0, 0, -8, -16, -24, -36,
133 20, 12, 4, 0, -8, -8, -8, -16,
134 24, 16, 8, 8, 8, 0, -4, -8,
135 40, 36, 24, 20, 16, 8, 0, -8,
136 48, 40, 28, 24, 20, 12, 0, -4,
137 64, 48, 28, 20, 16, 12, 8, 4,
140 static const int8_t buf_rate_adj_tab_b_vbr[72] = {
141 0, -4, -8, -16, -24, -32, -40, -48,
142 1, 0, -4, -8, -16, -24, -32, -40,
143 4, 2, 0, -1, -3, -8, -16, -24,
144 8, 4, 2, 0, -1, -4, -8, -16,
145 20, 16, 4, 0, -1, -4, -8, -16,
146 24, 20, 16, 8, 4, 0, -4, -8,
147 28, 24, 20, 16, 8, 4, 0, -8,
148 32, 24, 20, 16, 8, 4, 0, -4,
149 64, 48, 28, 20, 16, 12, 8, 4,
152 static struct huc_brc_update_constant_data
153 gen9_brc_update_constant_data = {
154 .global_rate_qp_adj_tab_i = {
155 48, 40, 32, 24, 16, 8, 0, -8,
156 40, 32, 24, 16, 8, 0, -8, -16,
157 32, 24, 16, 8, 0, -8, -16, -24,
158 24, 16, 8, 0, -8, -16, -24, -32,
159 16, 8, 0, -8, -16, -24, -32, -40,
160 8, 0, -8, -16, -24, -32, -40, -48,
161 0, -8, -16, -24, -32, -40, -48, -56,
162 48, 40, 32, 24, 16, 8, 0, -8,
165 .global_rate_qp_adj_tab_p = {
166 48, 40, 32, 24, 16, 8, 0, -8,
167 40, 32, 24, 16, 8, 0, -8, -16,
168 16, 8, 8, 4, -8, -16, -16, -24,
169 8, 0, 0, -8, -16, -16, -16, -24,
170 8, 0, 0, -24, -32, -32, -32, -48,
171 0, -16, -16, -24, -32, -48, -56, -64,
172 -8, -16, -32, -32, -48, -48, -56, -64,
173 -16,-32, -48, -48, -48, -56, -64, -80,
176 .global_rate_qp_adj_tab_b = {
177 48, 40, 32, 24, 16, 8, 0, -8,
178 40, 32, 24, 16, 8, 0, -8, -16,
179 32, 24, 16, 8, 0, -8, -16, -24,
180 24, 16, 8, 0, -8, -8, -16, -24,
181 16, 8, 0, 0, -8, -16, -24, -32,
182 16, 8, 0, 0, -8, -16, -24, -32,
183 0, -8, -8, -16, -32, -48, -56, -64,
184 0, -8, -8, -16, -32, -48, -56, -64
187 .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
188 .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
189 .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
191 .dist_qp_adj_tab_i = {
192 0, 0, 0, 0, 0, 3, 4, 6, 8,
193 0, 0, 0, 0, 0, 2, 3, 5, 7,
194 -1, 0, 0, 0, 0, 2, 2, 4, 5,
195 -1, -1, 0, 0, 0, 1, 2, 2, 4,
196 -2, -2, -1, 0, 0, 0, 1, 2, 4,
197 -2, -2, -1, 0, 0, 0, 1, 2, 4,
198 -3, -2, -1, -1, 0, 0, 1, 2, 5,
199 -3, -2, -1, -1, 0, 0, 2, 4, 7,
200 -4, -3, -2, -1, 0, 1, 3, 5, 8,
203 .dist_qp_adj_tab_p = {
204 -1, 0, 0, 0, 0, 1, 1, 2, 3,
205 -1, -1, 0, 0, 0, 1, 1, 2, 3,
206 -2, -1, -1, 0, 0, 1, 1, 2, 3,
207 -3, -2, -2, -1, 0, 0, 1, 2, 3,
208 -3, -2, -1, -1, 0, 0, 1, 2, 3,
209 -3, -2, -1, -1, 0, 0, 1, 2, 3,
210 -3, -2, -1, -1, 0, 0, 1, 2, 3,
211 -3, -2, -1, -1, 0, 0, 1, 2, 3,
212 -3, -2, -1, -1, 0, 0, 1, 2, 3,
215 .dist_qp_adj_tab_b = {
216 0, 0, 0, 0, 0, 2, 3, 3, 4,
217 0, 0, 0, 0, 0, 2, 3, 3, 4,
218 -1, 0, 0, 0, 0, 2, 2, 3, 3,
219 -1, -1, 0, 0, 0, 1, 2, 2, 2,
220 -1, -1, -1, 0, 0, 0, 1, 2, 2,
221 -2, -1, -1, 0, 0, 0, 0, 1, 2,
222 -2, -1, -1, -1, 0, 0, 0, 1, 3,
223 -2, -2, -1, -1, 0, 0, 1, 1, 3,
224 -2, -2, -1, -1, 0, 1, 1, 2, 4,
227 /* default table for non lowdelay */
228 .buf_rate_adj_tab_i = {
229 -4, -20, -28, -36, -40, -44, -48, -80,
230 0, -8, -12, -20, -24, -28, -32, -36,
231 0, 0, -8, -16, -20, -24, -28, -32,
232 8, 4, 0, 0, -8, -16, -24, -28,
233 32, 24, 16, 2, -4, -8, -16, -20,
234 36, 32, 28, 16, 8, 0, -4, -8,
235 40, 36, 24, 20, 16, 8, 0, -8,
236 48, 40, 28, 24, 20, 12, 0, -4,
237 64, 48, 28, 20, 16, 12, 8, 4,
240 /* default table for non lowdelay */
241 .buf_rate_adj_tab_p = {
242 -8, -24, -32, -44, -48, -56, -64, -80,
243 -8, -16, -32, -40, -44, -52, -56, -64,
244 0, 0, -16, -28, -36, -40, -44, -48,
245 8, 4, 0, 0, -8, -16, -24, -36,
246 20, 12, 4, 0, -8, -8, -8, -16,
247 24, 16, 8, 8, 8, 0, -4, -8,
248 40, 36, 24, 20, 16, 8, 0, -8,
249 48, 40, 28, 24, 20, 12, 0, -4,
250 64, 48, 28, 20, 16, 12, 8, 4,
253 /* default table for non lowdelay */
254 .buf_rate_adj_tab_b = {
255 0, -4, -8, -16, -24, -32, -40, -48,
256 1, 0, -4, -8, -16, -24, -32, -40,
257 4, 2, 0, -1, -3, -8, -16, -24,
258 8, 4, 2, 0, -1, -4, -8, -16,
259 20, 16, 4, 0, -1, -4, -8, -16,
260 24, 20, 16, 8, 4, 0, -4, -8,
261 28, 24, 20, 16, 8, 4, 0, -8,
262 32, 24, 20, 16, 8, 4, 0, -4,
263 64, 48, 28, 20, 16, 12, 8, 4,
266 .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
267 .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
269 .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
270 .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
272 .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
273 .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
275 .i_intra_non_pred = {
276 0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
277 0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
278 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
282 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
284 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
289 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
290 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
294 0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
295 0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
296 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
300 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
305 .p_intra_non_pred = {
306 0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
307 0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
308 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
312 0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
313 0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
314 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
318 0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
319 0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
320 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
324 0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
325 0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
326 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
336 0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
337 0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
338 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
342 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
343 0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
344 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
348 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
349 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
350 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
354 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
355 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
356 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
376 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
377 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
378 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
383 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
384 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
385 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
390 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
391 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
392 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
397 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
398 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
399 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
404 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
405 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
406 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, 0x2f, 0x3a, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d,
411 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
412 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
413 0x1a, 0x1a, 0x1a, 0x1f, 0x2d, 0x3d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d,
419 static uint8_t vdenc_const_qp_lambda[44] = {
420 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
421 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
422 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
423 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
424 0x4a, 0x53, 0x00, 0x00
428 static uint16_t vdenc_const_skip_threshold[28] = {
433 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
438 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
443 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
448 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
454 static uint8_t vdenc_const_qp_lambda_p[44] = {
455 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
456 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
457 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
458 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
459 0x4a, 0x53, 0x00, 0x00
463 static uint16_t vdenc_const_skip_threshold_p[28] = {
464 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
465 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
466 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
467 0x0333, 0x03b0, 0x0000, 0x0000
471 static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
472 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
473 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
474 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
478 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
479 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
480 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
481 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
485 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
486 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
487 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
488 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
492 static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
493 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
494 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
495 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
499 vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
502 vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
505 vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
508 vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
511 vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
514 vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
516 static const unsigned char
517 vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
519 static const unsigned char
520 vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
522 static const uint16_t
523 vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
526 vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
529 vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
532 vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
535 vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
537 const int vdenc_mode_const[2][12][52] = {
540 //LUTMODE_INTRA_NONPRED
542 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
543 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
544 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
545 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
548 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
559 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
560 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
561 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
566 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
567 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
568 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
569 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
572 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
578 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
581 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
584 //LUTMODE_INTER_16x16, LUTMODE_INTER
593 //LUTMODE_INTRA_CHROMA
599 //LUTMODE_INTRA_NONPRED
601 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
602 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
603 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
604 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
607 //LUTMODE_INTRA_16x16, LUTMODE_INTRA
609 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
610 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
611 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
612 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
618 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
619 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
620 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
621 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
626 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
627 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
628 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
629 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
632 //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
634 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
635 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
636 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
637 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
642 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
643 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
644 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
648 //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
650 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
651 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
652 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
653 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
656 //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
658 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
659 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
660 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
661 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
664 //LUTMODE_INTER_16x16, LUTMODE_INTER
666 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
667 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
668 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
669 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
682 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
683 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
684 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
685 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
688 //LUTMODE_INTRA_CHROMA
690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
698 const int vdenc_mv_cost_skipbias_qpel[8] = {
700 0, 6, 6, 9, 10, 13, 14, 16
703 const int vdenc_hme_cost[8][52] = {
706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
707 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
720 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
721 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
722 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
723 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[39 ~51]
727 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
728 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
729 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
730 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39 ~51]
734 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
735 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
736 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
737 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[39 ~51]
741 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
742 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
743 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
744 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50, //QP=[39 ~51]
748 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
749 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
750 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
751 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100, //QP=[39 ~51]
756 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
757 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
758 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
759 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200, //QP=[39 ~51]
763 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
765 OUT_BCS_RELOC64(batch, \
767 I915_GEM_DOMAIN_RENDER, \
768 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
771 OUT_BCS_BATCH(batch, 0); \
772 OUT_BCS_BATCH(batch, 0); \
776 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
777 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
778 OUT_BCS_BATCH(batch, attr); \
781 #define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) { \
782 buffer.type = I965_GPE_RESOURCE_BUFFER; \
783 buffer.width = bfsize; \
785 buffer.pitch = buffer.width; \
786 buffer.size = buffer.pitch; \
787 buffer.tiling = I915_TILING_NONE; \
788 i965_allocate_gpe_resource(i965->intel.bufmgr, \
794 gen9_vdenc_get_max_vmv_range(int level)
796 int max_vmv_range = 512;
800 else if (level <= 20)
802 else if (level <= 30)
803 max_vmv_range = 1024;
805 max_vmv_range = 2048;
807 return max_vmv_range;
811 map_44_lut_value(unsigned int v, unsigned char max)
813 unsigned int maxcost;
821 maxcost = ((max & 15) << (max >> 4));
827 d = (int)(log((double)v) / log(2.0)) - 3;
833 ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
834 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
840 gen9_vdenc_update_rate_control_parameters(VADriverContextP ctx,
841 struct intel_encoder_context *encoder_context,
842 VAEncMiscParameterRateControl *misc)
844 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
846 vdenc_context->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
847 vdenc_context->mb_brc_enabled = 0;
849 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
850 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
851 vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
853 if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate) {
854 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
855 vdenc_context->brc_need_reset = 1;
857 } else if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
858 vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
859 vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
861 if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate * misc->target_percentage / 100) {
862 vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * misc->target_percentage / 100;
863 vdenc_context->brc_need_reset = 1;
869 gen9_vdenc_update_hrd_parameters(VADriverContextP ctx,
870 struct intel_encoder_context *encoder_context,
871 VAEncMiscParameterHRD *misc)
873 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
875 if (vdenc_context->internal_rate_mode == I965_BRC_CQP)
878 vdenc_context->vbv_buffer_size_in_bit = misc->buffer_size;
879 vdenc_context->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
883 gen9_vdenc_update_framerate_parameters(VADriverContextP ctx,
884 struct intel_encoder_context *encoder_context,
885 VAEncMiscParameterFrameRate *misc)
887 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
889 vdenc_context->frames_per_100s = misc->framerate; /* misc->framerate is multiple of 100 */
893 gen9_vdenc_update_roi_parameters(VADriverContextP ctx,
894 struct intel_encoder_context *encoder_context,
895 VAEncMiscParameterBufferROI *misc)
897 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
900 if (!misc || !misc->roi) {
901 vdenc_context->num_roi = 0;
905 vdenc_context->num_roi = MIN(misc->num_roi, 3);
906 vdenc_context->max_delta_qp = misc->max_delta_qp;
907 vdenc_context->min_delta_qp = misc->min_delta_qp;
908 vdenc_context->vdenc_streamin_enable = (vdenc_context->num_roi == 0);
910 for (i = 0; i < vdenc_context->num_roi; i++) {
911 vdenc_context->roi[i].left = misc->roi->roi_rectangle.x;
912 vdenc_context->roi[i].right = vdenc_context->roi[i].left + misc->roi->roi_rectangle.width;
913 vdenc_context->roi[i].top = misc->roi->roi_rectangle.y;
914 vdenc_context->roi[i].bottom = vdenc_context->roi[i].top + misc->roi->roi_rectangle.height;
915 vdenc_context->roi[i].value = misc->roi->roi_value;
917 vdenc_context->roi[i].left /= 16;
918 vdenc_context->roi[i].right /= 16;
919 vdenc_context->roi[i].top /= 16;
920 vdenc_context->roi[i].bottom /= 16;
925 gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
926 struct encode_state *encode_state,
927 struct intel_encoder_context *encoder_context)
930 VAEncMiscParameterBuffer *misc_param;
932 for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
933 if (!encode_state->misc_param[i] || !encode_state->misc_param[i]->buffer)
936 misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i]->buffer;
938 switch (misc_param->type) {
939 case VAEncMiscParameterTypeFrameRate:
940 gen9_vdenc_update_framerate_parameters(ctx,
942 (VAEncMiscParameterFrameRate *)misc_param->data);
945 case VAEncMiscParameterTypeRateControl:
946 gen9_vdenc_update_rate_control_parameters(ctx,
948 (VAEncMiscParameterRateControl *)misc_param->data);
951 case VAEncMiscParameterTypeHRD:
952 gen9_vdenc_update_hrd_parameters(ctx,
954 (VAEncMiscParameterHRD *)misc_param->data);
957 case VAEncMiscParameterTypeROI:
958 gen9_vdenc_update_roi_parameters(ctx,
960 (VAEncMiscParameterBufferROI *)misc_param->data);
970 gen9_vdenc_update_parameters(VADriverContextP ctx,
972 struct encode_state *encode_state,
973 struct intel_encoder_context *encoder_context)
975 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
976 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
977 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
979 if (profile == VAProfileH264High)
980 vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
982 vdenc_context->transform_8x8_mode_enable = 0;
984 vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
985 vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
987 vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
988 vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
990 vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
991 vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
992 vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
993 vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
994 vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
996 if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
997 vdenc_context->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
998 vdenc_context->max_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
999 vdenc_context->min_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1002 vdenc_context->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
1003 vdenc_context->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
1004 vdenc_context->frames_per_100s = 3000; /* 30fps */
1005 vdenc_context->gop_size = seq_param->intra_period;
1006 vdenc_context->ref_dist = seq_param->ip_period;
1007 vdenc_context->vdenc_streamin_enable = 0;
1009 gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
1011 vdenc_context->current_pass = 0;
1012 vdenc_context->num_passes = 1;
1014 if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
1015 vdenc_context->internal_rate_mode == I965_BRC_VBR)
1016 vdenc_context->brc_enabled = 1;
1018 vdenc_context->brc_enabled = 0;
1020 if (vdenc_context->brc_enabled &&
1021 (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
1022 !vdenc_context->vbv_buffer_size_in_bit ||
1023 !vdenc_context->max_bit_rate ||
1024 !vdenc_context->target_bit_rate ||
1025 !vdenc_context->frames_per_100s))
1026 vdenc_context->brc_enabled = 0;
1028 if (!vdenc_context->brc_enabled) {
1029 vdenc_context->target_bit_rate = 0;
1030 vdenc_context->max_bit_rate = 0;
1031 vdenc_context->min_bit_rate = 0;
1032 vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
1033 vdenc_context->vbv_buffer_size_in_bit = 0;
1035 vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
1040 gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
1041 struct encode_state *encode_state,
1042 struct intel_encoder_context *encoder_context,
1045 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1046 unsigned int frame_type = vdenc_context->frame_type;
1048 memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
1049 memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
1050 memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
1052 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
1053 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
1054 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
1055 vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
1057 if (frame_type == VDENC_FRAME_P) {
1058 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
1059 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
1060 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
1061 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
1062 vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
1063 vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
1065 vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
1066 vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
1067 vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
1068 vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
1069 vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
1070 vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
1071 vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
1072 vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
1074 vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
1075 vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
1076 vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
1077 vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
1078 vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
1079 vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
1080 vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
1081 vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
1086 gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
1087 struct intel_encoder_context *encoder_context)
1089 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1090 struct gen9_vdenc_streamin_state *streamin_state;
1093 if (!vdenc_context->num_roi)
1096 streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1098 if (!streamin_state)
1101 for (col = 0; col < vdenc_context->frame_width_in_mbs; col++) {
1102 for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1103 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1105 /* The last one has higher priority */
1106 for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1107 if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1108 (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1109 streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1117 i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1121 gen9_vdenc_avc_prepare(VADriverContextP ctx,
1123 struct encode_state *encode_state,
1124 struct intel_encoder_context *encoder_context)
1126 struct i965_driver_data *i965 = i965_driver_data(ctx);
1127 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1128 struct i965_coded_buffer_segment *coded_buffer_segment;
1129 struct object_surface *obj_surface;
1130 struct object_buffer *obj_buffer;
1131 VAEncPictureParameterBufferH264 *pic_param;
1132 VAEncSliceParameterBufferH264 *slice_param;
1133 VDEncAvcSurface *vdenc_avc_surface;
1135 int i, j, enable_avc_ildb = 0;
1139 gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1141 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1142 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1143 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1145 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1146 assert((slice_param->slice_type == SLICE_TYPE_I) ||
1147 (slice_param->slice_type == SLICE_TYPE_SI) ||
1148 (slice_param->slice_type == SLICE_TYPE_P) ||
1149 (slice_param->slice_type == SLICE_TYPE_SP) ||
1150 (slice_param->slice_type == SLICE_TYPE_B));
1152 if (slice_param->disable_deblocking_filter_idc != 1) {
1153 enable_avc_ildb = 1;
1161 /* Setup current frame */
1162 obj_surface = encode_state->reconstructed_object;
1163 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1165 if (obj_surface->private_data == NULL) {
1166 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1167 assert(vdenc_avc_surface);
1169 vdenc_avc_surface->ctx = ctx;
1170 i965_CreateSurfaces(ctx,
1171 vdenc_context->down_scaled_width_4x,
1172 vdenc_context->down_scaled_height_4x,
1173 VA_RT_FORMAT_YUV420,
1175 &vdenc_avc_surface->scaled_4x_surface_id);
1176 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1177 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1178 i965_check_alloc_surface_bo(ctx,
1179 vdenc_avc_surface->scaled_4x_surface_obj,
1184 obj_surface->private_data = (void *)vdenc_avc_surface;
1185 obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1188 vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1189 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1191 /* Reconstructed surfaces */
1192 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1193 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1194 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1195 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1197 i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1198 i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1200 if (enable_avc_ildb) {
1201 i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1203 i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1207 /* Reference surfaces */
1208 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1209 assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1210 ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1211 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1212 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1213 obj_surface = encode_state->reference_objects[i];
1215 if (obj_surface && obj_surface->bo) {
1216 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1218 if (obj_surface->private_data == NULL) {
1219 vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1220 assert(vdenc_avc_surface);
1222 vdenc_avc_surface->ctx = ctx;
1223 i965_CreateSurfaces(ctx,
1224 vdenc_context->down_scaled_width_4x,
1225 vdenc_context->down_scaled_height_4x,
1226 VA_RT_FORMAT_YUV420,
1228 &vdenc_avc_surface->scaled_4x_surface_id);
1229 vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1230 assert(vdenc_avc_surface->scaled_4x_surface_obj);
1231 i965_check_alloc_surface_bo(ctx,
1232 vdenc_avc_surface->scaled_4x_surface_obj,
1237 obj_surface->private_data = vdenc_avc_surface;
1238 obj_surface->free_private_data = gen_free_avc_surface;
1241 vdenc_avc_surface = obj_surface->private_data;
1242 i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1246 /* Input YUV surface */
1247 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1248 i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1250 /* Encoded bitstream */
1251 obj_buffer = encode_state->coded_buf_object;
1252 bo = obj_buffer->buffer_store->bo;
1253 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1254 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1255 vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1256 vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1259 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1260 i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1261 vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1262 vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1263 vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1264 assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1265 vdenc_context->compressed_bitstream.start_offset);
1269 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1270 coded_buffer_segment->mapped = 0;
1271 coded_buffer_segment->codec = encoder_context->codec;
1272 coded_buffer_segment->status_support = 1;
1274 pbuffer = bo->virtual;
1275 pbuffer += vdenc_context->status_bffuer.base_offset;
1276 memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1280 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1281 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1282 vdenc_context->frame_width_in_mbs * 64,
1283 "Intra row store scratch buffer");
1285 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1286 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1287 vdenc_context->frame_width_in_mbs * 256,
1288 "Deblocking filter row store scratch buffer");
1290 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1291 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1292 vdenc_context->frame_width_in_mbs * 128,
1293 "BSD/MPC row store scratch buffer");
1295 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1296 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1297 vdenc_context->frame_width_in_mbs * 64,
1298 "VDENC row store scratch buffer");
1300 assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1301 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1302 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1303 vdenc_context->frame_width_in_mbs *
1304 vdenc_context->frame_height_in_mbs *
1305 sizeof(struct gen9_vdenc_streamin_state),
1306 "VDENC StreamIn buffer");
1309 * Calculate the index for each reference surface in list0 for the first slice
1310 * TODO: other slices
1312 pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1313 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1315 vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1317 if (slice_param->num_ref_idx_active_override_flag)
1318 vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1320 if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1321 return VA_STATUS_ERROR_INVALID_VALUE;
1323 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1324 VAPictureH264 *va_pic;
1326 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1327 vdenc_context->list_ref_idx[0][i] = 0;
1329 if (i >= vdenc_context->num_refs[0])
1332 va_pic = &slice_param->RefPicList0[i];
1334 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1335 obj_surface = encode_state->reference_objects[j];
1339 obj_surface->base.id == va_pic->picture_id) {
1341 assert(obj_surface->base.id != VA_INVALID_SURFACE);
1342 vdenc_context->list_ref_idx[0][i] = j;
1349 if (slice_param->slice_type == SLICE_TYPE_I ||
1350 slice_param->slice_type == SLICE_TYPE_SI)
1351 vdenc_context->frame_type = VDENC_FRAME_I;
1353 vdenc_context->frame_type = VDENC_FRAME_P;
1355 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1357 gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1358 gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1360 return VA_STATUS_SUCCESS;
1364 gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1365 struct intel_encoder_context *encoder_context,
1366 struct huc_pipe_mode_select_parameter *params)
1368 struct intel_batchbuffer *batch = encoder_context->base.batch;
1370 BEGIN_BCS_BATCH(batch, 3);
1372 OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1373 OUT_BCS_BATCH(batch,
1374 (params->huc_stream_object_enable << 10) |
1375 (params->indirect_stream_out_enable << 4));
1376 OUT_BCS_BATCH(batch,
1377 params->media_soft_reset_counter);
1379 ADVANCE_BCS_BATCH(batch);
1383 gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1384 struct intel_encoder_context *encoder_context,
1385 struct huc_imem_state_parameter *params)
1387 struct intel_batchbuffer *batch = encoder_context->base.batch;
1389 BEGIN_BCS_BATCH(batch, 5);
1391 OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1392 OUT_BCS_BATCH(batch, 0);
1393 OUT_BCS_BATCH(batch, 0);
1394 OUT_BCS_BATCH(batch, 0);
1395 OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1397 ADVANCE_BCS_BATCH(batch);
1401 gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1402 struct intel_encoder_context *encoder_context,
1403 struct huc_dmem_state_parameter *params)
1405 struct intel_batchbuffer *batch = encoder_context->base.batch;
1407 BEGIN_BCS_BATCH(batch, 6);
1409 OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1410 OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1411 OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1412 OUT_BCS_BATCH(batch, params->huc_data_length);
1414 ADVANCE_BCS_BATCH(batch);
1419 gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1420 struct intel_encoder_context *encoder_context,
1421 struct huc_cfg_state_parameter *params)
1423 struct intel_batchbuffer *batch = encoder_context->base.batch;
1425 BEGIN_BCS_BATCH(batch, 2);
1427 OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1428 OUT_BCS_BATCH(batch, !!params->force_reset);
1430 ADVANCE_BCS_BATCH(batch);
1434 gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1435 struct intel_encoder_context *encoder_context,
1436 struct huc_virtual_addr_parameter *params)
1438 struct intel_batchbuffer *batch = encoder_context->base.batch;
1441 BEGIN_BCS_BATCH(batch, 49);
1443 OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1445 for (i = 0; i < 16; i++) {
1446 if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1447 OUT_BUFFER_3DW(batch,
1448 params->regions[i].huc_surface_res->bo,
1449 !!params->regions[i].is_target, 0, 0);
1451 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1454 ADVANCE_BCS_BATCH(batch);
1458 gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1459 struct intel_encoder_context *encoder_context,
1460 struct huc_ind_obj_base_addr_parameter *params)
1462 struct intel_batchbuffer *batch = encoder_context->base.batch;
1464 BEGIN_BCS_BATCH(batch, 11);
1466 OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1468 if (params->huc_indirect_stream_in_object_res)
1469 OUT_BUFFER_3DW(batch,
1470 params->huc_indirect_stream_in_object_res->bo,
1473 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1475 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1477 if (params->huc_indirect_stream_out_object_res)
1478 OUT_BUFFER_3DW(batch,
1479 params->huc_indirect_stream_out_object_res->bo,
1482 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1484 OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1486 ADVANCE_BCS_BATCH(batch);
1490 gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1491 struct intel_encoder_context *encoder_context)
1493 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1494 struct intel_batchbuffer *batch = encoder_context->base.batch;
1495 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1496 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1498 /* Write HUC_STATUS2 mask (1 << 6) */
1499 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1500 mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1501 mi_store_data_imm_params.offset = 0;
1502 mi_store_data_imm_params.dw0 = (1 << 6);
1503 gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1505 /* Store HUC_STATUS2 */
1506 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1507 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1508 mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1509 mi_store_register_mem_params.offset = 4;
1510 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1514 gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1515 struct intel_encoder_context *encoder_context,
1516 struct huc_stream_object_parameter *params)
1518 struct intel_batchbuffer *batch = encoder_context->base.batch;
1520 BEGIN_BCS_BATCH(batch, 5);
1522 OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1523 OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1524 OUT_BCS_BATCH(batch,
1525 (1 << 31) | /* Must be 1 */
1526 params->indirect_stream_in_start_address);
1527 OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1528 OUT_BCS_BATCH(batch,
1529 (!!params->huc_bitstream_enable << 29) |
1530 (params->length_mode << 27) |
1531 (!!params->stream_out << 26) |
1532 (!!params->emulation_prevention_byte_removal << 25) |
1533 (!!params->start_code_search_engine << 24) |
1534 (params->start_code_byte2 << 16) |
1535 (params->start_code_byte1 << 8) |
1536 params->start_code_byte0);
1538 ADVANCE_BCS_BATCH(batch);
1542 gen9_vdenc_huc_start(VADriverContextP ctx,
1543 struct intel_encoder_context *encoder_context,
1544 struct huc_start_parameter *params)
1546 struct intel_batchbuffer *batch = encoder_context->base.batch;
1548 BEGIN_BCS_BATCH(batch, 2);
1550 OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1551 OUT_BCS_BATCH(batch, !!params->last_stream_object);
1553 ADVANCE_BCS_BATCH(batch);
1557 gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1558 struct intel_encoder_context *encoder_context,
1559 struct vd_pipeline_flush_parameter *params)
1561 struct intel_batchbuffer *batch = encoder_context->base.batch;
1563 BEGIN_BCS_BATCH(batch, 2);
1565 OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1566 OUT_BCS_BATCH(batch,
1567 params->mfx_pipeline_command_flush << 19 |
1568 params->mfl_pipeline_command_flush << 18 |
1569 params->vdenc_pipeline_command_flush << 17 |
1570 params->hevc_pipeline_command_flush << 16 |
1571 params->vd_command_message_parser_done << 4 |
1572 params->mfx_pipeline_done << 3 |
1573 params->mfl_pipeline_done << 2 |
1574 params->vdenc_pipeline_done << 1 |
1575 params->hevc_pipeline_done);
1577 ADVANCE_BCS_BATCH(batch);
1581 gen9_vdenc_get_max_mbps(int level_idc)
1583 int max_mbps = 11880;
1585 switch (level_idc) {
1639 gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1640 struct intel_encoder_context *encoder_context,
1643 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1644 double bits_per_mb, tmpf;
1645 int max_mbps, num_mb_per_frame;
1646 uint64_t max_byte_per_frame0, max_byte_per_frame1;
1649 if (level_idc >= 31 && level_idc <= 40)
1652 bits_per_mb = 192.0;
1654 max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1655 num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1657 tmpf = (double)num_mb_per_frame;
1659 if (tmpf < max_mbps / 172.0)
1660 tmpf = max_mbps / 172.0;
1662 max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1663 max_byte_per_frame1 = (uint64_t)(((double)max_mbps * 100) / vdenc_context->frames_per_100s *bits_per_mb);
1665 /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1666 ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1667 ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1673 gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1674 struct encode_state *encode_state,
1675 struct intel_encoder_context *encoder_context)
1677 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1678 float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1679 unsigned frame_size;
1682 frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1683 qp = (int)(1.0 / 1.2 * pow(10.0,
1684 (log10(frame_size * 2.0 / 3.0 * ((float)vdenc_context->frames_per_100s) /
1685 ((float)(vdenc_context->target_bit_rate * 1000) * 100)) - x0) *
1686 (y1 - y0) / (x1 - x0) + y0) + 0.5);
1688 delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((float)vdenc_context->frames_per_100s) /
1689 ((float)(vdenc_context->target_bit_rate * 1000) * 100)));
1693 qp = CLAMP(1, 51, qp);
1703 gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1704 struct encode_state *encode_state,
1705 struct intel_encoder_context *encoder_context)
1707 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1708 struct huc_brc_init_dmem *dmem;
1709 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1710 double input_bits_per_frame, bps_ratio;
1713 vdenc_context->brc_init_reset_input_bits_per_frame = ((double)(vdenc_context->max_bit_rate * 1000) * 100) / vdenc_context->frames_per_100s;
1714 vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1715 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1717 dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1722 memset(dmem, 0, sizeof(*dmem));
1724 dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1726 dmem->frame_width = vdenc_context->frame_width;
1727 dmem->frame_height = vdenc_context->frame_height;
1729 dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1730 dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1731 dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1732 dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1733 dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1735 if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1736 dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1738 if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1739 dmem->brc_flag |= 0x10;
1740 else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1741 dmem->brc_flag |= 0x20;
1743 dmem->frame_rate_m = vdenc_context->frames_per_100s;
1744 dmem->frame_rate_d = 100;
1746 dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1748 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1749 dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1754 input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000 * 100) / vdenc_context->frames_per_100s;
1755 bps_ratio = input_bits_per_frame / ((double)vdenc_context->vbv_buffer_size_in_bit * 100 / vdenc_context->frames_per_100s);
1757 if (bps_ratio < 0.1)
1760 if (bps_ratio > 3.5)
1763 for (i = 0; i < 4; i++) {
1764 dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1765 dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1767 dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1768 dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1770 dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1771 dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1774 dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1776 if (vdenc_context->mb_brc_enabled) {
1777 dmem->mb_qp_ctrl = 1;
1778 dmem->dist_qp_delta[0] = -5;
1779 dmem->dist_qp_delta[1] = -2;
1780 dmem->dist_qp_delta[2] = 2;
1781 dmem->dist_qp_delta[3] = 5;
1784 dmem->slice_size_ctrl_en = 0; /* TODO: add support for slice size control */
1786 dmem->oscillation_qp_delta = 0; /* TODO: add support */
1787 dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1789 // 2nd re-encode pass if possible
1790 if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1791 dmem->top_qp_delta_thr_for_2nd_pass = 5;
1792 dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1793 dmem->top_frame_size_threshold_for_2nd_pass = 80;
1794 dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1796 dmem->top_qp_delta_thr_for_2nd_pass = 2;
1797 dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1798 dmem->top_frame_size_threshold_for_2nd_pass = 32;
1799 dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1802 dmem->qp_select_for_first_pass = 1;
1803 dmem->mb_header_compensation = 1;
1804 dmem->delta_qp_adaptation = 1;
1805 dmem->max_crf_quality_factor = 52;
1807 dmem->crf_quality_factor = 0; /* TODO: add support for CRF */
1808 dmem->scenario_info = 0;
1810 memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1811 memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1813 i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1817 gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1818 struct encode_state *encode_state,
1819 struct intel_encoder_context *encoder_context)
1821 struct intel_batchbuffer *batch = encoder_context->base.batch;
1822 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1823 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1824 struct huc_imem_state_parameter imem_state_params;
1825 struct huc_dmem_state_parameter dmem_state_params;
1826 struct huc_virtual_addr_parameter virtual_addr_params;
1827 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1828 struct huc_stream_object_parameter stream_object_params;
1829 struct huc_start_parameter start_params;
1830 struct vd_pipeline_flush_parameter pipeline_flush_params;
1831 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1833 vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1835 memset(&imem_state_params, 0, sizeof(imem_state_params));
1836 imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1837 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1839 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1840 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1842 gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1843 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1844 dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1845 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1846 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1847 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1849 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1850 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1851 virtual_addr_params.regions[0].is_target = 1;
1852 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1854 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1855 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1856 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1857 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1859 memset(&stream_object_params, 0, sizeof(stream_object_params));
1860 stream_object_params.indirect_stream_in_data_length = 1;
1861 stream_object_params.indirect_stream_in_start_address = 0;
1862 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1864 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1866 memset(&start_params, 0, sizeof(start_params));
1867 start_params.last_stream_object = 1;
1868 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1870 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1871 pipeline_flush_params.hevc_pipeline_done = 1;
1872 pipeline_flush_params.hevc_pipeline_command_flush = 1;
1873 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1875 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1876 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1877 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1881 gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1883 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1884 struct huc_brc_update_dmem *dmem;
1885 int i, num_p_in_gop = 0;
1887 dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1894 if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1895 vdenc_context->brc_init_previous_target_buf_full_in_bits =
1896 (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1897 vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1898 vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1901 if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1902 vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1904 dmem->target_size = vdenc_context->brc_target_size;
1906 dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1908 dmem->target_slice_size = 0; // TODO: add support for slice size control
1910 memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1911 memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1913 dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3; // I frame:2, P frame:0, B frame:1
1915 memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1916 memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1917 memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1919 dmem->current_pak_pass = vdenc_context->current_pass;
1920 dmem->max_num_passes = 2;
1922 dmem->scene_change_detect_enable = 1;
1923 dmem->scene_change_prev_intra_percent_threshold = 96;
1924 dmem->scene_change_cur_intra_perent_threshold = 192;
1926 if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1927 num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1929 for (i = 0; i < 2; i++)
1930 dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1932 if (vdenc_context->is_low_delay)
1933 dmem->ip_average_coeff = 0;
1935 dmem->ip_average_coeff = 128;
1937 dmem->skip_frame_size = 0;
1938 dmem->num_of_frames_skipped = 0;
1940 dmem->roi_source = 0; // TODO: add support for dirty ROI
1941 dmem->hme_detection_enable = 0; // TODO: support HME kernel
1942 dmem->hme_cost_enable = 1;
1944 dmem->second_level_batchbuffer_size = 228;
1946 i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1950 gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1951 struct encode_state *encode_state,
1952 struct intel_encoder_context *encoder_context,
1953 struct gen9_mfx_avc_img_state *pstate)
1955 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1956 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1957 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1959 memset(pstate, 0, sizeof(*pstate));
1961 pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1963 pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1965 pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1966 pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1968 pstate->dw3.image_structure = 0;
1969 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1970 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1971 pstate->dw3.brc_domain_rate_control_enable = 1;
1972 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1973 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1975 pstate->dw4.field_picture_flag = 0;
1976 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1977 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1978 pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1979 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1980 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1981 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1982 pstate->dw4.mb_mv_format_flag = 1;
1983 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1984 pstate->dw4.mv_unpacked_flag = 1;
1985 pstate->dw4.insert_test_flag = 0;
1986 pstate->dw4.load_slice_pointer_flag = 0;
1987 pstate->dw4.macroblock_stat_enable = 0; /* Always 0 in VDEnc mode */
1988 pstate->dw4.minimum_frame_size = 0;
1990 pstate->dw5.intra_mb_max_bit_flag = 1;
1991 pstate->dw5.inter_mb_max_bit_flag = 1;
1992 pstate->dw5.frame_size_over_flag = 1;
1993 pstate->dw5.frame_size_under_flag = 1;
1994 pstate->dw5.intra_mb_ipcm_flag = 1;
1995 pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */
1996 pstate->dw5.non_first_pass_flag = 0;
1997 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1998 pstate->dw5.aq_chroma_disable = 1;
2000 pstate->dw6.intra_mb_max_size = 2700;
2001 pstate->dw6.inter_mb_max_size = 4095;
2003 pstate->dw8.slice_delta_qp_max0 = 0;
2004 pstate->dw8.slice_delta_qp_max1 = 0;
2005 pstate->dw8.slice_delta_qp_max2 = 0;
2006 pstate->dw8.slice_delta_qp_max3 = 0;
2008 pstate->dw9.slice_delta_qp_min0 = 0;
2009 pstate->dw9.slice_delta_qp_min1 = 0;
2010 pstate->dw9.slice_delta_qp_min2 = 0;
2011 pstate->dw9.slice_delta_qp_min3 = 0;
2013 pstate->dw10.frame_bitrate_min = 0;
2014 pstate->dw10.frame_bitrate_min_unit = 1;
2015 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2016 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2017 pstate->dw10.frame_bitrate_max_unit = 1;
2018 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2020 pstate->dw11.frame_bitrate_min_delta = 0;
2021 pstate->dw11.frame_bitrate_max_delta = 0;
2023 pstate->dw12.vad_error_logic = 1;
2024 /* TODO: set paramters DW19/DW20 for slices */
2028 gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
2029 struct encode_state *encode_state,
2030 struct intel_encoder_context *encoder_context,
2031 struct gen9_vdenc_img_state *pstate,
2034 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2035 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2036 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2037 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2039 memset(pstate, 0, sizeof(*pstate));
2041 pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
2043 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2044 pstate->dw4.intra_sad_measure_adjustment = 2;
2045 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2047 pstate->dw5.cre_prefetch_enable = 1;
2049 pstate->dw9.mode0_cost = 10;
2050 pstate->dw9.mode1_cost = 0;
2051 pstate->dw9.mode2_cost = 3;
2052 pstate->dw9.mode3_cost = 30;
2054 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2055 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2056 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2058 pstate->dw22.small_mb_size_in_word = 0xff;
2059 pstate->dw22.large_mb_size_in_word = 0xff;
2061 pstate->dw27.max_hmv_r = 0x2000;
2062 pstate->dw27.max_vmv_r = 0x200;
2064 pstate->dw33.qp_range_check_upper_bound = 0x33;
2065 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2066 pstate->dw33.qp_range_check_value = 0x0f;
2068 pstate->dw2.bidirectional_weight = 0x20;
2070 pstate->dw4.subpel_mode = 3;
2071 pstate->dw4.bme_disable_for_fbr_message = 1;
2072 pstate->dw4.inter_sad_measure_adjustment = 2;
2073 pstate->dw4.intra_sad_measure_adjustment = 2;
2074 pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2076 pstate->dw5.cre_prefetch_enable = 1;
2078 pstate->dw8.non_skip_zero_mv_const_added = 1;
2079 pstate->dw8.non_skip_mb_mode_const_added = 1;
2080 pstate->dw8.ref_id_cost_mode_select = 1;
2082 pstate->dw9.mode0_cost = 7;
2083 pstate->dw9.mode1_cost = 26;
2084 pstate->dw9.mode2_cost = 30;
2085 pstate->dw9.mode3_cost = 57;
2087 pstate->dw10.mode4_cost = 8;
2088 pstate->dw10.mode5_cost = 2;
2089 pstate->dw10.mode6_cost = 4;
2090 pstate->dw10.mode7_cost = 6;
2092 pstate->dw11.mode8_cost = 5;
2093 pstate->dw11.mode9_cost = 0;
2094 pstate->dw11.ref_id_cost = 4;
2095 pstate->dw11.chroma_intra_mode_cost = 0;
2097 pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2098 pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2099 pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2100 pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2101 pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2102 pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2103 pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2104 pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2106 pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2107 pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2108 pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2110 pstate->dw22.small_mb_size_in_word = 0xff;
2111 pstate->dw22.large_mb_size_in_word = 0xff;
2113 pstate->dw27.max_hmv_r = 0x2000;
2114 pstate->dw27.max_vmv_r = 0x200;
2116 pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2118 pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2119 pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2121 pstate->dw33.qp_range_check_upper_bound = 0x33;
2122 pstate->dw33.qp_range_check_lower_bound = 0x0a;
2123 pstate->dw33.qp_range_check_value = 0x0f;
2125 pstate->dw34.midpoint_distortion = 0x640;
2128 /* ROI will be updated in HuC kernel for CBR/VBR */
2129 if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2130 pstate->dw34.roi_enable = 1;
2132 pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2134 if (vdenc_context->num_roi > 1)
2135 pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2137 if (vdenc_context->num_roi > 2)
2138 pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2141 pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2143 pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2145 pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2147 pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2148 pstate->dw5.picture_type = vdenc_context->frame_type;
2149 pstate->dw5.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2151 if (vdenc_context->frame_type == VDENC_FRAME_P) {
2152 pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2155 pstate->dw5.mb_slice_threshold_value = 0;
2157 pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2159 if (pstate->dw1.transform_8x8_flag)
2160 pstate->dw8.luma_intra_partition_mask = 0;
2162 pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2164 pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta; /* TODO: check whether it is OK to use the first slice only */
2167 pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2168 pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2169 pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2170 pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2172 pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2173 pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2174 pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2175 pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2177 pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2178 pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2179 pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2180 pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2182 pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2183 pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2184 pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2185 pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2186 pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2187 pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2188 pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2189 pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2191 pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2192 pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2193 pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2194 pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2195 pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2196 pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2197 pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2198 pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2201 pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2203 pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2205 /* TODO: check rolling I */
2207 /* TODO: handle ROI */
2209 /* TODO: check stream in support */
2213 gen9_vdenc_init_img_states(VADriverContextP ctx,
2214 struct encode_state *encode_state,
2215 struct intel_encoder_context *encoder_context)
2217 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2218 struct gen9_mfx_avc_img_state *mfx_img_cmd;
2219 struct gen9_vdenc_img_state *vdenc_img_cmd;
2222 pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2227 mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2228 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd);
2229 pbuffer += sizeof(*mfx_img_cmd);
2231 vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2232 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2233 pbuffer += sizeof(*vdenc_img_cmd);
2235 /* Add batch buffer end command */
2236 *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2238 i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2242 gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2243 struct encode_state *encode_state,
2244 struct intel_encoder_context *encoder_context)
2246 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2249 pbuffer = i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2254 if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2255 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2256 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2257 memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2258 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2259 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2260 memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2263 memcpy(pbuffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2265 i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2269 gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2270 struct encode_state *encode_state,
2271 struct intel_encoder_context *encoder_context)
2273 struct intel_batchbuffer *batch = encoder_context->base.batch;
2274 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2275 struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2276 struct huc_imem_state_parameter imem_state_params;
2277 struct huc_dmem_state_parameter dmem_state_params;
2278 struct huc_virtual_addr_parameter virtual_addr_params;
2279 struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2280 struct huc_stream_object_parameter stream_object_params;
2281 struct huc_start_parameter start_params;
2282 struct vd_pipeline_flush_parameter pipeline_flush_params;
2283 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2284 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2285 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2287 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2288 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2289 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2291 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2292 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2294 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2295 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2296 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2299 gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2301 memset(&imem_state_params, 0, sizeof(imem_state_params));
2302 imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2303 gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2305 memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2306 gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2308 gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2309 memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2310 dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2311 dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2312 dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2313 gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2315 gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2316 memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2317 virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2318 virtual_addr_params.regions[0].is_target = 1;
2319 virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2320 virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2321 virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2322 virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2323 virtual_addr_params.regions[4].is_target = 1;
2324 virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2325 virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2326 virtual_addr_params.regions[6].is_target = 1;
2327 gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2329 memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2330 ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2331 ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2332 gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2334 memset(&stream_object_params, 0, sizeof(stream_object_params));
2335 stream_object_params.indirect_stream_in_data_length = 1;
2336 stream_object_params.indirect_stream_in_start_address = 0;
2337 gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2339 gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2341 memset(&start_params, 0, sizeof(start_params));
2342 start_params.last_stream_object = 1;
2343 gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2345 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2346 pipeline_flush_params.hevc_pipeline_done = 1;
2347 pipeline_flush_params.hevc_pipeline_command_flush = 1;
2348 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2350 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2351 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2352 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2354 /* Store HUC_STATUS */
2355 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2356 mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2357 mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2358 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2360 /* Write HUC_STATUS mask (1 << 31) */
2361 memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2362 mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2363 mi_store_data_imm_params.offset = 4;
2364 mi_store_data_imm_params.dw0 = (1 << 31);
2365 gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2369 gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2370 struct encode_state *encode_state,
2371 struct intel_encoder_context *encoder_context)
2373 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2374 struct intel_batchbuffer *batch = encoder_context->base.batch;
2376 BEGIN_BCS_BATCH(batch, 5);
2378 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2379 OUT_BCS_BATCH(batch,
2381 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
2382 (MFD_MODE_VLD << 15) |
2383 (1 << 13) | /* VDEnc mode */
2384 ((!!vdenc_context->post_deblocking_output_res.bo) << 9) | /* Post Deblocking Output */
2385 ((!!vdenc_context->pre_deblocking_output_res.bo) << 8) | /* Pre Deblocking Output */
2386 (1 << 7) | /* Scaled surface enable */
2387 (1 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
2388 (1 << 4) | /* encoding mode */
2389 (MFX_FORMAT_AVC << 0));
2390 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_BATCH(batch, 0);
2392 OUT_BCS_BATCH(batch, 0);
2394 ADVANCE_BCS_BATCH(batch);
2398 gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2399 struct intel_encoder_context *encoder_context,
2400 struct i965_gpe_resource *gpe_resource,
2403 struct intel_batchbuffer *batch = encoder_context->base.batch;
2405 BEGIN_BCS_BATCH(batch, 6);
2407 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2408 OUT_BCS_BATCH(batch, id);
2409 OUT_BCS_BATCH(batch,
2410 ((gpe_resource->height - 1) << 18) |
2411 ((gpe_resource->width - 1) << 4));
2412 OUT_BCS_BATCH(batch,
2413 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2414 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2415 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2416 (0 << 2) | /* must be 0 for interleave U/V */
2417 (1 << 1) | /* must be tiled */
2418 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2419 OUT_BCS_BATCH(batch,
2420 (0 << 16) | /* must be 0 for interleave U/V */
2421 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2422 OUT_BCS_BATCH(batch,
2423 (0 << 16) | /* must be 0 for interleave U/V */
2424 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2426 ADVANCE_BCS_BATCH(batch);
2430 gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2432 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2433 struct intel_batchbuffer *batch = encoder_context->base.batch;
2436 BEGIN_BCS_BATCH(batch, 65);
2438 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2440 /* the DW1-3 is for pre_deblocking */
2441 OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2443 /* the DW4-6 is for the post_deblocking */
2444 OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2446 /* the DW7-9 is for the uncompressed_picture */
2447 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2449 /* the DW10-12 is for PAK information (write) */
2450 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2452 /* the DW13-15 is for the intra_row_store_scratch */
2453 OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2455 /* the DW16-18 is for the deblocking filter */
2456 OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2458 /* the DW 19-50 is for Reference pictures*/
2459 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2460 OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2463 /* DW 51, reference picture attributes */
2464 OUT_BCS_BATCH(batch, 0);
2466 /* The DW 52-54 is for PAK information (read) */
2467 OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2469 /* the DW 55-57 is the ILDB buffer */
2470 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2472 /* the DW 58-60 is the second ILDB buffer */
2473 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2475 /* DW 61, memory compress enable & mode */
2476 OUT_BCS_BATCH(batch, 0);
2478 /* the DW 62-64 is the 4x Down Scaling surface */
2479 OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 0, 0, 0);
2481 ADVANCE_BCS_BATCH(batch);
2485 gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2487 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2488 struct intel_batchbuffer *batch = encoder_context->base.batch;
2490 BEGIN_BCS_BATCH(batch, 26);
2492 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2493 /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2494 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2495 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2497 /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2498 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2499 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2501 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2502 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2503 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2505 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2506 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2507 OUT_BUFFER_2DW(batch, NULL, 0, 0);
2509 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2510 * Note: an offset is specified in MFX_AVC_SLICE_STATE
2512 OUT_BUFFER_3DW(batch,
2513 vdenc_context->compressed_bitstream.res.bo,
2517 OUT_BUFFER_2DW(batch,
2518 vdenc_context->compressed_bitstream.res.bo,
2520 vdenc_context->compressed_bitstream.end_offset);
2522 ADVANCE_BCS_BATCH(batch);
2526 gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2528 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2529 struct intel_batchbuffer *batch = encoder_context->base.batch;
2531 BEGIN_BCS_BATCH(batch, 10);
2533 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2535 /* The DW1-3 is for bsd/mpc row store scratch buffer */
2536 OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2538 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2539 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2541 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2542 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2544 ADVANCE_BCS_BATCH(batch);
2548 gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2552 struct intel_encoder_context *encoder_context)
2554 struct intel_batchbuffer *batch = encoder_context->base.batch;
2555 unsigned int qm_buffer[16];
2557 assert(qm_length <= 16);
2558 assert(sizeof(*qm) == 4);
2559 memcpy(qm_buffer, qm, qm_length * 4);
2561 BEGIN_BCS_BATCH(batch, 18);
2562 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2563 OUT_BCS_BATCH(batch, qm_type << 0);
2564 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2565 ADVANCE_BCS_BATCH(batch);
2569 gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2571 /* TODO: add support for non flat matrix */
2572 unsigned int qm[16] = {
2573 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2574 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2575 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2576 0x10101010, 0x10101010, 0x10101010, 0x10101010
2579 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2580 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2581 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2582 gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2586 gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2590 struct intel_encoder_context *encoder_context)
2592 struct intel_batchbuffer *batch = encoder_context->base.batch;
2593 unsigned int fqm_buffer[32];
2595 assert(fqm_length <= 32);
2596 assert(sizeof(*fqm) == 4);
2597 memcpy(fqm_buffer, fqm, fqm_length * 4);
2599 BEGIN_BCS_BATCH(batch, 34);
2600 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2601 OUT_BCS_BATCH(batch, fqm_type << 0);
2602 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2603 ADVANCE_BCS_BATCH(batch);
2607 gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2609 /* TODO: add support for non flat matrix */
2610 unsigned int qm[32] = {
2611 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2612 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2613 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2614 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2615 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2616 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2617 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2618 0x10001000, 0x10001000, 0x10001000, 0x10001000
2621 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2622 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2623 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2624 gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2628 gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2629 struct encode_state *encode_state,
2630 struct intel_encoder_context *encoder_context)
2632 struct intel_batchbuffer *batch = encoder_context->base.batch;
2633 struct gen9_mfx_avc_img_state mfx_img_cmd;
2635 gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd);
2637 BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2638 intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2639 ADVANCE_BCS_BATCH(batch);
2643 gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2644 struct encode_state *encode_state,
2645 struct intel_encoder_context *encoder_context)
2647 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2648 struct intel_batchbuffer *batch = encoder_context->base.batch;
2650 BEGIN_BCS_BATCH(batch, 2);
2652 OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2653 OUT_BCS_BATCH(batch,
2654 (vdenc_context->vdenc_streamin_enable << 9) |
2655 (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2656 (1 << 7) | /* Tlb prefetch enable */
2657 (1 << 5) | /* Frame Statistics Stream-Out Enable */
2658 (VDENC_CODEC_AVC << 0));
2660 ADVANCE_BCS_BATCH(batch);
2664 gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2665 struct intel_encoder_context *encoder_context,
2666 struct i965_gpe_resource *gpe_resource,
2667 int vdenc_surface_cmd)
2669 struct intel_batchbuffer *batch = encoder_context->base.batch;
2671 BEGIN_BCS_BATCH(batch, 6);
2673 OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2674 OUT_BCS_BATCH(batch, 0);
2675 OUT_BCS_BATCH(batch,
2676 ((gpe_resource->height - 1) << 18) |
2677 ((gpe_resource->width - 1) << 4));
2678 OUT_BCS_BATCH(batch,
2679 (VDENC_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface only on SKL */
2680 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2681 ((gpe_resource->pitch - 1) << 3) | /* pitch */
2682 (0 << 2) | /* must be 0 for interleave U/V */
2683 (1 << 1) | /* must be tiled */
2684 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2685 OUT_BCS_BATCH(batch,
2686 (0 << 16) | /* must be 0 for interleave U/V */
2687 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2688 OUT_BCS_BATCH(batch,
2689 (0 << 16) | /* must be 0 for interleave U/V */
2690 (gpe_resource->y_cb_offset)); /* y offset for v(cr) */
2692 ADVANCE_BCS_BATCH(batch);
2696 gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2697 struct intel_encoder_context *encoder_context,
2698 struct i965_gpe_resource *gpe_resource)
2700 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2704 gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2705 struct intel_encoder_context *encoder_context,
2706 struct i965_gpe_resource *gpe_resource)
2708 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2712 gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2713 struct intel_encoder_context *encoder_context,
2714 struct i965_gpe_resource *gpe_resource)
2716 gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2720 gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2721 struct encode_state *encode_state,
2722 struct intel_encoder_context *encoder_context)
2724 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2725 struct intel_batchbuffer *batch = encoder_context->base.batch;
2727 BEGIN_BCS_BATCH(batch, 37);
2729 OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2731 /* DW1-6 for DS FWD REF0/REF1 */
2732 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2733 OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2735 /* DW7-9 for DS BWD REF0, ignored on SKL */
2736 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2738 /* DW10-12 for uncompressed input data */
2739 OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2741 /* DW13-DW15 for streamin data */
2742 if (vdenc_context->vdenc_streamin_enable)
2743 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2745 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2747 /* DW16-DW18 for row scratch buffer */
2748 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2750 /* DW19-DW21, ignored on SKL */
2751 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2753 /* DW22-DW27 for FWD REF0/REF1 */
2754 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2755 OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2757 /* DW28-DW30 for FWD REF2, ignored on SKL */
2758 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2760 /* DW31-DW33 for BDW REF0, ignored on SKL */
2761 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2763 /* DW34-DW36 for VDEnc statistics streamout */
2764 OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2766 ADVANCE_BCS_BATCH(batch);
2770 gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2771 struct encode_state *encode_state,
2772 struct intel_encoder_context *encoder_context)
2774 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2775 struct intel_batchbuffer *batch = encoder_context->base.batch;
2777 BEGIN_BCS_BATCH(batch, 61);
2779 OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2781 if (vdenc_context->frame_type == VDENC_FRAME_I) {
2783 intel_batchbuffer_data(batch, vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2786 intel_batchbuffer_data(batch, vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2789 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2792 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2795 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2798 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2802 for (i = 0; i < 28; i++) {
2803 vdenc_const_skip_threshold_p[i] *= 3;
2807 intel_batchbuffer_data(batch, vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2810 intel_batchbuffer_data(batch, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2813 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2816 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2819 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2822 intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2825 ADVANCE_BCS_BATCH(batch);
2829 gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2830 struct encode_state *encode_state,
2831 struct intel_encoder_context *encoder_context)
2833 struct intel_batchbuffer *batch = encoder_context->base.batch;
2835 BEGIN_BCS_BATCH(batch, 2);
2837 OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2838 OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2840 ADVANCE_BCS_BATCH(batch);
2844 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2845 struct encode_state *encode_state,
2846 struct intel_encoder_context *encoder_context)
2848 struct intel_batchbuffer *batch = encoder_context->base.batch;
2849 struct gen9_vdenc_img_state vdenc_img_cmd;
2851 gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2853 BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2854 intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2855 ADVANCE_BCS_BATCH(batch);
2859 intel_avc_enc_slice_type_fixup(int slice_type);
2862 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2863 struct intel_encoder_context *encoder_context,
2864 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2865 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2866 int slice_header_indicator)
2868 struct intel_batchbuffer *batch = encoder_context->base.batch;
2870 if (data_bits_in_last_dw == 0)
2871 data_bits_in_last_dw = 32;
2873 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2875 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2876 OUT_BCS_BATCH(batch,
2877 (0 << 16) | /* always start at offset 0 */
2878 (slice_header_indicator << 14) |
2879 (data_bits_in_last_dw << 8) |
2880 (skip_emul_byte_count << 4) |
2881 (!!emulation_flag << 3) |
2882 ((!!is_last_header) << 2) |
2883 ((!!is_end_of_slice) << 1) |
2884 (0 << 0)); /* TODO: check this flag */
2885 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2887 ADVANCE_BCS_BATCH(batch);
2891 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2892 struct encode_state *encode_state,
2893 struct intel_encoder_context *encoder_context,
2896 VAEncPackedHeaderParameterBuffer *param = NULL;
2897 unsigned int length_in_bits;
2898 unsigned int *header_data = NULL;
2899 int count, i, start_index;
2900 int slice_header_index;
2902 if (encode_state->slice_header_index[slice_index] == 0)
2903 slice_header_index = -1;
2905 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2907 count = encode_state->slice_rawdata_count[slice_index];
2908 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2910 for (i = 0; i < count; i++) {
2911 unsigned int skip_emul_byte_cnt;
2913 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2915 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2917 /* skip the slice header packed data type as it is lastly inserted */
2918 if (param->type == VAEncPackedHeaderSlice)
2921 length_in_bits = param->bit_length;
2923 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2925 /* as the slice header is still required, the last header flag is set to
2928 gen9_vdenc_mfx_avc_insert_object(ctx,
2931 ALIGN(length_in_bits, 32) >> 5,
2932 length_in_bits & 0x1f,
2936 !param->has_emulation_bytes,
2940 if (slice_header_index == -1) {
2941 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2942 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2943 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2944 unsigned char *slice_header = NULL;
2945 int slice_header_length_in_bits = 0;
2947 /* No slice header data is passed. And the driver needs to generate it */
2948 /* For the Normal H264 */
2949 slice_header_length_in_bits = build_avc_slice_header(seq_param,
2953 gen9_vdenc_mfx_avc_insert_object(ctx,
2955 (unsigned int *)slice_header,
2956 ALIGN(slice_header_length_in_bits, 32) >> 5,
2957 slice_header_length_in_bits & 0x1f,
2958 5, /* first 5 bytes are start code + nal unit type */
2964 unsigned int skip_emul_byte_cnt;
2966 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
2968 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
2969 length_in_bits = param->bit_length;
2971 /* as the slice header is the last header data for one slice,
2972 * the last header flag is set to one.
2974 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2976 gen9_vdenc_mfx_avc_insert_object(ctx,
2979 ALIGN(length_in_bits, 32) >> 5,
2980 length_in_bits & 0x1f,
2984 !param->has_emulation_bytes,
2992 gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
2993 struct encode_state *encode_state,
2994 struct intel_encoder_context *encoder_context,
2995 VAEncSliceParameterBufferH264 *slice_param,
2998 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2999 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
3000 unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
3001 unsigned int skip_emul_byte_cnt;
3003 if (slice_index == 0) {
3004 if (encode_state->packed_header_data[idx]) {
3005 VAEncPackedHeaderParameterBuffer *param = NULL;
3006 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3007 unsigned int length_in_bits;
3009 assert(encode_state->packed_header_param[idx]);
3010 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3011 length_in_bits = param->bit_length;
3013 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3014 gen9_vdenc_mfx_avc_insert_object(ctx,
3017 ALIGN(length_in_bits, 32) >> 5,
3018 length_in_bits & 0x1f,
3022 !param->has_emulation_bytes,
3026 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3028 if (encode_state->packed_header_data[idx]) {
3029 VAEncPackedHeaderParameterBuffer *param = NULL;
3030 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3031 unsigned int length_in_bits;
3033 assert(encode_state->packed_header_param[idx]);
3034 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3035 length_in_bits = param->bit_length;
3037 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3039 gen9_vdenc_mfx_avc_insert_object(ctx,
3042 ALIGN(length_in_bits, 32) >> 5,
3043 length_in_bits & 0x1f,
3047 !param->has_emulation_bytes,
3051 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3053 if (encode_state->packed_header_data[idx]) {
3054 VAEncPackedHeaderParameterBuffer *param = NULL;
3055 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3056 unsigned int length_in_bits;
3058 assert(encode_state->packed_header_param[idx]);
3059 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3060 length_in_bits = param->bit_length;
3062 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3063 gen9_vdenc_mfx_avc_insert_object(ctx,
3066 ALIGN(length_in_bits, 32) >> 5,
3067 length_in_bits & 0x1f,
3071 !param->has_emulation_bytes,
3073 } else if (internal_rate_mode == I965_BRC_CBR) {
3074 /* TODO: insert others */
3078 gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3085 gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3086 struct encode_state *encode_state,
3087 struct intel_encoder_context *encoder_context,
3088 VAEncPictureParameterBufferH264 *pic_param,
3089 VAEncSliceParameterBufferH264 *slice_param,
3090 VAEncSliceParameterBufferH264 *next_slice_param)
3092 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3093 struct intel_batchbuffer *batch = encoder_context->base.batch;
3094 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3095 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3096 unsigned char correct[6], grow, shrink;
3097 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3098 int max_qp_n, max_qp_p;
3100 int weighted_pred_idc = 0;
3101 int num_ref_l0 = 0, num_ref_l1 = 0;
3102 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3103 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3105 slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3106 slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3108 if (next_slice_param) {
3109 next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3110 next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3112 next_slice_hor_pos = 0;
3113 next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3116 if (slice_type == SLICE_TYPE_I) {
3117 luma_log2_weight_denom = 0;
3118 chroma_log2_weight_denom = 0;
3119 } else if (slice_type == SLICE_TYPE_P) {
3120 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3121 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3123 if (slice_param->num_ref_idx_active_override_flag)
3124 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3125 } else if (slice_type == SLICE_TYPE_B) {
3126 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3127 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3128 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3130 if (slice_param->num_ref_idx_active_override_flag) {
3131 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3132 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3135 if (weighted_pred_idc == 2) {
3136 /* 8.4.3 - Derivation process for prediction weights (8-279) */
3137 luma_log2_weight_denom = 5;
3138 chroma_log2_weight_denom = 5;
3142 max_qp_n = 0; /* TODO: update it */
3143 max_qp_p = 0; /* TODO: update it */
3144 grow = 0; /* TODO: update it */
3145 shrink = 0; /* TODO: update it */
3147 for (i = 0; i < 6; i++)
3148 correct[i] = 0; /* TODO: update it */
3150 BEGIN_BCS_BATCH(batch, 11);
3152 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3153 OUT_BCS_BATCH(batch, slice_type);
3154 OUT_BCS_BATCH(batch,
3155 (num_ref_l0 << 16) |
3156 (num_ref_l1 << 24) |
3157 (chroma_log2_weight_denom << 8) |
3158 (luma_log2_weight_denom << 0));
3159 OUT_BCS_BATCH(batch,
3160 (weighted_pred_idc << 30) |
3161 (slice_param->direct_spatial_mv_pred_flag << 29) |
3162 (slice_param->disable_deblocking_filter_idc << 27) |
3163 (slice_param->cabac_init_idc << 24) |
3165 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3166 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3168 OUT_BCS_BATCH(batch,
3169 slice_ver_pos << 24 |
3170 slice_hor_pos << 16 |
3171 slice_param->macroblock_address);
3172 OUT_BCS_BATCH(batch,
3173 next_slice_ver_pos << 16 |
3174 next_slice_hor_pos);
3176 OUT_BCS_BATCH(batch,
3177 (0 << 31) | /* TODO: ignore it for VDENC ??? */
3178 (!slice_param->macroblock_address << 30) | /* ResetRateControlCounter */
3179 (2 << 28) | /* Loose Rate Control */
3180 (0 << 24) | /* RC Stable Tolerance */
3181 (0 << 23) | /* RC Panic Enable */
3182 (1 << 22) | /* CBP mode */
3183 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3184 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3185 (!next_slice_param << 19) | /* Is Last Slice */
3186 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3187 (1 << 17) | /* HeaderPresentFlag */
3188 (1 << 16) | /* SliceData PresentFlag */
3189 (0 << 15) | /* TailPresentFlag, TODO: check it on VDEnc */
3190 (1 << 13) | /* RBSP NAL TYPE */
3191 (1 << 12)); /* CabacZeroWordInsertionEnable */
3193 OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3195 OUT_BCS_BATCH(batch,
3196 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
3197 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
3200 OUT_BCS_BATCH(batch,
3205 (correct[5] << 20) |
3206 (correct[4] << 16) |
3207 (correct[3] << 12) |
3211 OUT_BCS_BATCH(batch, 0);
3213 ADVANCE_BCS_BATCH(batch);
3217 gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3219 unsigned int is_long_term =
3220 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3221 unsigned int is_top_field =
3222 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3223 unsigned int is_bottom_field =
3224 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3226 return ((is_long_term << 6) |
3227 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3228 (frame_store_id << 1) |
3229 ((is_top_field ^ 1) & is_bottom_field));
3233 gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3234 struct encode_state *encode_state,
3235 struct intel_encoder_context *encoder_context,
3236 VAEncSliceParameterBufferH264 *slice_param)
3238 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3239 struct intel_batchbuffer *batch = encoder_context->base.batch;
3240 VAPictureH264 *ref_pic;
3241 int i, slice_type, ref_idx_shift;
3242 unsigned int fwd_ref_entry;
3244 fwd_ref_entry = 0x80808080;
3245 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3247 for (i = 0; i < MAX(vdenc_context->num_refs[0], 2); i++) {
3248 ref_pic = &slice_param->RefPicList0[i];
3249 ref_idx_shift = vdenc_context->list_ref_idx[0][i] * 8;
3251 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3252 fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3255 if (slice_type == SLICE_TYPE_P) {
3256 BEGIN_BCS_BATCH(batch, 10);
3257 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3258 OUT_BCS_BATCH(batch, 0); // L0
3259 OUT_BCS_BATCH(batch, fwd_ref_entry);
3261 for (i = 0; i < 7; i++) {
3262 OUT_BCS_BATCH(batch, 0x80808080);
3265 ADVANCE_BCS_BATCH(batch);
3268 if (slice_type == SLICE_TYPE_B) {
3269 /* VDEnc on SKL doesn't support BDW */
3275 gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3276 struct encode_state *encode_state,
3277 struct intel_encoder_context *encoder_context,
3278 VAEncPictureParameterBufferH264 *pic_param,
3279 VAEncSliceParameterBufferH264 *slice_param)
3281 struct intel_batchbuffer *batch = encoder_context->base.batch;
3283 short weightoffsets[32 * 6];
3285 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3287 if (slice_type == SLICE_TYPE_P &&
3288 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3290 for (i = 0; i < 32; i++) {
3291 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3292 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3293 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3294 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3295 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3296 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3299 BEGIN_BCS_BATCH(batch, 98);
3300 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3301 OUT_BCS_BATCH(batch, 0);
3302 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3304 ADVANCE_BCS_BATCH(batch);
3307 if (slice_type == SLICE_TYPE_B) {
3308 /* VDEnc on SKL doesn't support BWD */
3314 gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3315 struct encode_state *encode_state,
3316 struct intel_encoder_context *encoder_context,
3317 VAEncSliceParameterBufferH264 *slice_param,
3318 VAEncSliceParameterBufferH264 *next_slice_param,
3321 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3323 gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3324 gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3329 gen9_vdenc_mfx_avc_slice_state(ctx,
3335 gen9_vdenc_mfx_avc_inset_headers(ctx,
3343 gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3344 struct encode_state *encode_state,
3345 struct intel_encoder_context *encoder_context)
3347 struct intel_batchbuffer *batch = encoder_context->base.batch;
3348 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3349 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3351 int slice_index = 0;
3352 int is_frame_level_vdenc = 1; /* TODO: check it for SKL */
3353 int has_tail = 0; /* TODO: check it later */
3355 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3356 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3358 if (j == encode_state->num_slice_params_ext - 1)
3359 next_slice_group_param = NULL;
3361 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3363 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3364 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3365 next_slice_param = slice_param + 1;
3367 next_slice_param = next_slice_group_param;
3369 gen9_vdenc_mfx_avc_single_slice(ctx,
3378 if (is_frame_level_vdenc)
3381 /* TODO: remove assert(0) and add other commands here */
3386 if (is_frame_level_vdenc)
3390 if (is_frame_level_vdenc) {
3391 struct vd_pipeline_flush_parameter pipeline_flush_params;
3393 gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3395 memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3396 pipeline_flush_params.mfx_pipeline_done = !has_tail;
3397 pipeline_flush_params.vdenc_pipeline_done = 1;
3398 pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3399 pipeline_flush_params.vd_command_message_parser_done = 1;
3400 gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3404 /* TODO: insert a tail if required */
3407 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3408 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3409 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3413 gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3414 struct encode_state *encode_state,
3415 struct intel_encoder_context *encoder_context)
3417 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3418 struct intel_batchbuffer *batch = encoder_context->base.batch;
3419 struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3421 if (vdenc_context->brc_enabled) {
3422 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3424 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3425 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3426 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3429 if (vdenc_context->current_pass) {
3430 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3432 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3433 mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3434 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3437 gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3439 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3440 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3441 gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3443 gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3444 gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3445 gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3447 gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3448 gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3449 gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3450 gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3451 gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3452 gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3454 if (!vdenc_context->brc_enabled) {
3455 gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3456 gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3458 memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3459 mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3460 mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3461 gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3464 gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3465 gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3467 gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3471 gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3472 struct intel_encoder_context *encoder_context)
3474 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3475 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3477 switch (rate_control_mode & 0x7f) {
3479 vdenc_context->internal_rate_mode = I965_BRC_CBR;
3483 vdenc_context->internal_rate_mode = I965_BRC_VBR;
3488 vdenc_context->internal_rate_mode = I965_BRC_CQP;
3494 gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3496 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3497 struct intel_batchbuffer *batch = encoder_context->base.batch;
3498 struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3499 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3500 unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3503 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3504 gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3506 memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3507 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3508 mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3509 mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3510 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3512 /* Update DMEM buffer for BRC Update */
3513 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3514 mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3515 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3516 mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3517 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3519 mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3520 mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3521 mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3522 gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3527 gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3528 struct encode_state *encode_state,
3529 struct intel_encoder_context *encoder_context)
3531 VAEncSliceParameterBufferH264 *slice_param;
3534 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3535 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3537 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3538 if (slice_param->slice_type == SLICE_TYPE_B)
3539 return VA_STATUS_ERROR_UNKNOWN;
3545 return VA_STATUS_SUCCESS;
3549 gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3551 struct encode_state *encode_state,
3552 struct intel_encoder_context *encoder_context)
3555 struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3556 struct intel_batchbuffer *batch = encoder_context->base.batch;
3558 va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3560 if (va_status != VA_STATUS_SUCCESS)
3563 gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3565 for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3566 vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3567 vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3569 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3570 intel_batchbuffer_emit_mi_flush(batch);
3572 if (vdenc_context->brc_enabled) {
3573 if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3574 gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3576 gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3577 intel_batchbuffer_emit_mi_flush(batch);
3580 gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3581 gen9_vdenc_read_status(ctx, encoder_context);
3583 intel_batchbuffer_end_atomic(batch);
3584 intel_batchbuffer_flush(batch);
3586 vdenc_context->brc_initted = 1;
3587 vdenc_context->brc_need_reset = 0;
3590 return VA_STATUS_SUCCESS;
3594 gen9_vdenc_pipeline(VADriverContextP ctx,
3596 struct encode_state *encode_state,
3597 struct intel_encoder_context *encoder_context)
3602 case VAProfileH264ConstrainedBaseline:
3603 case VAProfileH264Main:
3604 case VAProfileH264High:
3605 vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3609 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3617 gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3621 i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3622 i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3623 i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3624 i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3625 i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3627 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3628 i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3630 i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3631 i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3632 i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3633 i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3634 i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3635 i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3637 i965_free_gpe_resource(&vdenc_context->huc_status_res);
3638 i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3640 i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3641 i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3642 i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3643 i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3645 for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3646 i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3647 i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3650 i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3651 i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3652 i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3654 i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3655 i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3656 i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3657 i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3659 i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3663 gen9_vdenc_context_destroy(void *context)
3665 struct gen9_vdenc_context *vdenc_context = context;
3667 gen9_vdenc_free_resources(vdenc_context);
3669 free(vdenc_context);
3673 gen9_vdenc_allocate_resources(VADriverContextP ctx,
3674 struct intel_encoder_context *encoder_context,
3675 struct gen9_vdenc_context *vdenc_context)
3677 struct i965_driver_data *i965 = i965_driver_data(ctx);
3680 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3681 ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3682 "HuC Init&Reset DMEM buffer");
3684 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3685 ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3686 "HuC History buffer");
3688 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3689 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3690 "HuC Stream In buffer");
3692 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3693 ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3694 "HuC Stream Out buffer");
3696 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3698 "HuC dummy buffer");
3700 for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3701 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3702 ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3703 "HuC BRC Update buffer");
3704 i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3707 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3708 ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3709 "VDENC statistics buffer");
3711 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3712 ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3713 "PAK statistics buffer");
3715 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3716 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3717 "VDENC/AVC image state buffer");
3719 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3720 ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3721 "HME summary buffer");
3723 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3724 ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3725 "BRC constant buffer");
3727 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3728 ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3729 "Second level batch buffer");
3731 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3733 "HuC Status buffer");
3735 ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3737 "HuC Status buffer");
3741 gen9_vdenc_context_get_status(VADriverContextP ctx,
3742 struct intel_encoder_context *encoder_context,
3743 struct i965_coded_buffer_segment *coded_buffer_segment)
3745 struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3747 coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3749 return VA_STATUS_SUCCESS;
3753 gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3755 struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3760 vdenc_context->brc_initted = 0;
3761 vdenc_context->brc_need_reset = 0;
3762 vdenc_context->is_low_delay = 0;
3763 vdenc_context->current_pass = 0;
3764 vdenc_context->num_passes = 1;
3765 vdenc_context->vdenc_streamin_enable = 0;
3766 vdenc_context->vdenc_pak_threshold_check_enable = 0;
3768 gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3770 encoder_context->mfc_context = vdenc_context;
3771 encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3772 encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3773 encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3774 encoder_context->get_status = gen9_vdenc_context_get_status;