OSDN Git Service

Remove implicit truncations from int to bit-field
[android-x86/hardware-intel-common-vaapi.git] / src / i965_encoder_vp8.c
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang, Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <time.h>
35 #include <assert.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "i965_gpe_utils.h"
44 #include "i965_encoder_vp8.h"
45 #include "vp8_probs.h"
46 #include "vpx_quant.h"
47
48 #define SCALE_FACTOR_4X                 4
49 #define SCALE_FACTOR_16X                16
50
51 #define MAX_VP8_ENCODER_SURFACES        128
52
53 #define MAX_URB_SIZE                    2048 /* In register */
54 #define NUM_KERNELS_PER_GPE_CONTEXT     1
55
56 #define VP8_BRC_KBPS                    1000
57
58 #define BRC_KERNEL_CBR                  0x0010
59 #define BRC_KERNEL_VBR                  0x0020
60
61 struct i965_kernel vp8_kernels_brc_init_reset[NUM_VP8_BRC_RESET] = {
62     {
63         "VP8 BRC Init",
64         VP8_BRC_INIT,
65         NULL,
66         0,
67         NULL
68     },
69
70     {
71         "VP8 BRC Reset",
72         VP8_BRC_RESET,
73         NULL,
74         0,
75         NULL
76     },
77 };
78
79 struct i965_kernel vp8_kernels_scaling[NUM_VP8_SCALING] = {
80     {
81         "VP8 SCALE 4X",
82         VP8_SCALING_4X,
83         NULL,
84         0,
85         NULL
86     },
87
88     {
89         "VP8 SCALE 16",
90         VP8_SCALING_16X,
91         NULL,
92         0,
93         NULL
94     },
95 };
96
97 struct i965_kernel vp8_kernels_me[NUM_VP8_ME] = {
98     {
99         "VP8 ME 4X",
100         VP8_ME_4X,
101         NULL,
102         0,
103         NULL
104     },
105
106     {
107         "VP8 ME 16",
108         VP8_ME_16X,
109         NULL,
110         0,
111         NULL
112     },
113 };
114
115 struct i965_kernel vp8_kernels_mbenc[NUM_VP8_MBENC] = {
116     {
117         "VP8 MBEnc I Frame Dist",
118         VP8_MBENC_I_FRAME_DIST,
119         NULL,
120         0,
121         NULL
122     },
123
124     {
125         "VP8 MBEnc I Frame Luma",
126         VP8_MBENC_I_FRAME_LUMA,
127         NULL,
128         0,
129         NULL
130     },
131
132     {
133         "VP8 MBEnc I Frame Chroma",
134         VP8_MBENC_I_FRAME_CHROMA,
135         NULL,
136         0,
137         NULL
138     },
139
140     {
141         "VP8 MBEnc P Frame",
142         VP8_MBENC_P_FRAME,
143         NULL,
144         0,
145         NULL
146     },
147 };
148
149 struct i965_kernel vp8_kernels_mpu[NUM_VP8_MPU] = {
150     {
151         "VP8 MPU",
152         VP8_MPU,
153         NULL,
154         0,
155         NULL
156     },
157 };
158
159 struct i965_kernel vp8_kernels_tpu[NUM_VP8_TPU] = {
160     {
161         "VP8 TPU",
162         VP8_TPU,
163         NULL,
164         0,
165         NULL
166     },
167 };
168
169 struct i965_kernel vp8_kernels_brc_update[NUM_VP8_BRC_UPDATE] = {
170     {
171         "VP8 BRC Update",
172         VP8_BRC_UPDATE,
173         NULL,
174         0,
175         NULL
176     },
177 };
178
179 static const unsigned char
180 vp8_num_refs[8] = {
181     0, 1, 1, 2, 1, 2, 2, 3
182 };
183
184 static const unsigned int
185 vp8_search_path[8][16] = {
186     // MEMethod: 0
187     {
188         0x120FF10F, 0x1E22E20D, 0x20E2FF10, 0x2EDD06FC, 0x11D33FF1, 0xEB1FF33D, 0x4EF1F1F1, 0xF1F21211,
189         0x0DFFFFE0, 0x11201F1F, 0x1105F1CF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
190     },
191     // MEMethod: 1
192     {
193         0x120FF10F, 0x1E22E20D, 0x20E2FF10, 0x2EDD06FC, 0x11D33FF1, 0xEB1FF33D, 0x4EF1F1F1, 0xF1F21211,
194         0x0DFFFFE0, 0x11201F1F, 0x1105F1CF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
195     },
196     // MEMethod: 2
197     {
198         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
199         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
200     },
201     // MEMethod: 3
202     {
203         0x01010101, 0x11010101, 0x01010101, 0x11010101, 0x01010101, 0x11010101, 0x01010101, 0x11010101,
204         0x01010101, 0x11010101, 0x01010101, 0x00010101, 0x00000000, 0x00000000, 0x00000000, 0x00000000
205     },
206     // MEMethod: 4
207     {
208         0x0101F00F, 0x0F0F1010, 0xF0F0F00F, 0x01010101, 0x10101010, 0x0F0F0F0F, 0xF0F0F00F, 0x0101F0F0,
209         0x01010101, 0x10101010, 0x0F0F1010, 0x0F0F0F0F, 0xF0F0F00F, 0xF0F0F0F0, 0x00000000, 0x00000000
210     },
211     // MEMethod: 5
212     {
213         0x0101F00F, 0x0F0F1010, 0xF0F0F00F, 0x01010101, 0x10101010, 0x0F0F0F0F, 0xF0F0F00F, 0x0101F0F0,
214         0x01010101, 0x10101010, 0x0F0F1010, 0x0F0F0F0F, 0xF0F0F00F, 0xF0F0F0F0, 0x00000000, 0x00000000
215     },
216     // MEMethod: 6
217     {
218         0x120FF10F, 0x1E22E20D, 0x20E2FF10, 0x2EDD06FC, 0x11D33FF1, 0xEB1FF33D, 0x4EF1F1F1, 0xF1F21211,
219         0x0DFFFFE0, 0x11201F1F, 0x1105F1CF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
220     },
221     // MEMethod: 7
222     {
223         0x1F11F10F, 0x2E22E2FE, 0x20E220DF, 0x2EDD06FC, 0x11D33FF1, 0xEB1FF33D, 0x02F1F1F1, 0x1F201111,
224         0xF1EFFF0C, 0xF01104F1, 0x10FF0A50, 0x000FF1C0, 0x00000000, 0x00000000, 0x00000000, 0x00000000
225     }
226 };
227
228 static const unsigned char
229 i_frame_vme_costs_vp8[NUM_QP_VP8][4] = {
230     {0x05, 0x1f, 0x02, 0x09},
231     {0x05, 0x1f, 0x02, 0x09},
232     {0x08, 0x2b, 0x03, 0x0e},
233     {0x08, 0x2b, 0x03, 0x0e},
234     {0x0a, 0x2f, 0x04, 0x12},
235     {0x0a, 0x2f, 0x04, 0x12},
236     {0x0d, 0x39, 0x05, 0x17},
237     {0x0d, 0x39, 0x05, 0x17},
238     {0x0d, 0x39, 0x05, 0x17},
239     {0x0f, 0x3b, 0x06, 0x1b},
240     {0x0f, 0x3b, 0x06, 0x1b},
241     {0x19, 0x3d, 0x07, 0x20},
242     {0x19, 0x3d, 0x07, 0x20},
243     {0x1a, 0x3f, 0x08, 0x24},
244     {0x1a, 0x3f, 0x08, 0x24},
245     {0x1a, 0x3f, 0x08, 0x24},
246     {0x1b, 0x48, 0x09, 0x29},
247     {0x1b, 0x48, 0x09, 0x29},
248     {0x1d, 0x49, 0x09, 0x2d},
249     {0x1d, 0x49, 0x09, 0x2d},
250     {0x1d, 0x49, 0x09, 0x2d},
251     {0x1d, 0x49, 0x09, 0x2d},
252     {0x1e, 0x4a, 0x0a, 0x32},
253     {0x1e, 0x4a, 0x0a, 0x32},
254     {0x1e, 0x4a, 0x0a, 0x32},
255     {0x1e, 0x4a, 0x0a, 0x32},
256     {0x1f, 0x4b, 0x0b, 0x36},
257     {0x1f, 0x4b, 0x0b, 0x36},
258     {0x1f, 0x4b, 0x0b, 0x36},
259     {0x28, 0x4c, 0x0c, 0x3b},
260     {0x28, 0x4c, 0x0c, 0x3b},
261     {0x29, 0x4d, 0x0d, 0x3f},
262     {0x29, 0x4d, 0x0d, 0x3f},
263     {0x29, 0x4e, 0x0e, 0x44},
264     {0x29, 0x4e, 0x0e, 0x44},
265     {0x2a, 0x4f, 0x0f, 0x48},
266     {0x2a, 0x4f, 0x0f, 0x48},
267     {0x2b, 0x58, 0x10, 0x4d},
268     {0x2b, 0x58, 0x10, 0x4d},
269     {0x2b, 0x58, 0x11, 0x51},
270     {0x2b, 0x58, 0x11, 0x51},
271     {0x2b, 0x58, 0x11, 0x51},
272     {0x2c, 0x58, 0x12, 0x56},
273     {0x2c, 0x58, 0x12, 0x56},
274     {0x2c, 0x59, 0x13, 0x5a},
275     {0x2c, 0x59, 0x13, 0x5a},
276     {0x2d, 0x59, 0x14, 0x5f},
277     {0x2d, 0x59, 0x14, 0x5f},
278     {0x2e, 0x5a, 0x15, 0x63},
279     {0x2e, 0x5a, 0x15, 0x63},
280     {0x2e, 0x5a, 0x16, 0x68},
281     {0x2e, 0x5a, 0x16, 0x68},
282     {0x2e, 0x5a, 0x16, 0x68},
283     {0x2f, 0x5b, 0x17, 0x6c},
284     {0x2f, 0x5b, 0x17, 0x6c},
285     {0x38, 0x5b, 0x18, 0x71},
286     {0x38, 0x5b, 0x18, 0x71},
287     {0x38, 0x5c, 0x19, 0x76},
288     {0x38, 0x5c, 0x19, 0x76},
289     {0x38, 0x5c, 0x1a, 0x7a},
290     {0x38, 0x5c, 0x1a, 0x7a},
291     {0x39, 0x5d, 0x1a, 0x7f},
292     {0x39, 0x5d, 0x1a, 0x7f},
293     {0x39, 0x5d, 0x1b, 0x83},
294     {0x39, 0x5d, 0x1b, 0x83},
295     {0x39, 0x5e, 0x1c, 0x88},
296     {0x39, 0x5e, 0x1c, 0x88},
297     {0x3a, 0x5e, 0x1d, 0x8c},
298     {0x3a, 0x5e, 0x1d, 0x8c},
299     {0x3a, 0x5f, 0x1e, 0x91},
300     {0x3a, 0x5f, 0x1e, 0x91},
301     {0x3a, 0x5f, 0x1f, 0x95},
302     {0x3a, 0x5f, 0x1f, 0x95},
303     {0x3a, 0x68, 0x20, 0x9a},
304     {0x3a, 0x68, 0x20, 0x9a},
305     {0x3b, 0x68, 0x21, 0x9e},
306     {0x3b, 0x68, 0x21, 0x9e},
307     {0x3b, 0x68, 0x22, 0xa3},
308     {0x3b, 0x68, 0x22, 0xa3},
309     {0x3b, 0x68, 0x23, 0xa7},
310     {0x3b, 0x68, 0x23, 0xa7},
311     {0x3c, 0x68, 0x24, 0xac},
312     {0x3c, 0x68, 0x24, 0xac},
313     {0x3c, 0x68, 0x24, 0xac},
314     {0x3c, 0x69, 0x25, 0xb0},
315     {0x3c, 0x69, 0x25, 0xb0},
316     {0x3c, 0x69, 0x26, 0xb5},
317     {0x3c, 0x69, 0x26, 0xb5},
318     {0x3d, 0x69, 0x27, 0xb9},
319     {0x3d, 0x69, 0x27, 0xb9},
320     {0x3d, 0x69, 0x28, 0xbe},
321     {0x3d, 0x69, 0x28, 0xbe},
322     {0x3d, 0x6a, 0x29, 0xc2},
323     {0x3d, 0x6a, 0x29, 0xc2},
324     {0x3e, 0x6a, 0x2a, 0xc7},
325     {0x3e, 0x6a, 0x2a, 0xc7},
326     {0x3e, 0x6a, 0x2b, 0xcb},
327     {0x3e, 0x6a, 0x2b, 0xd0},
328     {0x3f, 0x6b, 0x2c, 0xd4},
329     {0x3f, 0x6b, 0x2d, 0xd9},
330     {0x3f, 0x6b, 0x2e, 0xdd},
331     {0x48, 0x6b, 0x2f, 0xe2},
332     {0x48, 0x6b, 0x2f, 0xe2},
333     {0x48, 0x6c, 0x30, 0xe6},
334     {0x48, 0x6c, 0x31, 0xeb},
335     {0x48, 0x6c, 0x32, 0xf0},
336     {0x48, 0x6c, 0x33, 0xf4},
337     {0x48, 0x6c, 0x34, 0xf9},
338     {0x49, 0x6d, 0x35, 0xfd},
339     {0x49, 0x6d, 0x36, 0xff},
340     {0x49, 0x6d, 0x37, 0xff},
341     {0x49, 0x6d, 0x38, 0xff},
342     {0x49, 0x6e, 0x3a, 0xff},
343     {0x49, 0x6e, 0x3b, 0xff},
344     {0x4a, 0x6e, 0x3c, 0xff},
345     {0x4a, 0x6f, 0x3d, 0xff},
346     {0x4a, 0x6f, 0x3d, 0xff},
347     {0x4a, 0x6f, 0x3e, 0xff},
348     {0x4a, 0x6f, 0x3f, 0xff},
349     {0x4a, 0x6f, 0x40, 0xff},
350     {0x4b, 0x78, 0x41, 0xff},
351     {0x4b, 0x78, 0x42, 0xff},
352     {0x4b, 0x78, 0x43, 0xff},
353     {0x4b, 0x78, 0x44, 0xff},
354     {0x4b, 0x78, 0x46, 0xff},
355     {0x4c, 0x78, 0x47, 0xff},
356     {0x4c, 0x79, 0x49, 0xff},
357     {0x4c, 0x79, 0x4a, 0xff}
358 };
359
360 static const unsigned char
361 mainref_table_vp8[8] = {
362     0, 1, 2, 9, 3, 13, 14, 57
363 };
364
365 static const unsigned int
366 cost_table_vp8[NUM_QP_VP8][7] = {
367     {0x398f0500, 0x6f6f6f6f, 0x0000006f, 0x06040402, 0x1a0c0907, 0x08, 0x0e},
368     {0x3b8f0600, 0x6f6f6f6f, 0x0000006f, 0x06040402, 0x1a0c0907, 0x0a, 0x11},
369     {0x3e8f0700, 0x6f6f6f6f, 0x0000006f, 0x06040402, 0x1a0c0907, 0x0c, 0x14},
370     {0x488f0800, 0x6f6f6f6f, 0x0000006f, 0x06040402, 0x1a0c0907, 0x0f, 0x18},
371     {0x498f0a00, 0x6f6f6f6f, 0x0000006f, 0x0d080805, 0x291b190e, 0x11, 0x1b},
372     {0x4a8f0b00, 0x6f6f6f6f, 0x0000006f, 0x0d080805, 0x291b190e, 0x13, 0x1e},
373     {0x4b8f0c00, 0x6f6f6f6f, 0x0000006f, 0x0d080805, 0x291b190e, 0x15, 0x22},
374     {0x4b8f0c00, 0x6f6f6f6f, 0x0000006f, 0x0d080805, 0x291b190e, 0x15, 0x22},
375     {0x4d8f0d00, 0x6f6f6f6f, 0x0000006f, 0x0d080805, 0x291b190e, 0x17, 0x25},
376     {0x4e8f0e00, 0x6f6f6f6f, 0x0000006f, 0x190b0c07, 0x2e281e1a, 0x19, 0x29},
377     {0x4f8f0f00, 0x6f6f6f6f, 0x0000006f, 0x190b0c07, 0x2e281e1a, 0x1b, 0x2c},
378     {0x588f1800, 0x6f6f6f6f, 0x0000006f, 0x190b0c07, 0x2e281e1a, 0x1d, 0x2f},
379     {0x588f1900, 0x6f6f6f6f, 0x0000006f, 0x190b0c07, 0x2e281e1a, 0x1f, 0x33},
380     {0x598f1900, 0x6f6f6f6f, 0x0000006f, 0x1c0f0f0a, 0x392b291e, 0x21, 0x36},
381     {0x5a8f1a00, 0x6f6f6f6f, 0x0000006f, 0x1c0f0f0a, 0x392b291e, 0x23, 0x3a},
382     {0x5a8f1a00, 0x6f6f6f6f, 0x0000006f, 0x1c0f0f0a, 0x392b291e, 0x23, 0x3a},
383     {0x5a8f1a00, 0x6f6f6f6f, 0x0000006f, 0x1c0f0f0a, 0x392b291e, 0x25, 0x3d},
384     {0x5b8f1b00, 0x6f6f6f6f, 0x0000006f, 0x1c0f0f0a, 0x392b291e, 0x27, 0x40},
385     {0x5b8f1c00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2a, 0x44},
386     {0x5b8f1c00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2a, 0x44},
387     {0x5c8f1c00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2c, 0x47},
388     {0x5c8f1c00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2c, 0x47},
389     {0x5d8f1d00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2e, 0x4a},
390     {0x5d8f1d00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x2e, 0x4a},
391     {0x5d8f1d00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x30, 0x4e},
392     {0x5d8f1d00, 0x6f6f6f6f, 0x0000006f, 0x2819190c, 0x3c2e2b29, 0x30, 0x4e},
393     {0x5e8f1e00, 0x6f6f6f6f, 0x0000006f, 0x291b1b0f, 0x3e382e2a, 0x32, 0x51},
394     {0x5e8f1f00, 0x6f6f6f6f, 0x0000006f, 0x291b1b0f, 0x3e382e2a, 0x34, 0x55},
395     {0x5e8f1f00, 0x6f6f6f6f, 0x0000006f, 0x291b1b0f, 0x3e382e2a, 0x34, 0x55},
396     {0x5f8f1f00, 0x6f6f6f6f, 0x0000006f, 0x291b1b0f, 0x3e382e2a, 0x36, 0x58},
397     {0x688f2800, 0x6f6f6f6f, 0x0000006f, 0x291b1b0f, 0x3e382e2a, 0x38, 0x5b},
398     {0x688f2800, 0x6f6f6f6f, 0x0000006f, 0x2b1d1d18, 0x483a382c, 0x3a, 0x5f},
399     {0x688f2800, 0x6f6f6f6f, 0x0000006f, 0x2b1d1d18, 0x483a382c, 0x3c, 0x62},
400     {0x688f2900, 0x6f6f6f6f, 0x0000006f, 0x2b1d1d18, 0x483a382c, 0x3e, 0x65},
401     {0x698f2900, 0x6f6f6f6f, 0x0000006f, 0x2b1d1d18, 0x483a382c, 0x40, 0x69},
402     {0x698f2900, 0x6f6f6f6f, 0x0000006f, 0x2c1f1f19, 0x493b392e, 0x43, 0x6c},
403     {0x698f2900, 0x6f6f6f6f, 0x0000006f, 0x2c1f1f19, 0x493b392e, 0x45, 0x70},
404     {0x6a8f2a00, 0x6f6f6f6f, 0x0000006f, 0x2c1f1f19, 0x493b392e, 0x47, 0x73},
405     {0x6a8f2a00, 0x6f6f6f6f, 0x0000006f, 0x2c1f1f19, 0x493b392e, 0x49, 0x76},
406     {0x6a8f2a00, 0x6f6f6f6f, 0x0000006f, 0x2e28281b, 0x4b3d3a38, 0x4b, 0x7a},
407     {0x6b8f2b00, 0x6f6f6f6f, 0x0000006f, 0x2e28281b, 0x4b3d3a38, 0x4d, 0x7d},
408     {0x6b8f2b00, 0x6f6f6f6f, 0x0000006f, 0x2e28281b, 0x4b3d3a38, 0x4d, 0x7d},
409     {0x6b8f2b00, 0x6f6f6f6f, 0x0000006f, 0x2e28281b, 0x4b3d3a38, 0x4f, 0x81},
410     {0x6b8f2b00, 0x6f6f6f6f, 0x0000006f, 0x2e28281b, 0x4b3d3a38, 0x51, 0x84},
411     {0x6b8f2c00, 0x6f6f6f6f, 0x0000006f, 0x2f29291c, 0x4c3e3b38, 0x53, 0x87},
412     {0x6c8f2c00, 0x6f6f6f6f, 0x0000006f, 0x2f29291c, 0x4c3e3b38, 0x55, 0x8b},
413     {0x6c8f2c00, 0x6f6f6f6f, 0x0000006f, 0x2f29291c, 0x4c3e3b38, 0x57, 0x8e},
414     {0x6c8f2c00, 0x6f6f6f6f, 0x0000006f, 0x2f29291c, 0x4c3e3b38, 0x59, 0x91},
415     {0x6d8f2d00, 0x6f6f6f6f, 0x0000006f, 0x382a2a1d, 0x4d483c39, 0x5b, 0x95},
416     {0x6d8f2d00, 0x6f6f6f6f, 0x0000006f, 0x382a2a1d, 0x4d483c39, 0x5e, 0x98},
417     {0x6d8f2d00, 0x6f6f6f6f, 0x0000006f, 0x382a2a1d, 0x4d483c39, 0x60, 0x9c},
418     {0x6d8f2d00, 0x6f6f6f6f, 0x0000006f, 0x382a2a1d, 0x4d483c39, 0x60, 0x9c},
419     {0x6d8f2e00, 0x6f6f6f6f, 0x0000006f, 0x382a2a1d, 0x4d483c39, 0x62, 0x9f},
420     {0x6e8f2e00, 0x6f6f6f6f, 0x0000006f, 0x392b2b1e, 0x4e483e3a, 0x64, 0xa2},
421     {0x6e8f2e00, 0x6f6f6f6f, 0x0000006f, 0x392b2b1e, 0x4e483e3a, 0x66, 0xa6},
422     {0x6e8f2e00, 0x6f6f6f6f, 0x0000006f, 0x392b2b1e, 0x4e483e3a, 0x68, 0xa9},
423     {0x6f8f2f00, 0x6f6f6f6f, 0x0000006f, 0x392b2b1e, 0x4e483e3a, 0x6a, 0xad},
424     {0x6f8f2f00, 0x6f6f6f6f, 0x0000006f, 0x3a2c2c1f, 0x4f493f3b, 0x6c, 0xb0},
425     {0x6f8f2f00, 0x6f6f6f6f, 0x0000006f, 0x3a2c2c1f, 0x4f493f3b, 0x6e, 0xb3},
426     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3a2c2c1f, 0x4f493f3b, 0x70, 0xb7},
427     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3a2c2c1f, 0x4f493f3b, 0x72, 0xba},
428     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3b2d2d28, 0x584a483c, 0x74, 0xbd},
429     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3b2d2d28, 0x584a483c, 0x76, 0xc1},
430     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3b2d2d28, 0x584a483c, 0x79, 0xc4},
431     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3b2d2d28, 0x584a483c, 0x7b, 0xc8},
432     {0x788f3800, 0x6f6f6f6f, 0x0000006f, 0x3b2e2e29, 0x594b483d, 0x7d, 0xcb},
433     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3b2e2e29, 0x594b483d, 0x7f, 0xce},
434     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3b2e2e29, 0x594b483d, 0x81, 0xd2},
435     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3b2e2e29, 0x594b483d, 0x83, 0xd5},
436     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3c2f2f29, 0x594b493e, 0x85, 0xd9},
437     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3c2f2f29, 0x594b493e, 0x87, 0xdc},
438     {0x798f3900, 0x6f6f6f6f, 0x0000006f, 0x3c2f2f29, 0x594b493e, 0x89, 0xdf},
439     {0x798f3a00, 0x6f6f6f6f, 0x0000006f, 0x3c2f2f29, 0x594b493e, 0x8b, 0xe3},
440     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3d38382a, 0x5a4c493f, 0x8d, 0xe6},
441     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3d38382a, 0x5a4c493f, 0x8f, 0xe9},
442     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3d38382a, 0x5a4c493f, 0x91, 0xed},
443     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3d38382a, 0x5a4c493f, 0x94, 0xf0},
444     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3e38382b, 0x5b4d4a48, 0x96, 0xf4},
445     {0x7a8f3a00, 0x6f6f6f6f, 0x0000006f, 0x3e38382b, 0x5b4d4a48, 0x98, 0xf7},
446     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3e38382b, 0x5b4d4a48, 0x9a, 0xfa},
447     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3e38382b, 0x5b4d4a48, 0x9c, 0xfe},
448     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f38392b, 0x5b4d4b48, 0x9e, 0xff},
449     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f38392b, 0x5b4d4b48, 0x9e, 0xff},
450     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f38392b, 0x5b4d4b48, 0xa0, 0xff},
451     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f38392b, 0x5b4d4b48, 0xa2, 0xff},
452     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f38392b, 0x5b4d4b48, 0xa4, 0xff},
453     {0x7b8f3b00, 0x6f6f6f6f, 0x0000006f, 0x3f39392c, 0x5c4e4b48, 0xa6, 0xff},
454     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x3f39392c, 0x5c4e4b48, 0xa8, 0xff},
455     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x3f39392c, 0x5c4e4b48, 0xaa, 0xff},
456     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x3f39392c, 0x5c4e4b48, 0xac, 0xff},
457     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x48393a2c, 0x5c4f4c49, 0xaf, 0xff},
458     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x48393a2c, 0x5c4f4c49, 0xb1, 0xff},
459     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x48393a2c, 0x5c4f4c49, 0xb3, 0xff},
460     {0x7c8f3c00, 0x6f6f6f6f, 0x0000006f, 0x48393a2c, 0x5c4f4c49, 0xb5, 0xff},
461     {0x7d8f3d00, 0x6f6f6f6f, 0x0000006f, 0x483a3a2d, 0x5d584c49, 0xb7, 0xff},
462     {0x7d8f3d00, 0x6f6f6f6f, 0x0000006f, 0x483a3a2d, 0x5d584c49, 0xb9, 0xff},
463     {0x7d8f3d00, 0x6f6f6f6f, 0x0000006f, 0x483a3a2d, 0x5d584c49, 0xbd, 0xff},
464     {0x7d8f3d00, 0x6f6f6f6f, 0x0000006f, 0x493a3b2e, 0x5e584d4a, 0xc1, 0xff},
465     {0x7e8f3e00, 0x6f6f6f6f, 0x0000006f, 0x493a3b2e, 0x5e584d4a, 0xc5, 0xff},
466     {0x7e8f3e00, 0x6f6f6f6f, 0x0000006f, 0x493b3b2e, 0x5e584e4a, 0xc8, 0xff},
467     {0x7e8f3e00, 0x6f6f6f6f, 0x0000006f, 0x493b3b2e, 0x5e584e4a, 0xcc, 0xff},
468     {0x7e8f3e00, 0x6f6f6f6f, 0x0000006f, 0x493b3c2f, 0x5f594e4b, 0xd0, 0xff},
469     {0x7f8f3f00, 0x6f6f6f6f, 0x0000006f, 0x493b3c2f, 0x5f594e4b, 0xd2, 0xff},
470     {0x7f8f3f00, 0x6f6f6f6f, 0x0000006f, 0x493b3c2f, 0x5f594e4b, 0xd4, 0xff},
471     {0x7f8f3f00, 0x6f6f6f6f, 0x0000006f, 0x4a3c3c2f, 0x5f594f4b, 0xd8, 0xff},
472     {0x7f8f3f00, 0x6f6f6f6f, 0x0000006f, 0x4a3c3c2f, 0x5f594f4b, 0xdc, 0xff},
473     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4a3c3d38, 0x68594f4c, 0xe0, 0xff},
474     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4a3c3d38, 0x68594f4c, 0xe5, 0xff},
475     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4b3d3d38, 0x685a584c, 0xe9, 0xff},
476     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4b3d3d38, 0x685a584c, 0xed, 0xff},
477     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4b3d3e38, 0x685a584c, 0xf1, 0xff},
478     {0x888f4800, 0x6f6f6f6f, 0x0000006f, 0x4b3d3e38, 0x685a584c, 0xf5, 0xff},
479     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4b3e3e39, 0x695b584d, 0xfe, 0xff},
480     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4c3e3e39, 0x695b594d, 0xff, 0xff},
481     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4c3e3e39, 0x695b594d, 0xff, 0xff},
482     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4c3f3f39, 0x695b594e, 0xff, 0xff},
483     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4c3f3f39, 0x695b594e, 0xff, 0xff},
484     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4d3f3f3a, 0x6a5c594e, 0xff, 0xff},
485     {0x898f4900, 0x6f6f6f6f, 0x0000006f, 0x4d3f3f3a, 0x6a5c594e, 0xff, 0xff},
486     {0x8a8f4a00, 0x6f6f6f6f, 0x0000006f, 0x4d48483a, 0x6a5c594f, 0xff, 0xff},
487     {0x8a8f4a00, 0x6f6f6f6f, 0x0000006f, 0x4d48483a, 0x6a5c594f, 0xff, 0xff},
488     {0x8a8f4a00, 0x6f6f6f6f, 0x0000006f, 0x4d48483a, 0x6a5c5a4f, 0xff, 0xff},
489     {0x8a8f4a00, 0x6f6f6f6f, 0x0000006f, 0x4d48483a, 0x6a5c5a4f, 0xff, 0xff},
490     {0x8a8f4a00, 0x6f6f6f6f, 0x0000006f, 0x4e48483a, 0x6a5d5a58, 0xff, 0xff},
491     {0x8b8f4b00, 0x6f6f6f6f, 0x0000006f, 0x4e48483b, 0x6b5d5a58, 0xff, 0xff},
492     {0x8b8f4b00, 0x6f6f6f6f, 0x0000006f, 0x4e48483b, 0x6b5d5a58, 0xff, 0xff},
493     {0x8b8f4b00, 0x6f6f6f6f, 0x0000006f, 0x4f48493b, 0x6b5d5b58, 0xff, 0xff},
494     {0x8b8f4b00, 0x6f6f6f6f, 0x0000006f, 0x4f49493b, 0x6b5e5b58, 0xff, 0xff}
495 };
496
497 static const unsigned int single_su_vp8[56] = {
498     0x00000000, 0x00000000, 0x00000000, 0x00000000,
499     0x00000000, 0x00000000, 0x00000000, 0x00000000,
500     0x00000000, 0x00000000, 0x00000000, 0x00000000,
501     0x00000000, 0x00000000
502 };
503
504 static const unsigned char full_spiral_48x40_vp8[56] = {
505     // L -> U -> R -> D
506     0x0F,
507     0xF0,
508     0x01, 0x01,
509     0x10, 0x10,
510     0x0F, 0x0F, 0x0F,
511     0xF0, 0xF0, 0xF0,
512     0x01, 0x01, 0x01, 0x01,
513     0x10, 0x10, 0x10, 0x10,
514     0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
515     0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
516     0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
517     0x10, 0x10, 0x10, 0x10, 0x10, 0x10,       // The last 0x10 steps outside the search window.
518     0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // These are outside the search window.
519     0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0
520 };
521
522 static const unsigned char raster_scan_48x40_vp8[56] = {
523     0x11, 0x01, 0x01, 0x01,
524     0x11, 0x01, 0x01, 0x01,
525     0x11, 0x01, 0x01, 0x01,
526     0x11, 0x01, 0x01, 0x01,
527     0x11, 0x01, 0x01, 0x01,
528     0x01, 0x01, 0x01, 0x01,
529     0x00, 0x00, 0x00, 0x00,
530     0x00, 0x00, 0x00, 0x00,
531     0x00, 0x00, 0x00, 0x00,
532     0x00, 0x00, 0x00, 0x00,
533     0x00, 0x00, 0x00, 0x00,
534     0x00, 0x00, 0x00, 0x00,
535     0x00, 0x00, 0x00, 0x00,
536     0x00, 0x00, 0x00, 0x00
537 };
538
539 static const unsigned char diamond_vp8[56] = {
540     0x0F, 0xF1, 0x0F, 0x12,//5
541     0x0D, 0xE2, 0x22, 0x1E,//9
542     0x10, 0xFF, 0xE2, 0x20,//13
543     0xFC, 0x06, 0xDD,//16
544     0x2E, 0xF1, 0x3F, 0xD3, 0x11, 0x3D, 0xF3, 0x1F,//24
545     0xEB, 0xF1, 0xF1, 0xF1,//28
546     0x4E, 0x11, 0x12, 0xF2, 0xF1,//33
547     0xE0, 0xFF, 0xFF, 0x0D, 0x1F, 0x1F,//39
548     0x20, 0x11, 0xCF, 0xF1, 0x05, 0x11,//45
549     0x00, 0x00, 0x00, 0x00, 0x00, 0x00,//51
550     0x00, 0x00, 0x00, 0x00, 0x00, 0x00
551 };
552
553 static const unsigned short
554 mv_ref_cost_context_vp8[6][4][2] = {
555     {   {1328, 10},
556         {2047, 1},
557         {2047, 1},
558         {214, 304},
559     },
560     {   {1072, 21},
561         {979, 27},
562         {1072, 21},
563         {321, 201},
564     },
565     {   {235, 278},
566         {511, 107},
567         {553, 93},
568         {488, 115},
569     },
570     {   {534, 99},
571         {560, 92},
572         {255, 257},
573         {505, 109},
574     },
575     {   {174, 361},
576         {238, 275},
577         {255, 257},
578         {744, 53},
579     },
580     {   {32, 922},
581         {113, 494},
582         {255, 257},
583         {816, 43},
584     },
585 };
586
587 static const unsigned int
588 new_mv_skip_threshold_vp8[NUM_QP_VP8] = {
589     111, 120, 129, 137, 146, 155, 163, 172, 180, 189, 198, 206, 215, 224, 232, 241,
590     249, 258, 267, 275, 284, 293, 301, 310, 318, 327, 336, 344, 353, 362, 370, 379,
591     387, 396, 405, 413, 422, 431, 439, 448, 456, 465, 474, 482, 491, 500, 508, 517,
592     525, 534, 543, 551, 560, 569, 577, 586, 594, 603, 612, 620, 629, 638, 646, 655,
593     663, 672, 681, 689, 698, 707, 715, 724, 733, 741, 750, 758, 767, 776, 784, 793,
594     802, 810, 819, 827, 836, 845, 853, 862, 871, 879, 888, 896, 905, 914, 922, 931,
595     940, 948, 957, 965, 974, 983, 991, 1000, 1009, 1017, 1026, 1034, 1043, 1052, 1060, 1069,
596     1078, 1086, 1095, 1103, 1112, 1121, 1129, 1138, 1147, 1155, 1164, 1172, 1181, 1190, 1198, 1208
597 };
598
599 static const unsigned short
600 mb_mode_cost_luma_vp8[10] = {
601     657,    869,    915,    917,    208,    0,      0,      0,      0,      0
602 };
603
604
605 static const unsigned short
606 block_mode_cost_vp8[10][10][10] = {
607     {
608         {37,  1725,  1868,  1151,  1622,  2096,  2011,  1770,  2218,  2128  },
609         {139,  759,  1683,  911,  1455,  1846,  1570,  1295,  1792,  1648   },
610         {560,  1383,  408,  639,  1612,  1174,  1562,  1736,  847,  991     },
611         {191,  1293,  1299,  466,  1774,  1840,  1784,  1691,  1698,  1505  },
612         {211,  1624,  1294,  779,  714,  1622,  2222,  1554,  1706,  903    },
613         {297,  1259,  1098,  1062,  1583,  618,  1053,  1889,  851,  1127   },
614         {275,  703,  1356,  1111,  1597,  1075,  656,  1529,  1531,  1275   },
615         {150,  1046,  1760,  1039,  1353,  1981,  2174,  728,  1730,  1379  },
616         {516,  1414,  741,  1045,  1495,  738,  1288,  1619,  442,  1200    },
617         {424,  1365,  706,  825,  1197,  1453,  1191,  1462,  1186,  519    },
618     },
619     {
620         {393,  515,  1491,  549,  1598,  1524,  964,  1126,  1651,  2172    },
621         {693,  237,  1954,  641,  1525,  2073,  1183,  971,  1973,  2235    },
622         {560,  739,  855,  836,  1224,  1115,  966,  839,  1076,  767       },
623         {657,  368,  1406,  425,  1672,  1853,  1210,  1125,  1969,  1542   },
624         {321,  1056,  1776,  774,  803,  3311,  1265,  1177,  1366,  636    },
625         {693,  510,  949,  877,  1049,  658,  882,  1178,  1515,  1111      },
626         {744,  377,  1278,  958,  1576,  1168,  477,  1146,  1838,  1501    },
627         {488,  477,  1767,  973,  1107,  1511,  1773,  486,  1527,  1449    },
628         {744,  1004,  695,  1012,  1326,  834,  1215,  774,  724,  704      },
629         {522,  567,  1036,  1082,  1039,  1333,  873,  1135,  1189,  677    },
630     },
631     {
632         {103,  1441,  1000,  864,  1513,  1928,  1832,  1916,  1663,  1567  },
633         {304,  872,  1100,  515,  1416,  1417,  3463,  1051,  1305,  1227   },
634         {684,  2176,  242,  729,  1867,  1496,  2056,  1544,  1038,  930    },
635         {534,  1198,  669,  300,  1805,  1377,  2165,  1894,  1249,  1153   },
636         {346,  1602,  1178,  612,  997,  3381,  1335,  1328,  997,  646     },
637         {393,  1027,  649,  813,  1276,  945,  1545,  1278,  875,  1031     },
638         {528,  996,  930,  617,  1086,  1190,  621,  2760,  787,  1347      },
639         {216,  873,  1595,  738,  1339,  3896,  3898,  743,  1343,  1605    },
640         {675,  1580,  543,  749,  1859,  1245,  1589,  2377,  384,  1075    },
641         {594,  1163,  415,  684,  1474,  1080,  1491,  1478,  1077,  801    },
642     },
643     {
644         {238,  1131,  1483,  398,  1510,  1651,  1495,  1545,  1970,  2090  },
645         {499,  456,  1499,  449,  1558,  1691,  1272,  969,  2114,  2116    },
646         {675,  1386,  318,  645,  1449,  1588,  1666,  1925,  979,  859     },
647         {467,  957,  1223,  238,  1825,  1704,  1608,  1560,  1665,  1376   },
648         {331,  1460,  1238,  627,  787,  1882,  3928,  1544,  1897,  579    },
649         {457,  1038,  903,  784,  1158,  725,  955,  1517,  842,  1016      },
650         {505,  497,  1131,  812,  1508,  1206,  703,  1072,  1254,  1256    },
651         {397,  741,  1336,  642,  1506,  1852,  1340,  599,  1854,  1000    },
652         {625,  1212,  597,  750,  1291,  1057,  1401,  1401,  527,  954     },
653         {499,  1041,  654,  752,  1299,  1217,  1605,  1424,  1377,  505    },
654     },
655     {
656         {263,  1094,  1218,  602,  938,  1487,  1231,  1016,  1724,  1448   },
657         {452,  535,  1728,  562,  1008,  1471,  1473,  873,  3182,  1136    },
658         {553,  1570,  935,  1093,  826,  1339,  879,  1007,  1006,  476     },
659         {365,  900,  1050,  582,  866,  1398,  1236,  1123,  1608,  1039    },
660         {294,  2044,  1790,  1143,  430,  1642,  3688,  1549,  2080,  704   },
661         {703,  1210,  958,  815,  1211,  960,  623,  2455,  815,  559       },
662         {675,  574,  862,  1261,  866,  864,  761,  1267,  1014,  936       },
663         {342,  1254,  1857,  989,  612,  1856,  1858,  553,  1840,  1037    },
664         {553,  1316,  811,  1072,  1068,  728,  1328,  1317,  1064,  475    },
665         {288,  1303,  1167,  1167,  823,  1634,  1636,  2497,  1294,  491   },
666     },
667     {
668         {227,  1059,  1369,  1066,  1505,  740,  970,  1511,  972,  1775    },
669         {516,  587,  1033,  646,  1188,  748,  978,  1445,  1294,  1450     },
670         {684,  1048,  663,  747,  1126,  826,  1386,  1128,  635,  924      },
671         {494,  814,  933,  510,  1606,  951,  878,  1344,  1031,  1347      },
672         {553,  1071,  1327,  726,  809,  3376,  1330,  1324,  1062,  407    },
673         {625,  1120,  988,  1121,  1197,  347,  1064,  1308,  862,  1206    },
674         {633,  853,  1657,  1073,  1662,  634,  460,  1405,  811,  1155     },
675         {505,  621,  1394,  876,  1394,  876,  878,  795,  878,  1399       },
676         {684,  1302,  968,  1704,  1280,  561,  972,  1713,  387,  1104     },
677         {397,  1447,  1060,  867,  957,  1058,  749,  1475,  1210,  660     },
678     },
679     {
680         {331,  933,  1647,  761,  1647,  998,  513,  1402,  1461,  2219     },
681         {573,  485,  1968,  641,  1570,  1198,  588,  1086,  1382,  1982    },
682         {790,  942,  570,  790,  1607,  1005,  938,  1193,  714,  751       },
683         {511,  745,  1152,  492,  1878,  1206,  596,  1867,  1617,  1157    },
684         {452,  1308,  896,  896,  451,  1308,  3354,  1301,  1306,  794     },
685         {693,  670,  1072,  1020,  1687,  566,  488,  1432,  1096,  3142    },
686         {778,  566,  1993,  1283,  3139,  1251,  227,  1378,  1784,  1447   },
687         {393,  937,  1091,  934,  939,  1348,  1092,  579,  1351,  1095     },
688         {560,  1013,  1007,  1014,  1011,  644,  1165,  1155,  605,  1016   },
689         {567,  627,  997,  793,  2562,  998,  849,  1260,  922,  748        },
690     },
691     {
692         {338,  762,  1868,  717,  1247,  1757,  1263,  535,  1751,  2162    },
693         {488,  442,  3235,  756,  1658,  1814,  1264,  528,  1857,  2119    },
694         {522,  1087,  840,  1103,  843,  1354,  1098,  888,  946,  588      },
695         {483,  688,  1502,  651,  1213,  1446,  1397,  491,  1908,  1253    },
696         {452,  1386,  1910,  1175,  298,  1507,  3553,  930,  1904,  905    },
697         {713,  839,  716,  715,  932,  719,  931,  848,  3088,  1042        },
698         {516,  495,  1331,  1340,  1331,  1069,  665,  702,  1593,  1337    },
699         {401,  977,  2167,  1537,  1069,  1764,  3810,  259,  3624,  1578   },
700         {560,  1104,  601,  1371,  965,  658,  2704,  779,  967,  969       },
701         {547,  1057,  801,  1141,  1133,  1397,  937,  605,  1252,  631     },
702     },
703     {
704         {163,  1240,  925,  983,  1653,  1321,  1353,  1566,  946,  1601    },
705         {401,  726,  758,  836,  1241,  926,  1656,  795,  1394,  1396      },
706         {905,  1073,  366,  876,  1436,  1576,  1732,  2432,  459,  1019    },
707         {594,  922,  835,  417,  1387,  1124,  1098,  2042,  843,  1023     },
708         {415,  1262,  860,  1274,  758,  1272,  3318,  1010,  1276,  503    },
709         {641,  1018,  1020,  1095,  1619,  667,  1371,  2348,  397,  849    },
710         {560,  817,  903,  1014,  1420,  695,  756,  904,  821,  1421       },
711         {406,  596,  1001,  993,  1257,  1258,  1260,  746,  1002,  1264    },
712         {979,  1371,  780,  1188,  1693,  1024,  1286,  1699,  183,  1405   },
713         {733,  1292,  458,  884,  1554,  889,  1151,  1286,  738,  740      },
714     },
715     {
716         {109,  1377,  1177,  933,  1140,  1928,  1639,  1705,  1861,  1292  },
717         {342,  570,  1081,  638,  1154,  1231,  1339,  1342,  1750,  1494   },
718         {560,  1203,  345,  767,  1325,  1681,  1425,  1905,  1205,  786    },
719         {406,  1027,  1011,  410,  1306,  1901,  1389,  1636,  1493,  776   },
720         {206,  1329,  1337,  1037,  802,  1600,  3646,  1451,  1603,  693   },
721         {472,  1167,  758,  911,  1424,  703,  2749,  1428,  703,  764      },
722         {342,  780,  1139,  889,  1290,  1139,  781,  1544,  957,  1042     },
723         {227,  888,  1039,  929,  988,  3753,  1707,  818,  1710,  1306     },
724         {767,  1055,  627,  725,  1312,  980,  1065,  1324,  599,  811      },
725         {304,  1372,  888,  1173,  979,  1578,  1580,  1974,  1318,  482    },
726     }
727 };
728
729 static const unsigned char
730 brc_qpadjustment_distthreshold_maxframethreshold_distqpadjustment_ipb_vp8[576] = {
731     0x01, 0x03, 0x05, 0x07, 0x09, 0x01, 0x02, 0x03, 0x05, 0x07, 0x00, 0x00, 0x01, 0x02, 0x04, 0x00,
732     0x00, 0x00, 0x01, 0x02, 0xff, 0x00, 0x00, 0x00, 0x01, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0xfb, 0xfc,
733     0xfe, 0xff, 0x00, 0xf9, 0xfa, 0xfc, 0xfe, 0xff, 0xf7, 0xf9, 0xfb, 0xfe, 0xff, 0x00, 0x04, 0x1e,
734     0x3c, 0x50, 0x78, 0x8c, 0xc8, 0xff, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00,
735     0x01, 0x02, 0x05, 0x08, 0x0a, 0x01, 0x02, 0x04, 0x06, 0x08, 0x00, 0x01, 0x02, 0x04, 0x06, 0x00,
736     0x00, 0x00, 0x01, 0x02, 0xff, 0x00, 0x00, 0x00, 0x01, 0xfe, 0xff, 0xff, 0x00, 0x00, 0xfd, 0xfe,
737     0xff, 0xff, 0x00, 0xfb, 0xfd, 0xfe, 0xff, 0x00, 0xf9, 0xfa, 0xfc, 0xfe, 0xff, 0x00, 0x04, 0x1e,
738     0x3c, 0x50, 0x78, 0x8c, 0xc8, 0xff, 0x04, 0x05, 0x06, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00,
739     0x01, 0x02, 0x05, 0x08, 0x0a, 0x01, 0x02, 0x04, 0x06, 0x08, 0x00, 0x01, 0x02, 0x04, 0x06, 0x00,
740     0x00, 0x00, 0x01, 0x02, 0xff, 0x00, 0x00, 0x00, 0x01, 0xfe, 0xff, 0xff, 0x00, 0x00, 0xfd, 0xfe,
741     0xff, 0xff, 0x00, 0xfb, 0xfd, 0xfe, 0xff, 0x00, 0xf9, 0xfa, 0xfc, 0xfe, 0xff, 0x00, 0x02, 0x14,
742     0x28, 0x46, 0x82, 0xa0, 0xc8, 0xff, 0x04, 0x05, 0x06, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00,
743     0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x06, 0x08, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05,
744     0x07, 0x09, 0xff, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x06, 0x07, 0xfe, 0xff, 0x00, 0x00, 0x00,
745     0x01, 0x02, 0x03, 0x05, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01, 0x03, 0x05, 0xfc, 0xfe, 0xff,
746     0x00, 0x00, 0x00, 0x01, 0x03, 0x05, 0xfb, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x01, 0x03, 0x05, 0xfa,
747     0xfc, 0xfe, 0xff, 0x00, 0x00, 0x01, 0x03, 0x05, 0xfa, 0xfc, 0xfe, 0xff, 0x00, 0x00, 0x01, 0x03,
748     0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
749     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
750     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
751     0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05,
752     0x06, 0x08, 0xff, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x07, 0x08, 0xfe, 0xff, 0x00, 0x00, 0x00,
753     0x02, 0x04, 0x05, 0x06, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01, 0x04, 0x05, 0xfc, 0xfe, 0xff,
754     0x00, 0x00, 0x00, 0x01, 0x04, 0x05, 0xfc, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x04, 0x05, 0xfc,
755     0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01, 0x05, 0xfb, 0xfc, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01,
756     0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
757     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
758     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
759     0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05,
760     0x06, 0x08, 0xff, 0x00, 0x00, 0x00, 0x00, 0x03, 0x05, 0x07, 0x08, 0xfe, 0xff, 0x00, 0x00, 0x00,
761     0x02, 0x04, 0x05, 0x06, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01, 0x04, 0x05, 0xfc, 0xfe, 0xff,
762     0x00, 0x00, 0x00, 0x01, 0x04, 0x05, 0xfc, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x04, 0x05, 0xfc,
763     0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01, 0x05, 0xfb, 0xfc, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x01,
764     0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
765     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
766     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
767 };
768
769 static const unsigned char
770 brc_iframe_cost_vp8[128][4] = {
771     { 0x5, 0x5, 0x8, 0x8 },
772     { 0xa, 0xa, 0xd, 0xd },
773     { 0xd, 0xf, 0xf, 0x19 },
774     { 0x19, 0x1a, 0x1a, 0x1a },
775     { 0x1b, 0x1b, 0x1d, 0x1d },
776     { 0x1d, 0x1d, 0x1e, 0x1e },
777     { 0x1e, 0x1e, 0x1f, 0x1f },
778     { 0x1f, 0x28, 0x28, 0x29 },
779     { 0x29, 0x29, 0x29, 0x2a },
780     { 0x2a, 0x2b, 0x2b, 0x2b },
781     { 0x2b, 0x2b, 0x2c, 0x2c },
782     { 0x2c, 0x2c, 0x2d, 0x2d },
783     { 0x2e, 0x2e, 0x2e, 0x2e },
784     { 0x2e, 0x2f, 0x2f, 0x38 },
785     { 0x38, 0x38, 0x38, 0x38 },
786     { 0x38, 0x39, 0x39, 0x39 },
787     { 0x39, 0x39, 0x39, 0x3a },
788     { 0x3a, 0x3a, 0x3a, 0x3a },
789     { 0x3a, 0x3a, 0x3a, 0x3b },
790     { 0x3b, 0x3b, 0x3b, 0x3b },
791     { 0x3b, 0x3c, 0x3c, 0x3c },
792     { 0x3c, 0x3c, 0x3c, 0x3c },
793     { 0x3d, 0x3d, 0x3d, 0x3d },
794     { 0x3d, 0x3d, 0x3e, 0x3e },
795     { 0x3e, 0x3e, 0x3f, 0x3f },
796     { 0x3f, 0x48, 0x48, 0x48 },
797     { 0x48, 0x48, 0x48, 0x48 },
798     { 0x49, 0x49, 0x49, 0x49 },
799     { 0x49, 0x49, 0x4a, 0x4a },
800     { 0x4a, 0x4a, 0x4a, 0x4a },
801     { 0x4b, 0x4b, 0x4b, 0x4b },
802     { 0x4b, 0x4c, 0x4c, 0x4c },
803     { 0x1f, 0x1f, 0x2b, 0x2b },
804     { 0x2f, 0x2f, 0x39, 0x39 },
805     { 0x39, 0x3b, 0x3b, 0x3d },
806     { 0x3d, 0x3f, 0x3f, 0x3f },
807     { 0x48, 0x48, 0x49, 0x49 },
808     { 0x49, 0x49, 0x4a, 0x4a },
809     { 0x4a, 0x4a, 0x4b, 0x4b },
810     { 0x4b, 0x4c, 0x4c, 0x4d },
811     { 0x4d, 0x4e, 0x4e, 0x4f },
812     { 0x4f, 0x58, 0x58, 0x58 },
813     { 0x58, 0x58, 0x58, 0x58 },
814     { 0x59, 0x59, 0x59, 0x59 },
815     { 0x5a, 0x5a, 0x5a, 0x5a },
816     { 0x5a, 0x5b, 0x5b, 0x5b },
817     { 0x5b, 0x5c, 0x5c, 0x5c },
818     { 0x5c, 0x5d, 0x5d, 0x5d },
819     { 0x5d, 0x5e, 0x5e, 0x5e },
820     { 0x5e, 0x5f, 0x5f, 0x5f },
821     { 0x5f, 0x68, 0x68, 0x68 },
822     { 0x68, 0x68, 0x68, 0x68 },
823     { 0x68, 0x68, 0x68, 0x68 },
824     { 0x69, 0x69, 0x69, 0x69 },
825     { 0x69, 0x69, 0x69, 0x69 },
826     { 0x6a, 0x6a, 0x6a, 0x6a },
827     { 0x6a, 0x6a, 0x6b, 0x6b },
828     { 0x6b, 0x6b, 0x6b, 0x6c },
829     { 0x6c, 0x6c, 0x6c, 0x6c },
830     { 0x6d, 0x6d, 0x6d, 0x6d },
831     { 0x6e, 0x6e, 0x6e, 0x6f },
832     { 0x6f, 0x6f, 0x6f, 0x6f },
833     { 0x78, 0x78, 0x78, 0x78 },
834     { 0x78, 0x78, 0x79, 0x79 },
835     { 0x2, 0x2, 0x3, 0x3 },
836     { 0x4, 0x4, 0x5, 0x5 },
837     { 0x5, 0x6, 0x6, 0x7 },
838     { 0x7, 0x8, 0x8, 0x8 },
839     { 0x9, 0x9, 0x9, 0x9 },
840     { 0x9, 0x9, 0xa, 0xa },
841     { 0xa, 0xa, 0xb, 0xb },
842     { 0xb, 0xc, 0xc, 0xd },
843     { 0xd, 0xe, 0xe, 0xf },
844     { 0xf, 0x10, 0x10, 0x11 },
845     { 0x11, 0x11, 0x12, 0x12 },
846     { 0x13, 0x13, 0x14, 0x14 },
847     { 0x15, 0x15, 0x16, 0x16 },
848     { 0x16, 0x17, 0x17, 0x18 },
849     { 0x18, 0x19, 0x19, 0x1a },
850     { 0x1a, 0x1a, 0x1a, 0x1b },
851     { 0x1b, 0x1c, 0x1c, 0x1d },
852     { 0x1d, 0x1e, 0x1e, 0x1f },
853     { 0x1f, 0x20, 0x20, 0x21 },
854     { 0x21, 0x22, 0x22, 0x23 },
855     { 0x23, 0x24, 0x24, 0x24 },
856     { 0x25, 0x25, 0x26, 0x26 },
857     { 0x27, 0x27, 0x28, 0x28 },
858     { 0x29, 0x29, 0x2a, 0x2a },
859     { 0x2b, 0x2b, 0x2c, 0x2d },
860     { 0x2e, 0x2f, 0x2f, 0x30 },
861     { 0x31, 0x32, 0x33, 0x34 },
862     { 0x35, 0x36, 0x37, 0x38 },
863     { 0x3a, 0x3b, 0x3c, 0x3d },
864     { 0x3d, 0x3e, 0x3f, 0x40 },
865     { 0x41, 0x42, 0x43, 0x44 },
866     { 0x46, 0x47, 0x49, 0x4a },
867     { 0x9, 0x9, 0xe, 0xe },
868     { 0x12, 0x12, 0x17, 0x17 },
869     { 0x17, 0x1b, 0x1b, 0x20 },
870     { 0x20, 0x24, 0x24, 0x24 },
871     { 0x29, 0x29, 0x2d, 0x2d },
872     { 0x2d, 0x2d, 0x32, 0x32 },
873     { 0x32, 0x32, 0x36, 0x36 },
874     { 0x36, 0x3b, 0x3b, 0x3f },
875     { 0x3f, 0x44, 0x44, 0x48 },
876     { 0x48, 0x4d, 0x4d, 0x51 },
877     { 0x51, 0x51, 0x56, 0x56 },
878     { 0x5a, 0x5a, 0x5f, 0x5f },
879     { 0x63, 0x63, 0x68, 0x68 },
880     { 0x68, 0x6c, 0x6c, 0x71 },
881     { 0x71, 0x76, 0x76, 0x7a },
882     { 0x7a, 0x7f, 0x7f, 0x83 },
883     { 0x83, 0x88, 0x88, 0x8c },
884     { 0x8c, 0x91, 0x91, 0x95 },
885     { 0x95, 0x9a, 0x9a, 0x9e },
886     { 0x9e, 0xa3, 0xa3, 0xa7 },
887     { 0xa7, 0xac, 0xac, 0xac },
888     { 0xb0, 0xb0, 0xb5, 0xb5 },
889     { 0xb9, 0xb9, 0xbe, 0xbe },
890     { 0xc2, 0xc2, 0xc7, 0xc7 },
891     { 0xcb, 0xd0, 0xd4, 0xd9 },
892     { 0xdd, 0xe2, 0xe2, 0xe6 },
893     { 0xeb, 0xf0, 0xf4, 0xf9 },
894     { 0xfd, 0xff, 0xff, 0xff },
895     { 0xff, 0xff, 0xff, 0xff },
896     { 0xff, 0xff, 0xff, 0xff },
897     { 0xff, 0xff, 0xff, 0xff },
898     { 0xff, 0xff, 0xff, 0xff },
899 };
900
901 static const unsigned int
902 brc_pframe_cost_vp8[256] = {
903     0x06040402,
904     0x06040402,
905     0x06040402,
906     0x06040402,
907     0x0d080805,
908     0x0d080805,
909     0x0d080805,
910     0x0d080805,
911     0x0d080805,
912     0x190b0c07,
913     0x190b0c07,
914     0x190b0c07,
915     0x190b0c07,
916     0x1c0f0f0a,
917     0x1c0f0f0a,
918     0x1c0f0f0a,
919     0x1c0f0f0a,
920     0x1c0f0f0a,
921     0x2819190c,
922     0x2819190c,
923     0x2819190c,
924     0x2819190c,
925     0x2819190c,
926     0x2819190c,
927     0x2819190c,
928     0x2819190c,
929     0x291b1b0f,
930     0x291b1b0f,
931     0x291b1b0f,
932     0x291b1b0f,
933     0x291b1b0f,
934     0x2b1d1d18,
935     0x2b1d1d18,
936     0x2b1d1d18,
937     0x2b1d1d18,
938     0x2c1f1f19,
939     0x2c1f1f19,
940     0x2c1f1f19,
941     0x2c1f1f19,
942     0x2e28281b,
943     0x2e28281b,
944     0x2e28281b,
945     0x2e28281b,
946     0x2e28281b,
947     0x2f29291c,
948     0x2f29291c,
949     0x2f29291c,
950     0x2f29291c,
951     0x382a2a1d,
952     0x382a2a1d,
953     0x382a2a1d,
954     0x382a2a1d,
955     0x382a2a1d,
956     0x392b2b1e,
957     0x392b2b1e,
958     0x392b2b1e,
959     0x392b2b1e,
960     0x3a2c2c1f,
961     0x3a2c2c1f,
962     0x3a2c2c1f,
963     0x3a2c2c1f,
964     0x3b2d2d28,
965     0x3b2d2d28,
966     0x3b2d2d28,
967     0x3b2d2d28,
968     0x3b2e2e29,
969     0x3b2e2e29,
970     0x3b2e2e29,
971     0x3b2e2e29,
972     0x3c2f2f29,
973     0x3c2f2f29,
974     0x3c2f2f29,
975     0x3c2f2f29,
976     0x3d38382a,
977     0x3d38382a,
978     0x3d38382a,
979     0x3d38382a,
980     0x3e38382b,
981     0x3e38382b,
982     0x3e38382b,
983     0x3e38382b,
984     0x3f38392b,
985     0x3f38392b,
986     0x3f38392b,
987     0x3f38392b,
988     0x3f38392b,
989     0x3f39392c,
990     0x3f39392c,
991     0x3f39392c,
992     0x3f39392c,
993     0x48393a2c,
994     0x48393a2c,
995     0x48393a2c,
996     0x48393a2c,
997     0x483a3a2d,
998     0x483a3a2d,
999     0x483a3a2d,
1000     0x493a3b2e,
1001     0x493a3b2e,
1002     0x493b3b2e,
1003     0x493b3b2e,
1004     0x493b3c2f,
1005     0x493b3c2f,
1006     0x493b3c2f,
1007     0x4a3c3c2f,
1008     0x4a3c3c2f,
1009     0x4a3c3d38,
1010     0x4a3c3d38,
1011     0x4b3d3d38,
1012     0x4b3d3d38,
1013     0x4b3d3e38,
1014     0x4b3d3e38,
1015     0x4b3e3e39,
1016     0x4c3e3e39,
1017     0x4c3e3e39,
1018     0x4c3f3f39,
1019     0x4c3f3f39,
1020     0x4d3f3f3a,
1021     0x4d3f3f3a,
1022     0x4d48483a,
1023     0x4d48483a,
1024     0x4d48483a,
1025     0x4d48483a,
1026     0x4e48483a,
1027     0x4e48483b,
1028     0x4e48483b,
1029     0x4f48493b,
1030     0x4f49493b,
1031     0x1a0c0907,
1032     0x1a0c0907,
1033     0x1a0c0907,
1034     0x1a0c0907,
1035     0x291b190e,
1036     0x291b190e,
1037     0x291b190e,
1038     0x291b190e,
1039     0x291b190e,
1040     0x2e281e1a,
1041     0x2e281e1a,
1042     0x2e281e1a,
1043     0x2e281e1a,
1044     0x392b291e,
1045     0x392b291e,
1046     0x392b291e,
1047     0x392b291e,
1048     0x392b291e,
1049     0x3c2e2b29,
1050     0x3c2e2b29,
1051     0x3c2e2b29,
1052     0x3c2e2b29,
1053     0x3c2e2b29,
1054     0x3c2e2b29,
1055     0x3c2e2b29,
1056     0x3c2e2b29,
1057     0x3e382e2a,
1058     0x3e382e2a,
1059     0x3e382e2a,
1060     0x3e382e2a,
1061     0x3e382e2a,
1062     0x483a382c,
1063     0x483a382c,
1064     0x483a382c,
1065     0x483a382c,
1066     0x493b392e,
1067     0x493b392e,
1068     0x493b392e,
1069     0x493b392e,
1070     0x4b3d3a38,
1071     0x4b3d3a38,
1072     0x4b3d3a38,
1073     0x4b3d3a38,
1074     0x4b3d3a38,
1075     0x4c3e3b38,
1076     0x4c3e3b38,
1077     0x4c3e3b38,
1078     0x4c3e3b38,
1079     0x4d483c39,
1080     0x4d483c39,
1081     0x4d483c39,
1082     0x4d483c39,
1083     0x4d483c39,
1084     0x4e483e3a,
1085     0x4e483e3a,
1086     0x4e483e3a,
1087     0x4e483e3a,
1088     0x4f493f3b,
1089     0x4f493f3b,
1090     0x4f493f3b,
1091     0x4f493f3b,
1092     0x584a483c,
1093     0x584a483c,
1094     0x584a483c,
1095     0x584a483c,
1096     0x594b483d,
1097     0x594b483d,
1098     0x594b483d,
1099     0x594b483d,
1100     0x594b493e,
1101     0x594b493e,
1102     0x594b493e,
1103     0x594b493e,
1104     0x5a4c493f,
1105     0x5a4c493f,
1106     0x5a4c493f,
1107     0x5a4c493f,
1108     0x5b4d4a48,
1109     0x5b4d4a48,
1110     0x5b4d4a48,
1111     0x5b4d4a48,
1112     0x5b4d4b48,
1113     0x5b4d4b48,
1114     0x5b4d4b48,
1115     0x5b4d4b48,
1116     0x5b4d4b48,
1117     0x5c4e4b48,
1118     0x5c4e4b48,
1119     0x5c4e4b48,
1120     0x5c4e4b48,
1121     0x5c4f4c49,
1122     0x5c4f4c49,
1123     0x5c4f4c49,
1124     0x5c4f4c49,
1125     0x5d584c49,
1126     0x5d584c49,
1127     0x5d584c49,
1128     0x5e584d4a,
1129     0x5e584d4a,
1130     0x5e584e4a,
1131     0x5e584e4a,
1132     0x5f594e4b,
1133     0x5f594e4b,
1134     0x5f594e4b,
1135     0x5f594f4b,
1136     0x5f594f4b,
1137     0x68594f4c,
1138     0x68594f4c,
1139     0x685a584c,
1140     0x685a584c,
1141     0x685a584c,
1142     0x685a584c,
1143     0x695b584d,
1144     0x695b594d,
1145     0x695b594d,
1146     0x695b594e,
1147     0x695b594e,
1148     0x6a5c594e,
1149     0x6a5c594e,
1150     0x6a5c594f,
1151     0x6a5c594f,
1152     0x6a5c5a4f,
1153     0x6a5c5a4f,
1154     0x6a5d5a58,
1155     0x6b5d5a58,
1156     0x6b5d5a58,
1157     0x6b5d5b58,
1158     0x6b5e5b58,
1159 };
1160
1161 static const unsigned short
1162 brc_skip_mv_threshold_vp8[256] = {
1163     111,  120,  129,  137,  146,  155,  163,  172,  180,  189,  198,  206,  215,  224,  232,  241,
1164     249,  258,  267,  275,  284,  293,  301,  310,  318,  327,  336,  344,  353,  362,  370,  379,
1165     387,  396,  405,  413,  422,  431,  439,  448,  456,  465,  474,  482,  491,  500,  508,  517,
1166     525,  534,  543,  551,  560,  569,  577,  586,  594,  603,  612,  620,  629,  638,  646,  655,
1167     663,  672,  681,  689,  698,  707,  715,  724,  733,  741,  750,  758,  767,  776,  784,  793,
1168     802,  810,  819,  827,  836,  845,  853,  862,  871,  879,  888,  896,  905,  914,  922,  931,
1169     940,  948,  957,  965,  974,  983,  991, 1000, 1009, 1017, 1026, 1034, 1043, 1052, 1060, 1069,
1170     1078, 1086, 1095, 1103, 1112, 1121, 1129, 1138, 1147, 1155, 1164, 1172, 1181, 1190, 1198, 1208
1171 };
1172
1173 void
1174 i965_encoder_vp8_check_motion_estimation(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1175 {
1176     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1177
1178     if (vp8_context->down_scaled_width_4x < vp8_context->min_scaled_dimension  ||
1179         vp8_context->down_scaled_width_in_mb4x < vp8_context->min_scaled_dimension_in_mbs ||
1180         vp8_context->down_scaled_height_4x < vp8_context->min_scaled_dimension ||
1181         vp8_context->down_scaled_height_in_mb4x < vp8_context->min_scaled_dimension_in_mbs) {
1182
1183         vp8_context->hme_16x_supported = 0;
1184
1185         if (vp8_context->down_scaled_width_4x < vp8_context->min_scaled_dimension  ||
1186             vp8_context->down_scaled_width_in_mb4x < vp8_context->min_scaled_dimension_in_mbs) {
1187
1188             vp8_context->down_scaled_width_4x = vp8_context->min_scaled_dimension;
1189             vp8_context->down_scaled_width_in_mb4x = vp8_context->min_scaled_dimension_in_mbs;
1190         }
1191
1192         if (vp8_context->down_scaled_height_4x < vp8_context->min_scaled_dimension ||
1193             vp8_context->down_scaled_height_in_mb4x < vp8_context->min_scaled_dimension_in_mbs) {
1194
1195             vp8_context->down_scaled_height_4x = vp8_context->min_scaled_dimension;
1196             vp8_context->down_scaled_height_in_mb4x = vp8_context->min_scaled_dimension_in_mbs;
1197         }
1198     } else if (vp8_context->down_scaled_width_16x < vp8_context->min_scaled_dimension ||
1199                vp8_context->down_scaled_width_in_mb16x < vp8_context->min_scaled_dimension_in_mbs ||
1200                vp8_context->down_scaled_height_16x < vp8_context->min_scaled_dimension ||
1201                vp8_context->down_scaled_height_in_mb16x < vp8_context->min_scaled_dimension_in_mbs) {
1202
1203         if (vp8_context->down_scaled_width_16x < vp8_context->min_scaled_dimension ||
1204             vp8_context->down_scaled_width_in_mb16x < vp8_context->min_scaled_dimension_in_mbs) {
1205
1206             vp8_context->down_scaled_width_16x = vp8_context->min_scaled_dimension;
1207             vp8_context->down_scaled_width_in_mb16x = vp8_context->min_scaled_dimension_in_mbs;
1208         }
1209
1210         if (vp8_context->down_scaled_height_16x < vp8_context->min_scaled_dimension ||
1211             vp8_context->down_scaled_height_in_mb16x < vp8_context->min_scaled_dimension_in_mbs) {
1212
1213             vp8_context->down_scaled_height_16x = vp8_context->min_scaled_dimension;
1214             vp8_context->down_scaled_height_in_mb16x = vp8_context->min_scaled_dimension_in_mbs;
1215         }
1216     }
1217 }
1218
1219 static void
1220 i965_encoder_vp8_free_surfaces(void **data)
1221 {
1222     struct i965_encoder_vp8_surface *vp8_surface;
1223
1224     if (!data || !(*data))
1225         return;
1226
1227     vp8_surface = *data;
1228
1229     if (vp8_surface->scaled_4x_surface_obj) {
1230         i965_DestroySurfaces(vp8_surface->ctx, &vp8_surface->scaled_4x_surface_id, 1);
1231         vp8_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
1232         vp8_surface->scaled_4x_surface_obj = NULL;
1233     }
1234
1235     if (vp8_surface->scaled_16x_surface_obj) {
1236         i965_DestroySurfaces(vp8_surface->ctx, &vp8_surface->scaled_16x_surface_id, 1);
1237         vp8_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
1238         vp8_surface->scaled_16x_surface_obj = NULL;
1239     }
1240 }
1241
1242 static void
1243 i965_encoder_vp8_allocate_surfaces(VADriverContextP ctx,
1244                                    struct intel_encoder_context *encoder_context,
1245                                    struct object_surface *obj_surface,
1246                                    int forced_free)
1247 {
1248     struct i965_driver_data *i965 = i965_driver_data(ctx);
1249     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1250     struct i965_encoder_vp8_surface *vp8_surface;
1251     int down_scaled_width_4x, down_scaled_height_4x;
1252     int down_scaled_width_16x, down_scaled_height_16x;
1253
1254     if (!obj_surface)
1255         return;
1256
1257     if (obj_surface->private_data && obj_surface->free_private_data) {
1258         if (forced_free && obj_surface->free_private_data != i965_encoder_vp8_free_surfaces)
1259             obj_surface->free_private_data(obj_surface->private_data);
1260         else
1261             return;
1262     }
1263
1264     vp8_surface = calloc(1, sizeof(struct i965_encoder_vp8_surface));
1265
1266     if (!vp8_surface) {
1267         obj_surface->private_data = NULL;
1268         obj_surface->free_private_data = NULL;
1269
1270         return;
1271     }
1272
1273     vp8_surface->ctx = ctx;
1274
1275     down_scaled_width_4x = vp8_context->down_scaled_width_4x;
1276     down_scaled_height_4x = vp8_context->down_scaled_height_4x;
1277     i965_CreateSurfaces(ctx,
1278                         down_scaled_width_4x,
1279                         down_scaled_height_4x,
1280                         VA_RT_FORMAT_YUV420,
1281                         1,
1282                         &vp8_surface->scaled_4x_surface_id);
1283     vp8_surface->scaled_4x_surface_obj = SURFACE(vp8_surface->scaled_4x_surface_id);
1284
1285     if (vp8_surface->scaled_4x_surface_obj)
1286         i965_check_alloc_surface_bo(ctx, vp8_surface->scaled_4x_surface_obj, 1,
1287                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1288
1289     down_scaled_width_16x = vp8_context->down_scaled_width_16x;
1290     down_scaled_height_16x = vp8_context->down_scaled_height_16x;
1291     i965_CreateSurfaces(ctx,
1292                         down_scaled_width_16x,
1293                         down_scaled_height_16x,
1294                         VA_RT_FORMAT_YUV420,
1295                         1,
1296                         &vp8_surface->scaled_16x_surface_id);
1297     vp8_surface->scaled_16x_surface_obj = SURFACE(vp8_surface->scaled_16x_surface_id);
1298
1299     if (vp8_surface->scaled_16x_surface_obj)
1300         i965_check_alloc_surface_bo(ctx, vp8_surface->scaled_16x_surface_obj, 1,
1301                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1302
1303     obj_surface->private_data = vp8_surface;
1304     obj_surface->free_private_data = i965_encoder_vp8_free_surfaces;
1305 }
1306
1307 static void
1308 i965_encoder_vp8_read_encode_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1309 {
1310     struct intel_batchbuffer *batch = encoder_context->base.batch;
1311     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
1312     struct i965_encoder_vp8_encode_status_buffer *encode_status_buffer = &vp8_context->encode_status_buffer;
1313     struct i965_gpe_table *gpe = vp8_context->gpe_table;
1314     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_param;
1315     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
1316     unsigned int base_offset;
1317
1318     base_offset = encode_status_buffer->base_offset;
1319
1320     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
1321     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
1322
1323     memset(&mi_store_register_mem_param, 0, sizeof(mi_store_register_mem_param));
1324     mi_store_register_mem_param.bo = encode_status_buffer->bo;
1325     mi_store_register_mem_param.offset = base_offset + encode_status_buffer->bitstream_byte_count_offset;
1326     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFC_BITSTREAM_BYTECOUNT_FRAME_REG_OFFSET;
1327     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1328
1329     mi_store_register_mem_param.offset = base_offset + encode_status_buffer->image_status_mask_offset;
1330     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFC_IMAGE_STATUS_MASK_REG_OFFSET;
1331     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1332
1333     mi_store_register_mem_param.offset = base_offset + encode_status_buffer->image_status_ctrl_offset;
1334     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFC_IMAGE_STATUS_CTRL_REG_OFFSET;
1335     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1336
1337     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
1338     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
1339 }
1340
1341 static void
1342 i965_encoder_vp8_read_pak_statistics(VADriverContextP ctx,
1343                                      struct intel_encoder_context *encoder_context,
1344                                      int ipass)
1345 {
1346     struct intel_batchbuffer *batch = encoder_context->base.batch;
1347     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
1348     struct i965_gpe_table *gpe = vp8_context->gpe_table;
1349     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
1350     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_param;
1351     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
1352
1353     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
1354     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
1355
1356     if (ipass < vp8_context->num_brc_pak_passes) {
1357         memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
1358         mi_store_data_imm_param.bo = vp8_context->brc_pak_statistics_buffer.bo;
1359         mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
1360         mi_store_data_imm_param.dw0 = (ipass + 1) << 8;
1361         gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
1362     }
1363
1364     memset(&mi_store_register_mem_param, 0, sizeof(mi_store_register_mem_param));
1365     mi_store_register_mem_param.bo = vp8_context->brc_pak_statistics_buffer.bo;
1366     mi_store_register_mem_param.offset = 0;
1367     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFC_BITSTREAM_BYTECOUNT_FRAME_REG_OFFSET;
1368     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1369
1370     if (ipass == 0) {
1371         mi_store_register_mem_param.offset = sizeof(unsigned int) * 4;
1372         mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_DQ_INDEX01_REG_OFFSET;
1373         gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1374     }
1375
1376     mi_store_register_mem_param.offset = sizeof(unsigned int) * 5;
1377     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_DQ_INDEX_REG_OFFSET;
1378     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1379
1380     mi_store_register_mem_param.offset = sizeof(unsigned int) * 6;
1381     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_D_LOOP_FILTER_REG_OFFSET;
1382     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1383
1384     mi_store_register_mem_param.offset = sizeof(unsigned int) * 9;
1385     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_DQ_INDEX01_REG_OFFSET;
1386     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1387
1388     mi_store_register_mem_param.offset = sizeof(unsigned int) * 10;
1389     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_DQ_INDEX23_REG_OFFSET;
1390     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1391
1392     mi_store_register_mem_param.offset = sizeof(unsigned int) * 11;
1393     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_D_LOOP_FILTER01_REG_OFFSET;
1394     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1395
1396     mi_store_register_mem_param.offset = sizeof(unsigned int) * 12;
1397     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_D_LOOP_FILTER23_REG_OFFSET;
1398     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1399
1400     mi_store_register_mem_param.offset = sizeof(unsigned int) * 13;
1401     mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CONVERGENCE_STATUS_REG_OFFSET;
1402     gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
1403 }
1404
1405 static void
1406 i965_encoder_vp8_gpe_context_init_once(VADriverContextP ctx,
1407                                        struct i965_gpe_context *gpe_context,
1408                                        struct vp8_encoder_kernel_parameters *kernel_params,
1409                                        unsigned int idrt_entry_size)
1410 {
1411     struct i965_driver_data *i965 = i965_driver_data(ctx);
1412
1413     gpe_context->curbe.length = kernel_params->curbe_size; // in bytes
1414
1415     gpe_context->sampler.entry_size = 0;
1416     gpe_context->sampler.max_entries = 0;
1417
1418     gpe_context->idrt.entry_size = idrt_entry_size;
1419     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1420
1421     gpe_context->surface_state_binding_table.max_entries = MAX_VP8_ENCODER_SURFACES;
1422     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1423     gpe_context->surface_state_binding_table.surface_state_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1424                                                                     ALIGN(MAX_VP8_ENCODER_SURFACES * 4, 64);
1425     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP8_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP8_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN8, 64);
1426
1427     if (i965->intel.eu_total > 0)
1428         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1429     else
1430         gpe_context->vfe_state.max_num_threads = 112;
1431
1432     gpe_context->vfe_state.curbe_allocation_size = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1433     gpe_context->vfe_state.urb_entry_size = MAX(1, (ALIGN(kernel_params->inline_data_size, 32) +
1434                                                     ALIGN(kernel_params->external_data_size, 32)) >> 5); // in registers
1435     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1436                                               gpe_context->vfe_state.curbe_allocation_size -
1437                                               ((gpe_context->idrt.entry_size >> 5) *
1438                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1439     gpe_context->vfe_state.num_urb_entries = CLAMP(gpe_context->vfe_state.num_urb_entries, 1, 64);
1440     gpe_context->vfe_state.gpgpu_mode = 0;
1441 }
1442
1443 static void
1444 i965_encoder_vp8_gpe_context_vfe_scoreboard_init(struct i965_gpe_context *gpe_context, struct vp8_encoder_scoreboard_parameters *scoreboard_params)
1445 {
1446     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_params->mask;
1447     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_params->type;
1448     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_params->enable;
1449
1450     // Scoreboard 0
1451     gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
1452     gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
1453
1454     // Scoreboard 1
1455     gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1456     gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
1457
1458     // Scoreboard 2
1459     gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
1460     gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
1461
1462     // Scoreboard 3
1463     gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1464     gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
1465
1466     // Scoreboard 4
1467     gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
1468     gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
1469
1470     // Scoreboard 5
1471     gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
1472     gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
1473     // Scoreboard 6
1474     gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
1475     gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1476     // Scoreboard 7
1477     gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
1478     gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1479 }
1480
1481 static void
1482 i965_add_dri_buffer_gpe_surface(VADriverContextP ctx,
1483                                 struct intel_encoder_context *encoder_context,
1484                                 struct i965_gpe_context *gpe_context,
1485                                 dri_bo *bo,
1486                                 int is_raw_buffer,
1487                                 unsigned int size,
1488                                 unsigned int offset,
1489                                 int index)
1490 {
1491     struct i965_gpe_resource gpe_resource;
1492
1493     i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
1494     i965_add_buffer_gpe_surface(ctx,
1495                                 gpe_context,
1496                                 &gpe_resource,
1497                                 is_raw_buffer,
1498                                 size,
1499                                 offset,
1500                                 index);
1501
1502     i965_free_gpe_resource(&gpe_resource);
1503 }
1504
1505 static void
1506 i965_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
1507                                    struct intel_encoder_context *encoder_context,
1508                                    struct i965_gpe_context *gpe_context,
1509                                    dri_bo *bo,
1510                                    unsigned int width,
1511                                    unsigned int height,
1512                                    unsigned int pitch,
1513                                    int is_media_block_rw,
1514                                    unsigned int format,
1515                                    int index)
1516 {
1517     struct i965_gpe_resource gpe_resource;
1518
1519     i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
1520     i965_add_buffer_2d_gpe_surface(ctx,
1521                                    gpe_context,
1522                                    &gpe_resource,
1523                                    is_media_block_rw,
1524                                    format,
1525                                    index);
1526
1527     i965_free_gpe_resource(&gpe_resource);
1528 }
1529
1530 static void
1531 i965_run_kernel_media_object(VADriverContextP ctx,
1532                              struct intel_encoder_context *encoder_context,
1533                              struct i965_gpe_context *gpe_context,
1534                              int media_function,
1535                              struct gpe_media_object_parameter *param)
1536 {
1537     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1538     struct i965_gpe_table *gpe = vp8_context->gpe_table;
1539     struct intel_batchbuffer *batch = encoder_context->base.batch;
1540
1541     intel_batchbuffer_start_atomic(batch, 0x1000);
1542
1543     intel_batchbuffer_emit_mi_flush(batch);
1544     gpe->pipeline_setup(ctx, gpe_context, batch);
1545     gpe->media_object(ctx, gpe_context, batch, param);
1546     gpe->media_state_flush(ctx, gpe_context, batch);
1547     gpe->pipeline_end(ctx, gpe_context, batch);
1548
1549     intel_batchbuffer_end_atomic(batch);
1550
1551     intel_batchbuffer_flush(batch);
1552 }
1553
1554 static void
1555 i965_init_media_object_walker_parameters(struct intel_encoder_context *encoder_context,
1556                                          struct vp8_encoder_kernel_walker_parameter *kernel_walker_param,
1557                                          struct gpe_media_object_walker_parameter *walker_param)
1558 {
1559     memset(walker_param, 0, sizeof(*walker_param));
1560
1561     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
1562
1563     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
1564     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
1565
1566     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
1567     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
1568
1569     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
1570     walker_param->global_outer_loop_stride.y = 0;
1571
1572     walker_param->global_inner_loop_unit.x = 0;
1573     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
1574
1575     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
1576     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
1577
1578     if (kernel_walker_param->no_dependency) {
1579         walker_param->scoreboard_mask = 0;
1580
1581         // Raster scan walking pattern
1582         walker_param->local_outer_loop_stride.x = 0;
1583         walker_param->local_outer_loop_stride.y = 1;
1584         walker_param->local_inner_loop_unit.x = 1;
1585         walker_param->local_inner_loop_unit.y = 0;
1586         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
1587         walker_param->local_end.y = 0;
1588     } else {
1589         walker_param->local_end.x = 0;
1590         walker_param->local_end.y = 0;
1591
1592         if (kernel_walker_param->walker_degree == VP8_ENCODER_46_DEGREE) {
1593             // 46 degree
1594             walker_param->scoreboard_mask = kernel_walker_param->scoreboard_mask;
1595             walker_param->local_outer_loop_stride.x = 1;
1596             walker_param->local_outer_loop_stride.y = 0;
1597             walker_param->local_inner_loop_unit.x = -1;
1598             walker_param->local_inner_loop_unit.y = 1;
1599         } else if (kernel_walker_param->walker_degree == VP8_ENCODER_45Z_DEGREE) {
1600             // 45z degree
1601             walker_param->scoreboard_mask = 0x0F;
1602
1603             walker_param->global_loop_exec_count = 0x3FF;
1604             walker_param->local_loop_exec_count = 0x3FF;
1605
1606             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
1607             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
1608
1609             walker_param->global_start.x = 0;
1610             walker_param->global_start.y = 0;
1611
1612             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
1613             walker_param->global_outer_loop_stride.y = 0;
1614
1615             walker_param->global_inner_loop_unit.x = 0;
1616             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
1617
1618             walker_param->block_resolution.x = walker_param->global_resolution.x;
1619             walker_param->block_resolution.y = walker_param->global_resolution.y;
1620
1621             walker_param->local_start.x = 0;
1622             walker_param->local_start.y = 0;
1623
1624             walker_param->local_outer_loop_stride.x = 1;
1625             walker_param->local_outer_loop_stride.y = 0;
1626
1627             walker_param->local_inner_loop_unit.x = -1;
1628             walker_param->local_inner_loop_unit.y = 4;
1629
1630             walker_param->middle_loop_extra_steps = 3;
1631             walker_param->mid_loop_unit_x = 0;
1632             walker_param->mid_loop_unit_y = 1;
1633         } else if (kernel_walker_param->walker_degree == VP8_ENCODER_45_DEGREE) {
1634             // 45 degree
1635             walker_param->scoreboard_mask = 0x03;
1636             walker_param->local_outer_loop_stride.x = 1;
1637             walker_param->local_outer_loop_stride.y = 0;
1638             walker_param->local_inner_loop_unit.x = -1;
1639             walker_param->local_inner_loop_unit.y = 1;
1640         } else {
1641             // 26 degree
1642             walker_param->scoreboard_mask = 0x0F;
1643             walker_param->local_outer_loop_stride.x = 1;
1644             walker_param->local_outer_loop_stride.y = 0;
1645             walker_param->local_inner_loop_unit.x = -2;
1646             walker_param->local_inner_loop_unit.y = 1;
1647         }
1648     }
1649 }
1650
1651 static void
1652 i965_run_kernel_media_object_walker(VADriverContextP ctx,
1653                                     struct intel_encoder_context *encoder_context,
1654                                     struct i965_gpe_context *gpe_context,
1655                                     int media_function,
1656                                     struct gpe_media_object_walker_parameter *param)
1657 {
1658     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1659     struct i965_gpe_table *gpe = vp8_context->gpe_table;
1660     struct intel_batchbuffer *batch = encoder_context->base.batch;
1661
1662     intel_batchbuffer_start_atomic(batch, 0x1000);
1663
1664     intel_batchbuffer_emit_mi_flush(batch);
1665     gpe->pipeline_setup(ctx, gpe_context, batch);
1666     gpe->media_object_walker(ctx, gpe_context, batch, param);
1667     gpe->media_state_flush(ctx, gpe_context, batch);
1668     gpe->pipeline_end(ctx, gpe_context, batch);
1669
1670     intel_batchbuffer_end_atomic(batch);
1671
1672     intel_batchbuffer_flush(batch);
1673 }
1674
1675 static void
1676 i965_encoder_vp8_vme_init_mpu_tpu_buffer(VADriverContextP ctx,
1677                                          struct intel_encoder_context *encoder_context,
1678                                          struct i965_encoder_vp8_context *vp8_context)
1679 {
1680     char *pbuffer = NULL;
1681
1682     i965_zero_gpe_resource(&vp8_context->pak_mpu_tpu_mode_probs_buffer);
1683     i965_zero_gpe_resource(&vp8_context->pak_mpu_tpu_ref_mode_probs_buffer);
1684
1685     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer);
1686
1687     if (!pbuffer)
1688         return;
1689
1690     memcpy(pbuffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
1691     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer);
1692
1693     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_entropy_cost_table_buffer);
1694
1695     if (!pbuffer)
1696         return;
1697
1698     memcpy(pbuffer, vp8_prob_cost, sizeof(vp8_prob_cost));
1699     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_entropy_cost_table_buffer);
1700
1701     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_pak_token_update_flags_buffer);
1702
1703     if (!pbuffer)
1704         return;
1705
1706     memcpy(pbuffer, vp8_probs_update_flag, sizeof(vp8_probs_update_flag));
1707     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_pak_token_update_flags_buffer);
1708
1709     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_default_token_probability_buffer);
1710
1711     if (!pbuffer)
1712         return;
1713
1714     memcpy(pbuffer, vp8_coef_update_probs, sizeof(vp8_coef_update_probs));
1715     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_default_token_probability_buffer);
1716
1717     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
1718
1719     if (!pbuffer)
1720         return;
1721
1722     memcpy(pbuffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
1723     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
1724
1725     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_updated_token_probability_buffer);
1726
1727     if (!pbuffer)
1728         return;
1729
1730     memcpy(pbuffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
1731     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_updated_token_probability_buffer);
1732 }
1733
1734 #define ALLOC_VP8_RESOURCE_BUFFER(buffer, bufsize, des)         \
1735     do {                                                        \
1736         vp8_context->buffer.type = I965_GPE_RESOURCE_BUFFER;    \
1737         vp8_context->buffer.width = (bufsize);                  \
1738         vp8_context->buffer.height = 1;                         \
1739         vp8_context->buffer.pitch = vp8_context->buffer.width;  \
1740         vp8_context->buffer.size = vp8_context->buffer.pitch *  \
1741             vp8_context->buffer.height;                         \
1742         vp8_context->buffer.tiling = I915_TILING_NONE;          \
1743         i965_allocate_gpe_resource(i965->intel.bufmgr,          \
1744                                    &vp8_context->buffer,        \
1745                                    vp8_context->buffer.size,    \
1746                                    (des));                      \
1747     } while (0)
1748
1749 static void
1750 i965_encoder_vp8_vme_allocate_resources(VADriverContextP ctx,
1751                                         struct intel_encoder_context *encoder_context,
1752                                         struct i965_encoder_vp8_context *vp8_context)
1753 {
1754     struct i965_driver_data *i965 = i965_driver_data(ctx);
1755     unsigned int frame_size_in_mbs = vp8_context->frame_width_in_mbs *
1756                                      vp8_context->frame_height_in_mbs;
1757
1758     vp8_context->mv_offset = ALIGN((frame_size_in_mbs * 16 * 4), 4096);
1759     vp8_context->mb_coded_buffer_size = vp8_context->mv_offset + (frame_size_in_mbs * 16 * sizeof(unsigned int));
1760
1761     ALLOC_VP8_RESOURCE_BUFFER(reference_frame_mb_count_buffer, 32, "Reference frame mb count buffer");
1762
1763     vp8_context->mb_mode_cost_luma_buffer.type = I965_GPE_RESOURCE_2D;
1764     vp8_context->mb_mode_cost_luma_buffer.width = ALIGN((sizeof(short) * 10), 64);
1765     vp8_context->mb_mode_cost_luma_buffer.height = 1;
1766     vp8_context->mb_mode_cost_luma_buffer.pitch = vp8_context->mb_mode_cost_luma_buffer.width;
1767     vp8_context->mb_mode_cost_luma_buffer.size = vp8_context->mb_mode_cost_luma_buffer.pitch *
1768                                                  vp8_context->mb_mode_cost_luma_buffer.height;
1769     vp8_context->mb_mode_cost_luma_buffer.tiling = I915_TILING_NONE;
1770     i965_allocate_gpe_resource(i965->intel.bufmgr,
1771                                &vp8_context->mb_mode_cost_luma_buffer,
1772                                vp8_context->mb_mode_cost_luma_buffer.size,
1773                                "MB mode cost luma buffer");
1774
1775     vp8_context->block_mode_cost_buffer.type = I965_GPE_RESOURCE_2D;
1776     vp8_context->block_mode_cost_buffer.width = ALIGN((sizeof(short) * 10 * 10 * 10), 64);
1777     vp8_context->block_mode_cost_buffer.height = 1;
1778     vp8_context->block_mode_cost_buffer.pitch = vp8_context->block_mode_cost_buffer.width;
1779     vp8_context->block_mode_cost_buffer.size = vp8_context->block_mode_cost_buffer.pitch *
1780                                                vp8_context->block_mode_cost_buffer.height;
1781     vp8_context->block_mode_cost_buffer.tiling = I915_TILING_NONE;
1782     i965_allocate_gpe_resource(i965->intel.bufmgr,
1783                                &vp8_context->block_mode_cost_buffer,
1784                                vp8_context->block_mode_cost_buffer.size,
1785                                "Block mode cost luma buffer");
1786
1787     ALLOC_VP8_RESOURCE_BUFFER(chroma_recon_buffer, frame_size_in_mbs * 64, "Chroma recon buffer");
1788
1789     vp8_context->per_mb_quant_data_buffer.type = I965_GPE_RESOURCE_2D;
1790     vp8_context->per_mb_quant_data_buffer.width = ALIGN((vp8_context->frame_width_in_mbs * 4), 64);
1791     vp8_context->per_mb_quant_data_buffer.height = vp8_context->frame_height_in_mbs;
1792     vp8_context->per_mb_quant_data_buffer.pitch = vp8_context->per_mb_quant_data_buffer.width;
1793     vp8_context->per_mb_quant_data_buffer.size = vp8_context->per_mb_quant_data_buffer.pitch *
1794                                                  vp8_context->per_mb_quant_data_buffer.height;
1795     vp8_context->per_mb_quant_data_buffer.tiling = I915_TILING_NONE;
1796     i965_allocate_gpe_resource(i965->intel.bufmgr,
1797                                &vp8_context->per_mb_quant_data_buffer,
1798                                vp8_context->per_mb_quant_data_buffer.size,
1799                                "Per MB quant data buffer");
1800
1801     ALLOC_VP8_RESOURCE_BUFFER(pred_mv_data_buffer, frame_size_in_mbs * 4 * sizeof(unsigned int), "Pred mv data buffer");
1802     ALLOC_VP8_RESOURCE_BUFFER(mode_cost_update_buffer, 16 * sizeof(unsigned int), "Mode cost update buffer");
1803
1804     /*
1805      * BRC buffers
1806      */
1807     ALLOC_VP8_RESOURCE_BUFFER(brc_history_buffer, VP8_BRC_HISTORY_BUFFER_SIZE, "BRC history buffer");
1808     i965_zero_gpe_resource(&vp8_context->brc_history_buffer);
1809
1810     vp8_context->brc_segment_map_buffer.type = I965_GPE_RESOURCE_2D;
1811     vp8_context->brc_segment_map_buffer.width = vp8_context->frame_width_in_mbs;
1812     vp8_context->brc_segment_map_buffer.height = vp8_context->frame_height_in_mbs;
1813     vp8_context->brc_segment_map_buffer.pitch = vp8_context->brc_segment_map_buffer.width;
1814     vp8_context->brc_segment_map_buffer.size = vp8_context->brc_segment_map_buffer.pitch *
1815                                                vp8_context->brc_segment_map_buffer.height;
1816     vp8_context->brc_segment_map_buffer.tiling = I915_TILING_NONE;
1817     i965_allocate_gpe_resource(i965->intel.bufmgr,
1818                                &vp8_context->brc_segment_map_buffer,
1819                                vp8_context->brc_segment_map_buffer.size,
1820                                "BRC segment map buffer");
1821
1822     vp8_context->brc_distortion_buffer.type = I965_GPE_RESOURCE_2D;
1823     vp8_context->brc_distortion_buffer.width = ALIGN((vp8_context->down_scaled_width_in_mb4x * 8), 64);
1824     vp8_context->brc_distortion_buffer.height = 2 * ALIGN((vp8_context->down_scaled_height_in_mb4x * 4), 8);
1825     vp8_context->brc_distortion_buffer.pitch = vp8_context->brc_distortion_buffer.width;
1826     vp8_context->brc_distortion_buffer.size = vp8_context->brc_distortion_buffer.pitch *
1827                                               vp8_context->brc_distortion_buffer.height;
1828     vp8_context->brc_distortion_buffer.tiling = I915_TILING_NONE;
1829     i965_allocate_gpe_resource(i965->intel.bufmgr,
1830                                &vp8_context->brc_distortion_buffer,
1831                                vp8_context->brc_distortion_buffer.size,
1832                                "BRC distortion buffer");
1833     i965_zero_gpe_resource(&vp8_context->brc_distortion_buffer);
1834
1835     ALLOC_VP8_RESOURCE_BUFFER(brc_pak_statistics_buffer, sizeof(struct vp8_brc_pak_statistics), "BRC pak statistics buffer");
1836     i965_zero_gpe_resource(&vp8_context->brc_pak_statistics_buffer);
1837
1838     ALLOC_VP8_RESOURCE_BUFFER(brc_vp8_cfg_command_read_buffer, VP8_BRC_IMG_STATE_SIZE_PER_PASS * VP8_BRC_MAXIMUM_NUM_PASSES, "BRC VP8 configuration command read buffer");
1839     i965_zero_gpe_resource(&vp8_context->brc_vp8_cfg_command_read_buffer);
1840
1841     ALLOC_VP8_RESOURCE_BUFFER(brc_vp8_cfg_command_write_buffer, VP8_BRC_IMG_STATE_SIZE_PER_PASS * VP8_BRC_MAXIMUM_NUM_PASSES, "BRC VP8 configuration command write buffer");
1842     i965_zero_gpe_resource(&vp8_context->brc_vp8_cfg_command_write_buffer);
1843
1844     ALLOC_VP8_RESOURCE_BUFFER(brc_vp8_constant_data_buffer, VP8_BRC_CONSTANT_DATA_SIZE, "BRC VP8 constant data buffer");
1845     i965_zero_gpe_resource(&vp8_context->brc_vp8_constant_data_buffer);
1846
1847     ALLOC_VP8_RESOURCE_BUFFER(brc_pak_statistics_dump_buffer, vp8_context->num_brc_pak_passes * sizeof(unsigned int) * 12, "BRC pak statistics buffer");
1848     i965_zero_gpe_resource(&vp8_context->brc_pak_statistics_dump_buffer);
1849
1850     vp8_context->me_4x_mv_data_buffer.type = I965_GPE_RESOURCE_2D;
1851     vp8_context->me_4x_mv_data_buffer.width = vp8_context->down_scaled_width_in_mb4x * 32;
1852     vp8_context->me_4x_mv_data_buffer.height = vp8_context->down_scaled_height_in_mb4x * 4 * 4;
1853     vp8_context->me_4x_mv_data_buffer.pitch = ALIGN(vp8_context->me_4x_mv_data_buffer.width, 64);
1854     vp8_context->me_4x_mv_data_buffer.size = vp8_context->me_4x_mv_data_buffer.pitch *
1855                                              vp8_context->me_4x_mv_data_buffer.height;
1856     vp8_context->me_4x_mv_data_buffer.tiling = I915_TILING_NONE;
1857     i965_allocate_gpe_resource(i965->intel.bufmgr,
1858                                &vp8_context->me_4x_mv_data_buffer,
1859                                vp8_context->me_4x_mv_data_buffer.size,
1860                                "ME 4x MV Data buffer");
1861
1862     vp8_context->me_4x_distortion_buffer.type = I965_GPE_RESOURCE_2D;
1863     vp8_context->me_4x_distortion_buffer.width = vp8_context->down_scaled_width_in_mb4x * 8;
1864     vp8_context->me_4x_distortion_buffer.height = vp8_context->down_scaled_height_in_mb4x * 4 * 4;
1865     vp8_context->me_4x_distortion_buffer.pitch = ALIGN(vp8_context->me_4x_distortion_buffer.width, 64);
1866     vp8_context->me_4x_distortion_buffer.size = vp8_context->me_4x_distortion_buffer.pitch *
1867                                                 vp8_context->me_4x_distortion_buffer.height;
1868     vp8_context->me_4x_distortion_buffer.tiling = I915_TILING_NONE;
1869     i965_allocate_gpe_resource(i965->intel.bufmgr,
1870                                &vp8_context->me_4x_distortion_buffer,
1871                                vp8_context->me_4x_distortion_buffer.size,
1872                                "ME 4x Distortion buffer");
1873
1874     vp8_context->me_16x_mv_data_buffer.type = I965_GPE_RESOURCE_2D;
1875     vp8_context->me_16x_mv_data_buffer.width = ALIGN((vp8_context->down_scaled_width_in_mb16x * 32), 64);
1876     vp8_context->me_16x_mv_data_buffer.height = vp8_context->down_scaled_height_in_mb16x * 4 * VP8_ME_MV_DATA_SIZE_MULTIPLIER;
1877     vp8_context->me_16x_mv_data_buffer.pitch = vp8_context->me_16x_mv_data_buffer.width;
1878     vp8_context->me_16x_mv_data_buffer.size = vp8_context->me_16x_mv_data_buffer.pitch *
1879                                               vp8_context->me_16x_mv_data_buffer.height;
1880     vp8_context->me_16x_mv_data_buffer.tiling = I915_TILING_NONE;
1881     i965_allocate_gpe_resource(i965->intel.bufmgr,
1882                                &vp8_context->me_16x_mv_data_buffer,
1883                                vp8_context->me_16x_mv_data_buffer.size,
1884                                "ME 16x MV Data buffer");
1885
1886     ALLOC_VP8_RESOURCE_BUFFER(histogram_buffer, VP8_HISTOGRAM_SIZE, "Histogram buffer");
1887     ALLOC_VP8_RESOURCE_BUFFER(pak_intra_row_store_scratch_buffer, vp8_context->frame_width_in_mbs * 64, "Intra row store scratch buffer");
1888     ALLOC_VP8_RESOURCE_BUFFER(pak_deblocking_filter_row_store_scratch_buffer, vp8_context->frame_width_in_mbs * 4 * 64, "Deblocking filter row store scratch buffer");
1889     ALLOC_VP8_RESOURCE_BUFFER(pak_mpc_row_store_scratch_buffer, vp8_context->frame_width_in_mbs * 2 * 64, "MPC row store scratch buffer");
1890     ALLOC_VP8_RESOURCE_BUFFER(pak_stream_out_buffer, frame_size_in_mbs * 16, "stream out buffer");
1891     ALLOC_VP8_RESOURCE_BUFFER(pak_frame_header_buffer, VP8_FRAME_HEADER_SIZE, "Frame header buffer");
1892     ALLOC_VP8_RESOURCE_BUFFER(pak_intermediate_buffer, frame_size_in_mbs * 256 * 2 + frame_size_in_mbs * 64 + VP8_INTERMEDIATE_PARTITION0_SIZE, "Intermediate buffer");
1893     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_mode_probs_buffer, VP8_MODE_PROPABILITIES_SIZE, "Mode probs buffer");
1894     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_ref_mode_probs_buffer, VP8_MODE_PROPABILITIES_SIZE, "Ref mode probs buffer");
1895     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_coeff_probs_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Coeff probs buffer");
1896     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_ref_coeff_probs_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Ref coeff probs buffer");
1897     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_token_bits_data_buffer, VP8_TOKEN_BITS_DATA_SIZE, "Token bits data buffer");
1898     i965_zero_gpe_resource(&vp8_context->pak_mpu_tpu_token_bits_data_buffer);
1899     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_picture_state_buffer, VP8_PICTURE_STATE_SIZE, "Picture state buffer");
1900     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_mpu_bitstream_buffer, VP8_MPU_BITSTREAM_SIZE, "Mpu bitstream buffer");
1901     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_tpu_bitstream_buffer, VP8_TPU_BITSTREAM_SIZE, "Tpu bitstream buffer");
1902     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_entropy_cost_table_buffer, VP8_ENTROPY_COST_TABLE_SIZE, "Entropy cost buffer");
1903     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_pak_token_statistics_buffer, VP8_TOKEN_STATISTICS_SIZE, "Pak token statistics buffer");
1904     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_pak_token_update_flags_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Pak token update flags buffer");
1905     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_default_token_probability_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Default token probability buffer");
1906     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_key_frame_token_probability_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Key frame token probability buffer");
1907     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_updated_token_probability_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Updated token probability buffer");
1908     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_hw_token_probability_pak_pass_2_buffer, VP8_COEFFS_PROPABILITIES_SIZE, "Hw token probability pak pass 2 buffer");
1909     ALLOC_VP8_RESOURCE_BUFFER(pak_mpu_tpu_repak_decision_buffer, VP8_REPAK_DECISION_BUF_SIZE, "Tpu repak decision buffer");
1910
1911     i965_encoder_vp8_vme_init_mpu_tpu_buffer(ctx, encoder_context, vp8_context);
1912
1913     ALLOC_VP8_RESOURCE_BUFFER(mb_coded_buffer, vp8_context->mb_coded_buffer_size, "MB coded buffer");
1914 }
1915
1916 #undef ALLOC_VP8_RESOURCE_BUFFER
1917
1918 static void
1919 i965_encoder_vp8_vme_free_resources(struct i965_encoder_vp8_context *vp8_context)
1920 {
1921     i965_free_gpe_resource(&vp8_context->reference_frame_mb_count_buffer);
1922     i965_free_gpe_resource(&vp8_context->mb_mode_cost_luma_buffer);
1923     i965_free_gpe_resource(&vp8_context->block_mode_cost_buffer);
1924     i965_free_gpe_resource(&vp8_context->chroma_recon_buffer);
1925     i965_free_gpe_resource(&vp8_context->per_mb_quant_data_buffer);
1926     i965_free_gpe_resource(&vp8_context->pred_mv_data_buffer);
1927     i965_free_gpe_resource(&vp8_context->mode_cost_update_buffer);
1928
1929     i965_free_gpe_resource(&vp8_context->brc_history_buffer);
1930     i965_free_gpe_resource(&vp8_context->brc_segment_map_buffer);
1931     i965_free_gpe_resource(&vp8_context->brc_distortion_buffer);
1932     i965_free_gpe_resource(&vp8_context->brc_pak_statistics_buffer);
1933     i965_free_gpe_resource(&vp8_context->brc_vp8_cfg_command_read_buffer);
1934     i965_free_gpe_resource(&vp8_context->brc_vp8_cfg_command_write_buffer);
1935     i965_free_gpe_resource(&vp8_context->brc_vp8_constant_data_buffer);
1936     i965_free_gpe_resource(&vp8_context->brc_pak_statistics_dump_buffer);
1937
1938     i965_free_gpe_resource(&vp8_context->me_4x_mv_data_buffer);
1939     i965_free_gpe_resource(&vp8_context->me_4x_distortion_buffer);
1940     i965_free_gpe_resource(&vp8_context->me_16x_mv_data_buffer);
1941
1942     i965_free_gpe_resource(&vp8_context->histogram_buffer);
1943
1944     i965_free_gpe_resource(&vp8_context->pak_intra_row_store_scratch_buffer);
1945     i965_free_gpe_resource(&vp8_context->pak_deblocking_filter_row_store_scratch_buffer);
1946     i965_free_gpe_resource(&vp8_context->pak_mpc_row_store_scratch_buffer);
1947     i965_free_gpe_resource(&vp8_context->pak_stream_out_buffer);
1948     i965_free_gpe_resource(&vp8_context->pak_frame_header_buffer);
1949     i965_free_gpe_resource(&vp8_context->pak_intermediate_buffer);
1950     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_mode_probs_buffer);
1951     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_ref_mode_probs_buffer);
1952     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_coeff_probs_buffer);
1953     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer);
1954     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_token_bits_data_buffer);
1955     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
1956     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_mpu_bitstream_buffer);
1957     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_tpu_bitstream_buffer);
1958     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_entropy_cost_table_buffer);
1959     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_pak_token_statistics_buffer);
1960     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_pak_token_update_flags_buffer);
1961     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_default_token_probability_buffer);
1962     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
1963     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_updated_token_probability_buffer);
1964     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_hw_token_probability_pak_pass_2_buffer);
1965     i965_free_gpe_resource(&vp8_context->pak_mpu_tpu_repak_decision_buffer);
1966
1967     i965_free_gpe_resource(&vp8_context->mb_coded_buffer);
1968 }
1969
1970 static void
1971 i965_encoder_vp8_update_internal_rc_mode(VADriverContextP ctx,
1972                                          struct encode_state *encode_state,
1973                                          struct intel_encoder_context *encoder_context)
1974 {
1975     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1976
1977     if (encoder_context->rate_control_mode & VA_RC_CBR)
1978         vp8_context->internal_rate_mode = I965_BRC_CBR;
1979     else if (encoder_context->rate_control_mode & VA_RC_VBR)
1980         vp8_context->internal_rate_mode = I965_BRC_VBR;
1981     else
1982         vp8_context->internal_rate_mode = I965_BRC_CQP;
1983 }
1984
1985 static void
1986 i965_encoder_vp8_get_sequence_parameter(VADriverContextP ctx,
1987                                         struct encode_state *encode_state,
1988                                         struct intel_encoder_context *encoder_context)
1989 {
1990     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
1991
1992     /*
1993      * It is required to update frame width and height for each frame
1994      */
1995     if (encoder_context->frame_width_in_pixel != vp8_context->picture_width ||
1996         encoder_context->frame_height_in_pixel != vp8_context->picture_height) {
1997         vp8_context->picture_width = encoder_context->frame_width_in_pixel;
1998         vp8_context->picture_height = encoder_context->frame_height_in_pixel;
1999
2000         vp8_context->frame_width_in_mbs = WIDTH_IN_MACROBLOCKS(vp8_context->picture_width);
2001         vp8_context->frame_height_in_mbs = HEIGHT_IN_MACROBLOCKS(vp8_context->picture_height);
2002
2003         vp8_context->frame_width = vp8_context->frame_width_in_mbs * 16;
2004         vp8_context->frame_height = vp8_context->frame_height_in_mbs * 16;
2005
2006         vp8_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vp8_context->frame_width / SCALE_FACTOR_4X);
2007         vp8_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vp8_context->frame_height / SCALE_FACTOR_4X);
2008         vp8_context->down_scaled_width_4x = vp8_context->down_scaled_width_in_mb4x * 16;
2009         vp8_context->down_scaled_height_4x = vp8_context->down_scaled_height_in_mb4x * 16;
2010
2011         vp8_context->down_scaled_width_in_mb16x = WIDTH_IN_MACROBLOCKS(vp8_context->frame_width / SCALE_FACTOR_16X);
2012         vp8_context->down_scaled_height_in_mb16x = HEIGHT_IN_MACROBLOCKS(vp8_context->frame_height / SCALE_FACTOR_16X);
2013         vp8_context->down_scaled_width_16x = vp8_context->down_scaled_width_in_mb16x * 16;
2014         vp8_context->down_scaled_height_16x = vp8_context->down_scaled_height_in_mb16x * 16;
2015
2016         i965_encoder_vp8_check_motion_estimation(ctx, encoder_context);
2017
2018         i965_encoder_vp8_vme_free_resources(vp8_context);
2019         i965_encoder_vp8_vme_allocate_resources(ctx, encoder_context, vp8_context);
2020     }
2021
2022     vp8_context->num_passes = 0;
2023     vp8_context->repak_pass_iter_val = 0;
2024     vp8_context->ref_ctrl_optimization_done = 0;
2025 }
2026
2027 static void
2028 i965_encoder_vp8_get_picture_parameter(VADriverContextP ctx,
2029                                        struct encode_state *encode_state,
2030                                        struct intel_encoder_context *encoder_context)
2031 {
2032     struct i965_driver_data *i965 = i965_driver_data(ctx);
2033     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2034     struct object_surface *obj_surface;
2035     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2036     VAQMatrixBufferVP8 *quant_params = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2037     int is_intra, i;
2038     unsigned int average_qp = 0;
2039     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
2040                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
2041
2042     vp8_context->frame_type = pic_param->pic_flags.bits.frame_type ? MPEG_P_PICTURE : MPEG_I_PICTURE;
2043     is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
2044
2045     if (is_intra) {
2046         vp8_context->ref_frame_ctrl = 0;
2047     } else {
2048         vp8_context->ref_frame_ctrl =
2049             ((!pic_param->ref_flags.bits.no_ref_last) |
2050              (!pic_param->ref_flags.bits.no_ref_gf << 1) |
2051              (!pic_param->ref_flags.bits.no_ref_arf << 2));
2052     }
2053
2054     vp8_context->hme_enabled = (vp8_context->hme_supported && !is_intra && vp8_context->ref_frame_ctrl != 0);
2055     vp8_context->hme_16x_enabled = (vp8_context->hme_16x_supported && !is_intra);
2056
2057     if (pic_param->ref_last_frame != VA_INVALID_SURFACE) {
2058         obj_surface = SURFACE(pic_param->ref_last_frame);
2059
2060         if (obj_surface && obj_surface->bo)
2061             vp8_context->ref_last_frame = obj_surface;
2062         else
2063             vp8_context->ref_last_frame = NULL;
2064     } else {
2065         vp8_context->ref_last_frame = NULL;
2066     }
2067
2068     if (pic_param->ref_gf_frame != VA_INVALID_SURFACE) {
2069         obj_surface = SURFACE(pic_param->ref_gf_frame);
2070
2071         if (obj_surface && obj_surface->bo)
2072             vp8_context->ref_gf_frame = obj_surface;
2073         else
2074             vp8_context->ref_gf_frame = NULL;
2075     } else {
2076         vp8_context->ref_gf_frame = NULL;
2077     }
2078
2079     if (pic_param->ref_arf_frame != VA_INVALID_SURFACE) {
2080         obj_surface = SURFACE(pic_param->ref_arf_frame);
2081
2082         if (obj_surface && obj_surface->bo)
2083             vp8_context->ref_arf_frame = obj_surface;
2084         else
2085             vp8_context->ref_arf_frame = NULL;
2086     } else {
2087         vp8_context->ref_arf_frame = NULL;
2088     }
2089
2090     vp8_context->brc_distortion_buffer_need_reset = 0;
2091
2092     if (brc_enabled) {
2093         if (is_intra) {
2094             vp8_context->brc_distortion_buffer_need_reset = 1;
2095         } else {
2096             if (vp8_context->frame_num % vp8_context->gop_size == 1) {
2097                 vp8_context->brc_distortion_buffer_need_reset = 1;
2098             }
2099         }
2100     }
2101
2102     if (pic_param->pic_flags.bits.segmentation_enabled) {
2103         for (i = 0; i < VP8_MAX_SEGMENTS; i++) {
2104             average_qp += quant_params->quantization_index[i] + quant_params->quantization_index_delta[i];
2105         }
2106
2107         average_qp = average_qp / VP8_MAX_SEGMENTS;
2108     } else {
2109         average_qp += quant_params->quantization_index[0] + quant_params->quantization_index_delta[0];
2110     }
2111
2112     if (is_intra) {
2113         vp8_context->average_i_frame_qp = average_qp;
2114     } else {
2115         vp8_context->average_p_frame_qp = average_qp;
2116     }
2117
2118     if (brc_enabled && vp8_context->multiple_pass_brc_supported)
2119         vp8_context->num_brc_pak_passes = VP8_BRC_MINIMUM_NUM_PASSES;
2120     else
2121         vp8_context->num_brc_pak_passes = VP8_BRC_SINGLE_PASS;
2122
2123     vp8_context->num_passes = 0;
2124     vp8_context->min_pak_passes = 1;
2125     vp8_context->repak_pass_iter_val = 0;
2126
2127     if (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) {
2128         vp8_context->num_passes = 1;
2129         vp8_context->min_pak_passes = 2;
2130     } else if (encoder_context->quality_level == ENCODER_LOW_QUALITY) {
2131         vp8_context->num_passes = 0;
2132         vp8_context->min_pak_passes = 1;
2133     } else {
2134         vp8_context->num_passes = 0;
2135         vp8_context->min_pak_passes = 1;
2136     }
2137
2138     if (!vp8_context->repak_supported) {
2139         vp8_context->num_passes = 0;
2140         vp8_context->min_pak_passes = 1;
2141     }
2142
2143     if (brc_enabled)
2144         vp8_context->num_passes += (vp8_context->num_brc_pak_passes - 1);
2145
2146     if (vp8_context->repak_supported && vp8_context->min_pak_passes > 1)
2147         vp8_context->repak_pass_iter_val = vp8_context->num_passes;
2148 }
2149
2150 static void
2151 i965_encoder_vp8_get_misc_parameters(VADriverContextP ctx,
2152                                      struct encode_state *encode_state,
2153                                      struct intel_encoder_context *encoder_context)
2154 {
2155     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2156
2157     if (vp8_context->internal_rate_mode == I965_BRC_CQP) {
2158         vp8_context->init_vbv_buffer_fullness_in_bit = 0;
2159         vp8_context->vbv_buffer_size_in_bit = 0;
2160         vp8_context->target_bit_rate = 0;
2161         vp8_context->max_bit_rate = 0;
2162         vp8_context->min_bit_rate = 0;
2163         vp8_context->brc_need_reset = 0;
2164     } else {
2165         vp8_context->gop_size = encoder_context->brc.gop_size;
2166
2167         if (encoder_context->brc.need_reset) {
2168             vp8_context->framerate = encoder_context->brc.framerate[0];
2169             vp8_context->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
2170             vp8_context->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
2171             vp8_context->max_bit_rate = encoder_context->brc.bits_per_second[0]; // currently only one layer is supported
2172             vp8_context->brc_need_reset = (vp8_context->brc_initted && encoder_context->brc.need_reset);
2173
2174             if (vp8_context->internal_rate_mode == I965_BRC_CBR) {
2175                 vp8_context->min_bit_rate = vp8_context->max_bit_rate;
2176                 vp8_context->target_bit_rate = vp8_context->max_bit_rate;
2177             } else {
2178                 assert(vp8_context->internal_rate_mode == I965_BRC_VBR);
2179
2180                 if (encoder_context->brc.target_percentage[0] <= 50)
2181                     vp8_context->min_bit_rate = 0;
2182                 else
2183                     vp8_context->min_bit_rate = vp8_context->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
2184
2185                 vp8_context->target_bit_rate = vp8_context->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
2186             }
2187         }
2188     }
2189
2190     if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
2191         vp8_context->hme_16x_supported = 0;
2192 }
2193
2194 static VAStatus
2195 i965_encoder_vp8_get_paramters(VADriverContextP ctx,
2196                                struct encode_state *encode_state,
2197                                struct intel_encoder_context *encoder_context)
2198 {
2199     VAQMatrixBufferVP8 *quant_params = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2200     struct i965_encoder_vp8_surface *vp8_surface;
2201
2202     i965_encoder_vp8_update_internal_rc_mode(ctx, encode_state, encoder_context);
2203     i965_encoder_vp8_get_sequence_parameter(ctx, encode_state, encoder_context);
2204     i965_encoder_vp8_get_misc_parameters(ctx, encode_state, encoder_context);
2205     i965_encoder_vp8_get_picture_parameter(ctx, encode_state, encoder_context);
2206
2207     i965_encoder_vp8_allocate_surfaces(ctx, encoder_context, encode_state->reconstructed_object, 1);
2208     vp8_surface = encode_state->reconstructed_object->private_data;
2209     vp8_surface->qp_index = quant_params->quantization_index[0];
2210
2211     return VA_STATUS_SUCCESS;
2212 }
2213
2214 static VAStatus
2215 i965_encoder_vp8_vme_gpe_kernel_init(VADriverContextP ctx,
2216                                      struct encode_state *encode_state,
2217                                      struct intel_encoder_context *encoder_context)
2218 {
2219     struct i965_driver_data *i965 = i965_driver_data(ctx);
2220     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2221     struct i965_gpe_table *gpe = vp8_context->gpe_table;
2222     struct i965_encoder_vp8_mbenc_context *mbenc_context = &vp8_context->mbenc_context;
2223     struct i965_encoder_vp8_mpu_context *mpu_context = &vp8_context->mpu_context;
2224     struct i965_encoder_vp8_tpu_context *tpu_context = &vp8_context->tpu_context;
2225     struct gpe_dynamic_state_parameter ds_param;
2226     int i;
2227
2228     /*
2229      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
2230      * MBEnc first
2231      */
2232     for (i = 0; i < NUM_VP8_MBENC; i++) {
2233         gpe->context_init(ctx, &mbenc_context->gpe_contexts[i]);
2234     }
2235
2236     /*
2237      * VP8_MBENC_I_FRAME_LUMA and VP8_MBENC_I_FRAME_CHROMA will use the same
2238      * the dynamic state buffer,
2239      */
2240     ds_param.bo_size = ALIGN(MAX(sizeof(struct vp8_mbenc_i_frame_curbe_data), sizeof(struct vp8_mbenc_p_frame_curbe_data)), 64) +
2241                        vp8_context->idrt_entry_size * 2;
2242     mbenc_context->luma_chroma_dynamic_buffer = dri_bo_alloc(i965->intel.bufmgr,
2243                                                              "IFrame Luma & CHROMA curbe buffer",
2244                                                              ds_param.bo_size,
2245                                                              0x1000);
2246
2247     /*
2248      * VP8_MBENC_I_FRAME_LUMA and VP8_MBENC_I_FRAME_CHROMA will share the same
2249      * the curbe data buffer
2250      */
2251     ds_param.bo = mbenc_context->luma_chroma_dynamic_buffer;
2252     ds_param.curbe_offset = 0;
2253     ds_param.idrt_offset = ALIGN(MAX(sizeof(struct vp8_mbenc_i_frame_curbe_data), sizeof(struct vp8_mbenc_p_frame_curbe_data)), 64);
2254     ds_param.sampler_offset = ds_param.bo_size;
2255     gpe->set_dynamic_buffer(ctx, &mbenc_context->gpe_contexts[VP8_MBENC_I_FRAME_LUMA], &ds_param);
2256
2257     ds_param.idrt_offset = ds_param.idrt_offset + vp8_context->idrt_entry_size;
2258     gpe->set_dynamic_buffer(ctx, &mbenc_context->gpe_contexts[VP8_MBENC_I_FRAME_CHROMA], &ds_param);
2259
2260     /*
2261      * BRC will update MPU curbe data buffer, so initialize GPE context for
2262      * MPU first
2263      */
2264     gpe->context_init(ctx, &mpu_context->gpe_contexts[0]);
2265     ds_param.bo_size = ALIGN(sizeof(struct vp8_mpu_curbe_data), 64) + vp8_context->idrt_entry_size;
2266     mpu_context->dynamic_buffer = dri_bo_alloc(i965->intel.bufmgr,
2267                                                "MPU dynamic buffer",
2268                                                ds_param.bo_size,
2269                                                0x1000);
2270
2271     ds_param.bo = mpu_context->dynamic_buffer;
2272     ds_param.curbe_offset = 0;
2273     ds_param.idrt_offset = ALIGN(sizeof(struct vp8_mpu_curbe_data), 64);
2274     ds_param.sampler_offset = ds_param.bo_size;
2275     gpe->set_dynamic_buffer(ctx, &mpu_context->gpe_contexts[0], &ds_param);
2276
2277     /*
2278      * BRC will update TPU curbe data buffer, so initialize GPE context for
2279      * TPU first
2280      */
2281     gpe->context_init(ctx, &tpu_context->gpe_contexts[0]);
2282     ds_param.bo_size = ALIGN(sizeof(struct vp8_tpu_curbe_data), 64) + vp8_context->idrt_entry_size;
2283     tpu_context->dynamic_buffer = dri_bo_alloc(i965->intel.bufmgr,
2284                                                "MPU dynamic buffer",
2285                                                ds_param.bo_size,
2286                                                0x1000);
2287
2288     ds_param.bo = tpu_context->dynamic_buffer;
2289     ds_param.curbe_offset = 0;
2290     ds_param.idrt_offset = ALIGN(sizeof(struct vp8_tpu_curbe_data), 64);
2291     ds_param.sampler_offset = ds_param.bo_size;
2292     gpe->set_dynamic_buffer(ctx, &tpu_context->gpe_contexts[0], &ds_param);
2293
2294     return VA_STATUS_SUCCESS;
2295 }
2296
2297 static void
2298 i965_encoder_vp8_vme_brc_init_reset_set_curbe(VADriverContextP ctx,
2299                                               struct encode_state *encode_state,
2300                                               struct intel_encoder_context *encoder_context,
2301                                               struct i965_gpe_context *gpe_context)
2302 {
2303     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2304     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2305     struct vp8_brc_init_reset_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
2306     double input_bits_per_frame, bps_ratio;
2307
2308     if (!pcmd)
2309         return;
2310
2311     memset(pcmd, 0, sizeof(*pcmd));
2312
2313     pcmd->dw0.profile_level_max_frame = vp8_context->frame_width * vp8_context->frame_height;
2314     pcmd->dw1.init_buf_full_in_bits = vp8_context->init_vbv_buffer_fullness_in_bit;
2315     pcmd->dw2.buf_size_in_bits = vp8_context->vbv_buffer_size_in_bit;
2316     pcmd->dw3.average_bitrate = (vp8_context->target_bit_rate + VP8_BRC_KBPS - 1) / VP8_BRC_KBPS * VP8_BRC_KBPS;
2317     pcmd->dw4.max_bitrate = (vp8_context->max_bit_rate + VP8_BRC_KBPS - 1) / VP8_BRC_KBPS * VP8_BRC_KBPS;
2318     pcmd->dw6.frame_rate_m = vp8_context->framerate.num;
2319     pcmd->dw7.frame_rate_d = vp8_context->framerate.den;
2320     pcmd->dw8.brc_flag = 0;
2321     pcmd->dw8.gop_minus1 = vp8_context->gop_size - 1;
2322
2323     if (vp8_context->internal_rate_mode == I965_BRC_CBR) {
2324         pcmd->dw4.max_bitrate = pcmd->dw3.average_bitrate;
2325
2326         pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_CBR;
2327     } else if (vp8_context->internal_rate_mode == I965_BRC_VBR) {
2328         if (pcmd->dw4.max_bitrate < pcmd->dw3.average_bitrate) {
2329             pcmd->dw4.max_bitrate = 2 * pcmd->dw3.average_bitrate;
2330         }
2331
2332         pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_VBR;
2333     }
2334
2335     input_bits_per_frame =
2336         ((double)(pcmd->dw4.max_bitrate) * (double)(pcmd->dw7.frame_rate_d) /
2337          (double)(pcmd->dw6.frame_rate_m));
2338
2339     if (pcmd->dw2.buf_size_in_bits < (unsigned int)input_bits_per_frame * 4) {
2340         pcmd->dw2.buf_size_in_bits = (unsigned int)input_bits_per_frame * 4;
2341     }
2342
2343     if (pcmd->dw1.init_buf_full_in_bits == 0) {
2344         pcmd->dw1.init_buf_full_in_bits = 7 * pcmd->dw2.buf_size_in_bits / 8;
2345     }
2346
2347     if (pcmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2348         pcmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2349     }
2350
2351     if (pcmd->dw1.init_buf_full_in_bits > pcmd->dw2.buf_size_in_bits) {
2352         pcmd->dw1.init_buf_full_in_bits = pcmd->dw2.buf_size_in_bits;
2353     }
2354
2355     bps_ratio = input_bits_per_frame / ((double)(pcmd->dw2.buf_size_in_bits) / 30);
2356     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2357
2358     pcmd->dw9.frame_width_in_bytes = vp8_context->frame_width;
2359     pcmd->dw10.frame_height_in_bytes = vp8_context->frame_height;
2360     pcmd->dw10.avbr_accuracy = 30;
2361     pcmd->dw11.avbr_convergence = 150;
2362     pcmd->dw11.min_qp = pic_param->clamp_qindex_low;
2363     pcmd->dw12.max_qp = pic_param->clamp_qindex_high;
2364     pcmd->dw12.level_qp = 60;
2365
2366     // DW13 default 100
2367     pcmd->dw13.max_section_pct = 100;
2368     pcmd->dw13.under_shoot_cbr_pct = 115;
2369
2370     // DW14 default 100
2371     pcmd->dw14.min_section_pct = 100;
2372     pcmd->dw14.vbr_bias_pct = 100;
2373     pcmd->dw15.instant_rate_threshold_0_for_p = 30;
2374     pcmd->dw15.instant_rate_threshold_1_for_p = 50;
2375     pcmd->dw15.instant_rate_threshold_2_for_p = 70;
2376     pcmd->dw15.instant_rate_threshold_3_for_p = 120;
2377
2378     pcmd->dw17.instant_rate_threshold_0_for_i = 30;
2379     pcmd->dw17.instant_rate_threshold_1_for_i = 50;
2380     pcmd->dw17.instant_rate_threshold_2_for_i = 90;
2381     pcmd->dw17.instant_rate_threshold_3_for_i = 115;
2382     pcmd->dw18.deviation_threshold_0_for_p = (unsigned int)(-50 * pow(0.9, bps_ratio));
2383     pcmd->dw18.deviation_threshold_1_for_p = (unsigned int)(-50 * pow(0.66, bps_ratio));
2384     pcmd->dw18.deviation_threshold_2_for_p = (unsigned int)(-50 * pow(0.46, bps_ratio));
2385     pcmd->dw18.deviation_threshold_3_for_p = (unsigned int)(-50 * pow(0.3, bps_ratio));
2386     pcmd->dw19.deviation_threshold_4_for_p = (unsigned int)(50 * pow(0.3, bps_ratio));
2387     pcmd->dw19.deviation_threshold_5_for_p = (unsigned int)(50 * pow(0.46, bps_ratio));
2388     pcmd->dw19.deviation_threshold_6_for_p = (unsigned int)(50 * pow(0.7, bps_ratio));
2389     pcmd->dw19.deviation_threshold_7_for_p = (unsigned int)(50 * pow(0.9, bps_ratio));
2390     pcmd->dw20.deviation_threshold_0_for_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2391     pcmd->dw20.deviation_threshold_1_for_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2392     pcmd->dw20.deviation_threshold_2_for_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2393     pcmd->dw20.deviation_threshold_3_for_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2394     pcmd->dw21.deviation_threshold_4_for_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2395     pcmd->dw21.deviation_threshold_5_for_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2396     pcmd->dw21.deviation_threshold_6_for_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2397     pcmd->dw21.deviation_threshold_7_for_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2398     pcmd->dw22.deviation_threshold_0_for_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2399     pcmd->dw22.deviation_threshold_1_for_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2400     pcmd->dw22.deviation_threshold_2_for_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2401     pcmd->dw22.deviation_threshold_3_for_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2402     pcmd->dw23.deviation_threshold_4_for_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2403     pcmd->dw23.deviation_threshold_5_for_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2404     pcmd->dw23.deviation_threshold_6_for_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2405     pcmd->dw23.deviation_threshold_7_for_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2406
2407     // Default: 1
2408     pcmd->dw24.num_t_levels = 1;
2409
2410     if (!vp8_context->brc_initted) {
2411         vp8_context->brc_init_current_target_buf_full_in_bits = pcmd->dw1.init_buf_full_in_bits;
2412     }
2413
2414     vp8_context->brc_init_reset_buf_size_in_bits = pcmd->dw2.buf_size_in_bits;
2415     vp8_context->brc_init_reset_input_bits_per_frame = input_bits_per_frame;
2416
2417     pcmd->dw26.history_buffer_bti = VP8_BTI_BRC_INIT_RESET_HISTORY;
2418     pcmd->dw27.distortion_buffer_bti = VP8_BTI_BRC_INIT_RESET_DISTORTION;
2419
2420     i965_gpe_context_unmap_curbe(gpe_context);
2421 }
2422
2423 static void
2424 i965_encoder_vp8_vme_brc_init_reset_add_surfaces(VADriverContextP ctx,
2425                                                  struct encode_state *encode_state,
2426                                                  struct intel_encoder_context *encoder_context,
2427                                                  struct i965_gpe_context *gpe_context)
2428 {
2429     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2430
2431     i965_add_buffer_gpe_surface(ctx,
2432                                 gpe_context,
2433                                 &vp8_context->brc_history_buffer,
2434                                 0,
2435                                 vp8_context->brc_history_buffer.size,
2436                                 0,
2437                                 VP8_BTI_BRC_INIT_RESET_HISTORY);
2438
2439     i965_add_buffer_2d_gpe_surface(ctx,
2440                                    gpe_context,
2441                                    &vp8_context->brc_distortion_buffer,
2442                                    1,
2443                                    I965_SURFACEFORMAT_R8_UNORM,
2444                                    VP8_BTI_BRC_INIT_RESET_DISTORTION);
2445 }
2446
2447 static VAStatus
2448 i965_encoder_vp8_vme_brc_init_reset(VADriverContextP ctx,
2449                                     struct encode_state *encode_state,
2450                                     struct intel_encoder_context *encoder_context)
2451 {
2452     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2453     struct i965_encoder_vp8_brc_init_reset_context *init_reset_context = &vp8_context->brc_init_reset_context;
2454     struct i965_gpe_table *gpe = vp8_context->gpe_table;
2455     struct gpe_media_object_parameter media_object_param;
2456     struct i965_gpe_context *gpe_context;
2457     int gpe_index = VP8_BRC_INIT;
2458     int media_function = VP8_MEDIA_STATE_BRC_INIT_RESET;
2459
2460     if (vp8_context->brc_initted)
2461         gpe_index = VP8_BRC_RESET;
2462
2463     gpe_context = &init_reset_context->gpe_contexts[gpe_index];
2464
2465     gpe->context_init(ctx, gpe_context);
2466     gpe->reset_binding_table(ctx, gpe_context);
2467     i965_encoder_vp8_vme_brc_init_reset_set_curbe(ctx, encode_state, encoder_context, gpe_context);
2468     i965_encoder_vp8_vme_brc_init_reset_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2469     gpe->setup_interface_data(ctx, gpe_context);
2470
2471     memset(&media_object_param, 0, sizeof(media_object_param));
2472     i965_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
2473
2474     return VA_STATUS_SUCCESS;
2475 }
2476
2477 static void
2478 i965_encoder_vp8_vme_scaling_set_curbe(VADriverContextP ctx,
2479                                        struct encode_state *encode_state,
2480                                        struct intel_encoder_context *encoder_context,
2481                                        struct i965_gpe_context *gpe_context,
2482                                        struct scaling_curbe_parameters *params)
2483 {
2484     struct vp8_scaling_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
2485
2486     if (!pcmd)
2487         return;
2488
2489     memset(pcmd, 0, sizeof(*pcmd));
2490
2491     pcmd->dw0.input_picture_width = params->input_picture_width;
2492     pcmd->dw0.input_picture_height = params->input_picture_height;
2493
2494     if (!params->is_field_picture) {
2495         pcmd->dw1.input_y_bti_frame = VP8_BTI_SCALING_FRAME_SRC_Y;
2496         pcmd->dw2.output_y_bti_frame = VP8_BTI_SCALING_FRAME_DST_Y;
2497     } else {
2498         pcmd->dw1.input_y_bti_top_field = VP8_BTI_SCALING_FIELD_TOP_SRC_Y;
2499         pcmd->dw2.output_y_bti_top_field = VP8_BTI_SCALING_FIELD_TOP_DST_Y;
2500         pcmd->dw3.input_y_bti_bottom_field = VP8_BTI_SCALING_FIELD_BOT_SRC_Y;
2501         pcmd->dw4.output_y_bti_bottom_field = VP8_BTI_SCALING_FIELD_BOT_DST_Y;
2502     }
2503
2504     if (params->flatness_check_enabled) {
2505         pcmd->dw5.flatness_threshold = 128;
2506         pcmd->dw6.enable_mb_flatness_check = 1;
2507
2508         if (!params->is_field_picture) {
2509             pcmd->dw8.flatness_output_bti_frame = VP8_BTI_SCALING_FRAME_FLATNESS_DST;
2510         } else {
2511             pcmd->dw8.flatness_output_bti_top_field = VP8_BTI_SCALING_FIELD_TOP_FLATNESS_DST;
2512             pcmd->dw9.flatness_output_bti_bottom_field = VP8_BTI_SCALING_FIELD_BOT_FLATNESS_DST;
2513         }
2514     } else {
2515         pcmd->dw6.enable_mb_flatness_check = 0;
2516     }
2517
2518     pcmd->dw6.enable_mb_variance_output = params->mb_variance_output_enabled;
2519     pcmd->dw6.enable_mb_pixel_average_output = params->mb_pixel_average_output_enabled;
2520
2521     if (params->mb_variance_output_enabled || params->mb_pixel_average_output_enabled) {
2522         if (!params->is_field_picture) {
2523             pcmd->dw10.mbv_proc_stats_bti_frame = VP8_BTI_SCALING_FRAME_MBVPROCSTATS_DST;
2524         } else {
2525             pcmd->dw10.mbv_proc_stats_bti_top_field = VP8_BTI_SCALING_FIELD_TOP_MBVPROCSTATS_DST;
2526             pcmd->dw11.mbv_proc_stats_bti_bottom_field = VP8_BTI_SCALING_FIELD_BOT_MBVPROCSTATS_DST;
2527         }
2528     }
2529
2530     i965_gpe_context_unmap_curbe(gpe_context);
2531 }
2532
2533 static void
2534 i965_encoder_vp8_vme_scaling_add_surfaces(VADriverContextP ctx,
2535                                           struct encode_state *encode_state,
2536                                           struct intel_encoder_context *encoder_context,
2537                                           struct i965_gpe_context *gpe_context,
2538                                           struct scaling_surface_parameters *params)
2539 {
2540     i965_add_2d_gpe_surface(ctx,
2541                             gpe_context,
2542                             params->input_obj_surface,
2543                             0,
2544                             1,
2545                             I965_SURFACEFORMAT_R32_UNORM,
2546                             VP8_BTI_SCALING_FRAME_SRC_Y);
2547     i965_add_2d_gpe_surface(ctx,
2548                             gpe_context,
2549                             params->output_obj_surface,
2550                             0,
2551                             1,
2552                             I965_SURFACEFORMAT_R32_UNORM,
2553                             VP8_BTI_SCALING_FRAME_DST_Y);
2554 }
2555
2556 static VAStatus
2557 i965_encoder_vp8_vme_scaling(VADriverContextP ctx,
2558                              struct encode_state *encode_state,
2559                              struct intel_encoder_context *encoder_context,
2560                              int scaling_16x_enabled)
2561 {
2562     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2563     struct i965_encoder_vp8_scaling_context *scaling_context = &vp8_context->scaling_context;
2564     struct i965_gpe_table *gpe = vp8_context->gpe_table;
2565     struct gpe_media_object_walker_parameter media_object_walker_param;
2566     struct i965_gpe_context *gpe_context;
2567     struct scaling_curbe_parameters scaling_curbe_params;
2568     struct scaling_surface_parameters scaling_surface_params;
2569     struct vp8_encoder_kernel_walker_parameter kernel_walker_param;
2570     struct object_surface *input_obj_surface, *output_obj_surface;
2571     struct i965_encoder_vp8_surface *vp8_surface;
2572     unsigned int input_frame_width, input_frame_height, output_frame_width, output_frame_height;
2573     unsigned int down_scaled_width_in_mbs, down_scaled_height_in_mbs;
2574     int gpe_index, media_function;
2575
2576     vp8_surface = encode_state->reconstructed_object->private_data;
2577
2578     if (scaling_16x_enabled) {
2579         gpe_index = VP8_SCALING_16X;
2580         media_function = VP8_MEDIA_STATE_16X_SCALING;
2581
2582         down_scaled_width_in_mbs = vp8_context->down_scaled_width_in_mb16x;
2583         down_scaled_height_in_mbs = vp8_context->down_scaled_height_in_mb16x;
2584
2585         input_obj_surface = vp8_surface->scaled_4x_surface_obj;
2586         input_frame_width = vp8_context->down_scaled_width_4x;
2587         input_frame_height = vp8_context->down_scaled_height_4x;
2588
2589         output_obj_surface = vp8_surface->scaled_16x_surface_obj;
2590         output_frame_width = vp8_context->down_scaled_width_16x;
2591         output_frame_height = vp8_context->down_scaled_height_16x;
2592     } else {
2593         gpe_index = VP8_SCALING_4X;
2594         media_function = VP8_MEDIA_STATE_4X_SCALING;
2595
2596         down_scaled_width_in_mbs = vp8_context->down_scaled_width_in_mb4x;
2597         down_scaled_height_in_mbs = vp8_context->down_scaled_height_in_mb4x;
2598
2599         input_obj_surface = encode_state->input_yuv_object;
2600         input_frame_width = vp8_context->picture_width;         /* the orignal width */
2601         input_frame_height = vp8_context->picture_height;       /* the orignal height */
2602
2603         output_obj_surface = vp8_surface->scaled_4x_surface_obj;
2604         output_frame_width = vp8_context->down_scaled_width_4x;
2605         output_frame_height = vp8_context->down_scaled_height_4x;
2606     }
2607
2608     gpe_context = &scaling_context->gpe_contexts[gpe_index];
2609
2610     gpe->context_init(ctx, gpe_context);
2611     gpe->reset_binding_table(ctx, gpe_context);
2612
2613     memset(&scaling_curbe_params, 0, sizeof(scaling_curbe_params));
2614     scaling_curbe_params.input_picture_width = input_frame_width;
2615     scaling_curbe_params.input_picture_height = input_frame_height;
2616     scaling_curbe_params.is_field_picture = 0;
2617     scaling_curbe_params.flatness_check_enabled = 0;
2618     scaling_curbe_params.mb_variance_output_enabled = 0;
2619     scaling_curbe_params.mb_pixel_average_output_enabled = 0;
2620     i965_encoder_vp8_vme_scaling_set_curbe(ctx, encode_state, encoder_context, gpe_context, &scaling_curbe_params);
2621
2622     scaling_surface_params.input_obj_surface = input_obj_surface;
2623     scaling_surface_params.input_width = input_frame_width;
2624     scaling_surface_params.input_height = input_frame_height;
2625     scaling_surface_params.output_obj_surface = output_obj_surface;
2626     scaling_surface_params.output_width = output_frame_width;
2627     scaling_surface_params.output_height = output_frame_height;
2628     i965_encoder_vp8_vme_scaling_add_surfaces(ctx, encode_state, encoder_context, gpe_context, &scaling_surface_params);
2629
2630     gpe->setup_interface_data(ctx, gpe_context);
2631
2632     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2633     kernel_walker_param.resolution_x = down_scaled_width_in_mbs * 2; /* 8x8 level */
2634     kernel_walker_param.resolution_y = down_scaled_height_in_mbs * 2;
2635     kernel_walker_param.no_dependency = 1;
2636     i965_init_media_object_walker_parameters(encoder_context, &kernel_walker_param, &media_object_walker_param);
2637
2638     i965_run_kernel_media_object_walker(ctx, encoder_context, gpe_context, media_function, &media_object_walker_param);
2639
2640     return VA_STATUS_SUCCESS;
2641 }
2642
2643 static void
2644 i965_encoder_vp8_vme_me_set_curbe(VADriverContextP ctx,
2645                                   struct encode_state *encode_state,
2646                                   struct intel_encoder_context *encoder_context,
2647                                   struct i965_gpe_context *gpe_context,
2648                                   struct me_curbe_parameters *params)
2649 {
2650     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2651     struct vp8_me_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
2652     int me_mode, me_method;
2653
2654     if (!pcmd)
2655         return;
2656
2657     if (vp8_context->hme_16x_enabled) {
2658         if (params->use_16x_me)
2659             me_mode = VP8_ME_MODE_ME16X_BEFORE_ME4X;
2660         else
2661             me_mode = VP8_ME_MODE_ME4X_AFTER_ME16X;
2662     } else {
2663         me_mode = VP8_ME_MODE_ME4X_ONLY;
2664     }
2665
2666     memset(pcmd, 0, sizeof(*pcmd));
2667
2668     pcmd->dw1.max_num_mvs = 0x10;
2669     pcmd->dw1.bi_weight = 0;
2670
2671     pcmd->dw2.max_num_su = 57;
2672     pcmd->dw2.max_len_sp = 57;
2673
2674     pcmd->dw3.sub_mb_part_mask = 0x77;
2675     pcmd->dw3.inter_sad = 0;
2676     pcmd->dw3.intra_sad = 0;
2677     pcmd->dw3.bme_disable_fbr = 1;
2678     pcmd->dw3.sub_pel_mode = 3;
2679
2680     pcmd->dw4.picture_height_minus1 = params->down_scaled_height_in_mbs - 1;
2681     pcmd->dw4.picture_width = params->down_scaled_width_in_mbs;
2682
2683     if (pcmd->dw4.picture_height_minus1 < 2)
2684         pcmd->dw4.picture_height_minus1 = 2;
2685
2686     if (pcmd->dw4.picture_width < 3)
2687         pcmd->dw4.picture_width = 3;
2688
2689     pcmd->dw5.ref_height = 40;
2690     pcmd->dw5.ref_width = 48;
2691
2692     pcmd->dw6.me_mode = me_mode;
2693
2694     if (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY)
2695         pcmd->dw6.super_combine_dist = 5;
2696     else if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
2697         pcmd->dw6.super_combine_dist = 0;
2698     else
2699         pcmd->dw6.super_combine_dist = 1;
2700
2701     pcmd->dw6.max_vmv_range = 0x7fc;
2702
2703     pcmd->dw13.num_ref_idx_l0_minus1 = vp8_num_refs[vp8_context->ref_frame_ctrl] - 1;
2704     pcmd->dw13.num_ref_idx_l1_minus1 = 0;
2705
2706     me_method = (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) ? 6 : 4;
2707     memcpy(&pcmd->dw16, vp8_search_path[me_method], 14 * sizeof(pcmd->dw16));
2708
2709     pcmd->dw32.vp8_me_mv_output_data_bti = VP8_BTI_ME_MV_DATA;
2710     pcmd->dw33.vp8_me_mv_input_data_bti = VP8_BTI_16X_ME_MV_DATA;
2711     pcmd->dw34.vp8_me_distorion_bti = VP8_BTI_ME_DISTORTION;
2712     pcmd->dw35.vp8_me_min_dist_brc_bti = VP8_BTI_ME_MIN_DIST_BRC_DATA;
2713     pcmd->dw36.vp8_me_forward_ref_bti = VP8_BTI_VME_INTER_PRED;
2714
2715     i965_gpe_context_unmap_curbe(gpe_context);
2716 }
2717
2718 static void
2719 i965_encoder_vp8_vme_me_add_surfaces(VADriverContextP ctx,
2720                                      struct encode_state *encode_state,
2721                                      struct intel_encoder_context *encoder_context,
2722                                      struct i965_gpe_context *gpe_context,
2723                                      struct me_surface_parameters *params)
2724 {
2725     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2726     struct i965_encoder_vp8_surface *vp8_surface;
2727     struct i965_gpe_resource *me_gpe_buffer, *me_brc_distortion_buffer;
2728     struct object_surface *obj_surface;
2729     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
2730                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
2731
2732     if (brc_enabled)
2733         me_brc_distortion_buffer = &vp8_context->brc_distortion_buffer;
2734     else
2735         me_brc_distortion_buffer = &vp8_context->me_4x_distortion_buffer;
2736
2737     if (params->use_16x_me) {
2738         me_gpe_buffer = &vp8_context->me_16x_mv_data_buffer;
2739     } else {
2740         me_gpe_buffer = &vp8_context->me_4x_mv_data_buffer;
2741     }
2742
2743     i965_add_buffer_2d_gpe_surface(ctx,
2744                                    gpe_context,
2745                                    me_gpe_buffer,
2746                                    1,
2747                                    I965_SURFACEFORMAT_R8_UNORM,
2748                                    VP8_BTI_ME_MV_DATA);
2749
2750     if (vp8_context->hme_16x_enabled) {
2751         me_gpe_buffer = &vp8_context->me_16x_mv_data_buffer;
2752         i965_add_buffer_2d_gpe_surface(ctx,
2753                                        gpe_context,
2754                                        me_gpe_buffer,
2755                                        1,
2756                                        I965_SURFACEFORMAT_R8_UNORM,
2757                                        VP8_BTI_16X_ME_MV_DATA);
2758     }
2759
2760     if (!params->use_16x_me) {
2761         me_gpe_buffer = &vp8_context->me_4x_distortion_buffer;
2762         i965_add_buffer_2d_gpe_surface(ctx,
2763                                        gpe_context,
2764                                        me_gpe_buffer,
2765                                        1,
2766                                        I965_SURFACEFORMAT_R8_UNORM,
2767                                        VP8_BTI_ME_DISTORTION);
2768
2769         me_gpe_buffer = me_brc_distortion_buffer;
2770         i965_add_buffer_2d_gpe_surface(ctx,
2771                                        gpe_context,
2772                                        me_gpe_buffer,
2773                                        1,
2774                                        I965_SURFACEFORMAT_R8_UNORM,
2775                                        VP8_BTI_ME_MIN_DIST_BRC_DATA);
2776     }
2777
2778     vp8_surface = encode_state->reconstructed_object->private_data;
2779     assert(vp8_surface);
2780
2781     if (params->use_16x_me) {
2782         obj_surface = vp8_surface->scaled_16x_surface_obj;
2783     } else {
2784         obj_surface = vp8_surface->scaled_4x_surface_obj;
2785     }
2786
2787     i965_add_adv_gpe_surface(ctx,
2788                              gpe_context,
2789                              obj_surface,
2790                              VP8_BTI_VME_INTER_PRED);
2791
2792     if (vp8_context->ref_last_frame != NULL &&
2793         vp8_context->ref_last_frame->bo != NULL) {
2794         vp8_surface = vp8_context->ref_last_frame->private_data;
2795         obj_surface = NULL;
2796
2797         if (vp8_surface) {
2798             if (params->use_16x_me) {
2799                 obj_surface = vp8_surface->scaled_16x_surface_obj;
2800             } else {
2801                 obj_surface = vp8_surface->scaled_4x_surface_obj;
2802             }
2803         }
2804
2805         if (obj_surface) {
2806             i965_add_adv_gpe_surface(ctx,
2807                                      gpe_context,
2808                                      obj_surface,
2809                                      VP8_BTI_ME_REF1_PIC);
2810         }
2811     }
2812
2813     if (vp8_context->ref_gf_frame != NULL &&
2814         vp8_context->ref_gf_frame->bo != NULL) {
2815         vp8_surface = vp8_context->ref_gf_frame->private_data;
2816         obj_surface = NULL;
2817
2818         if (vp8_surface) {
2819             if (params->use_16x_me) {
2820                 obj_surface = vp8_surface->scaled_16x_surface_obj;
2821             } else {
2822                 obj_surface = vp8_surface->scaled_4x_surface_obj;
2823             }
2824         }
2825
2826         if (obj_surface) {
2827             switch (vp8_context->ref_frame_ctrl) {
2828             case 2:
2829             case 6:
2830                 i965_add_adv_gpe_surface(ctx,
2831                                          gpe_context,
2832                                          obj_surface,
2833                                          VP8_BTI_ME_REF1_PIC);
2834                 break;
2835
2836             case 3:
2837             case 7:
2838                 i965_add_adv_gpe_surface(ctx,
2839                                          gpe_context,
2840                                          obj_surface,
2841                                          VP8_BTI_ME_REF2_PIC);
2842                 break;
2843             }
2844         }
2845     }
2846
2847     if (vp8_context->ref_arf_frame != NULL &&
2848         vp8_context->ref_arf_frame->bo != NULL) {
2849         vp8_surface = vp8_context->ref_arf_frame->private_data;
2850         obj_surface = NULL;
2851
2852         if (vp8_surface) {
2853             if (params->use_16x_me) {
2854                 obj_surface = vp8_surface->scaled_16x_surface_obj;
2855             } else {
2856                 obj_surface = vp8_surface->scaled_4x_surface_obj;
2857             }
2858         }
2859
2860         if (obj_surface) {
2861             switch (vp8_context->ref_frame_ctrl) {
2862             case 4:
2863                 i965_add_adv_gpe_surface(ctx,
2864                                          gpe_context,
2865                                          obj_surface,
2866                                          VP8_BTI_ME_REF1_PIC);
2867                 break;
2868
2869             case 5:
2870             case 6:
2871                 i965_add_adv_gpe_surface(ctx,
2872                                          gpe_context,
2873                                          obj_surface,
2874                                          VP8_BTI_ME_REF2_PIC);
2875                 break;
2876
2877             case 7:
2878                 i965_add_adv_gpe_surface(ctx,
2879                                          gpe_context,
2880                                          obj_surface,
2881                                          VP8_BTI_ME_REF3_PIC);
2882                 break;
2883             }
2884         }
2885     }
2886 }
2887
2888 static void
2889 i965_encoder_vp8_vme_init_brc_distorion_buffer(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2890 {
2891     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2892
2893     i965_zero_gpe_resource(&vp8_context->brc_distortion_buffer);
2894 }
2895
2896 static VAStatus
2897 i965_encoder_vp8_vme_me(VADriverContextP ctx,
2898                         struct encode_state *encode_state,
2899                         struct intel_encoder_context *encoder_context,
2900                         int use_16x_me)
2901 {
2902     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
2903     struct i965_encoder_vp8_me_context *me_context = &vp8_context->me_context;
2904     struct i965_gpe_table *gpe = vp8_context->gpe_table;
2905     struct gpe_media_object_walker_parameter media_object_walker_param;
2906     struct vp8_encoder_kernel_walker_parameter kernel_walker_params;
2907     struct me_curbe_parameters me_curbe_params;
2908     struct i965_gpe_context *gpe_context;
2909     struct me_surface_parameters me_surface_params;
2910     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2911     unsigned int down_scaled_width_in_mbs, down_scaled_height_in_mbs;
2912     unsigned int ref_frame_flag_final, ref_frame_flag;
2913     int gpe_index, media_function;
2914
2915     if (vp8_context->frame_type == MPEG_P_PICTURE) {
2916         ref_frame_flag = VP8_REF_FLAG_ALL;
2917
2918         if (pic_param->ref_last_frame == pic_param->ref_gf_frame) {
2919             ref_frame_flag &= ~VP8_REF_FLAG_GOLDEN;
2920         }
2921
2922         if (pic_param->ref_last_frame == pic_param->ref_arf_frame) {
2923             ref_frame_flag &= ~VP8_REF_FLAG_ALT;
2924         }
2925
2926         if (pic_param->ref_gf_frame == pic_param->ref_arf_frame) {
2927             ref_frame_flag &= ~VP8_REF_FLAG_ALT;
2928         }
2929     } else {
2930         ref_frame_flag = VP8_REF_FLAG_LAST;
2931     }
2932
2933     switch (vp8_context->ref_frame_ctrl) {
2934     case 0:
2935         ref_frame_flag_final = VP8_REF_FLAG_NONE;
2936         break;
2937
2938     case 1:
2939         ref_frame_flag_final = VP8_REF_FLAG_LAST;       // Last Ref only
2940         break;
2941
2942     case 2:
2943         ref_frame_flag_final = VP8_REF_FLAG_GOLDEN;     // Gold Ref only
2944         break;
2945
2946     case 4:
2947         ref_frame_flag_final = VP8_REF_FLAG_ALT;        // Alt Ref only
2948         break;
2949
2950     default:
2951         ref_frame_flag_final = ref_frame_flag;
2952     }
2953
2954     vp8_context->ref_frame_ctrl = ref_frame_flag_final;
2955     vp8_context->ref_ctrl_optimization_done = 1;
2956
2957     if (use_16x_me) {
2958         gpe_index = VP8_ME_16X;
2959         media_function = VP8_MEDIA_STATE_16X_ME;
2960         down_scaled_width_in_mbs = vp8_context->down_scaled_width_in_mb16x;
2961         down_scaled_height_in_mbs = vp8_context->down_scaled_height_in_mb16x;
2962     } else {
2963         gpe_index = VP8_ME_4X;
2964         media_function = VP8_MEDIA_STATE_4X_ME;
2965         down_scaled_width_in_mbs = vp8_context->down_scaled_width_in_mb4x;
2966         down_scaled_height_in_mbs = vp8_context->down_scaled_height_in_mb4x;
2967     }
2968
2969     gpe_context = &me_context->gpe_contexts[gpe_index];
2970
2971     gpe->context_init(ctx, gpe_context);
2972     gpe->reset_binding_table(ctx, gpe_context);
2973
2974     memset(&me_curbe_params, 0, sizeof(me_curbe_params));
2975     me_curbe_params.down_scaled_width_in_mbs = down_scaled_width_in_mbs;
2976     me_curbe_params.down_scaled_height_in_mbs = down_scaled_height_in_mbs;
2977     me_curbe_params.use_16x_me = use_16x_me;
2978     i965_encoder_vp8_vme_me_set_curbe(ctx, encode_state, encoder_context, gpe_context, &me_curbe_params);
2979
2980     if (vp8_context->brc_distortion_buffer_need_reset && !use_16x_me) {
2981         i965_encoder_vp8_vme_init_brc_distorion_buffer(ctx, encoder_context);
2982     }
2983
2984     memset(&me_surface_params, 0, sizeof(me_surface_params));
2985     me_surface_params.use_16x_me = use_16x_me;
2986     i965_encoder_vp8_vme_me_add_surfaces(ctx, encode_state, encoder_context, gpe_context, &me_surface_params);
2987
2988     gpe->setup_interface_data(ctx, gpe_context);
2989
2990     memset(&kernel_walker_params, 0, sizeof(kernel_walker_params));
2991     kernel_walker_params.resolution_x = down_scaled_width_in_mbs;
2992     kernel_walker_params.resolution_y = down_scaled_height_in_mbs;
2993     kernel_walker_params.no_dependency = 1;
2994     i965_init_media_object_walker_parameters(encoder_context, &kernel_walker_params, &media_object_walker_param);
2995
2996     i965_run_kernel_media_object_walker(ctx, encoder_context, gpe_context, media_function, &media_object_walker_param);
2997
2998     return VA_STATUS_SUCCESS;
2999 }
3000
3001 #define QUANT_INDEX(index, q_index, q_index_delta)                      \
3002     do {                                                                \
3003         index = quant_param->quantization_index[q_index] + quant_param->quantization_index_delta[q_index_delta]; \
3004         index = CLAMP(0, MAX_QP_VP8, index);                            \
3005     } while (0)
3006
3007 static void
3008 i965_encoder_vp8_vme_mbenc_set_i_frame_curbe(VADriverContextP ctx,
3009                                              struct encode_state *encode_state,
3010                                              struct intel_encoder_context *encoder_context,
3011                                              struct i965_gpe_context *gpe_context)
3012 {
3013     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3014     struct vp8_mbenc_i_frame_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
3015     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3016     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3017     unsigned int segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3018     unsigned short y_quanta_dc_idx, uv_quanta_dc_idx, uv_quanta_ac_idx;
3019
3020     if (!pcmd)
3021         return;
3022
3023     memset(pcmd, 0, sizeof(*pcmd));
3024
3025     pcmd->dw0.frame_width = vp8_context->frame_width;
3026     pcmd->dw0.frame_height = vp8_context->frame_height;
3027
3028     pcmd->dw1.frame_type = 0; /* key frame */
3029     pcmd->dw1.enable_segmentation = segmentation_enabled;
3030     pcmd->dw1.enable_hw_intra_prediction = (encoder_context->quality_level == ENCODER_LOW_QUALITY) ? 1 : 0;
3031     pcmd->dw1.enable_chroma_ip_enhancement = 1; /* Cannot be disabled */
3032     pcmd->dw1.enable_debug_dumps = 0;
3033     pcmd->dw1.enable_mpu_histogram_update = 1;
3034     pcmd->dw1.vme_distortion_measure = 2; /* HAAR transform */
3035     pcmd->dw1.vme_enable_tm_check = 0;
3036
3037     QUANT_INDEX(y_quanta_dc_idx, 0, 0);
3038     pcmd->dw2.lambda_seg_0 = (unsigned short)((quant_dc_vp8[y_quanta_dc_idx] * quant_dc_vp8[y_quanta_dc_idx]) / 4);
3039
3040     if (segmentation_enabled) {
3041         QUANT_INDEX(y_quanta_dc_idx, 1, 0);
3042         pcmd->dw2.lambda_seg_1 = (unsigned short)((quant_dc_vp8[y_quanta_dc_idx] * quant_dc_vp8[y_quanta_dc_idx]) / 4);
3043
3044         QUANT_INDEX(y_quanta_dc_idx, 2, 0);
3045         pcmd->dw3.lambda_seg_2 = (unsigned short)((quant_dc_vp8[y_quanta_dc_idx] * quant_dc_vp8[y_quanta_dc_idx]) / 4);
3046
3047         QUANT_INDEX(y_quanta_dc_idx, 3, 0);
3048         pcmd->dw3.lambda_seg_3 = (unsigned short)((quant_dc_vp8[y_quanta_dc_idx] * quant_dc_vp8[y_quanta_dc_idx]) / 4);
3049     }
3050
3051     pcmd->dw4.all_dc_bias_segment_0 = DC_BIAS_SEGMENT_DEFAULT_VAL_VP8;
3052
3053     if (segmentation_enabled) {
3054         pcmd->dw4.all_dc_bias_segment_1 = DC_BIAS_SEGMENT_DEFAULT_VAL_VP8;
3055         pcmd->dw5.all_dc_bias_segment_2 = DC_BIAS_SEGMENT_DEFAULT_VAL_VP8;
3056         pcmd->dw5.all_dc_bias_segment_3 = DC_BIAS_SEGMENT_DEFAULT_VAL_VP8;
3057     }
3058
3059     QUANT_INDEX(uv_quanta_dc_idx, 0, 1);
3060     pcmd->dw6.chroma_dc_de_quant_segment_0 = quant_dc_vp8[uv_quanta_dc_idx];
3061
3062     if (segmentation_enabled) {
3063         QUANT_INDEX(uv_quanta_dc_idx, 1, 1);
3064         pcmd->dw6.chroma_dc_de_quant_segment_1 = quant_dc_vp8[uv_quanta_dc_idx];
3065
3066         QUANT_INDEX(uv_quanta_dc_idx, 2, 1);
3067         pcmd->dw7.chroma_dc_de_quant_segment_2 = quant_dc_vp8[uv_quanta_dc_idx];
3068
3069         QUANT_INDEX(uv_quanta_dc_idx, 3, 1);
3070         pcmd->dw7.chroma_dc_de_quant_segment_3 = quant_dc_vp8[uv_quanta_dc_idx];
3071     }
3072
3073     QUANT_INDEX(uv_quanta_ac_idx, 0, 2);
3074     pcmd->dw8.chroma_ac_de_quant_segment0 = quant_ac_vp8[uv_quanta_ac_idx];
3075     pcmd->dw10.chroma_ac0_threshold0_segment0 = (unsigned short)((((((1) << 16) -
3076                                                                     1) * 1.0 / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3077                                                                    ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3078                                                                   (1 << 13) + 3400) / 2217.0);
3079     pcmd->dw10.chroma_ac0_threshold1_segment0 = (unsigned short)((((((2) << 16) -
3080                                                                     1) * 1.0 / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3081                                                                    ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3082                                                                   (1 << 13) + 3400) / 2217.0);
3083
3084     if (segmentation_enabled) {
3085         QUANT_INDEX(uv_quanta_ac_idx, 1, 2);
3086         pcmd->dw8.chroma_ac_de_quant_segment1 = quant_ac_vp8[uv_quanta_ac_idx];
3087         pcmd->dw10.chroma_ac0_threshold0_segment0 = (unsigned short)((((((1) << 16) -
3088                                                                         1) * 1.0 / ((1 << 16) /
3089                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3090                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3091                                                                       (1 << 13) + 3400) / 2217.0);
3092         pcmd->dw10.chroma_ac0_threshold1_segment0 = (unsigned short)((((((2) << 16) -
3093                                                                         1) * 1.0 / ((1 << 16) /
3094                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3095                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3096                                                                       (1 << 13) + 3400) / 2217.0);
3097
3098         QUANT_INDEX(uv_quanta_ac_idx, 2, 2);
3099         pcmd->dw9.chroma_ac_de_quant_segment2 = quant_ac_vp8[uv_quanta_ac_idx];
3100         pcmd->dw12.chroma_ac0_threshold0_segment2 = (unsigned short)((((((1) << 16) -
3101                                                                         1) * 1.0 / ((1 << 16) /
3102                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3103                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3104                                                                       (1 << 13) + 3400) / 2217.0);
3105         pcmd->dw12.chroma_ac0_threshold1_segment2 = (unsigned short)((((((2) << 16) -
3106                                                                         1) * 1.0 / ((1 << 16) /
3107                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3108                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3109                                                                       (1 << 13) + 3400) / 2217.0);
3110
3111         QUANT_INDEX(uv_quanta_ac_idx, 3, 2);
3112         pcmd->dw9.chroma_ac_de_quant_segment3 = quant_ac_vp8[uv_quanta_ac_idx];
3113         pcmd->dw13.chroma_ac0_threshold0_segment3 = (unsigned short)((((((1) << 16) -
3114                                                                         1) * 1.0 / ((1 << 16) /
3115                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3116                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3117                                                                       (1 << 13) + 3400) / 2217.0);
3118         pcmd->dw13.chroma_ac0_threshold1_segment3 = (unsigned short)((((((2) << 16) -
3119                                                                         1) * 1.0 / ((1 << 16) /
3120                                                                                     quant_ac_vp8[uv_quanta_ac_idx]) -
3121                                                                        ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7)) *
3122                                                                       (1 << 13) + 3400) / 2217.0);
3123     }
3124
3125     QUANT_INDEX(uv_quanta_dc_idx, 0, 1);
3126     pcmd->dw14.chroma_dc_threshold0_segment0 = (((1) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3127                                                ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3128     pcmd->dw14.chroma_dc_threshold1_segment0 = (((2) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3129                                                ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3130     pcmd->dw15.chroma_dc_threshold2_segment0 = (((3) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3131                                                ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3132     pcmd->dw15.chroma_dc_threshold3_segment0 = (((4) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3133                                                ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3134
3135     if (segmentation_enabled) {
3136         QUANT_INDEX(uv_quanta_dc_idx, 1, 1);
3137         pcmd->dw16.chroma_dc_threshold0_segment1 = (((1) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3138                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3139         pcmd->dw16.chroma_dc_threshold1_segment1 = (((2) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3140                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3141         pcmd->dw17.chroma_dc_threshold2_segment1 = (((3) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3142                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3143         pcmd->dw17.chroma_dc_threshold3_segment1 = (((4) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3144                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3145
3146         QUANT_INDEX(uv_quanta_dc_idx, 2, 1);
3147         pcmd->dw18.chroma_dc_threshold0_segment2 = (((1) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3148                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3149         pcmd->dw18.chroma_dc_threshold1_segment2 = (((2) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3150                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3151         pcmd->dw19.chroma_dc_threshold2_segment2 = (((3) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3152                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3153         pcmd->dw19.chroma_dc_threshold3_segment2 = (((4) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3154                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3155
3156         QUANT_INDEX(uv_quanta_dc_idx, 3, 1);
3157         pcmd->dw20.chroma_dc_threshold0_segment3 = (((1) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3158                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3159         pcmd->dw20.chroma_dc_threshold1_segment3 = (((2) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3160                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3161         pcmd->dw21.chroma_dc_threshold2_segment3 = (((3) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3162                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3163         pcmd->dw21.chroma_dc_threshold3_segment3 = (((4) << 16) - 1) / ((1 << 16) / quant_dc_vp8[uv_quanta_dc_idx]) -
3164                                                    ((48 * quant_dc_vp8[uv_quanta_dc_idx]) >> 7);
3165     }
3166
3167     QUANT_INDEX(uv_quanta_ac_idx, 0, 2);
3168     pcmd->dw22.chroma_ac1_threshold_segment0 = ((1 << (16)) - 1) / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3169                                                ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7);
3170
3171     if (segmentation_enabled) {
3172         QUANT_INDEX(uv_quanta_ac_idx, 1, 2);
3173         pcmd->dw22.chroma_ac1_threshold_segment1 = ((1 << (16)) - 1) / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3174                                                    ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7);
3175
3176         QUANT_INDEX(uv_quanta_ac_idx, 2, 2);
3177         pcmd->dw23.chroma_ac1_threshold_segment2 = ((1 << (16)) - 1) / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3178                                                    ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7);
3179
3180         QUANT_INDEX(uv_quanta_ac_idx, 3, 2);
3181         pcmd->dw23.chroma_ac1_threshold_segment3 =
3182             ((1 << (16)) - 1) / ((1 << 16) / quant_ac_vp8[uv_quanta_ac_idx]) -
3183             ((48 * quant_ac_vp8[uv_quanta_ac_idx]) >> 7);
3184     }
3185
3186     QUANT_INDEX(uv_quanta_dc_idx, 0, 0);
3187     pcmd->dw24.vme_16x16_cost_segment0 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][0];
3188     pcmd->dw25.vme_4x4_cost_segment0 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][1];
3189     pcmd->dw26.vme_16x16_non_dc_penalty_segment0 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][2];
3190     pcmd->dw27.vme_4x4_non_dc_penalty_segment0 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][3];
3191
3192     if (segmentation_enabled) {
3193         QUANT_INDEX(uv_quanta_dc_idx, 1, 0);
3194         pcmd->dw24.vme_16x16_cost_segment1 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][0];
3195         pcmd->dw25.vme_4x4_cost_segment1 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][1];
3196         pcmd->dw26.vme_16x16_non_dc_penalty_segment1 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][2];
3197         pcmd->dw27.vme_4x4_non_dc_penalty_segment1 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][3];
3198
3199         QUANT_INDEX(uv_quanta_dc_idx, 2, 0);
3200         pcmd->dw24.vme_16x16_cost_segment2 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][0];
3201         pcmd->dw25.vme_4x4_cost_segment2 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][1];
3202         pcmd->dw26.vme_16x16_non_dc_penalty_segment2 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][2];
3203         pcmd->dw27.vme_4x4_non_dc_penalty_segment2 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][3];
3204
3205         QUANT_INDEX(uv_quanta_dc_idx, 3, 0);
3206         pcmd->dw24.vme_16x16_cost_segment3 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][0];
3207         pcmd->dw25.vme_4x4_cost_segment3 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][1];
3208         pcmd->dw26.vme_16x16_non_dc_penalty_segment3 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][2];
3209         pcmd->dw27.vme_4x4_non_dc_penalty_segment3 = i_frame_vme_costs_vp8[uv_quanta_dc_idx & 0x7F][3];
3210     }
3211
3212     pcmd->dw32.mb_enc_per_mb_out_data_surf_bti = VP8_BTI_MBENC_PER_MB_OUT;
3213     pcmd->dw33.mb_enc_curr_y_bti = VP8_BTI_MBENC_CURR_Y;
3214     pcmd->dw34.mb_enc_curr_uv_bti = VP8_BTI_MBENC_CURR_Y;
3215     pcmd->dw35.mb_mode_cost_luma_bti = VP8_BTI_MBENC_MB_MODE_COST_LUMA;
3216     pcmd->dw36.mb_enc_block_mode_cost_bti = VP8_BTI_MBENC_BLOCK_MODE_COST;
3217     pcmd->dw37.chroma_recon_surf_bti = VP8_BTI_MBENC_CHROMA_RECON;
3218     pcmd->dw38.segmentation_map_bti = VP8_BTI_MBENC_SEGMENTATION_MAP;
3219     pcmd->dw39.histogram_bti = VP8_BTI_MBENC_HISTOGRAM;
3220     pcmd->dw40.mb_enc_vme_debug_stream_out_bti = VP8_BTI_MBENC_I_VME_DEBUG_STREAMOUT;
3221     pcmd->dw41.vme_bti = VP8_BTI_MBENC_VME;
3222     pcmd->dw42.idist_surface_bti = VP8_BTI_MBENC_IDIST;
3223     pcmd->dw43.curr_y_down_scaled_bti = VP8_BTI_MBENC_CURR_Y_DOWNSCALED;
3224     pcmd->dw44.vme_coarse_intra_bti = VP8_BTI_MBENC_VME_COARSE_INTRA;
3225
3226     i965_gpe_context_unmap_curbe(gpe_context);
3227 }
3228
3229 static void
3230 i965_encoder_vp8_vme_mbenc_set_p_frame_curbe(VADriverContextP ctx,
3231                                              struct encode_state *encode_state,
3232                                              struct intel_encoder_context *encoder_context,
3233                                              struct i965_gpe_context *gpe_context)
3234 {
3235     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3236     struct vp8_mbenc_p_frame_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
3237     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3238     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3239     unsigned int segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3240     unsigned short qp_seg0, qp_seg1, qp_seg2, qp_seg3;
3241     unsigned char me_method = (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) ? 6 : 4;
3242
3243     if (!pcmd)
3244         return;
3245
3246     memset(pcmd, 0, sizeof(*pcmd));
3247
3248     QUANT_INDEX(qp_seg0, 0, 0);
3249     QUANT_INDEX(qp_seg1, 0, 0);
3250     QUANT_INDEX(qp_seg2, 0, 0);
3251     QUANT_INDEX(qp_seg3, 3, 0);
3252
3253     pcmd->dw0.frame_width = vp8_context->frame_width;
3254     pcmd->dw0.frame_height = vp8_context->frame_height;
3255
3256     pcmd->dw1.frame_type = 1;   // P-frame
3257     pcmd->dw1.multiple_pred = (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) ? 1 :
3258                               ((encoder_context->quality_level == ENCODER_LOW_QUALITY) ? 0 : 2);
3259     pcmd->dw1.hme_enable = vp8_context->hme_enabled;
3260     pcmd->dw1.hme_combine_overlap = 1;
3261     pcmd->dw1.enable_temporal_scalability = 0;
3262     pcmd->dw1.ref_frame_flags = vp8_context->ref_frame_ctrl;
3263     pcmd->dw1.enable_segmentation = segmentation_enabled;
3264     pcmd->dw1.enable_segmentation_info_update = 1;
3265     pcmd->dw1.multi_reference_qp_check = 0;
3266     pcmd->dw1.mode_cost_enable_flag = 1;
3267     pcmd->dw1.main_ref = mainref_table_vp8[vp8_context->ref_frame_ctrl];
3268
3269     pcmd->dw2.lambda_intra_segment0 = quant_dc_vp8[qp_seg0];
3270     pcmd->dw2.lambda_inter_segment0 = (quant_dc_vp8[qp_seg0] >> 2);
3271
3272     pcmd->dw3.lambda_intra_segment1 = quant_dc_vp8[qp_seg1];
3273     pcmd->dw3.lambda_inter_segment1 = (quant_dc_vp8[qp_seg1] >> 2);
3274
3275     pcmd->dw4.lambda_intra_segment2 = quant_dc_vp8[qp_seg2];
3276     pcmd->dw4.lambda_inter_segment2 = (quant_dc_vp8[qp_seg2] >> 2);
3277
3278     pcmd->dw5.lambda_intra_segment3 = quant_dc_vp8[qp_seg3];
3279     pcmd->dw5.lambda_inter_segment3 = (quant_dc_vp8[qp_seg3] >> 2);
3280
3281     pcmd->dw6.reference_frame_sign_bias_3 = pic_param->pic_flags.bits.sign_bias_golden;
3282     pcmd->dw6.reference_frame_sign_bias_2 = pic_param->pic_flags.bits.sign_bias_alternate;
3283     pcmd->dw6.reference_frame_sign_bias_1 = pic_param->pic_flags.bits.sign_bias_golden ^ pic_param->pic_flags.bits.sign_bias_alternate;
3284     pcmd->dw6.reference_frame_sign_bias_0 = 0;
3285
3286     pcmd->dw7.raw_dist_threshold = (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) ? 50 :
3287                                    ((encoder_context->quality_level == ENCODER_LOW_QUALITY) ? 0 : 100);
3288     pcmd->dw7.temporal_layer_id = 0;
3289
3290     pcmd->dw8.early_ime_successful_stop_threshold = 0;
3291     pcmd->dw8.adaptive_search_enable = (encoder_context->quality_level != ENCODER_LOW_QUALITY) ? 1 : 0;
3292     pcmd->dw8.skip_mode_enable = 1;
3293     pcmd->dw8.bidirectional_mix_disbale = 0;
3294     pcmd->dw8.transform8x8_flag_for_inter_enable = 0;
3295     pcmd->dw8.early_ime_success_enable = 0;
3296
3297     pcmd->dw9.ref_pixel_bias_enable = 0;
3298     pcmd->dw9.unidirection_mix_enable = 0;
3299     pcmd->dw9.bidirectional_weight = 0;
3300     pcmd->dw9.ref_id_polarity_bits = 0;
3301     pcmd->dw9.max_num_of_motion_vectors = 0;
3302
3303     pcmd->dw10.max_fixed_search_path_length = (encoder_context->quality_level == ENCODER_DEFAULT_QUALITY) ? 25 :
3304                                               ((encoder_context->quality_level == ENCODER_LOW_QUALITY) ? 9 : 57);
3305     pcmd->dw10.maximum_search_path_length = 57;
3306
3307     pcmd->dw11.submacro_block_subPartition_mask = 0;
3308     pcmd->dw11.intra_sad_measure_adjustment = 2;
3309     pcmd->dw11.inter_sad_measure_adjustment = 2;
3310     pcmd->dw11.block_based_skip_enable = 0;
3311     pcmd->dw11.bme_disable_for_fbr_message = 0;
3312     pcmd->dw11.forward_trans_form_skip_check_enable = 0;
3313     pcmd->dw11.process_inter_chroma_pixels_mode = 0;
3314     pcmd->dw11.disable_field_cache_allocation = 0;
3315     pcmd->dw11.skip_mode_type = 0;
3316     pcmd->dw11.sub_pel_mode = 3;
3317     pcmd->dw11.dual_search_path_option = 0;
3318     pcmd->dw11.search_control = 0;
3319     pcmd->dw11.reference_access = 0;
3320     pcmd->dw11.source_access = 0;
3321     pcmd->dw11.inter_mb_type_road_map = 0;
3322     pcmd->dw11.source_block_size = 0;
3323
3324     pcmd->dw12.reference_search_windows_height = (encoder_context->quality_level != ENCODER_LOW_QUALITY) ? 40 : 28;
3325     pcmd->dw12.reference_search_windows_width = (encoder_context->quality_level != ENCODER_LOW_QUALITY) ? 48 : 28;
3326
3327     pcmd->dw13.mode_0_3_cost_seg0 = cost_table_vp8[qp_seg0][0];
3328     pcmd->dw14.mode_4_7_cost_seg0 = cost_table_vp8[qp_seg0][1];
3329     pcmd->dw15.mode_8_9_ref_id_chroma_cost_seg0 = cost_table_vp8[qp_seg0][2];
3330
3331     switch (me_method) {
3332     case 2:
3333         memcpy(&(pcmd->dw16), single_su_vp8, sizeof(single_su_vp8));
3334         break;
3335
3336     case 3:
3337         memcpy(&(pcmd->dw16), raster_scan_48x40_vp8, sizeof(raster_scan_48x40_vp8));
3338         break;
3339
3340     case 4:
3341     case 5:
3342         memcpy(&(pcmd->dw16), full_spiral_48x40_vp8, sizeof(full_spiral_48x40_vp8));
3343         break;
3344
3345     case 6:
3346     default:
3347         memcpy(&(pcmd->dw16), diamond_vp8, sizeof(diamond_vp8));
3348         break;
3349     }
3350
3351     pcmd->dw30.mv_0_3_cost_seg0 = cost_table_vp8[qp_seg0][3];
3352     pcmd->dw31.mv_4_7_cost_seg0 = cost_table_vp8[qp_seg0][4];
3353
3354     pcmd->dw32.bilinear_enable = 0;
3355     pcmd->dw32.intra_16x16_no_dc_penalty_segment0 = cost_table_vp8[qp_seg0][5];
3356     pcmd->dw32.intra_16x16_no_dc_penalty_segment1 = cost_table_vp8[qp_seg1][5];
3357
3358     pcmd->dw33.intra_16x16_no_dc_penalty_segment2 = cost_table_vp8[qp_seg2][5];
3359     pcmd->dw33.intra_16x16_no_dc_penalty_segment3 = cost_table_vp8[qp_seg3][5];
3360     pcmd->dw33.hme_combine_len = 8;
3361
3362     /* dw34 to dw 57 */
3363     memcpy(&(pcmd->dw34), mv_ref_cost_context_vp8, 24 * sizeof(unsigned int));
3364
3365     pcmd->dw58.enc_cost_16x16 = 0;
3366     pcmd->dw58.enc_cost_16x8 = 0x73c;
3367
3368     pcmd->dw59.enc_cost_8x8 = 0x365;
3369     pcmd->dw59.enc_cost_4x4 = 0xdc9;
3370
3371     pcmd->dw60.frame_count_probability_ref_frame_cost_0 = 0x0204;
3372     pcmd->dw60.frame_count_probability_ref_frame_cost_1 = 0x006a;
3373
3374     pcmd->dw61.frame_count_probability_ref_frame_cost_2 = 0x0967;
3375     pcmd->dw61.frame_count_probability_ref_frame_cost_3 = 0x0969;
3376
3377     switch (vp8_context->frame_num % vp8_context->gop_size) {
3378     case 1:
3379         pcmd->dw62.average_qp_of_last_ref_frame = quant_dc_vp8[vp8_context->average_i_frame_qp];
3380         pcmd->dw62.average_qp_of_gold_ref_frame = pcmd->dw62.average_qp_of_last_ref_frame;
3381         pcmd->dw62.average_qp_of_alt_ref_frame  = pcmd->dw62.average_qp_of_last_ref_frame;
3382         break;
3383
3384     case 2:
3385         pcmd->dw62.average_qp_of_last_ref_frame = quant_dc_vp8[vp8_context->average_p_frame_qp];
3386         pcmd->dw62.average_qp_of_gold_ref_frame = quant_dc_vp8[vp8_context->average_i_frame_qp];
3387         pcmd->dw62.average_qp_of_alt_ref_frame  = pcmd->dw62.average_qp_of_gold_ref_frame;
3388         break;
3389
3390     case 3:
3391         pcmd->dw62.average_qp_of_last_ref_frame = quant_dc_vp8[vp8_context->average_p_frame_qp];
3392         pcmd->dw62.average_qp_of_gold_ref_frame = quant_dc_vp8[vp8_context->average_p_frame_qp];
3393         pcmd->dw62.average_qp_of_alt_ref_frame  = quant_dc_vp8[vp8_context->average_i_frame_qp];
3394         break;
3395
3396     default:
3397         pcmd->dw62.average_qp_of_last_ref_frame = quant_dc_vp8[vp8_context->average_p_frame_qp];
3398         pcmd->dw62.average_qp_of_gold_ref_frame = pcmd->dw62.average_qp_of_last_ref_frame;
3399         pcmd->dw62.average_qp_of_alt_ref_frame  = pcmd->dw62.average_qp_of_last_ref_frame;
3400         break;
3401     }
3402
3403     pcmd->dw63.intra_4x4_no_dc_penalty_segment0 = cost_table_vp8[qp_seg0][6];
3404     pcmd->dw63.intra_4x4_no_dc_penalty_segment1 = cost_table_vp8[qp_seg1][6];
3405     pcmd->dw63.intra_4x4_no_dc_penalty_segment2 = cost_table_vp8[qp_seg2][6];
3406     pcmd->dw63.intra_4x4_no_dc_penalty_segment3 = cost_table_vp8[qp_seg3][6];
3407
3408     pcmd->dw64.mode_0_3_cost_seg1 = cost_table_vp8[qp_seg1][0];
3409     pcmd->dw65.mode_4_7_cost_seg1 = cost_table_vp8[qp_seg1][1];
3410     pcmd->dw66.mode_8_9_ref_id_chroma_cost_seg1 = cost_table_vp8[qp_seg1][2];
3411
3412     pcmd->dw67.mv_0_3_cost_seg1 = cost_table_vp8[qp_seg1][3];
3413     pcmd->dw68.mv_4_7_cost_seg1 = cost_table_vp8[qp_seg1][4];
3414
3415     pcmd->dw69.mode_0_3_cost_seg2 = cost_table_vp8[qp_seg2][0];
3416     pcmd->dw70.mode_4_7_cost_seg2 = cost_table_vp8[qp_seg2][1];
3417     pcmd->dw71.mode_8_9_ref_id_chroma_cost_seg2 = cost_table_vp8[qp_seg2][2];
3418
3419     pcmd->dw72.mv_0_3_cost_seg2 = cost_table_vp8[qp_seg2][3];
3420     pcmd->dw73.mv_4_7_cost_seg2 = cost_table_vp8[qp_seg2][4];
3421
3422     pcmd->dw74.mode_0_3_cost_seg3 = cost_table_vp8[qp_seg3][0];
3423     pcmd->dw75.mode_4_7_cost_seg3 = cost_table_vp8[qp_seg3][1];
3424     pcmd->dw76.mode_8_9_ref_id_chroma_cost_seg3 = cost_table_vp8[qp_seg3][2];
3425
3426     pcmd->dw77.mv_0_3_cost_seg3 = cost_table_vp8[qp_seg3][3];
3427     pcmd->dw78.mv_4_7_cost_seg3 = cost_table_vp8[qp_seg3][4];
3428
3429     pcmd->dw79.new_mv_skip_threshold_segment0 = new_mv_skip_threshold_vp8[qp_seg0];
3430     pcmd->dw79.new_mv_skip_threshold_segment1 = new_mv_skip_threshold_vp8[qp_seg1];
3431     pcmd->dw80.new_mv_skip_threshold_segment2 = new_mv_skip_threshold_vp8[qp_seg2];
3432     pcmd->dw80.new_mv_skip_threshold_segment3 = new_mv_skip_threshold_vp8[qp_seg3];
3433
3434     pcmd->dw81.per_mb_output_data_surface_bti = VP8_BTI_MBENC_PER_MB_OUT;
3435     pcmd->dw82.current_picture_y_surface_bti = VP8_BTI_MBENC_CURR_Y;
3436     pcmd->dw83.current_picture_interleaved_uv_surface_bti = VP8_BTI_MBENC_CURR_Y;
3437     pcmd->dw84.hme_mv_data_surface_bti = VP8_BTI_MBENC_MV_DATA_FROM_ME;
3438     pcmd->dw85.mv_data_surface_bti = VP8_BTI_MBENC_IND_MV_DATA;
3439     pcmd->dw86.mb_count_per_reference_frame_bti = VP8_BTI_MBENC_REF_MB_COUNT;
3440     pcmd->dw87.vme_inter_prediction_bti = VP8_BTI_MBENC_INTER_PRED;
3441     pcmd->dw88.active_ref1_bti = VP8_BTI_MBENC_REF1_PIC;
3442     pcmd->dw89.active_ref2_bti = VP8_BTI_MBENC_REF2_PIC;
3443     pcmd->dw90.active_ref3_bti = VP8_BTI_MBENC_REF3_PIC;
3444     pcmd->dw91.per_mb_quant_data_bti = VP8_BTI_MBENC_P_PER_MB_QUANT;
3445     pcmd->dw92.segment_map_bti = VP8_BTI_MBENC_SEGMENTATION_MAP;
3446     pcmd->dw93.inter_prediction_distortion_bti = VP8_BTI_MBENC_INTER_PRED_DISTORTION;
3447     pcmd->dw94.histogram_bti = VP8_BTI_MBENC_HISTOGRAM;
3448     pcmd->dw95.pred_mv_data_bti = VP8_BTI_MBENC_PRED_MV_DATA;
3449     pcmd->dw96.mode_cost_update_bti = VP8_BTI_MBENC_MODE_COST_UPDATE;
3450     pcmd->dw97.kernel_debug_dump_bti = VP8_BTI_MBENC_P_VME_DEBUG_STREAMOUT;
3451
3452     i965_gpe_context_unmap_curbe(gpe_context);
3453 }
3454
3455 #undef QUANT_INDEX
3456
3457 static void
3458 i965_encoder_vp8_vme_mbenc_set_curbe(VADriverContextP ctx,
3459                                      struct encode_state *encode_state,
3460                                      struct intel_encoder_context *encoder_context,
3461                                      struct i965_gpe_context *gpe_context)
3462 {
3463     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3464
3465     if (vp8_context->frame_type == MPEG_I_PICTURE)
3466         i965_encoder_vp8_vme_mbenc_set_i_frame_curbe(ctx, encode_state, encoder_context, gpe_context);
3467     else
3468         i965_encoder_vp8_vme_mbenc_set_p_frame_curbe(ctx, encode_state, encoder_context, gpe_context);
3469 }
3470
3471 static void
3472 i965_encoder_vp8_vme_mbenc_add_surfaces(VADriverContextP ctx,
3473                                         struct encode_state *encode_state,
3474                                         struct intel_encoder_context *encoder_context,
3475                                         struct i965_gpe_context *gpe_context,
3476                                         struct mbenc_surface_parameters *params)
3477 {
3478     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3479     struct i965_encoder_vp8_surface *vp8_surface;
3480     struct object_surface *obj_surface;
3481     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3482     unsigned int size = vp8_context->frame_width_in_mbs * vp8_context->frame_height_in_mbs * 16;
3483     unsigned int segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3484
3485     /* Per MB output data buffer */
3486     i965_add_buffer_gpe_surface(ctx,
3487                                 gpe_context,
3488                                 &vp8_context->mb_coded_buffer,
3489                                 0,
3490                                 size,
3491                                 0,
3492                                 VP8_BTI_MBENC_PER_MB_OUT);
3493
3494     /* Current input surface Y & UV */
3495     i965_add_2d_gpe_surface(ctx,
3496                             gpe_context,
3497                             encode_state->input_yuv_object,
3498                             0,
3499                             1,
3500                             I965_SURFACEFORMAT_R8_UNORM,
3501                             VP8_BTI_MBENC_CURR_Y);
3502
3503     i965_add_2d_gpe_surface(ctx,
3504                             gpe_context,
3505                             encode_state->input_yuv_object,
3506                             1,
3507                             1,
3508                             I965_SURFACEFORMAT_R8_UNORM,
3509                             VP8_BTI_MBENC_CURR_UV);
3510
3511     /* Current surface for VME */
3512     i965_add_adv_gpe_surface(ctx,
3513                              gpe_context,
3514                              encode_state->input_yuv_object,
3515                              VP8_BTI_MBENC_VME);
3516
3517     if (segmentation_enabled) {
3518         /* TODO check the internal segmetation buffer */
3519         dri_bo *bo = NULL;
3520
3521         if (encode_state->encmb_map)
3522             bo = encode_state->encmb_map->bo;
3523
3524         if (bo) {
3525             i965_add_dri_buffer_2d_gpe_surface(ctx,
3526                                                encoder_context,
3527                                                gpe_context,
3528                                                bo,
3529                                                vp8_context->frame_width_in_mbs,
3530                                                vp8_context->frame_height_in_mbs,
3531                                                vp8_context->frame_width_in_mbs,
3532                                                0,
3533                                                I965_SURFACEFORMAT_R8_UNORM,
3534                                                VP8_BTI_MBENC_SEGMENTATION_MAP);
3535         }
3536     }
3537
3538     /* Histogram buffer */
3539     size = VP8_HISTOGRAM_SIZE;
3540     i965_add_buffer_gpe_surface(ctx,
3541                                 gpe_context,
3542                                 &vp8_context->histogram_buffer,
3543                                 1,
3544                                 size,
3545                                 0,
3546                                 VP8_BTI_MBENC_HISTOGRAM);
3547
3548     if (vp8_context->frame_type == MPEG_I_PICTURE) {
3549         i965_add_buffer_2d_gpe_surface(ctx,
3550                                        gpe_context,
3551                                        &vp8_context->mb_mode_cost_luma_buffer,
3552                                        0,
3553                                        I965_SURFACEFORMAT_R8_UNORM,
3554                                        VP8_BTI_MBENC_MB_MODE_COST_LUMA);
3555
3556         i965_add_buffer_2d_gpe_surface(ctx,
3557                                        gpe_context,
3558                                        &vp8_context->block_mode_cost_buffer,
3559                                        0,
3560                                        I965_SURFACEFORMAT_R8_UNORM,
3561                                        VP8_BTI_MBENC_BLOCK_MODE_COST);
3562
3563         /* Chroma recon buffer */
3564         size = vp8_context->frame_width_in_mbs * vp8_context->frame_height_in_mbs * 64;
3565         i965_add_buffer_gpe_surface(ctx,
3566                                     gpe_context,
3567                                     &vp8_context->chroma_recon_buffer,
3568                                     0,
3569                                     size,
3570                                     0,
3571                                     VP8_BTI_MBENC_CHROMA_RECON);
3572
3573         if (params->i_frame_dist_in_use) {
3574             i965_add_buffer_2d_gpe_surface(ctx,
3575                                            gpe_context,
3576                                            params->me_brc_distortion_buffer,
3577                                            1,
3578                                            I965_SURFACEFORMAT_R8_UNORM,
3579                                            VP8_BTI_MBENC_IDIST);
3580
3581
3582             vp8_surface = encode_state->reconstructed_object->private_data;
3583             assert(vp8_surface);
3584
3585             if (vp8_surface && vp8_surface->scaled_4x_surface_obj) {
3586                 obj_surface = vp8_surface->scaled_4x_surface_obj;
3587             } else
3588                 obj_surface = NULL;
3589
3590             if (obj_surface) {
3591                 i965_add_2d_gpe_surface(ctx,
3592                                         gpe_context,
3593                                         obj_surface,
3594                                         0,
3595                                         0,
3596                                         I965_SURFACEFORMAT_R8_UNORM,
3597                                         VP8_BTI_MBENC_CURR_Y_DOWNSCALED);
3598
3599                 i965_add_adv_gpe_surface(ctx,
3600                                          gpe_context,
3601                                          obj_surface,
3602                                          VP8_BTI_MBENC_VME_COARSE_INTRA);
3603             }
3604         }
3605     } else {
3606         size = vp8_context->frame_width_in_mbs * vp8_context->frame_height_in_mbs * 64;
3607
3608         i965_add_buffer_gpe_surface(ctx,
3609                                     gpe_context,
3610                                     &vp8_context->mb_coded_buffer,
3611                                     1,
3612                                     size,
3613                                     vp8_context->mv_offset,
3614                                     VP8_BTI_MBENC_IND_MV_DATA);
3615
3616         if (vp8_context->hme_enabled) {
3617             i965_add_buffer_2d_gpe_surface(ctx,
3618                                            gpe_context,
3619                                            &vp8_context->me_4x_mv_data_buffer,
3620                                            1,
3621                                            I965_SURFACEFORMAT_R8_UNORM,
3622                                            VP8_BTI_MBENC_MV_DATA_FROM_ME);
3623         }
3624
3625         i965_add_buffer_gpe_surface(ctx,
3626                                     gpe_context,
3627                                     &vp8_context->reference_frame_mb_count_buffer,
3628                                     0,
3629                                     32, /* sizeof(unsigned int) * 8 */
3630                                     0,
3631                                     VP8_BTI_MBENC_REF_MB_COUNT);
3632
3633         i965_add_adv_gpe_surface(ctx,
3634                                  gpe_context,
3635                                  encode_state->input_yuv_object,
3636                                  VP8_BTI_MBENC_INTER_PRED);
3637
3638         if (vp8_context->ref_last_frame &&
3639             vp8_context->ref_last_frame->bo) {
3640             obj_surface = vp8_context->ref_last_frame;
3641
3642             switch (vp8_context->ref_frame_ctrl) {
3643             case 1:
3644             case 3:
3645             case 5:
3646             case 7:
3647                 i965_add_adv_gpe_surface(ctx,
3648                                          gpe_context,
3649                                          obj_surface,
3650                                          VP8_BTI_MBENC_REF1_PIC);
3651                 break;
3652             }
3653         }
3654
3655         if (vp8_context->ref_gf_frame &&
3656             vp8_context->ref_gf_frame->bo) {
3657             obj_surface = vp8_context->ref_gf_frame;
3658
3659             switch (vp8_context->ref_frame_ctrl) {
3660             case 2:
3661             case 6:
3662                 i965_add_adv_gpe_surface(ctx,
3663                                          gpe_context,
3664                                          obj_surface,
3665                                          VP8_BTI_MBENC_REF1_PIC);
3666                 break;
3667
3668             case 3:
3669             case 7:
3670                 i965_add_adv_gpe_surface(ctx,
3671                                          gpe_context,
3672                                          obj_surface,
3673                                          VP8_BTI_MBENC_REF2_PIC);
3674                 break;
3675             }
3676         }
3677
3678         if (vp8_context->ref_arf_frame &&
3679             vp8_context->ref_arf_frame->bo) {
3680             obj_surface = vp8_context->ref_arf_frame;
3681
3682             switch (vp8_context->ref_frame_ctrl) {
3683             case 4:
3684                 i965_add_adv_gpe_surface(ctx,
3685                                          gpe_context,
3686                                          obj_surface,
3687                                          VP8_BTI_MBENC_REF1_PIC);
3688                 break;
3689
3690             case 5:
3691             case 6:
3692                 i965_add_adv_gpe_surface(ctx,
3693                                          gpe_context,
3694                                          obj_surface,
3695                                          VP8_BTI_MBENC_REF2_PIC);
3696                 break;
3697
3698             case 7:
3699                 i965_add_adv_gpe_surface(ctx,
3700                                          gpe_context,
3701                                          obj_surface,
3702                                          VP8_BTI_MBENC_REF3_PIC);
3703                 break;
3704             }
3705         }
3706
3707         i965_add_buffer_2d_gpe_surface(ctx,
3708                                        gpe_context,
3709                                        &vp8_context->per_mb_quant_data_buffer,
3710                                        1,
3711                                        I965_SURFACEFORMAT_R8_UNORM,
3712                                        VP8_BTI_MBENC_P_PER_MB_QUANT);
3713
3714         i965_add_buffer_2d_gpe_surface(ctx,
3715                                        gpe_context,
3716                                        &vp8_context->me_4x_distortion_buffer,
3717                                        0,
3718                                        I965_SURFACEFORMAT_R8_UNORM,
3719                                        VP8_BTI_MBENC_INTER_PRED_DISTORTION);
3720
3721         size = vp8_context->frame_width_in_mbs * vp8_context->frame_height_in_mbs * 16;
3722         i965_add_buffer_gpe_surface(ctx,
3723                                     gpe_context,
3724                                     &vp8_context->pred_mv_data_buffer,
3725                                     0,
3726                                     size,
3727                                     0,
3728                                     VP8_BTI_MBENC_PRED_MV_DATA);
3729
3730         size = 16 * sizeof(unsigned int);
3731         i965_add_buffer_gpe_surface(ctx,
3732                                     gpe_context,
3733                                     &vp8_context->mode_cost_update_buffer,
3734                                     1,
3735                                     size,
3736                                     0,
3737                                     VP8_BTI_MBENC_MODE_COST_UPDATE);
3738     }
3739 }
3740
3741 static void
3742 i965_encoder_vp8_vme_mbenc_init_constant_buffer(VADriverContextP ctx,
3743                                                 struct encode_state *encode_state,
3744                                                 struct intel_encoder_context *encoder_context)
3745 {
3746     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3747     char *pbuffer = NULL;
3748
3749     i965_zero_gpe_resource(&vp8_context->mb_mode_cost_luma_buffer);
3750     i965_zero_gpe_resource(&vp8_context->block_mode_cost_buffer);
3751
3752     pbuffer = i965_map_gpe_resource(&vp8_context->mb_mode_cost_luma_buffer);
3753
3754     if (!pbuffer)
3755         return;
3756
3757     memcpy(pbuffer, mb_mode_cost_luma_vp8, sizeof(mb_mode_cost_luma_vp8));
3758     i965_unmap_gpe_resource(&vp8_context->mb_mode_cost_luma_buffer);
3759
3760     pbuffer = i965_map_gpe_resource(&vp8_context->block_mode_cost_buffer);
3761
3762     if (!pbuffer)
3763         return;
3764
3765     memcpy(pbuffer, block_mode_cost_vp8, sizeof(block_mode_cost_vp8));
3766     i965_unmap_gpe_resource(&vp8_context->block_mode_cost_buffer);
3767 }
3768
3769 static VAStatus
3770 i965_encoder_vp8_vme_mbenc(VADriverContextP ctx,
3771                            struct encode_state *encode_state,
3772                            struct intel_encoder_context *encoder_context,
3773                            int is_phase2,
3774                            int is_iframe_dist)
3775 {
3776     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3777     struct i965_encoder_vp8_mbenc_context *mbenc_context = &vp8_context->mbenc_context;
3778     struct i965_gpe_table *gpe = vp8_context->gpe_table;
3779     struct i965_gpe_context *gpe_context;
3780     struct mbenc_surface_parameters mbenc_surface_params;
3781     struct gpe_media_object_walker_parameter media_object_walker_param;
3782     struct vp8_encoder_kernel_walker_parameter kernel_walker_param;
3783     int is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
3784     int gpe_index, media_function;
3785
3786     if (is_iframe_dist) {
3787         gpe_index = VP8_MBENC_I_FRAME_DIST;
3788         media_function = VP8_MEDIA_STATE_ENC_I_FRAME_DIST;
3789     } else if (!is_phase2) {
3790         if (is_intra) {
3791             gpe_index = VP8_MBENC_I_FRAME_LUMA;
3792             media_function = VP8_MEDIA_STATE_ENC_I_FRAME_LUMA;
3793         } else {
3794             gpe_index = VP8_MBENC_P_FRAME;
3795             media_function = VP8_MEDIA_STATE_ENC_P_FRAME;
3796         }
3797     } else {
3798         gpe_index = VP8_MBENC_I_FRAME_CHROMA;
3799         media_function = VP8_MEDIA_STATE_ENC_I_FRAME_CHROMA;
3800     }
3801
3802     gpe_context = &mbenc_context->gpe_contexts[gpe_index];
3803
3804     if (!is_phase2 || (is_phase2 && vp8_context->brc_mbenc_phase1_ignored)) {
3805         if (!vp8_context->mbenc_curbe_updated_in_brc_update || is_iframe_dist) {
3806             VAEncPictureParameterBufferVP8 *pic_param =
3807                 (VAEncPictureParameterBufferVP8 *) encode_state->pic_param_ext->buffer;
3808             unsigned int ref_frame_flag_final, ref_frame_flag;
3809
3810             if (!vp8_context->ref_ctrl_optimization_done) {
3811                 if (!is_intra) {
3812                     ref_frame_flag = VP8_REF_FLAG_ALL;
3813
3814                     if (pic_param->ref_last_frame == pic_param->ref_gf_frame) {
3815                         ref_frame_flag &= ~VP8_REF_FLAG_GOLDEN;
3816                     }
3817
3818                     if (pic_param->ref_last_frame == pic_param->ref_arf_frame) {
3819                         ref_frame_flag &= ~VP8_REF_FLAG_ALT;
3820                     }
3821
3822                     if (pic_param->ref_gf_frame == pic_param->ref_arf_frame) {
3823                         ref_frame_flag &= ~VP8_REF_FLAG_ALT;
3824                     }
3825                 } else {
3826                     ref_frame_flag = VP8_REF_FLAG_LAST;
3827                 }
3828
3829                 switch (vp8_context->ref_frame_ctrl) {
3830                 case 0:
3831                     ref_frame_flag_final = VP8_REF_FLAG_NONE;
3832                     break;
3833
3834                 case 1:
3835                     ref_frame_flag_final = VP8_REF_FLAG_LAST;
3836                     break;
3837
3838                 case 2:
3839                     ref_frame_flag_final = VP8_REF_FLAG_GOLDEN;
3840                     break;
3841
3842                 case 4:
3843                     ref_frame_flag_final = VP8_REF_FLAG_ALT;
3844                     break;
3845
3846                 default:
3847                     ref_frame_flag_final = ref_frame_flag;
3848                 }
3849
3850                 vp8_context->ref_frame_ctrl = ref_frame_flag_final;
3851             }
3852
3853             i965_encoder_vp8_vme_mbenc_set_curbe(ctx, encode_state, encoder_context, gpe_context);
3854         }
3855
3856         if (is_intra) {
3857             i965_encoder_vp8_vme_mbenc_init_constant_buffer(ctx, encode_state, encoder_context);
3858         }
3859
3860         if (vp8_context->brc_distortion_buffer_need_reset && is_iframe_dist) {
3861             i965_encoder_vp8_vme_init_brc_distorion_buffer(ctx, encoder_context);
3862         }
3863     }
3864
3865     if (!is_phase2 || (is_phase2 && vp8_context->brc_mbenc_phase1_ignored)) {
3866         i965_zero_gpe_resource(&vp8_context->histogram_buffer);
3867     }
3868
3869     gpe->reset_binding_table(ctx, gpe_context);
3870
3871     memset(&mbenc_surface_params, 0, sizeof(mbenc_surface_params));
3872     mbenc_surface_params.i_frame_dist_in_use = is_iframe_dist;
3873
3874     if (is_iframe_dist)
3875         mbenc_surface_params.me_brc_distortion_buffer = &vp8_context->brc_distortion_buffer;
3876     else
3877         mbenc_surface_params.me_brc_distortion_buffer = &vp8_context->me_4x_distortion_buffer;
3878
3879     i965_encoder_vp8_vme_mbenc_add_surfaces(ctx, encode_state, encoder_context, gpe_context, &mbenc_surface_params);
3880
3881     gpe->setup_interface_data(ctx, gpe_context);
3882
3883     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3884
3885     kernel_walker_param.use_scoreboard = vp8_context->use_hw_scoreboard;
3886
3887     if (is_iframe_dist) {
3888         kernel_walker_param.resolution_x = vp8_context->down_scaled_width_in_mb4x;
3889         kernel_walker_param.resolution_y = vp8_context->down_scaled_height_in_mb4x;
3890     } else {
3891         kernel_walker_param.resolution_x = vp8_context->frame_width_in_mbs;
3892         kernel_walker_param.resolution_y = vp8_context->frame_height_in_mbs;
3893     }
3894
3895     if (is_intra && !is_phase2)
3896         kernel_walker_param.no_dependency = 1;
3897     else
3898         kernel_walker_param.walker_degree = VP8_ENCODER_45_DEGREE;
3899
3900     i965_init_media_object_walker_parameters(encoder_context, &kernel_walker_param, &media_object_walker_param);
3901
3902     i965_run_kernel_media_object_walker(ctx, encoder_context, gpe_context, media_function, &media_object_walker_param);
3903
3904     return VA_STATUS_SUCCESS;
3905 }
3906
3907 static void
3908 i965_encoder_vp8_vme_brc_update_set_curbe(VADriverContextP ctx,
3909                                           struct encode_state *encode_state,
3910                                           struct intel_encoder_context *encoder_context,
3911                                           struct i965_gpe_context *gpe_context)
3912 {
3913     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
3914     struct vp8_brc_update_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
3915     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3916     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3917     int is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
3918
3919     if (!pcmd)
3920         return;
3921
3922     memset(pcmd, 0, sizeof(*pcmd));
3923
3924     pcmd->dw2.picture_header_size = 0;
3925
3926     pcmd->dw3.start_global_adjust_frame0 = 10;
3927     pcmd->dw3.start_global_adjust_frame1 = 50;
3928
3929     pcmd->dw4.start_global_adjust_frame2 = 100;
3930     pcmd->dw4.start_global_adjust_frame3 = 150;
3931
3932     pcmd->dw5.target_size_flag = 0;
3933
3934     if (vp8_context->brc_init_current_target_buf_full_in_bits > (double)vp8_context->brc_init_reset_buf_size_in_bits) {
3935         vp8_context->brc_init_current_target_buf_full_in_bits -= (double)vp8_context->brc_init_reset_buf_size_in_bits;
3936         pcmd->dw5.target_size_flag = 1;
3937     }
3938
3939     pcmd->dw0.target_size = (unsigned int)vp8_context->brc_init_current_target_buf_full_in_bits;
3940
3941     pcmd->dw5.curr_frame_type = is_intra ? 2 : 0;
3942     pcmd->dw5.brc_flag = 16 * vp8_context->internal_rate_mode;
3943     pcmd->dw5.max_num_paks = vp8_context->num_brc_pak_passes;
3944
3945     pcmd->dw6.tid = 0;
3946     pcmd->dw6.num_t_levels = 1;
3947
3948     pcmd->dw8.start_global_adjust_mult0 = 1;
3949     pcmd->dw8.start_global_adjust_mult1 = 1;
3950     pcmd->dw8.start_global_adjust_mult2 = 3;
3951     pcmd->dw8.start_global_adjust_mult3 = 2;
3952
3953     pcmd->dw9.start_global_adjust_div0 = 40;
3954     pcmd->dw9.start_global_adjust_div1 = 5;
3955     pcmd->dw9.start_global_adjust_div2 = 5;
3956     pcmd->dw9.start_global_adjust_mult4 = 1;
3957
3958     pcmd->dw10.start_global_adjust_div3 = 3;
3959     pcmd->dw10.start_global_adjust_div4 = 1;
3960     pcmd->dw10.qp_threshold0 = 20;
3961     pcmd->dw10.qp_threshold1 = 40;
3962
3963     pcmd->dw11.qp_threshold2 = 60;
3964     pcmd->dw11.qp_threshold3 = 90;
3965     pcmd->dw11.g_rate_ratio_threshold0 = 40;
3966     pcmd->dw11.g_rate_ratio_threshold1 = 75;
3967
3968     pcmd->dw12.g_rate_ratio_threshold2 = 97;
3969     pcmd->dw12.g_rate_ratio_threshold3 = 103;
3970     pcmd->dw12.g_rate_ratio_threshold4 = 125;
3971     pcmd->dw12.g_rate_ratio_threshold5 = 160;
3972
3973     pcmd->dw13.g_rate_ratio_threshold_qp0 = -3;
3974     pcmd->dw13.g_rate_ratio_threshold_qp1 = -2;
3975     pcmd->dw13.g_rate_ratio_threshold_qp2 = -1;
3976     pcmd->dw13.g_rate_ratio_threshold_qp3 = 0;
3977
3978     pcmd->dw14.g_rate_ratio_threshold_qp4 = 1;
3979     pcmd->dw14.g_rate_ratio_threshold_qp5 = 2;
3980     pcmd->dw14.g_rate_ratio_threshold_qp6 = 3;
3981     pcmd->dw14.index_of_previous_qp = 0;
3982
3983     pcmd->dw15.frame_width_in_mb = vp8_context->frame_width_in_mbs;
3984     pcmd->dw15.frame_height_in_mb = vp8_context->frame_height_in_mbs;
3985
3986     pcmd->dw16.p_frame_qp_seg0 = quant_param->quantization_index[0];
3987     pcmd->dw16.p_frame_qp_seg1 = quant_param->quantization_index[1];
3988     pcmd->dw16.p_frame_qp_seg2 = quant_param->quantization_index[2];
3989     pcmd->dw16.p_frame_qp_seg3 = quant_param->quantization_index[3];
3990
3991     pcmd->dw17.key_frame_qp_seg0 = quant_param->quantization_index[0];
3992     pcmd->dw17.key_frame_qp_seg1 = quant_param->quantization_index[1];
3993     pcmd->dw17.key_frame_qp_seg2 = quant_param->quantization_index[2];
3994     pcmd->dw17.key_frame_qp_seg3 = quant_param->quantization_index[3];
3995
3996     pcmd->dw18.qdelta_plane0 = 0;
3997     pcmd->dw18.qdelta_plane1 = 0;
3998     pcmd->dw18.qdelta_plane2 = 0;
3999     pcmd->dw18.qdelta_plane3 = 0;
4000
4001     pcmd->dw19.qdelta_plane4 = 0;
4002     pcmd->dw19.main_ref = is_intra ? 0 : mainref_table_vp8[vp8_context->ref_frame_ctrl];
4003     pcmd->dw19.ref_frame_flags = is_intra ? 0 : vp8_context->ref_frame_ctrl;
4004
4005     pcmd->dw20.seg_on = pic_param->pic_flags.bits.segmentation_enabled;
4006     pcmd->dw20.brc_method = vp8_context->internal_rate_mode;
4007     pcmd->dw20.mb_rc = 0;
4008
4009     pcmd->dw20.vme_intra_prediction = (encoder_context->quality_level == ENCODER_LOW_QUALITY) ? 1 : 0;
4010
4011     pcmd->dw22.historyt_buffer_bti = VP8_BTI_BRC_UPDATE_HISTORY;
4012     pcmd->dw23.pak_statistics_bti = VP8_BTI_BRC_UPDATE_PAK_STATISTICS_OUTPUT;
4013     pcmd->dw24.mfx_vp8_encoder_cfg_read_bti = VP8_BTI_BRC_UPDATE_MFX_ENCODER_CFG_READ;
4014     pcmd->dw25.mfx_vp8_encoder_cfg_write_bti = VP8_BTI_BRC_UPDATE_MFX_ENCODER_CFG_WRITE;
4015     pcmd->dw26.mbenc_curbe_read_bti = VP8_BTI_BRC_UPDATE_MBENC_CURBE_READ;
4016     pcmd->dw27.mbenc_curbe_write_bti = VP8_BTI_BRC_UPDATE_MBENC_CURBE_WRITE;
4017     pcmd->dw28.distortion_bti = VP8_BTI_BRC_UPDATE_DISTORTION_SURFACE;
4018     pcmd->dw29.constant_data_bti = VP8_BTI_BRC_UPDATE_CONSTANT_DATA;
4019     pcmd->dw30.segment_map_bti = VP8_BTI_BRC_UPDATE_SEGMENT_MAP;
4020     pcmd->dw31.mpu_curbe_read_bti = VP8_BTI_BRC_UPDATE_MPU_CURBE_READ;
4021     pcmd->dw32.mpu_curbe_write_bti = VP8_BTI_BRC_UPDATE_MPU_CURBE_WRITE;
4022     pcmd->dw33.tpu_curbe_read_bti = VP8_BTI_BRC_UPDATE_TPU_CURBE_READ;
4023     pcmd->dw34.tpu_curbe_write_bti = VP8_BTI_BRC_UPDATE_TPU_CURBE_WRITE;
4024
4025     vp8_context->brc_init_current_target_buf_full_in_bits += vp8_context->brc_init_reset_input_bits_per_frame;
4026
4027     i965_gpe_context_unmap_curbe(gpe_context);
4028 }
4029
4030 static void
4031 i965_encoder_vp8_vme_mpu_set_curbe(VADriverContextP ctx,
4032                                    struct encode_state *encode_state,
4033                                    struct intel_encoder_context *encoder_context,
4034                                    struct i965_gpe_context *gpe_context);
4035 static void
4036 i965_encoder_vp8_pak_tpu_set_curbe(VADriverContextP ctx,
4037                                    struct encode_state *encode_state,
4038                                    struct intel_encoder_context *encoder_context,
4039                                    struct i965_gpe_context *gpe_context);
4040
4041 static void
4042 i965_encoder_vp8_vme_brc_update_add_surfaces(VADriverContextP ctx,
4043                                              struct encode_state *encode_state,
4044                                              struct intel_encoder_context *encoder_context,
4045                                              struct i965_gpe_context *gpe_context,
4046                                              struct brc_update_surface_parameters *params)
4047 {
4048     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4049     unsigned int size;
4050     int is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
4051
4052     /* BRC history buffer */
4053     size = VP8_BRC_HISTORY_BUFFER_SIZE;
4054     i965_add_buffer_gpe_surface(ctx,
4055                                 gpe_context,
4056                                 &vp8_context->brc_history_buffer,
4057                                 0,
4058                                 size,
4059                                 0,
4060                                 VP8_BTI_BRC_UPDATE_HISTORY);
4061
4062     /* PAK Statistics buffer */
4063     size = sizeof(struct vp8_brc_pak_statistics);
4064     i965_add_buffer_gpe_surface(ctx,
4065                                 gpe_context,
4066                                 &vp8_context->brc_pak_statistics_buffer,
4067                                 0,
4068                                 size,
4069                                 0,
4070                                 VP8_BTI_BRC_UPDATE_PAK_STATISTICS_OUTPUT);
4071
4072     /* Encoder CFG command surface - read only */
4073     size = VP8_BRC_IMG_STATE_SIZE_PER_PASS * VP8_BRC_MAXIMUM_NUM_PASSES;
4074     i965_add_buffer_gpe_surface(ctx,
4075                                 gpe_context,
4076                                 &vp8_context->brc_vp8_cfg_command_write_buffer,
4077                                 0,
4078                                 size,
4079                                 0,
4080                                 VP8_BTI_BRC_UPDATE_MFX_ENCODER_CFG_READ);
4081
4082     /* Encoder CFG command surface - write only */
4083     i965_add_buffer_gpe_surface(ctx,
4084                                 gpe_context,
4085                                 &vp8_context->brc_vp8_cfg_command_write_buffer,
4086                                 0,
4087                                 size,
4088                                 0,
4089                                 VP8_BTI_BRC_UPDATE_MFX_ENCODER_CFG_WRITE);
4090
4091     /* MBEnc CURBE Buffer - read only */
4092     size = ALIGN(params->mbenc_gpe_context->curbe.length, 64);
4093     i965_add_dri_buffer_gpe_surface(ctx,
4094                                     encoder_context,
4095                                     gpe_context,
4096                                     params->mbenc_gpe_context->curbe.bo,
4097                                     0,
4098                                     size,
4099                                     params->mbenc_gpe_context->curbe.offset,
4100                                     VP8_BTI_BRC_UPDATE_MBENC_CURBE_READ);
4101
4102     /* MBEnc CURBE Buffer - write only */
4103     i965_add_dri_buffer_gpe_surface(ctx,
4104                                     encoder_context,
4105                                     gpe_context,
4106                                     params->mbenc_gpe_context->curbe.bo,
4107                                     0,
4108                                     size,
4109                                     params->mbenc_gpe_context->curbe.offset,
4110                                     VP8_BTI_BRC_UPDATE_MBENC_CURBE_WRITE);
4111
4112     /* BRC Distortion data buffer - input/output */
4113     i965_add_buffer_2d_gpe_surface(ctx,
4114                                    gpe_context,
4115                                    is_intra ? &vp8_context->brc_distortion_buffer : &vp8_context->me_4x_distortion_buffer,
4116                                    1,
4117                                    I965_SURFACEFORMAT_R8_UNORM,
4118                                    VP8_BTI_BRC_UPDATE_DISTORTION_SURFACE);
4119
4120     /* Constant Data Surface */
4121     size = VP8_BRC_CONSTANT_DATA_SIZE;
4122     i965_add_buffer_gpe_surface(ctx,
4123                                 gpe_context,
4124                                 &vp8_context->brc_vp8_constant_data_buffer,
4125                                 0,
4126                                 size,
4127                                 0,
4128                                 VP8_BTI_BRC_UPDATE_CONSTANT_DATA);
4129
4130     /* Segmap surface */
4131     i965_add_buffer_2d_gpe_surface(ctx,
4132                                    gpe_context,
4133                                    &vp8_context->brc_segment_map_buffer,
4134                                    0,
4135                                    I965_SURFACEFORMAT_R8_UNORM,
4136                                    VP8_BTI_BRC_UPDATE_SEGMENT_MAP);
4137
4138     /* MPU CURBE Buffer - read only */
4139     size = ALIGN(params->mpu_gpe_context->curbe.length, 64);
4140     i965_add_dri_buffer_gpe_surface(ctx,
4141                                     encoder_context,
4142                                     gpe_context,
4143                                     params->mpu_gpe_context->curbe.bo,
4144                                     0,
4145                                     size,
4146                                     params->mpu_gpe_context->curbe.offset,
4147                                     VP8_BTI_BRC_UPDATE_MPU_CURBE_READ);
4148
4149     /* MPU CURBE Buffer - write only */
4150     size = ALIGN(params->mpu_gpe_context->curbe.length, 64);
4151     i965_add_dri_buffer_gpe_surface(ctx,
4152                                     encoder_context,
4153                                     gpe_context,
4154                                     params->mpu_gpe_context->curbe.bo,
4155                                     0,
4156                                     size,
4157                                     params->mpu_gpe_context->curbe.offset,
4158                                     VP8_BTI_BRC_UPDATE_MPU_CURBE_WRITE);
4159
4160     /* TPU CURBE Buffer - read only */
4161     size = ALIGN(params->tpu_gpe_context->curbe.length, 64);
4162     i965_add_dri_buffer_gpe_surface(ctx,
4163                                     encoder_context,
4164                                     gpe_context,
4165                                     params->tpu_gpe_context->curbe.bo,
4166                                     0,
4167                                     size,
4168                                     params->tpu_gpe_context->curbe.offset,
4169                                     VP8_BTI_BRC_UPDATE_TPU_CURBE_READ);
4170
4171     /* TPU CURBE Buffer - write only */
4172     size = ALIGN(params->tpu_gpe_context->curbe.length, 64);
4173     i965_add_dri_buffer_gpe_surface(ctx,
4174                                     encoder_context,
4175                                     gpe_context,
4176                                     params->tpu_gpe_context->curbe.bo,
4177                                     0,
4178                                     size,
4179                                     params->tpu_gpe_context->curbe.offset,
4180                                     VP8_BTI_BRC_UPDATE_TPU_CURBE_WRITE);
4181 }
4182
4183 static void
4184 i965_encoder_vp8_vme_init_brc_update_constant_data(VADriverContextP ctx,
4185                                                    struct encode_state *encode_state,
4186                                                    struct intel_encoder_context *encoder_context)
4187 {
4188     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4189     char *pbuffer;
4190
4191     pbuffer = i965_map_gpe_resource(&vp8_context->brc_vp8_constant_data_buffer);
4192
4193     if (!pbuffer)
4194         return;
4195
4196     memcpy(pbuffer,
4197            brc_qpadjustment_distthreshold_maxframethreshold_distqpadjustment_ipb_vp8,
4198            sizeof(brc_qpadjustment_distthreshold_maxframethreshold_distqpadjustment_ipb_vp8));
4199     pbuffer += sizeof(brc_qpadjustment_distthreshold_maxframethreshold_distqpadjustment_ipb_vp8);
4200
4201     memcpy(pbuffer, brc_iframe_cost_vp8, sizeof(brc_iframe_cost_vp8));
4202     pbuffer += sizeof(brc_iframe_cost_vp8);
4203
4204     memcpy(pbuffer, brc_pframe_cost_vp8, sizeof(brc_pframe_cost_vp8));
4205     pbuffer += sizeof(brc_pframe_cost_vp8);
4206
4207     memcpy(pbuffer, quant_dc_vp8, sizeof(quant_dc_vp8));
4208     pbuffer += sizeof(quant_dc_vp8);
4209
4210     memcpy(pbuffer, quant_ac_vp8, sizeof(quant_ac_vp8));
4211     pbuffer += sizeof(quant_ac_vp8);
4212
4213     memcpy(pbuffer, brc_skip_mv_threshold_vp8, sizeof(brc_skip_mv_threshold_vp8));
4214
4215     i965_unmap_gpe_resource(&vp8_context->brc_vp8_constant_data_buffer);
4216 }
4217
4218 static void
4219 i965_encoder_vp8_vme_init_mfx_config_command(VADriverContextP ctx,
4220                                              struct encode_state *encode_state,
4221                                              struct intel_encoder_context *encoder_context,
4222                                              struct vp8_mpu_encoder_config_parameters *params);
4223
4224 static VAStatus
4225 i965_encoder_vp8_vme_brc_update(VADriverContextP ctx,
4226                                 struct encode_state *encode_state,
4227                                 struct intel_encoder_context *encoder_context)
4228 {
4229     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4230     struct i965_encoder_vp8_brc_update_context *brc_update_context = &vp8_context->brc_update_context;
4231     struct i965_encoder_vp8_mbenc_context *mbenc_context = &vp8_context->mbenc_context;
4232     struct i965_encoder_vp8_mpu_context *mpu_context = &vp8_context->mpu_context;
4233     struct i965_encoder_vp8_tpu_context *tpu_context = &vp8_context->tpu_context;
4234     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4235     struct i965_gpe_context *gpe_context, *mbenc_gpe_context, *mpu_gpe_context, *tpu_gpe_context;
4236     struct brc_update_surface_parameters brc_update_surface_params;
4237     struct gpe_media_object_parameter media_object_param;
4238     struct vp8_mpu_encoder_config_parameters config_params;
4239     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4240     unsigned int ref_frame_flag_final, ref_frame_flag;
4241     int is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
4242     int media_function = VP8_MEDIA_STATE_BRC_UPDATE;
4243     int i;
4244
4245     gpe_context = &brc_update_context->gpe_contexts[0];
4246
4247     if (is_intra)
4248         mbenc_gpe_context = &mbenc_context->gpe_contexts[VP8_MBENC_I_FRAME_LUMA];
4249     else
4250         mbenc_gpe_context = &mbenc_context->gpe_contexts[VP8_MBENC_P_FRAME];
4251
4252     mpu_gpe_context = &mpu_context->gpe_contexts[0];
4253     tpu_gpe_context = &tpu_context->gpe_contexts[0];
4254
4255     if (!is_intra) {
4256         ref_frame_flag = VP8_REF_FLAG_ALL;
4257
4258         if (pic_param->ref_last_frame == pic_param->ref_gf_frame) {
4259             ref_frame_flag &= ~VP8_REF_FLAG_GOLDEN;
4260         }
4261
4262         if (pic_param->ref_last_frame == pic_param->ref_arf_frame) {
4263             ref_frame_flag &= ~VP8_REF_FLAG_ALT;
4264         }
4265
4266         if (pic_param->ref_gf_frame == pic_param->ref_arf_frame) {
4267             ref_frame_flag &= ~VP8_REF_FLAG_ALT;
4268         }
4269     } else {
4270         ref_frame_flag = VP8_REF_FLAG_LAST;
4271     }
4272
4273     switch (vp8_context->ref_frame_ctrl) {
4274     case 0:
4275         ref_frame_flag_final = VP8_REF_FLAG_NONE;
4276         break;
4277
4278     case 1:
4279         ref_frame_flag_final = VP8_REF_FLAG_LAST;       // Last Ref only
4280         break;
4281
4282     case 2:
4283         ref_frame_flag_final = VP8_REF_FLAG_GOLDEN;     // Gold Ref only
4284         break;
4285
4286     case 4:
4287         ref_frame_flag_final = VP8_REF_FLAG_ALT;        // Alt Ref only
4288         break;
4289
4290     default:
4291         ref_frame_flag_final = ref_frame_flag;
4292     }
4293
4294     vp8_context->ref_frame_ctrl = ref_frame_flag_final;
4295     i965_encoder_vp8_vme_mbenc_set_curbe(ctx, encode_state, encoder_context, mbenc_gpe_context);
4296     vp8_context->mbenc_curbe_updated_in_brc_update = 1;
4297
4298     /* Set MPU & TPU curbe here */
4299     i965_encoder_vp8_vme_mpu_set_curbe(ctx, encode_state, encoder_context, mpu_gpe_context);
4300     vp8_context->mpu_curbe_updated_in_brc_update = 1;
4301
4302     i965_encoder_vp8_pak_tpu_set_curbe(ctx, encode_state, encoder_context, tpu_gpe_context);
4303     vp8_context->tpu_curbe_updated_in_brc_update = 1;
4304
4305     gpe->context_init(ctx, gpe_context);
4306     gpe->reset_binding_table(ctx, gpe_context);
4307
4308     i965_encoder_vp8_vme_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context);
4309
4310     if (vp8_context->brc_constant_buffer_supported) {
4311         i965_encoder_vp8_vme_init_brc_update_constant_data(ctx, encode_state, encoder_context);
4312     }
4313
4314     memset(&config_params, 0, sizeof(config_params));
4315     config_params.buffer_size = VP8_HEADER_METADATA_SIZE;
4316     config_params.config_buffer = &vp8_context->brc_vp8_cfg_command_write_buffer;
4317
4318     for (i = 0; i < VP8_BRC_MAXIMUM_NUM_PASSES; i++) {
4319         config_params.is_first_pass = !i;
4320         config_params.command_offset = i * VP8_HEADER_METADATA_SIZE;
4321         i965_encoder_vp8_vme_init_mfx_config_command(ctx, encode_state, encoder_context, &config_params);
4322     }
4323
4324     vp8_context->mfx_encoder_config_command_initialized = 1;
4325
4326     memset(&brc_update_surface_params, 0, sizeof(brc_update_surface_params));
4327     brc_update_surface_params.mbenc_gpe_context = mbenc_gpe_context;
4328     brc_update_surface_params.mpu_gpe_context = mpu_gpe_context;
4329     brc_update_surface_params.tpu_gpe_context = tpu_gpe_context;
4330     i965_encoder_vp8_vme_brc_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context, &brc_update_surface_params);
4331
4332     gpe->setup_interface_data(ctx, gpe_context);
4333
4334     memset(&media_object_param, 0, sizeof(media_object_param));
4335     i965_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
4336
4337     return VA_STATUS_SUCCESS;
4338 }
4339
4340 static void
4341 i965_encoder_vp8_vme_mpu_set_curbe(VADriverContextP ctx,
4342                                    struct encode_state *encode_state,
4343                                    struct intel_encoder_context *encoder_context,
4344                                    struct i965_gpe_context *gpe_context)
4345 {
4346     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4347     struct vp8_mpu_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
4348     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4349     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4350     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
4351
4352     if (!pcmd)
4353         return;
4354
4355     memset(pcmd, 0, sizeof(*pcmd));
4356
4357     pcmd->dw0.frame_width = vp8_context->frame_width;
4358     pcmd->dw0.frame_height = vp8_context->frame_height;
4359
4360     pcmd->dw1.frame_type = pic_param->pic_flags.bits.frame_type;
4361     pcmd->dw1.version = pic_param->pic_flags.bits.version;
4362     pcmd->dw1.show_frame = pic_param->pic_flags.bits.show_frame;
4363     pcmd->dw1.horizontal_scale_code = seq_param->frame_width_scale;
4364     pcmd->dw1.vertical_scale_code = seq_param->frame_height_scale;
4365     pcmd->dw1.color_space_type = pic_param->pic_flags.bits.color_space;
4366     pcmd->dw1.clamp_type = pic_param->pic_flags.bits.clamping_type;
4367     pcmd->dw1.partition_num_l2 = pic_param->pic_flags.bits.num_token_partitions;
4368     pcmd->dw1.enable_segmentation = pic_param->pic_flags.bits.segmentation_enabled;
4369     pcmd->dw1.seg_map_update = pic_param->pic_flags.bits.segmentation_enabled ? pic_param->pic_flags.bits.update_mb_segmentation_map : 0;
4370     pcmd->dw1.segmentation_feature_update = pic_param->pic_flags.bits.update_segment_feature_data;
4371     pcmd->dw1.segmentation_feature_mode = 1;
4372     pcmd->dw1.loop_filter_type = pic_param->pic_flags.bits.loop_filter_type;
4373     pcmd->dw1.sharpness_level = pic_param->sharpness_level;
4374     pcmd->dw1.loop_filter_adjustment_on = pic_param->pic_flags.bits.loop_filter_adj_enable;
4375     pcmd->dw1.mb_no_coeffiscient_skip = pic_param->pic_flags.bits.mb_no_coeff_skip;
4376     pcmd->dw1.golden_reference_copy_flag = pic_param->pic_flags.bits.copy_buffer_to_golden;
4377     pcmd->dw1.alternate_reference_copy_flag = pic_param->pic_flags.bits.copy_buffer_to_alternate;
4378     pcmd->dw1.last_frame_update = pic_param->pic_flags.bits.refresh_last;
4379     pcmd->dw1.sign_bias_golden = pic_param->pic_flags.bits.sign_bias_golden;
4380     pcmd->dw1.sign_bias_alt_ref = pic_param->pic_flags.bits.sign_bias_alternate;
4381     pcmd->dw1.refresh_entropy_p = pic_param->pic_flags.bits.refresh_entropy_probs;
4382
4383     pcmd->dw2.loop_filter_level = pic_param->loop_filter_level[0];
4384     pcmd->dw2.qindex = quant_param->quantization_index[0];
4385     pcmd->dw2.y1_dc_qindex = quant_param->quantization_index_delta[0];
4386     pcmd->dw2.y2_dc_qindex = quant_param->quantization_index_delta[3];
4387
4388     pcmd->dw3.y2_ac_qindex = quant_param->quantization_index_delta[4];
4389     pcmd->dw3.uv_dc_qindex = quant_param->quantization_index_delta[1];
4390     pcmd->dw3.uv_ac_qindex = quant_param->quantization_index_delta[2];
4391     pcmd->dw3.feature_data0_segment0 = quant_param->quantization_index[0];
4392
4393     pcmd->dw4.feature_data0_segment1 = quant_param->quantization_index[1];
4394     pcmd->dw4.feature_data0_segment2 = quant_param->quantization_index[2];
4395     pcmd->dw4.feature_data0_segment3 = quant_param->quantization_index[3];
4396     pcmd->dw4.feature_data1_segment0 = pic_param->loop_filter_level[0];
4397
4398     pcmd->dw5.feature_data1_segment1 = pic_param->loop_filter_level[1];
4399     pcmd->dw5.feature_data1_segment2 = pic_param->loop_filter_level[2];
4400     pcmd->dw5.feature_data1_segment3 = pic_param->loop_filter_level[3];
4401     pcmd->dw5.ref_lf_delta0 = pic_param->ref_lf_delta[0];
4402
4403     pcmd->dw6.ref_lf_delta1 = pic_param->ref_lf_delta[1];
4404     pcmd->dw6.ref_lf_delta2 = pic_param->ref_lf_delta[2];
4405     pcmd->dw6.ref_lf_delta3 = pic_param->ref_lf_delta[3];
4406     pcmd->dw6.mode_lf_delta0 = pic_param->mode_lf_delta[0];
4407
4408     pcmd->dw7.mode_lf_delta1 = pic_param->mode_lf_delta[1];
4409     pcmd->dw7.mode_lf_delta2 = pic_param->mode_lf_delta[2];
4410     pcmd->dw7.mode_lf_delta3 = pic_param->mode_lf_delta[3];
4411     pcmd->dw7.mc_filter_select = pic_param->pic_flags.bits.version > 0 ? 1 : 0;
4412     pcmd->dw7.chroma_full_pixel_mc_filter_mode = pic_param->pic_flags.bits.version < 3 ? 0 : 1;
4413     pcmd->dw7.max_num_pak_passes = vp8_context->num_brc_pak_passes;
4414     pcmd->dw7.forced_token_surface_read = 1;
4415     pcmd->dw7.mode_cost_enable_flag = 1;
4416
4417     pcmd->dw8.num_t_levels = 1;
4418     pcmd->dw8.temporal_layer_id = 0;
4419
4420     pcmd->dw12.histogram_bti = VP8_BTI_MPU_HISTOGRAM;
4421     pcmd->dw13.reference_mode_probability_bti = VP8_BTI_MPU_REF_MODE_PROBABILITY;
4422     pcmd->dw14.mode_probability_bti = VP8_BTI_MPU_CURR_MODE_PROBABILITY;
4423     pcmd->dw15.reference_token_probability_bti = VP8_BTI_MPU_REF_TOKEN_PROBABILITY;
4424     pcmd->dw16.token_probability_bti = VP8_BTI_MPU_CURR_TOKEN_PROBABILITY;
4425     pcmd->dw17.frame_header_bitstream_bti = VP8_BTI_MPU_HEADER_BITSTREAM;
4426     pcmd->dw18.header_meta_data_bti = VP8_BTI_MPU_HEADER_METADATA;
4427     pcmd->dw19.picture_state_bti = VP8_BTI_MPU_PICTURE_STATE;
4428     pcmd->dw20.mpu_bitstream_bti = VP8_BTI_MPU_MPU_BITSTREAM;
4429     pcmd->dw21.token_bits_data_bti = VP8_BTI_MPU_TOKEN_BITS_DATA_TABLE;
4430     pcmd->dw22.kernel_debug_dump_bti = VP8_BTI_MPU_VME_DEBUG_STREAMOUT;
4431     pcmd->dw23.entropy_cost_bti = VP8_BTI_MPU_ENTROPY_COST_TABLE;
4432     pcmd->dw24.mode_cost_update_bti = VP8_BTI_MPU_MODE_COST_UPDATE;
4433
4434     i965_gpe_context_unmap_curbe(gpe_context);
4435 }
4436
4437 static void
4438 i965_encoder_vp8_vme_mpu_add_surfaces(VADriverContextP ctx,
4439                                       struct encode_state *encode_state,
4440                                       struct intel_encoder_context *encoder_context,
4441                                       struct i965_gpe_context *gpe_context)
4442 {
4443     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4444     unsigned int size;
4445     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
4446                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
4447
4448     /* Histogram buffer */
4449     size = VP8_HISTOGRAM_SIZE;
4450     i965_add_buffer_gpe_surface(ctx,
4451                                 gpe_context,
4452                                 &vp8_context->histogram_buffer,
4453                                 1,
4454                                 size,
4455                                 0,
4456                                 VP8_BTI_MPU_HISTOGRAM);
4457
4458     // Reference mode probability
4459     size = VP8_MODE_PROPABILITIES_SIZE;
4460     i965_add_buffer_gpe_surface(ctx,
4461                                 gpe_context,
4462                                 &vp8_context->pak_mpu_tpu_ref_mode_probs_buffer,
4463                                 1,
4464                                 size,
4465                                 0,
4466                                 VP8_BTI_MPU_REF_MODE_PROBABILITY);
4467
4468     // Mode probability
4469     i965_add_buffer_gpe_surface(ctx,
4470                                 gpe_context,
4471                                 &vp8_context->pak_mpu_tpu_mode_probs_buffer,
4472                                 1,
4473                                 size,
4474                                 0,
4475                                 VP8_BTI_MPU_CURR_MODE_PROBABILITY);
4476
4477     // Reference Token probability
4478     size = VP8_COEFFS_PROPABILITIES_SIZE;
4479     i965_add_buffer_gpe_surface(ctx,
4480                                 gpe_context,
4481                                 &vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer,
4482                                 1,
4483                                 size,
4484                                 0,
4485                                 VP8_BTI_MPU_REF_TOKEN_PROBABILITY);
4486
4487     // Token probability
4488     i965_add_buffer_gpe_surface(ctx,
4489                                 gpe_context,
4490                                 &vp8_context->pak_mpu_tpu_coeff_probs_buffer,
4491                                 1,
4492                                 size,
4493                                 0,
4494                                 VP8_BTI_MPU_CURR_TOKEN_PROBABILITY);
4495
4496     // Frame header
4497     size = VP8_FRAME_HEADER_SIZE;
4498     i965_add_buffer_gpe_surface(ctx,
4499                                 gpe_context,
4500                                 &vp8_context->pak_frame_header_buffer,
4501                                 0,
4502                                 size,
4503                                 0,
4504                                 VP8_BTI_MPU_HEADER_BITSTREAM);
4505
4506     // Header Metadata
4507     size = VP8_HEADER_METADATA_SIZE;
4508
4509     if (brc_enabled) {
4510         i965_add_buffer_gpe_surface(ctx,
4511                                     gpe_context,
4512                                     &vp8_context->brc_vp8_cfg_command_write_buffer,
4513                                     0,
4514                                     size,
4515                                     0,
4516                                     VP8_BTI_MPU_HEADER_METADATA);
4517     } else {
4518         i965_add_buffer_gpe_surface(ctx,
4519                                     gpe_context,
4520                                     &vp8_context->pak_mpu_tpu_picture_state_buffer,
4521                                     0,
4522                                     size,
4523                                     VP8_HEADER_METADATA_OFFSET,
4524                                     VP8_BTI_MPU_HEADER_METADATA);
4525     }
4526
4527     // Picture state MFX_VP8_PIC_STATE
4528     size = 38 * sizeof(unsigned int);
4529     i965_add_buffer_gpe_surface(ctx,
4530                                 gpe_context,
4531                                 &vp8_context->pak_mpu_tpu_picture_state_buffer,
4532                                 0,
4533                                 size,
4534                                 0,
4535                                 VP8_BTI_MPU_PICTURE_STATE);
4536
4537     // Mpu Bitstream
4538     size = VP8_MPU_BITSTREAM_SIZE;
4539     i965_add_buffer_gpe_surface(ctx,
4540                                 gpe_context,
4541                                 &vp8_context->pak_mpu_tpu_mpu_bitstream_buffer,
4542                                 0,
4543                                 size,
4544                                 0,
4545                                 VP8_BTI_MPU_MPU_BITSTREAM);
4546
4547     // Token bits Data Surface
4548     size = VP8_TOKEN_BITS_DATA_SIZE;
4549     i965_add_buffer_gpe_surface(ctx,
4550                                 gpe_context,
4551                                 &vp8_context->pak_mpu_tpu_token_bits_data_buffer,
4552                                 1,
4553                                 size,
4554                                 0,
4555                                 VP8_BTI_MPU_TOKEN_BITS_DATA_TABLE);
4556
4557     // Entropy cost table
4558     size = VP8_ENTROPY_COST_TABLE_SIZE;
4559     i965_add_buffer_gpe_surface(ctx,
4560                                 gpe_context,
4561                                 &vp8_context->pak_mpu_tpu_entropy_cost_table_buffer,
4562                                 1,
4563                                 size,
4564                                 0,
4565                                 VP8_BTI_MPU_ENTROPY_COST_TABLE);
4566
4567     //Mode Cost Update Surface
4568     size = 16 * sizeof(unsigned int);
4569     i965_add_buffer_gpe_surface(ctx,
4570                                 gpe_context,
4571                                 &vp8_context->mode_cost_update_buffer,
4572                                 0,
4573                                 size,
4574                                 0,
4575                                 VP8_BTI_MPU_MODE_COST_UPDATE);
4576 }
4577
4578 static void
4579 i965_encoder_vp8_vme_update_key_frame_mpu_tpu_buffer(VADriverContextP ctx,
4580                                                      struct encode_state *encode_state,
4581                                                      struct intel_encoder_context *encoder_context)
4582 {
4583     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4584     char *key_buffer, *pbuffer;
4585
4586     key_buffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
4587
4588     if (!key_buffer)
4589         return;
4590
4591     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_coeff_probs_buffer);
4592
4593     if (!pbuffer) {
4594         i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
4595
4596         return;
4597     }
4598
4599     memcpy(pbuffer, key_buffer, VP8_COEFFS_PROPABILITIES_SIZE);
4600     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_coeff_probs_buffer);
4601     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer);
4602
4603     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer);
4604
4605     if (!pbuffer)
4606         return;
4607
4608     memcpy(pbuffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
4609     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer);
4610
4611     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_hw_token_probability_pak_pass_2_buffer);
4612
4613     if (!pbuffer)
4614         return;
4615
4616     memcpy(pbuffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
4617     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_hw_token_probability_pak_pass_2_buffer);
4618 }
4619
4620 static void
4621 i965_encoder_vp8_vme_init_mfx_config_command(VADriverContextP ctx,
4622                                              struct encode_state *encode_state,
4623                                              struct intel_encoder_context *encoder_context,
4624                                              struct vp8_mpu_encoder_config_parameters *params)
4625 {
4626     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4627     struct vp8_mfx_encoder_cfg_cmd *pcmd;
4628     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4629     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4630     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
4631     unsigned int segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
4632     int i;
4633     char *pbuffer;
4634     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
4635                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
4636
4637     pbuffer = i965_map_gpe_resource(params->config_buffer);
4638
4639     if (!pbuffer)
4640         return;
4641
4642     pbuffer += params->command_offset;
4643     memset(pbuffer, 0, params->buffer_size);
4644
4645     pcmd = (struct vp8_mfx_encoder_cfg_cmd *)pbuffer;
4646
4647     pcmd->dw0.value = (MFX_VP8_ENCODER_CFG | (sizeof(*pcmd) / 4 - 2));
4648
4649     pcmd->dw1.rate_control_initial_pass = params->is_first_pass ? 1 : 0;
4650     pcmd->dw1.per_segment_delta_qindex_loop_filter_disable  = (params->is_first_pass || !brc_enabled);
4651     pcmd->dw1.token_statistics_output_enable = 1;
4652
4653     if (segmentation_enabled) {
4654         for (i = 1; i < 4; i++) {
4655             if ((quant_param->quantization_index[i] != quant_param->quantization_index[0]) ||
4656                 (pic_param->loop_filter_level[i] != pic_param->loop_filter_level[0])) {
4657                 pcmd->dw1.update_segment_feature_data_flag = 1;
4658                 break;
4659             }
4660         }
4661     }
4662
4663     if (brc_enabled) {
4664         pcmd->dw2.max_frame_bit_count_rate_control_enable_mask = 1;
4665         pcmd->dw2.min_frame_bit_count_rate_control_enable_mask = 1;
4666     }
4667
4668     pcmd->dw22.show_frame = pic_param->pic_flags.bits.show_frame;
4669     pcmd->dw22.bitstream_format_version = pic_param->pic_flags.bits.version;
4670
4671     pcmd->dw23.horizontal_size_code = ((seq_param->frame_width_scale << 14) | seq_param->frame_width);
4672     pcmd->dw23.vertical_size_code = ((seq_param->frame_height_scale << 14) | seq_param->frame_height);
4673
4674     //Add batch buffer end command
4675     pbuffer += sizeof(*pcmd);
4676     *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
4677
4678     i965_unmap_gpe_resource(params->config_buffer);
4679 }
4680
4681 static VAStatus
4682 i965_encoder_vp8_vme_mpu(VADriverContextP ctx,
4683                          struct encode_state *encode_state,
4684                          struct intel_encoder_context *encoder_context)
4685 {
4686     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4687     struct i965_encoder_vp8_mpu_context *mpu_context = &vp8_context->mpu_context;
4688     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4689     struct gpe_media_object_parameter media_object_param;
4690     struct i965_gpe_context *gpe_context;
4691     struct vp8_mpu_encoder_config_parameters config_params;
4692     int media_function = VP8_MEDIA_STATE_MPU;
4693
4694     gpe_context = &mpu_context->gpe_contexts[0];
4695     /* gpe->context_init(ctx, gpe_context); */
4696     gpe->reset_binding_table(ctx, gpe_context);
4697
4698     if (vp8_context->frame_type == MPEG_I_PICTURE)
4699         i965_encoder_vp8_vme_update_key_frame_mpu_tpu_buffer(ctx, encode_state, encoder_context);
4700
4701     if (!vp8_context->mfx_encoder_config_command_initialized) {
4702         memset(&config_params, 0, sizeof(config_params));
4703         config_params.is_first_pass = !vp8_context->curr_pass;
4704         config_params.command_offset = VP8_HEADER_METADATA_OFFSET;
4705         config_params.buffer_size = VP8_PICTURE_STATE_SIZE;
4706         config_params.config_buffer = &vp8_context->pak_mpu_tpu_picture_state_buffer;
4707         i965_encoder_vp8_vme_init_mfx_config_command(ctx, encode_state, encoder_context, &config_params);
4708     }
4709
4710     if (!vp8_context->mpu_curbe_updated_in_brc_update)
4711         i965_encoder_vp8_vme_mpu_set_curbe(ctx, encode_state, encoder_context, gpe_context);
4712
4713     i965_encoder_vp8_vme_mpu_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
4714     gpe->setup_interface_data(ctx, gpe_context);
4715
4716     memset(&media_object_param, 0, sizeof(media_object_param));
4717     i965_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
4718
4719     return VA_STATUS_SUCCESS;
4720 }
4721
4722 static VAStatus
4723 i965_encoder_vp8_vme_gpe_kernel_function(VADriverContextP ctx,
4724                                          struct encode_state *encode_state,
4725                                          struct intel_encoder_context *encoder_context)
4726 {
4727     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4728     int is_intra = (vp8_context->frame_type == MPEG_I_PICTURE);
4729     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
4730                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
4731     unsigned char scaling_enabled = vp8_context->hme_supported;
4732     unsigned char scaling_16x_enabled = vp8_context->hme_16x_supported;
4733     unsigned char hme_enabled = vp8_context->hme_enabled;
4734     unsigned char hme_16x_enabled = vp8_context->hme_16x_enabled;
4735
4736     if (brc_enabled) {
4737         if (!vp8_context->brc_initted || vp8_context->brc_need_reset) {
4738             i965_encoder_vp8_vme_brc_init_reset(ctx, encode_state, encoder_context);
4739         }
4740     }
4741
4742     if (scaling_enabled) {
4743         i965_encoder_vp8_vme_scaling(ctx, encode_state, encoder_context, 0);
4744
4745         if (scaling_16x_enabled)
4746             i965_encoder_vp8_vme_scaling(ctx, encode_state, encoder_context, 1);
4747     }
4748
4749     if (hme_enabled) {
4750         if (hme_16x_enabled)
4751             i965_encoder_vp8_vme_me(ctx, encode_state, encoder_context, 1);
4752
4753         i965_encoder_vp8_vme_me(ctx, encode_state, encoder_context, 0);
4754     }
4755
4756     if (brc_enabled) {
4757         if (is_intra) {
4758             i965_encoder_vp8_vme_mbenc(ctx, encode_state, encoder_context, 0, 1);
4759         }
4760
4761         i965_encoder_vp8_vme_brc_update(ctx, encode_state, encoder_context);
4762     }
4763
4764     vp8_context->brc_initted = 1;
4765     vp8_context->brc_mbenc_phase1_ignored = 0;
4766
4767     if (is_intra && encoder_context->quality_level == ENCODER_LOW_QUALITY) {
4768         vp8_context->brc_mbenc_phase1_ignored = 1;
4769     } else {
4770         i965_encoder_vp8_vme_mbenc(ctx, encode_state, encoder_context, 0, 0);
4771     }
4772
4773     if (is_intra) {
4774         i965_encoder_vp8_vme_mbenc(ctx, encode_state, encoder_context, 1, 0);
4775     }
4776
4777     i965_encoder_vp8_vme_mpu(ctx, encode_state, encoder_context);
4778
4779     return VA_STATUS_SUCCESS;
4780 }
4781
4782 static VAStatus
4783 i965_encoder_vp8_vme_gpe_kernel_final(VADriverContextP ctx,
4784                                       struct encode_state *encode_state,
4785                                       struct intel_encoder_context *encoder_context)
4786 {
4787     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4788     struct i965_encoder_vp8_mbenc_context *mbenc_context = &vp8_context->mbenc_context;
4789     struct i965_encoder_vp8_mpu_context *mpu_context = &vp8_context->mpu_context;
4790
4791     dri_bo_unreference(mbenc_context->luma_chroma_dynamic_buffer);
4792     mbenc_context->luma_chroma_dynamic_buffer = NULL;
4793
4794     dri_bo_unreference(mpu_context->dynamic_buffer);
4795     mpu_context->dynamic_buffer = NULL;
4796
4797     return VA_STATUS_SUCCESS;
4798 }
4799
4800 static void
4801 i965_encoder_vp8_vme_set_status_buffer(VADriverContextP ctx,
4802                                        struct encode_state *encode_state,
4803                                        struct intel_encoder_context *encoder_context)
4804 {
4805     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4806     struct i965_encoder_vp8_encode_status_buffer *encode_status_buffer = &vp8_context->encode_status_buffer;
4807     struct vp8_encode_status *encode_status;
4808     char *pbuffer;
4809
4810     dri_bo_unreference(encode_status_buffer->bo);
4811     encode_status_buffer->bo = encode_state->coded_buf_object->buffer_store->bo;
4812     dri_bo_reference(encode_status_buffer->bo);
4813
4814     encode_status_buffer->base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
4815     encode_status_buffer->size = ALIGN(sizeof(struct vp8_encode_status), sizeof(unsigned int) * 2);
4816
4817     encode_status_buffer->bitstream_byte_count_offset = offsetof(struct vp8_encode_status, bitstream_byte_count_per_frame);
4818     encode_status_buffer->image_status_mask_offset = offsetof(struct vp8_encode_status, image_status_mask);
4819     encode_status_buffer->image_status_ctrl_offset = offsetof(struct vp8_encode_status, image_status_ctrl);
4820
4821     dri_bo_map(encode_status_buffer->bo, 1);
4822
4823     if (!encode_status_buffer->bo->virtual)
4824         return;
4825
4826     pbuffer = encode_status_buffer->bo->virtual;
4827     pbuffer += encode_status_buffer->base_offset;
4828     encode_status = (struct vp8_encode_status *)pbuffer;
4829     memset(encode_status, 0, sizeof(*encode_status));
4830
4831     dri_bo_unmap(encode_status_buffer->bo);
4832 }
4833
4834 static VAStatus
4835 i965_encoder_vp8_vme_pipeline(VADriverContextP ctx,
4836                               VAProfile profile,
4837                               struct encode_state *encode_state,
4838                               struct intel_encoder_context *encoder_context)
4839 {
4840     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
4841
4842     vp8_context->is_render_context = 1;
4843
4844     i965_encoder_vp8_vme_set_status_buffer(ctx, encode_state, encoder_context);
4845
4846     i965_encoder_vp8_get_paramters(ctx, encode_state, encoder_context);
4847
4848     i965_encoder_vp8_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
4849     i965_encoder_vp8_vme_gpe_kernel_function(ctx, encode_state, encoder_context);
4850     i965_encoder_vp8_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
4851
4852     vp8_context->frame_num++;
4853     vp8_context->brc_need_reset = 0;
4854
4855     vp8_context->mbenc_curbe_updated_in_brc_update = 0;
4856     vp8_context->mpu_curbe_updated_in_brc_update = 0;
4857     vp8_context->mfx_encoder_config_command_initialized = 0;
4858
4859     return VA_STATUS_SUCCESS;
4860 }
4861
4862 static void
4863 i965_encoder_vp8_vme_kernel_context_destroy(struct i965_encoder_vp8_context *vp8_context)
4864 {
4865     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4866     int i;
4867
4868     for (i = 0; i < NUM_VP8_BRC_RESET; i++)
4869         gpe->context_destroy(&vp8_context->brc_init_reset_context.gpe_contexts[i]);
4870
4871     for (i = 0; i < NUM_VP8_SCALING; i++)
4872         gpe->context_destroy(&vp8_context->scaling_context.gpe_contexts[i]);
4873
4874     for (i = 0; i < NUM_VP8_ME; i++)
4875         gpe->context_destroy(&vp8_context->me_context.gpe_contexts[i]);
4876
4877     for (i = 0; i < NUM_VP8_MBENC; i++)
4878         gpe->context_destroy(&vp8_context->mbenc_context.gpe_contexts[i]);
4879
4880     for (i = 0; i < NUM_VP8_BRC_UPDATE; i++)
4881         gpe->context_destroy(&vp8_context->brc_update_context.gpe_contexts[i]);
4882
4883     for (i = 0; i < NUM_VP8_MPU; i++)
4884         gpe->context_destroy(&vp8_context->mpu_context.gpe_contexts[i]);
4885
4886     i965_encoder_vp8_vme_free_resources(vp8_context);
4887 }
4888
4889 static void
4890 i965_encoder_vp8_vme_context_destroy(void *context)
4891 {
4892     struct i965_encoder_vp8_context *vp8_context = context;
4893     struct i965_encoder_vp8_encode_status_buffer *encode_status_buffer = &vp8_context->encode_status_buffer;
4894
4895     i965_encoder_vp8_vme_kernel_context_destroy(vp8_context);
4896
4897     dri_bo_unreference(encode_status_buffer->bo);
4898     encode_status_buffer->bo = NULL;
4899
4900     free(vp8_context);
4901 }
4902
4903 static void
4904 i965_encoder_vp8_vme_brc_init_reset_context_init(VADriverContextP ctx,
4905                                                  struct i965_encoder_vp8_context *vp8_context,
4906                                                  struct i965_encoder_vp8_brc_init_reset_context *brc_init_reset_context)
4907 {
4908     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4909     struct i965_gpe_context *gpe_context = NULL;
4910     struct vp8_encoder_kernel_parameters kernel_params;
4911     struct vp8_encoder_scoreboard_parameters scoreboard_params;
4912     int i;
4913
4914     kernel_params.curbe_size = sizeof(struct vp8_brc_init_reset_curbe_data);
4915     kernel_params.inline_data_size = 0;
4916     kernel_params.external_data_size = 0;
4917
4918     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
4919     scoreboard_params.mask = 0xFF;
4920     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
4921     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
4922
4923     for (i = 0; i < NUM_VP8_BRC_RESET; i++) {
4924         gpe_context = &brc_init_reset_context->gpe_contexts[i];
4925         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
4926         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
4927         gpe->load_kernels(ctx,
4928                           gpe_context,
4929                           &vp8_kernels_brc_init_reset[i],
4930                           1);
4931     }
4932 }
4933
4934 static void
4935 i965_encoder_vp8_vme_scaling_context_init(VADriverContextP ctx,
4936                                           struct i965_encoder_vp8_context *vp8_context,
4937                                           struct i965_encoder_vp8_scaling_context *scaling_context)
4938 {
4939     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4940     struct i965_gpe_context *gpe_context = NULL;
4941     struct vp8_encoder_kernel_parameters kernel_params;
4942     struct vp8_encoder_scoreboard_parameters scoreboard_params;
4943     int i;
4944
4945     kernel_params.curbe_size = sizeof(struct vp8_scaling_curbe_data);
4946     kernel_params.inline_data_size = 0;
4947     kernel_params.external_data_size = 0;
4948
4949     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
4950     scoreboard_params.mask = 0xFF;
4951     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
4952     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
4953
4954     for (i = 0; i < NUM_VP8_SCALING; i++) {
4955         gpe_context = &scaling_context->gpe_contexts[i];
4956         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
4957         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
4958         gpe->load_kernels(ctx,
4959                           gpe_context,
4960                           &vp8_kernels_scaling[i],
4961                           1);
4962     }
4963 }
4964
4965 static void
4966 i965_encoder_vp8_vme_me_context_init(VADriverContextP ctx,
4967                                      struct i965_encoder_vp8_context *vp8_context,
4968                                      struct i965_encoder_vp8_me_context *me_context)
4969 {
4970     struct i965_gpe_table *gpe = vp8_context->gpe_table;
4971     struct i965_gpe_context *gpe_context = NULL;
4972     struct vp8_encoder_kernel_parameters kernel_params;
4973     struct vp8_encoder_scoreboard_parameters scoreboard_params;
4974     int i;
4975
4976     kernel_params.curbe_size = sizeof(struct vp8_me_curbe_data);
4977     kernel_params.inline_data_size = 0;
4978     kernel_params.external_data_size = 0;
4979
4980     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
4981     scoreboard_params.mask = 0xFF;
4982     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
4983     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
4984
4985     for (i = 0; i < NUM_VP8_ME; i++) {
4986         gpe_context = &me_context->gpe_contexts[i];
4987         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
4988         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
4989         gpe->load_kernels(ctx,
4990                           gpe_context,
4991                           &vp8_kernels_me[i],
4992                           1);
4993     }
4994 }
4995
4996 static void
4997 i965_encoder_vp8_vme_mbenc_context_init(VADriverContextP ctx,
4998                                         struct i965_encoder_vp8_context *vp8_context,
4999                                         struct i965_encoder_vp8_mbenc_context *mbenc_context)
5000 {
5001     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5002     struct i965_gpe_context *gpe_context = NULL;
5003     struct vp8_encoder_kernel_parameters kernel_params;
5004     struct vp8_encoder_scoreboard_parameters scoreboard_params;
5005     int i;
5006
5007     kernel_params.curbe_size = MAX(sizeof(struct vp8_mbenc_i_frame_curbe_data), sizeof(struct vp8_mbenc_p_frame_curbe_data));
5008     kernel_params.inline_data_size = 0;
5009     kernel_params.external_data_size = 0;
5010
5011     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
5012     scoreboard_params.mask = 0xFF;
5013     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
5014     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
5015
5016     for (i = 0; i < NUM_VP8_MBENC; i++) {
5017         gpe_context = &mbenc_context->gpe_contexts[i];
5018         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
5019         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
5020         gpe->load_kernels(ctx,
5021                           gpe_context,
5022                           &vp8_kernels_mbenc[i],
5023                           1);
5024     }
5025 }
5026
5027 static void
5028 i965_encoder_vp8_vme_brc_update_context_init(VADriverContextP ctx,
5029                                              struct i965_encoder_vp8_context *vp8_context,
5030                                              struct i965_encoder_vp8_brc_update_context *brc_update_context)
5031 {
5032     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5033     struct i965_gpe_context *gpe_context = NULL;
5034     struct vp8_encoder_kernel_parameters kernel_params;
5035     struct vp8_encoder_scoreboard_parameters scoreboard_params;
5036     int i;
5037
5038     kernel_params.curbe_size = sizeof(struct vp8_brc_update_curbe_data);
5039     kernel_params.inline_data_size = 0;
5040     kernel_params.external_data_size = 0;
5041
5042     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
5043     scoreboard_params.mask = 0xFF;
5044     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
5045     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
5046
5047     for (i = 0; i < NUM_VP8_BRC_UPDATE; i++) {
5048         gpe_context = &brc_update_context->gpe_contexts[i];
5049         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
5050         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
5051         gpe->load_kernels(ctx,
5052                           gpe_context,
5053                           &vp8_kernels_brc_update[i],
5054                           1);
5055     }
5056 }
5057
5058 static void
5059 i965_encoder_vp8_vme_mpu_context_init(VADriverContextP ctx,
5060                                       struct i965_encoder_vp8_context *vp8_context,
5061                                       struct i965_encoder_vp8_mpu_context *mpu_context)
5062 {
5063     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5064     struct i965_gpe_context *gpe_context = NULL;
5065     struct vp8_encoder_kernel_parameters kernel_params;
5066     struct vp8_encoder_scoreboard_parameters scoreboard_params;
5067     int i;
5068
5069     kernel_params.curbe_size = sizeof(struct vp8_mpu_curbe_data);
5070     kernel_params.inline_data_size = 0;
5071     kernel_params.external_data_size = 0;
5072
5073     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
5074     scoreboard_params.mask = 0xFF;
5075     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
5076     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
5077
5078     for (i = 0; i < NUM_VP8_MPU; i++) {
5079         gpe_context = &mpu_context->gpe_contexts[i];
5080         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
5081         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
5082         gpe->load_kernels(ctx,
5083                           gpe_context,
5084                           &vp8_kernels_mpu[i],
5085                           1);
5086     }
5087 }
5088
5089 static Bool
5090 i965_encoder_vp8_vme_var_init(VADriverContextP ctx,
5091                               struct intel_encoder_context *encoder_context,
5092                               struct i965_encoder_vp8_context *vp8_context)
5093 {
5094     struct i965_driver_data *i965 = i965_driver_data(ctx);
5095
5096     vp8_context->mocs = i965->intel.mocs_state;
5097
5098     vp8_context->gpe_table = &i965->gpe_table;
5099
5100     vp8_context->min_scaled_dimension = 48;
5101     vp8_context->min_scaled_dimension_in_mbs = WIDTH_IN_MACROBLOCKS(vp8_context->min_scaled_dimension);
5102
5103     vp8_context->vdbox_idc = BSD_RING0;
5104     vp8_context->vdbox_mmio_base = VDBOX0_MMIO_BASE;
5105
5106     /* TODO: This is a WA for VDBOX loading balance only, */
5107     if (i965->intel.has_bsd2) {
5108         srandom(time(NULL));
5109         vp8_context->vdbox_idc = (random() % 2 ? BSD_RING1 : BSD_RING0);
5110     }
5111
5112     if (vp8_context->vdbox_idc == BSD_RING1)
5113         vp8_context->vdbox_mmio_base = VDBOX1_MMIO_BASE;
5114
5115     vp8_context->frame_type = MPEG_I_PICTURE;
5116
5117     vp8_context->use_hw_scoreboard = 1;
5118     vp8_context->use_hw_non_stalling_scoreborad = 1; /* default: non-stalling */
5119     vp8_context->brc_distortion_buffer_supported = 1;
5120     vp8_context->brc_constant_buffer_supported = 1;
5121     vp8_context->repak_supported = 1;
5122     vp8_context->multiple_pass_brc_supported = 0;
5123     vp8_context->is_first_frame = 1;
5124     vp8_context->is_first_two_frame = 1;
5125     vp8_context->gop_size = 30;
5126     vp8_context->hme_supported = 1;
5127     vp8_context->hme_16x_supported = 1;
5128     vp8_context->hme_enabled = 0;
5129     vp8_context->hme_16x_enabled = 0;
5130     vp8_context->brc_initted = 0;
5131     vp8_context->frame_num = 0;
5132     vp8_context->framerate = (struct intel_fraction) {
5133         30, 1
5134     };
5135
5136     return True;
5137 }
5138
5139 static Bool
5140 i965_encoder_vp8_vme_kernels_context_init(VADriverContextP ctx,
5141                                           struct intel_encoder_context *encoder_context,
5142                                           struct i965_encoder_vp8_context *vp8_context)
5143 {
5144     i965_encoder_vp8_vme_brc_init_reset_context_init(ctx, vp8_context, &vp8_context->brc_init_reset_context);
5145     i965_encoder_vp8_vme_scaling_context_init(ctx, vp8_context, &vp8_context->scaling_context);
5146     i965_encoder_vp8_vme_me_context_init(ctx, vp8_context, &vp8_context->me_context);
5147     i965_encoder_vp8_vme_mbenc_context_init(ctx, vp8_context, &vp8_context->mbenc_context);
5148     i965_encoder_vp8_vme_brc_update_context_init(ctx, vp8_context, &vp8_context->brc_update_context);
5149     i965_encoder_vp8_vme_mpu_context_init(ctx, vp8_context, &vp8_context->mpu_context);
5150
5151     return True;
5152 }
5153
5154 extern Bool
5155 gen8_encoder_vp8_context_init(VADriverContextP, struct intel_encoder_context *, struct i965_encoder_vp8_context *);
5156
5157 extern Bool
5158 gen9_encoder_vp8_context_init(VADriverContextP, struct intel_encoder_context *, struct i965_encoder_vp8_context *);
5159
5160 extern Bool
5161 gen10_encoder_vp8_context_init(VADriverContextP, struct intel_encoder_context *, struct i965_encoder_vp8_context *);
5162
5163 Bool
5164 i965_encoder_vp8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5165 {
5166     struct i965_driver_data *i965 = i965_driver_data(ctx);
5167     struct i965_encoder_vp8_context *vp8_context = NULL;
5168
5169     vp8_context = calloc(1, sizeof(struct i965_encoder_vp8_context));
5170
5171     if (!vp8_context)
5172         return False;
5173
5174     i965_encoder_vp8_vme_var_init(ctx, encoder_context, vp8_context);
5175
5176     if (IS_CHERRYVIEW(i965->intel.device_info))
5177         gen8_encoder_vp8_context_init(ctx, encoder_context, vp8_context);
5178     else if (IS_GEN9(i965->intel.device_info)) {
5179         gen9_encoder_vp8_context_init(ctx, encoder_context, vp8_context);
5180     } else if (IS_GEN10(i965->intel.device_info)) {
5181         gen10_encoder_vp8_context_init(ctx, encoder_context, vp8_context);
5182     } else {
5183         free(vp8_context);
5184
5185         return False;
5186     }
5187
5188     i965_encoder_vp8_vme_kernels_context_init(ctx, encoder_context, vp8_context);
5189
5190     encoder_context->vme_context = vp8_context;
5191     encoder_context->vme_pipeline = i965_encoder_vp8_vme_pipeline;
5192     encoder_context->vme_context_destroy = i965_encoder_vp8_vme_context_destroy;
5193
5194     return True;
5195 }
5196
5197 /*
5198  * PAK part
5199  */
5200 static void
5201 i965_encoder_vp8_pak_pre_pipeline(struct encode_state *encode_state,
5202                                   struct intel_encoder_context *encoder_context)
5203 {
5204     /* No thing to do */
5205 }
5206
5207 static void
5208 i965_encoder_vp8_pak_kernels_context_destroy(struct i965_encoder_vp8_context *vp8_context)
5209 {
5210     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5211     int i;
5212
5213     for (i = 0; i < NUM_VP8_TPU; i++)
5214         gpe->context_destroy(&vp8_context->tpu_context.gpe_contexts[i]);
5215 }
5216
5217
5218 static void
5219 i965_encoder_vp8_pak_context_destroy(void *context)
5220 {
5221     struct i965_encoder_vp8_context *vp8_context = context;
5222     int i;
5223
5224     dri_bo_unreference(vp8_context->post_deblocking_output.bo);
5225     vp8_context->post_deblocking_output.bo = NULL;
5226
5227     dri_bo_unreference(vp8_context->pre_deblocking_output.bo);
5228     vp8_context->pre_deblocking_output.bo = NULL;
5229
5230     dri_bo_unreference(vp8_context->uncompressed_picture_source.bo);
5231     vp8_context->uncompressed_picture_source.bo = NULL;
5232
5233     dri_bo_unreference(vp8_context->indirect_pak_bse_object.bo);
5234     vp8_context->indirect_pak_bse_object.bo = NULL;
5235
5236     for (i = 0; i < MAX_MFX_REFERENCE_SURFACES; i++) {
5237         dri_bo_unreference(vp8_context->reference_surfaces[i].bo);
5238         vp8_context->reference_surfaces[i].bo = NULL;
5239     }
5240
5241     i965_encoder_vp8_pak_kernels_context_destroy(vp8_context);
5242
5243     /* vme & pak same the same structure, so don't free the context here */
5244 }
5245
5246 static void
5247 i965_encoder_vp8_pak_pipe_mode_select(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5248 {
5249     struct intel_batchbuffer *batch = encoder_context->base.batch;
5250     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5251
5252     BEGIN_BCS_BATCH(batch, 5);
5253
5254     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5255     OUT_BCS_BATCH(batch,
5256                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
5257                   (MFD_MODE_VLD << 15) |  /* VLD mode */
5258                   ((!!vp8_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
5259                   ((!!vp8_context->pre_deblocking_output.bo) << 8)  |  /* Pre Deblocking Output */
5260                   (1 << 4)  | /* encoding mode */
5261                   (MFX_FORMAT_VP8 << 0));
5262     OUT_BCS_BATCH(batch, 0);
5263     OUT_BCS_BATCH(batch, 0);
5264     OUT_BCS_BATCH(batch, 0);
5265
5266     ADVANCE_BCS_BATCH(batch);
5267 }
5268
5269 static void
5270 i965_encoder_vp8_pak_surface_state(VADriverContextP ctx,
5271                                    struct object_surface *obj_surface,
5272                                    int id,
5273                                    struct intel_encoder_context *encoder_context)
5274 {
5275     struct intel_batchbuffer *batch = encoder_context->base.batch;
5276
5277     BEGIN_BCS_BATCH(batch, 6);
5278
5279     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5280     OUT_BCS_BATCH(batch, id);
5281     OUT_BCS_BATCH(batch,
5282                   ((obj_surface->orig_height - 1) << 18) |
5283                   ((obj_surface->orig_width - 1) << 4));
5284     OUT_BCS_BATCH(batch,
5285                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
5286                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
5287                   ((obj_surface->width - 1) << 3) |  /* pitch */
5288                   (0 << 2)  | /* must be 0 for interleave U/V */
5289                   (1 << 1)  | /* must be tiled */
5290                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
5291     OUT_BCS_BATCH(batch,
5292                   (0 << 16) |            /* must be 0 for interleave U/V */
5293                   (obj_surface->height));        /* y offset for U(cb) */
5294     OUT_BCS_BATCH(batch, 0);
5295
5296     ADVANCE_BCS_BATCH(batch);
5297 }
5298
5299 #define PAK_OUT_BUFFER_2DW(buf_bo, is_target, delta)  do {              \
5300         if (buf_bo) {                                                   \
5301             OUT_BCS_RELOC64(batch,                                      \
5302                             buf_bo,                                     \
5303                             I915_GEM_DOMAIN_RENDER,                     \
5304                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
5305                             delta);                                     \
5306         } else {                                                        \
5307             OUT_BCS_BATCH(batch, 0);                                    \
5308             OUT_BCS_BATCH(batch, 0);                                    \
5309         }                                                               \
5310     } while (0)
5311
5312 #define PAK_OUT_BUFFER_3DW(buf_bo, is_target, delta, attr)  do {        \
5313         PAK_OUT_BUFFER_2DW(buf_bo, is_target, delta);                   \
5314         OUT_BCS_BATCH(batch, attr);                                     \
5315     } while (0)
5316
5317
5318
5319 static void
5320 i965_encoder_vp8_pak_pipe_buf_addr_state(VADriverContextP ctx,
5321                                          struct intel_encoder_context *encoder_context)
5322 {
5323     struct intel_batchbuffer *batch = encoder_context->base.batch;
5324     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5325     int i;
5326
5327     BEGIN_BCS_BATCH(batch, 61);
5328
5329     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
5330
5331     /* the DW1-3 is for pre_deblocking */
5332     PAK_OUT_BUFFER_3DW(vp8_context->pre_deblocking_output.bo, 1, 0, vp8_context->mocs);
5333
5334     /* the DW4-6 is for the post_deblocking */
5335     PAK_OUT_BUFFER_3DW(vp8_context->post_deblocking_output.bo, 1, 0, vp8_context->mocs);
5336
5337     /* the DW7-9 is for the uncompressed_picture */
5338     PAK_OUT_BUFFER_3DW(vp8_context->uncompressed_picture_source.bo, 0, 0, vp8_context->mocs);
5339
5340     /* the DW10-12 is for the mb status */
5341     PAK_OUT_BUFFER_3DW(vp8_context->pak_stream_out_buffer.bo, 1, 0, vp8_context->mocs);
5342
5343     /* the DW13-15 is for the intra_row_store_scratch */
5344     PAK_OUT_BUFFER_3DW(vp8_context->pak_intra_row_store_scratch_buffer.bo, 1, 0, vp8_context->mocs);
5345
5346     /* the DW16-18 is for the deblocking filter */
5347     PAK_OUT_BUFFER_3DW(vp8_context->pak_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, vp8_context->mocs);
5348
5349     /* the DW 19-50 is for Reference pictures*/
5350     for (i = 0; i < ARRAY_ELEMS(vp8_context->reference_surfaces); i++) {
5351         PAK_OUT_BUFFER_2DW(vp8_context->reference_surfaces[i].bo, 0, 0);
5352     }
5353
5354     /* DW 51 */
5355     OUT_BCS_BATCH(batch, vp8_context->mocs);
5356
5357     /* The DW 52-54 is for the MB status buffer */
5358     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5359
5360     /* the DW 55-57 is the ILDB buffer */
5361     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5362
5363     /* the DW 58-60 is the second ILDB buffer */
5364     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5365
5366     ADVANCE_BCS_BATCH(batch);
5367 }
5368
5369 static void
5370 i965_encoder_vp8_pak_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5371 {
5372     struct intel_batchbuffer *batch = encoder_context->base.batch;
5373     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5374     int vme_size = ALIGN((vp8_context->mb_coded_buffer_size - vp8_context->mv_offset), 0x1000);
5375
5376     BEGIN_BCS_BATCH(batch, 26);
5377
5378     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
5379
5380     /* the DW1-5 is for the MFX indirect bistream */
5381     PAK_OUT_BUFFER_3DW(vp8_context->indirect_pak_bse_object.bo, 1, vp8_context->indirect_pak_bse_object.offset, vp8_context->mocs);
5382     PAK_OUT_BUFFER_2DW(vp8_context->indirect_pak_bse_object.bo, 1, vp8_context->indirect_pak_bse_object.end_offset);
5383
5384     /* the DW6-10 is for MFX Indirect MV Object Base Address */
5385     PAK_OUT_BUFFER_3DW(vp8_context->mb_coded_buffer.bo, 0, vp8_context->mv_offset, vp8_context->mocs);
5386     PAK_OUT_BUFFER_2DW(vp8_context->mb_coded_buffer.bo, 0, (vp8_context->mv_offset + vme_size));
5387
5388     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
5389     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5390     PAK_OUT_BUFFER_2DW(NULL, 0, 0);
5391
5392     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
5393     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5394     PAK_OUT_BUFFER_2DW(NULL, 0, 0);
5395
5396     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
5397     PAK_OUT_BUFFER_3DW(vp8_context->indirect_pak_bse_object.bo, 1, vp8_context->indirect_pak_bse_object.offset, vp8_context->mocs);
5398     PAK_OUT_BUFFER_2DW(vp8_context->indirect_pak_bse_object.bo, 1, vp8_context->indirect_pak_bse_object.end_offset);
5399
5400     ADVANCE_BCS_BATCH(batch);
5401 }
5402
5403 static void
5404 i965_encoder_vp8_pak_bsp_buf_base_addr_state(VADriverContextP ctx,
5405                                              struct encode_state *encode_state,
5406                                              struct intel_encoder_context *encoder_context)
5407 {
5408     struct intel_batchbuffer *batch = encoder_context->base.batch;
5409     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5410     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
5411     int num_partitions = (1 << pic_param->pic_flags.bits.num_token_partitions);
5412     int offset;
5413     unsigned int token_size = vp8_context->frame_width * vp8_context->frame_height * 2;
5414     unsigned int part_size = token_size / num_partitions;
5415     unsigned int part0_size = (vp8_context->frame_width * vp8_context->frame_height) / 4 + VP8_INTERMEDIATE_PARTITION0_SIZE;
5416
5417     BEGIN_BCS_BATCH(batch, 32);
5418     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
5419
5420     /* The 4th parameter in PAK_OUT_BUFFER_3DW() is not a MOCS index for this command per doc */
5421     /* DW1-3 */
5422     PAK_OUT_BUFFER_3DW(vp8_context->pak_frame_header_buffer.bo, 1, 0, 0);
5423     /* DW4-6 */
5424     PAK_OUT_BUFFER_3DW(vp8_context->pak_intermediate_buffer.bo, 1, 0, 0);
5425
5426     /* DW7-DW14 */
5427     offset = ALIGN(part0_size, 64);
5428     OUT_BCS_BATCH(batch, offset);
5429     offset = ALIGN(offset + part_size, 64);
5430     OUT_BCS_BATCH(batch, offset);
5431     offset = ALIGN(offset + part_size, 64);
5432     OUT_BCS_BATCH(batch, offset);
5433     offset = ALIGN(offset + part_size, 64);
5434     OUT_BCS_BATCH(batch, offset);
5435     offset = ALIGN(offset + part_size, 64);
5436     OUT_BCS_BATCH(batch, offset);
5437     offset = ALIGN(offset + part_size, 64);
5438     OUT_BCS_BATCH(batch, offset);
5439     offset = ALIGN(offset + part_size, 64);
5440     OUT_BCS_BATCH(batch, offset);
5441     offset = ALIGN(offset + part_size, 64);
5442     OUT_BCS_BATCH(batch, offset);
5443
5444     /* DW15 */
5445     OUT_BCS_BATCH(batch, token_size + part0_size);
5446
5447     /* DW16-18 */
5448     PAK_OUT_BUFFER_3DW(vp8_context->indirect_pak_bse_object.bo, 1, vp8_context->indirect_pak_bse_object.offset, 0);
5449
5450     /* DW19 */
5451     OUT_BCS_BATCH(batch, 0);
5452
5453     /* DW20-22 */
5454     PAK_OUT_BUFFER_3DW(NULL, 0, 0, 0);
5455
5456     /* DW23-25 */
5457     if (vp8_context->repak_pass_iter_val > 0 &&
5458         vp8_context->frame_type == MPEG_I_PICTURE &&
5459         vp8_context->repak_pass_iter_val == vp8_context->curr_pass)
5460         PAK_OUT_BUFFER_3DW(vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer.bo, 1, 0, 0);
5461     else
5462         PAK_OUT_BUFFER_3DW(vp8_context->pak_mpu_tpu_coeff_probs_buffer.bo, 1, 0, 0);
5463
5464     /* DW26-28 */
5465     PAK_OUT_BUFFER_3DW(vp8_context->pak_mpu_tpu_pak_token_statistics_buffer.bo, 1, 0, 0);
5466
5467     /* DW29-31 */
5468     PAK_OUT_BUFFER_3DW(vp8_context->pak_mpc_row_store_scratch_buffer.bo, 1, 0, 0);
5469
5470     ADVANCE_BCS_BATCH(batch);
5471 }
5472
5473 static void
5474 i965_encoder_vp8_pak_insert_batch_buffers(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5475 {
5476     struct intel_batchbuffer *batch = encoder_context->base.batch;
5477     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5478     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5479     struct gpe_mi_batch_buffer_start_parameter batch_param;
5480     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
5481                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
5482
5483     memset(&batch_param, 0, sizeof(batch_param));
5484     batch_param.bo = vp8_context->pak_mpu_tpu_picture_state_buffer.bo;
5485     batch_param.is_second_level = 1; /* Must be the second batch buffer */
5486     gpe->mi_batch_buffer_start(ctx, batch, &batch_param);
5487
5488     if (brc_enabled) {
5489         batch_param.bo = vp8_context->brc_vp8_cfg_command_write_buffer.bo;
5490
5491         if (vp8_context->repak_pass_iter_val == 0) {
5492             batch_param.offset = vp8_context->curr_pass * VP8_BRC_IMG_STATE_SIZE_PER_PASS;
5493         } else {
5494
5495             if (vp8_context->repak_pass_iter_val == vp8_context->curr_pass)
5496                 batch_param.offset = 0;
5497             else
5498                 batch_param.offset = vp8_context->curr_pass * VP8_BRC_IMG_STATE_SIZE_PER_PASS;
5499         }
5500
5501         gpe->mi_batch_buffer_start(ctx, batch, &batch_param);
5502     }
5503
5504     batch_param.bo = vp8_context->mb_coded_buffer.bo;
5505     gpe->mi_batch_buffer_start(ctx, batch, &batch_param);
5506 }
5507
5508 static void
5509 i965_encoder_vp8_pak_picture_level(VADriverContextP ctx,
5510                                    struct encode_state *encode_state,
5511                                    struct intel_encoder_context *encoder_context)
5512 {
5513     struct intel_batchbuffer *batch = encoder_context->base.batch;
5514     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5515     struct i965_encoder_vp8_encode_status_buffer *encode_status_buffer = &vp8_context->encode_status_buffer;
5516     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5517     struct gpe_mi_conditional_batch_buffer_end_parameter mi_param;
5518     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
5519                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
5520
5521     if (brc_enabled &&
5522         vp8_context->curr_pass > 0 &&
5523         (vp8_context->curr_pass < vp8_context->repak_pass_iter_val ||
5524          vp8_context->repak_pass_iter_val == 0)) {
5525         memset(&mi_param, 0, sizeof(mi_param));
5526         mi_param.bo = encode_status_buffer->bo;
5527         mi_param.offset = (encode_status_buffer->base_offset +
5528                            encode_status_buffer->image_status_mask_offset);
5529         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_param);
5530     }
5531
5532     if ((vp8_context->repak_pass_iter_val > 0) && (vp8_context->curr_pass == vp8_context->repak_pass_iter_val)) {
5533         memset(&mi_param, 0, sizeof(mi_param));
5534         mi_param.bo = vp8_context->pak_mpu_tpu_repak_decision_buffer.bo;
5535         mi_param.offset = 0;
5536         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_param);
5537     }
5538
5539     i965_encoder_vp8_pak_pipe_mode_select(ctx, encoder_context);
5540     i965_encoder_vp8_pak_surface_state(ctx, encode_state->reconstructed_object, 0, encoder_context);
5541     i965_encoder_vp8_pak_surface_state(ctx, encode_state->input_yuv_object, 4, encoder_context);
5542     i965_encoder_vp8_pak_pipe_buf_addr_state(ctx, encoder_context);
5543     i965_encoder_vp8_pak_ind_obj_base_addr_state(ctx, encoder_context);
5544     i965_encoder_vp8_pak_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
5545     i965_encoder_vp8_pak_insert_batch_buffers(ctx, encoder_context);
5546 }
5547
5548 static void
5549 i965_encoder_vp8_pak_set_pak_status_in_tpu_curbe(VADriverContextP ctx,
5550                                                  struct intel_encoder_context *encoder_context,
5551                                                  int ipass)
5552 {
5553     struct intel_batchbuffer *batch = encoder_context->base.batch;
5554     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5555     struct i965_encoder_vp8_tpu_context *tpu_context = &vp8_context->tpu_context;
5556     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5557     struct i965_gpe_context *tpu_gpe_context;
5558     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
5559     struct gpe_mi_store_register_mem_parameter mi_store_register_mem_param;
5560
5561     tpu_gpe_context = &tpu_context->gpe_contexts[0];
5562
5563     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
5564     mi_store_data_imm_param.bo = tpu_gpe_context->curbe.bo;
5565     mi_store_data_imm_param.offset = tpu_gpe_context->curbe.offset + sizeof(unsigned int) * 6;
5566     mi_store_data_imm_param.dw0 = (vp8_context->curr_pass + 1) << 8;
5567
5568     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
5569
5570     if (ipass == 0) {
5571         memset(&mi_store_register_mem_param, 0, sizeof(mi_store_register_mem_param));
5572         mi_store_register_mem_param.bo = tpu_gpe_context->curbe.bo;
5573         mi_store_register_mem_param.offset = tpu_gpe_context->curbe.offset + sizeof(unsigned int) * 8;
5574         mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_DQ_INDEX01_REG_OFFSET;
5575         gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
5576
5577         mi_store_register_mem_param.offset = tpu_gpe_context->curbe.offset + sizeof(unsigned int) * 9;
5578         mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_DQ_INDEX23_REG_OFFSET;
5579         gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
5580
5581         mi_store_register_mem_param.offset = tpu_gpe_context->curbe.offset + sizeof(unsigned int) * 10;
5582         mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_D_LOOP_FILTER01_REG_OFFSET;
5583         gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
5584
5585         mi_store_register_mem_param.offset = tpu_gpe_context->curbe.offset + sizeof(unsigned int) * 11;
5586         mi_store_register_mem_param.mmio_offset = vp8_context->vdbox_mmio_base + VP8_MFX_BRC_CUMULATIVE_D_LOOP_FILTER23_REG_OFFSET;
5587         gpe->mi_store_register_mem(ctx, batch, &mi_store_register_mem_param);
5588     }
5589 }
5590
5591 static void
5592 i965_encoder_vp8_pak_slice_level_brc(VADriverContextP ctx,
5593                                      struct intel_encoder_context *encoder_context,
5594                                      int ipass)
5595 {
5596     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5597     unsigned int *pbuffer;
5598
5599     i965_encoder_vp8_read_pak_statistics(ctx, encoder_context, ipass);
5600
5601     pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5602
5603     if (!pbuffer)
5604         return;
5605
5606     pbuffer += 38;
5607     *pbuffer = 0x05000000;
5608     i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5609
5610     i965_encoder_vp8_pak_set_pak_status_in_tpu_curbe(ctx, encoder_context, ipass);
5611 }
5612
5613 static void
5614 i965_encoder_vp8_pak_slice_level(VADriverContextP ctx,
5615                                  struct encode_state *encode_state,
5616                                  struct intel_encoder_context *encoder_context)
5617 {
5618     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5619     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
5620                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
5621     unsigned int *pbuffer;
5622
5623     i965_encoder_vp8_read_encode_status(ctx, encoder_context);
5624
5625     if (vp8_context->num_brc_pak_passes == VP8_BRC_SINGLE_PASS) {
5626         if (brc_enabled) {
5627             i965_encoder_vp8_read_pak_statistics(ctx, encoder_context, vp8_context->curr_pass);
5628
5629             /* Workaround: */
5630             pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5631
5632             if (!pbuffer)
5633                 return;
5634
5635             pbuffer += 38;
5636             *pbuffer = 0x05000000;
5637             i965_unmap_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5638         }
5639
5640         vp8_context->submit_batchbuffer = 1;
5641     } else {
5642         if ((brc_enabled) &&
5643             ((vp8_context->curr_pass < vp8_context->num_passes && vp8_context->repak_pass_iter_val > 0) ||
5644              (vp8_context->curr_pass <= vp8_context->num_passes && vp8_context->repak_pass_iter_val == 0))) {
5645             i965_encoder_vp8_pak_slice_level_brc(ctx, encoder_context, vp8_context->curr_pass);
5646
5647             if (vp8_context->tpu_required)
5648                 vp8_context->submit_batchbuffer = 1;
5649             else
5650                 vp8_context->submit_batchbuffer = 0;
5651         } else {
5652             if (brc_enabled) {
5653                 i965_encoder_vp8_read_pak_statistics(ctx, encoder_context, vp8_context->curr_pass);
5654             }
5655
5656             pbuffer = i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5657
5658             if (!pbuffer)
5659                 return;
5660
5661             pbuffer += 38;
5662             *pbuffer = 0x05000000;
5663             i965_map_gpe_resource(&vp8_context->pak_mpu_tpu_picture_state_buffer);
5664
5665             vp8_context->submit_batchbuffer = 1;
5666         }
5667     }
5668 }
5669
5670 static void
5671 i965_encoder_vp8_pak_tpu_set_curbe(VADriverContextP ctx,
5672                                    struct encode_state *encode_state,
5673                                    struct intel_encoder_context *encoder_context,
5674                                    struct i965_gpe_context *gpe_context)
5675 {
5676     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5677     struct vp8_tpu_curbe_data *pcmd = i965_gpe_context_map_curbe(gpe_context);
5678     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
5679     VAQMatrixBufferVP8 *quant_param = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
5680
5681     if (!pcmd)
5682         return;
5683
5684     memset(pcmd, 0, sizeof(*pcmd));
5685
5686     pcmd->dw0.mbs_in_frame = vp8_context->frame_width_in_mbs * vp8_context->frame_height_in_mbs;
5687
5688     pcmd->dw1.frame_type = pic_param->pic_flags.bits.frame_type;
5689     pcmd->dw1.enable_segmentation = pic_param->pic_flags.bits.segmentation_enabled;
5690     pcmd->dw1.rebinarization_frame_hdr = (vp8_context->repak_pass_iter_val ? 1 : 0);
5691
5692     pcmd->dw1.refresh_entropy_p = pic_param->pic_flags.bits.refresh_entropy_probs;
5693     pcmd->dw1.mb_no_coeffiscient_skip = pic_param->pic_flags.bits.mb_no_coeff_skip;
5694
5695     pcmd->dw3.max_qp = pic_param->clamp_qindex_high;
5696     pcmd->dw3.min_qp = pic_param->clamp_qindex_low;
5697
5698     pcmd->dw4.loop_filter_level_segment0 = pic_param->loop_filter_level[0];
5699     pcmd->dw4.loop_filter_level_segment1 = pic_param->loop_filter_level[1];
5700     pcmd->dw4.loop_filter_level_segment2 = pic_param->loop_filter_level[2];
5701     pcmd->dw4.loop_filter_level_segment3 = pic_param->loop_filter_level[3];
5702
5703     pcmd->dw5.quantization_index_segment0 = quant_param->quantization_index[0];
5704     pcmd->dw5.quantization_index_segment1 = quant_param->quantization_index[1];
5705     pcmd->dw5.quantization_index_segment2 = quant_param->quantization_index[2];
5706     pcmd->dw5.quantization_index_segment3 = quant_param->quantization_index[3];
5707
5708     pcmd->dw6.pak_pass_num = (vp8_context->internal_rate_mode > 0 ? vp8_context->num_brc_pak_passes : 0) << 8;
5709
5710     if (vp8_context->repak_pass_iter_val > 0) { // TODO: more check
5711         pcmd->dw7.skip_cost_delta_threshold = 100;
5712         pcmd->dw7.token_cost_delta_threshold = 50;
5713     } else {
5714         pcmd->dw7.skip_cost_delta_threshold = 0;
5715         pcmd->dw7.token_cost_delta_threshold = 0;
5716     }
5717
5718     pcmd->dw12.pak_token_statistics_bti = VP8_BTI_TPU_PAK_TOKEN_STATISTICS;
5719     pcmd->dw13.token_update_flags_bti = VP8_BTI_TPU_TOKEN_UPDATE_FLAGS;
5720     pcmd->dw14.entropy_cost_table_bti = VP8_BTI_TPU_ENTROPY_COST_TABLE;
5721     pcmd->dw15.frame_header_bitstream_bti = VP8_BTI_TPU_HEADER_BITSTREAM;
5722     pcmd->dw16.default_token_probability_bti = VP8_BTI_TPU_DEFAULT_TOKEN_PROBABILITY;
5723     pcmd->dw17.picture_state_bti = VP8_BTI_TPU_PICTURE_STATE;
5724     pcmd->dw18.mpu_curbe_data_bti = VP8_BTI_TPU_MPU_CURBE_DATA;
5725     pcmd->dw19.header_meta_data_bti = VP8_BTI_TPU_HEADER_METADATA;
5726     pcmd->dw20.token_probability_bti = VP8_BTI_TPU_TOKEN_PROBABILITY;
5727     pcmd->dw21.pak_hardware_token_probability_pass1_bti = VP8_BTI_TPU_PAK_HW_PASS1_PROBABILITY;
5728     pcmd->dw22.key_frame_token_probability_bti = VP8_BTI_TPU_KEY_TOKEN_PROBABILITY;
5729     pcmd->dw23.updated_token_probability_bti = VP8_BTI_TPU_UPDATED_TOKEN_PROBABILITY;
5730     pcmd->dw24.pak_hardware_token_probability_pass2_bti = VP8_BTI_TPU_PAK_HW_PASS2_PROBABILITY;
5731     pcmd->dw25.kernel_debug_dump_bti = VP8_BTI_TPU_VME_DEBUG_STREAMOUT;
5732     pcmd->dw26.repak_decision_surface_bti = VP8_BTI_TPU_REPAK_DECISION;
5733
5734     i965_gpe_context_unmap_curbe(gpe_context);
5735 }
5736
5737 static void
5738 i965_encoder_vp8_tpu_add_surfaces(VADriverContextP ctx,
5739                                   struct encode_state *encode_state,
5740                                   struct intel_encoder_context *encoder_context,
5741                                   struct i965_gpe_context *gpe_context)
5742 {
5743     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5744     unsigned int size;
5745     unsigned char brc_enabled = (vp8_context->internal_rate_mode == I965_BRC_CBR ||
5746                                  vp8_context->internal_rate_mode == I965_BRC_VBR);
5747
5748     // Pak token statistics
5749     size = VP8_TOKEN_STATISTICS_SIZE;
5750     i965_add_buffer_gpe_surface(ctx,
5751                                 gpe_context,
5752                                 &vp8_context->pak_mpu_tpu_pak_token_statistics_buffer,
5753                                 1,
5754                                 size,
5755                                 0,
5756                                 VP8_BTI_TPU_PAK_TOKEN_STATISTICS);
5757
5758     // Pak token Update flags
5759     size = VP8_COEFFS_PROPABILITIES_SIZE;
5760     i965_add_buffer_gpe_surface(ctx,
5761                                 gpe_context,
5762                                 &vp8_context->pak_mpu_tpu_pak_token_update_flags_buffer,
5763                                 0,
5764                                 size,
5765                                 0,
5766                                 VP8_BTI_TPU_TOKEN_UPDATE_FLAGS);
5767
5768     // Entropy cost
5769     size = VP8_ENTROPY_COST_TABLE_SIZE;
5770     i965_add_buffer_gpe_surface(ctx,
5771                                 gpe_context,
5772                                 &vp8_context->pak_mpu_tpu_entropy_cost_table_buffer,
5773                                 1,
5774                                 size,
5775                                 0,
5776                                 VP8_BTI_TPU_ENTROPY_COST_TABLE);
5777
5778     // Frame header
5779     size = VP8_FRAME_HEADER_SIZE;
5780     i965_add_buffer_gpe_surface(ctx,
5781                                 gpe_context,
5782                                 &vp8_context->pak_frame_header_buffer,
5783                                 0,
5784                                 size,
5785                                 0,
5786                                 VP8_BTI_TPU_HEADER_BITSTREAM);
5787
5788     // Default token token probability
5789     size = VP8_COEFFS_PROPABILITIES_SIZE;
5790     i965_add_buffer_gpe_surface(ctx,
5791                                 gpe_context,
5792                                 &vp8_context->pak_mpu_tpu_default_token_probability_buffer,
5793                                 0,
5794                                 size,
5795                                 0,
5796                                 VP8_BTI_TPU_DEFAULT_TOKEN_PROBABILITY);
5797
5798     // Picture state surface
5799     size = VP8_PICTURE_STATE_SIZE;
5800     i965_add_buffer_gpe_surface(ctx,
5801                                 gpe_context,
5802                                 &vp8_context->pak_mpu_tpu_picture_state_buffer,
5803                                 0,
5804                                 size,
5805                                 0,
5806                                 VP8_BTI_TPU_PICTURE_STATE);
5807
5808     // MPU Curbe info from TPU
5809     size = VP8_TOKEN_BITS_DATA_SIZE;
5810     i965_add_buffer_gpe_surface(ctx,
5811                                 gpe_context,
5812                                 &vp8_context->pak_mpu_tpu_token_bits_data_buffer,
5813                                 0,
5814                                 size,
5815                                 0,
5816                                 VP8_BTI_TPU_MPU_CURBE_DATA);
5817
5818     // Encoder CFG command surface
5819     size = VP8_HEADER_METADATA_SIZE;
5820
5821     if (brc_enabled) {
5822         i965_add_buffer_gpe_surface(ctx,
5823                                     gpe_context,
5824                                     &vp8_context->brc_vp8_cfg_command_write_buffer,
5825                                     0,
5826                                     size,
5827                                     0,
5828                                     VP8_BTI_TPU_HEADER_METADATA);
5829     } else {
5830         i965_add_buffer_gpe_surface(ctx,
5831                                     gpe_context,
5832                                     &vp8_context->pak_mpu_tpu_picture_state_buffer,
5833                                     0,
5834                                     size,
5835                                     VP8_HEADER_METADATA_OFFSET,
5836                                     VP8_BTI_TPU_HEADER_METADATA);
5837     }
5838
5839     // Current frame token probability
5840     size = VP8_COEFFS_PROPABILITIES_SIZE;
5841     i965_add_buffer_gpe_surface(ctx,
5842                                 gpe_context,
5843                                 &vp8_context->pak_mpu_tpu_coeff_probs_buffer,
5844                                 0,
5845                                 size,
5846                                 0,
5847                                 VP8_BTI_TPU_TOKEN_PROBABILITY);
5848
5849     // Hardware token probability pass 1
5850     i965_add_buffer_gpe_surface(ctx,
5851                                 gpe_context,
5852                                 &vp8_context->pak_mpu_tpu_ref_coeff_probs_buffer,
5853                                 0,
5854                                 size,
5855                                 0,
5856                                 VP8_BTI_TPU_PAK_HW_PASS1_PROBABILITY);
5857
5858     // key frame token probability
5859     i965_add_buffer_gpe_surface(ctx,
5860                                 gpe_context,
5861                                 &vp8_context->pak_mpu_tpu_updated_token_probability_buffer,
5862                                 0,
5863                                 size,
5864                                 0,
5865                                 VP8_BTI_TPU_UPDATED_TOKEN_PROBABILITY);
5866
5867     // update token probability
5868     i965_add_buffer_gpe_surface(ctx,
5869                                 gpe_context,
5870                                 &vp8_context->pak_mpu_tpu_key_frame_token_probability_buffer,
5871                                 0,
5872                                 size,
5873                                 0,
5874                                 VP8_BTI_TPU_KEY_TOKEN_PROBABILITY);
5875
5876     // Hardware token probability pass 2
5877     i965_add_buffer_gpe_surface(ctx,
5878                                 gpe_context,
5879                                 &vp8_context->pak_mpu_tpu_hw_token_probability_pak_pass_2_buffer,
5880                                 0,
5881                                 size,
5882                                 0,
5883                                 VP8_BTI_TPU_PAK_HW_PASS2_PROBABILITY);
5884
5885     // Repak Decision
5886     i965_add_buffer_gpe_surface(ctx,
5887                                 gpe_context,
5888                                 &vp8_context->pak_mpu_tpu_repak_decision_buffer,
5889                                 0,
5890                                 size,
5891                                 0,
5892                                 VP8_BTI_TPU_REPAK_DECISION);
5893 }
5894
5895 static void
5896 i965_encoder_vp8_pak_tpu(VADriverContextP ctx,
5897                          struct encode_state *encode_state,
5898                          struct intel_encoder_context *encoder_context)
5899 {
5900     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5901     struct i965_encoder_vp8_tpu_context *tpu_context = &vp8_context->tpu_context;
5902     struct i965_gpe_table *gpe = vp8_context->gpe_table;
5903     struct gpe_media_object_parameter media_object_param;
5904     struct i965_gpe_context *gpe_context;
5905     int media_function = VP8_MEDIA_STATE_TPU;
5906
5907     gpe_context = &tpu_context->gpe_contexts[0];
5908     /* gpe->context_init(ctx, gpe_context); */
5909     gpe->reset_binding_table(ctx, gpe_context);
5910
5911     if (!vp8_context->tpu_curbe_updated_in_brc_update)
5912         i965_encoder_vp8_pak_tpu_set_curbe(ctx, encode_state, encoder_context, gpe_context);
5913
5914     i965_encoder_vp8_tpu_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
5915     gpe->setup_interface_data(ctx, gpe_context);
5916
5917     memset(&media_object_param, 0, sizeof(media_object_param));
5918     i965_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
5919 }
5920
5921 #define PAK_REFERENCE_BO(dst_bo, src_bo, is_ref_bo)     \
5922     do {                                                \
5923         dri_bo_unreference(dst_bo);                     \
5924         dst_bo = src_bo;                                \
5925         if (is_ref_bo)                                  \
5926             dri_bo_reference(dst_bo);                   \
5927     } while (0)
5928
5929 static void
5930 i965_encoder_vp8_pak_pipeline_prepare(VADriverContextP ctx,
5931                                       struct encode_state *encode_state,
5932                                       struct intel_encoder_context *encoder_context)
5933 {
5934     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5935     struct object_surface *obj_surface;
5936     struct object_buffer *obj_buffer;
5937     struct i965_coded_buffer_segment *coded_buffer_segment;
5938     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
5939     dri_bo *bo;
5940     int i;
5941
5942     /* reconstructed surface */
5943     obj_surface = encode_state->reconstructed_object;
5944     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5945
5946     if (pic_param->loop_filter_level[0] == 0) {
5947         PAK_REFERENCE_BO(vp8_context->pre_deblocking_output.bo, obj_surface->bo, 1);
5948         PAK_REFERENCE_BO(vp8_context->post_deblocking_output.bo, NULL, 0);
5949     } else {
5950         PAK_REFERENCE_BO(vp8_context->pre_deblocking_output.bo, NULL, 0);
5951         PAK_REFERENCE_BO(vp8_context->post_deblocking_output.bo, obj_surface->bo, 1);
5952     }
5953
5954     /* set vp8 reference frames */
5955     for (i = 0; i < ARRAY_ELEMS(vp8_context->reference_surfaces); i++) {
5956         obj_surface = encode_state->reference_objects[i];
5957
5958         if (obj_surface && obj_surface->bo) {
5959             PAK_REFERENCE_BO(vp8_context->reference_surfaces[i].bo, obj_surface->bo, 1);
5960         } else {
5961             PAK_REFERENCE_BO(vp8_context->reference_surfaces[i].bo, NULL, 0);
5962         }
5963     }
5964
5965     /* input YUV surface */
5966     obj_surface = encode_state->input_yuv_object;
5967     PAK_REFERENCE_BO(vp8_context->uncompressed_picture_source.bo, obj_surface->bo, 1);
5968
5969     /* coded buffer */
5970     obj_buffer = encode_state->coded_buf_object;
5971     bo = obj_buffer->buffer_store->bo;
5972     vp8_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5973     vp8_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5974     PAK_REFERENCE_BO(vp8_context->indirect_pak_bse_object.bo, bo, 1);
5975
5976     /* set the internal flag to 0 to indicate the coded size is unknown */
5977     dri_bo_map(bo, 1);
5978
5979     if (!bo->virtual)
5980         return;
5981
5982     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5983     coded_buffer_segment->mapped = 0;
5984     coded_buffer_segment->codec = encoder_context->codec;
5985     coded_buffer_segment->status_support = 1;
5986     dri_bo_unmap(bo);
5987 }
5988
5989 static VAStatus
5990 i965_encoder_vp8_pak_pipeline_final(VADriverContextP ctx,
5991                                     struct encode_state *encode_state,
5992                                     struct intel_encoder_context *encoder_context)
5993 {
5994     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
5995     struct i965_encoder_vp8_tpu_context *tpu_context = &vp8_context->tpu_context;
5996
5997     dri_bo_unreference(tpu_context->dynamic_buffer);
5998     tpu_context->dynamic_buffer = NULL;
5999
6000     return VA_STATUS_SUCCESS;
6001 }
6002
6003 #undef PAK_REFERENCE_BO
6004
6005 static VAStatus
6006 i965_encoder_vp8_pak_pipeline(VADriverContextP ctx,
6007                               VAProfile profile,
6008                               struct encode_state *encode_state,
6009                               struct intel_encoder_context *encoder_context)
6010 {
6011     struct i965_encoder_vp8_context *vp8_context = encoder_context->mfc_context;
6012     struct intel_batchbuffer *batch = encoder_context->base.batch;
6013
6014     i965_encoder_vp8_pak_pipeline_prepare(ctx, encode_state, encoder_context);
6015
6016     vp8_context->is_render_context = 0;
6017     vp8_context->submit_batchbuffer = 1;
6018
6019     for (vp8_context->curr_pass = 0; vp8_context->curr_pass <= vp8_context->num_passes; vp8_context->curr_pass++) {
6020         vp8_context->tpu_required = ((vp8_context->curr_pass == (vp8_context->num_passes - 1) &&
6021                                       vp8_context->repak_pass_iter_val > 0) ||
6022                                      (vp8_context->curr_pass == vp8_context->num_passes &&
6023                                       vp8_context->repak_pass_iter_val == 0));
6024
6025         if (vp8_context->submit_batchbuffer)
6026             intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, vp8_context->vdbox_idc);
6027
6028         intel_batchbuffer_emit_mi_flush(batch);
6029         i965_encoder_vp8_pak_picture_level(ctx, encode_state, encoder_context);
6030         i965_encoder_vp8_pak_slice_level(ctx, encode_state, encoder_context);
6031
6032         if (vp8_context->submit_batchbuffer) {
6033             intel_batchbuffer_end_atomic(batch);
6034             intel_batchbuffer_flush(batch);
6035         }
6036
6037         if (vp8_context->tpu_required) {
6038             assert(vp8_context->submit_batchbuffer);
6039             i965_encoder_vp8_pak_tpu(ctx, encode_state, encoder_context);
6040         }
6041     }
6042
6043     if (!vp8_context->is_first_frame && vp8_context->is_first_two_frame)
6044         vp8_context->is_first_two_frame = 0;
6045
6046     vp8_context->is_first_frame = 0;
6047     vp8_context->tpu_curbe_updated_in_brc_update = 0;
6048
6049     i965_encoder_vp8_pak_pipeline_final(ctx, encode_state, encoder_context);
6050
6051     return VA_STATUS_SUCCESS;
6052 }
6053
6054 static void
6055 i965_encoder_vp8_pak_tpu_context_init(VADriverContextP ctx,
6056                                       struct i965_encoder_vp8_context *vp8_context,
6057                                       struct i965_encoder_vp8_tpu_context *tpu_context)
6058 {
6059     struct i965_gpe_table *gpe = vp8_context->gpe_table;
6060     struct i965_gpe_context *gpe_context = NULL;
6061     struct vp8_encoder_kernel_parameters kernel_params;
6062     struct vp8_encoder_scoreboard_parameters scoreboard_params;
6063     int i;
6064
6065     kernel_params.curbe_size = sizeof(struct vp8_tpu_curbe_data);
6066     kernel_params.inline_data_size = 0;
6067     kernel_params.external_data_size = 0;
6068
6069     memset(&scoreboard_params, 0, sizeof(scoreboard_params));
6070     scoreboard_params.mask = 0xFF;
6071     scoreboard_params.enable = vp8_context->use_hw_scoreboard;
6072     scoreboard_params.type = vp8_context->use_hw_non_stalling_scoreborad;
6073
6074     for (i = 0; i < NUM_VP8_TPU; i++) {
6075         gpe_context = &tpu_context->gpe_contexts[i];
6076         i965_encoder_vp8_gpe_context_init_once(ctx, gpe_context, &kernel_params, vp8_context->idrt_entry_size);
6077         i965_encoder_vp8_gpe_context_vfe_scoreboard_init(gpe_context, &scoreboard_params);
6078         gpe->load_kernels(ctx,
6079                           gpe_context,
6080                           &vp8_kernels_tpu[i],
6081                           1);
6082     }
6083 }
6084
6085 static void
6086 i965_encoder_vp8_pak_kernels_context_init(VADriverContextP ctx,
6087                                           struct intel_encoder_context *encoder_context,
6088                                           struct i965_encoder_vp8_context *vp8_context)
6089 {
6090     i965_encoder_vp8_pak_tpu_context_init(ctx, vp8_context, &vp8_context->tpu_context);
6091 }
6092
6093 static VAStatus
6094 i965_encoder_vp8_get_status(VADriverContextP ctx,
6095                             struct intel_encoder_context *encoder_context,
6096                             struct i965_coded_buffer_segment *coded_buffer_segment)
6097 {
6098     struct vp8_encode_status *encode_state = (struct vp8_encode_status *)coded_buffer_segment->codec_private_data;
6099
6100     coded_buffer_segment->base.size = encode_state->bitstream_byte_count_per_frame;
6101
6102     return VA_STATUS_SUCCESS;
6103 }
6104
6105 Bool
6106 i965_encoder_vp8_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6107 {
6108     /* VME & PAK share the same context */
6109     struct i965_encoder_vp8_context *vp8_context = encoder_context->vme_context;
6110
6111     assert(vp8_context);
6112     i965_encoder_vp8_pak_kernels_context_init(ctx, encoder_context, vp8_context);
6113
6114     encoder_context->mfc_context = vp8_context;
6115     encoder_context->mfc_context_destroy = i965_encoder_vp8_pak_context_destroy;
6116     encoder_context->mfc_pipeline = i965_encoder_vp8_pak_pipeline;
6117     encoder_context->mfc_brc_prepare = i965_encoder_vp8_pak_pre_pipeline;
6118     encoder_context->get_status = i965_encoder_vp8_get_status;
6119
6120     return True;
6121 }