OSDN Git Service

i965_drv_video: add support for H264 on Clarkdale/Arrandale
[android-x86/hardware-intel-common-libva.git] / i965_drv_video / shaders / h264 / mc / inter_Header.inc
1 /*\r
2  * Header file for all AVC INTER prediction kernels\r
3  * Copyright © <2010>, Intel Corporation.\r
4  *\r
5  * This program is licensed under the terms and conditions of the\r
6  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
7  * http://www.opensource.org/licenses/eclipse-1.0.php.\r
8  *\r
9  */\r
10 #if !defined(__INTER_HEADER__)  // Make sure this file is only included once\r
11 #define __INTER_HEADER__\r
12 \r
13 // Module name: inter_header.inc\r
14 //\r
15 // Header file for all AVC INTER prediction kernels\r
16 //\r
17 \r
18 #define INTER_KERNEL\r
19 \r
20 //-------------------------------------------------------------------------------------------\r
21 // TODO: The followings will be merged with the above definitions later\r
22 //-------------------------------------------------------------------------------------------\r
23 \r
24 \r
25 //------------ Input parameters & bit masks\r
26 \r
27 // SW WA for weighted prediction - 2007/09/06   \r
28 //.declare      guwR1                   Base=r1 ElementSize=2 Type=uw   \r
29 //.declare      guwW128                 Base=r63.13 ElementSize=2 Type=uw\r
30 \r
31 #ifdef DEV_ILK\r
32 // #define SW_W_128             // Enable SW WA for special Weight=128 case. Can be commented to disable it\r
33 #else   // Pre DEV_ILK\r
34 #define SW_W_128                // Enable SW WA for special Weight=128 case.\r
35 #endif  // DEV_ILK\r
36 \r
37 #ifdef  SW_W_128\r
38 .declare        gudW128                 Base=r1.0 ElementSize=4 Type=ud\r
39 #else\r
40 #endif  // SW_W_128\r
41 \r
42 #define         gORIX                   r3.4                            // :ub, X origin\r
43 #define         gORIY                   r3.5                            // :ub, Y origin\r
44 \r
45 #define         gCBP                    r3.9                            // :ub, CBP (0, 0, Y0, Y1, Y2, Y3, Cb, Cr)\r
46 #define         nCBPY_MASK              0x3c\r
47 #define         nCBPU_MASK              0x2\r
48 #define         nCBPV_MASK              0x1\r
49 \r
50 #define         gFIELDFLAGS             r3.1                            // :uw - To compute message descriptor for write\r
51 \r
52 #define         gMBTYPE                 r3.1                            // :ub, MB type\r
53 #define         nMBTYPE_MASK    0x1f\r
54 #define         gFIELDMBFLAG    r3.1                            // :ub, Field MB flag\r
55 #define         nFIELDMB_MASK   0x40\r
56 #define         gMBPARITY               r3.3                            // :ub, Bottom field flag\r
57 #define         nMBPARITY_MASK  0x01\r
58 \r
59 #define         gWPREDFLAG              r3.0                            // :ub, Weighted pred flag\r
60 #define         nWBIDIR_MASK    0xc0\r
61 \r
62 #define         gSUBMB_SHAPE    r3.12                           // :ub, Sub-MB shape\r
63 #define         gSUBMB_MODE             r3.13                           // :ub, Sub-MB prediction mode\r
64 .declare        guwSUBMB_SHAPE_MODE     Base=r3.6 ElementSize=2 Type=uw\r
65 \r
66 #define         gYWDENOM                r3.14                           // :ub, Luma log2 weight denom\r
67 #define         gCWDENOM                r3.15                           // :ub, Chroma log2 weight denom\r
68 \r
69 #define         gADDR                   r3.24                           // :ub, Register addresses of error data / MV\r
70 \r
71 .declare        gubBIDX                 Base=r3.16 ElementSize=1 Type=ub\r
72 \r
73 #define         gWGT                    r8                                      // Weights/offsets\r
74 .declare    gdWGT                       Base=r8  ElementSize=4 Type=d\r
75 .declare    gwWGT                       Base=r8  ElementSize=2 Type=w\r
76 #define         gMV                             r4                                      // MVs\r
77 .declare    gwMV                        Base=r4  ElementSize=2 Type=w\r
78 .declare    gdMV                        Base=r4  ElementSize=4 Type=d\r
79 \r
80 .declare        gwERRORY                Base=r10 ElementSize=2 Type=w           // 16 GRFs\r
81 .declare        gubERRORY               Base=r10 ElementSize=1 Type=ub\r
82 .declare        gwERRORC                Base=r26 ElementSize=2 Type=w           // 8 GRFs\r
83 .declare        gubERRORC               Base=r26 ElementSize=2 Type=ub\r
84 \r
85 //------------ Address registers\r
86 #define         pMSGDSC                 a0.0                            // ud: Must be the leading dword of the address register\r
87 #define         pREF                    a0.0\r
88 \r
89 #define         pBIDX                   a0.2                            \r
90 #define         pWGT                    a0.3\r
91 #define         pERRORYC                a0.2                            // :ud  \r
92 #define         pERRORY                 a0.4\r
93 #define         pERRORC                 a0.5\r
94 #define         pMV                             a0.6    \r
95 \r
96 #define         pWGT_BIDX               a0.1                            // :ud, WGT & BIDX\r
97 #define         pRECON_MV               a0.3                            // :ud, RECON & MV\r
98 \r
99 #define         pREF0                   a0.0                            // :uw\r
100 #define         pREF0D                  a0.0                            // :ud\r
101 #define         pREF1                   a0.1\r
102 #define         pREF2                   a0.2\r
103 #define         pREF2D                  a0.1                            // :ud\r
104 #define         pREF3                   a0.3\r
105 #define         pREF4                   a0.4\r
106 #define         pREF4D                  a0.2                            // :ud\r
107 #define         pREF5                   a0.5\r
108 #define         pREF6                   a0.6\r
109 #define         pREF6D                  a0.3                            // :ud\r
110 #define         pREF7                   a0.7\r
111 \r
112 #define         pRES                    a0.6\r
113 #define         pRESD                   a0.3                            // :ud\r
114 #define         pRESULT                 a0.7\r
115 \r
116 #define         p0                              a0.0\r
117 #define         p1                              a0.1\r
118 \r
119 //------------ Constants for static/inline/indirect\r
120 #define         nOFFSET_BIDX    112                                     // = 32*3+4*4\r
121 \r
122 #define         nOFFSET_WGT             256                                     // = 32*8\r
123 #define         nOFFSET_WGT_BIDX 0x01000070                     // = (256<<16)+112\r
124 #define         nOFFSET_ERROR   0x03400140                      // = (320+128*4)<<16+320=0x03400140\r
125 #define         nOFFSET_ERRORY  0x0140\r
126 #define         nOFFSET_ERRORC  0x0340\r
127 #define         nOFFSET_MV              128                                     // = 32*4\r
128 #define         nOFFSET_RECON_MV 0x04400080                     // = (1088<<16)+128             // TODO: OFFSET_RECON is obsolete\r
129 \r
130 //------------ Constants for kernel internal variables\r
131 #define         nOFFSET_INTPY0  0x0640                          // = 32*50\r
132 #define         nOFFSET_INTPY1  0x0780                          // = 32*60\r
133 #define         nOFFSET_INTPC0  0x06c0                          // = 32*54\r
134 #define         nOFFSET_INTPC1  0x0480                          // = 32*36\r
135 #define         nOFFSET_INTP0   0x06c00640\r
136 #define         nOFFSET_INTP1   0x04800780\r
137 \r
138 #define         nOFFSET_INTERIM         0x0480                          // = 32*36\r
139 #define         nOFFSET_INTERIM2        0x04A00480                      // = ((32*37)<<16)|(32*36)\r
140 #define         nOFFSET_INTERIM3        0x04A00480                      // = ((32*36+32)<<16)|(32*36)\r
141 #define         nOFFSET_INTERIM4        0x04A00490                      // = ((32*37)<<16)|(32*36+16)\r
142 \r
143 #define         nOFFSET_INTERIM4x4              0x04C0                  // = 32*38\r
144 #define         nOFFSET_INTERIM4x4_4    0x04E004D0              // = ((32*38+32)<<16)|(32*38+16)\r
145 #define         nOFFSET_INTERIM4x4_5    0x04D004C0              // = ((32*38+16)<<16)|(32*38)\r
146 #define         nOFFSET_INTERIM4x4_6    0x04E004C0              // = ((32*38+32)<<16)|(32*38)\r
147 #define         nOFFSET_INTERIM4x4_7    0x04D004C8              // = ((32*38+16)<<16)|(32*38+8)\r
148 #define         nOFFSET_INTERIM4x4_8    0x04E004D8              // = ((32*38+32)<<16)|(32*38+24)\r
149 #define         nOFFSET_INTERIM4x4_9    0x04F004E8              // = ((32*38+48)<<16)|(32*38+40)\r
150 \r
151 #define         nOFFSET_RES             0x540                           // = 32*42\r
152 #define         nOFFSET_REF             0x560                           // = 32*43\r
153 #define         nOFFSET_REFC    0x700                           // = 32*56\r
154 \r
155                         // Binding table index\r
156 #define         nBDIX_DESTY             0\r
157 #define         nBDIX_DESTC             1\r
158 #define         nBI_LC_DIFF             0x10                            // Binding table index diff between luma and chroma\r
159 \r
160 #define         nGRFWIB                 32\r
161 #define         nGRFHWIB                16\r
162 \r
163 //------------ Regions\r
164 \r
165 .declare    gudREF                      Base=r43 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
166 .declare    gubREF                      Base=r43 ElementSize=1 Type=ub\r
167 .declare    gudREFC                     Base=r56 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
168 \r
169 // 16x16 handling\r
170 .declare    gudREF21x21         Base=r58 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
171 .declare    gudREF18x10         Base=r66 ElementSize=4 SrcRegion=<16;16,1> Type=ud\r
172 .declare    gubREF18x10         Base=r66 ElementSize=1 SrcRegion=<16;16,1> Type=ub\r
173 \r
174 \r
175 \r
176 .declare    gudREF16x16         Base=r38 ElementSize=4 Type=ud                  // 8 GRFs\r
177 .declare    gubREF16x16         Base=r38 ElementSize=1 Type=ub\r
178 .declare    gudREFC16x8         Base=r46 ElementSize=4 Type=ud                  // 4 GRFs\r
179 .declare    gubREFC16x8         Base=r46 ElementSize=1 Type=ub\r
180 \r
181 // TODO\r
182 .declare    gubAVG                      Base=r56 ElementSize=1 Type=ub\r
183 .declare        gubREFY_BWD             Base=r64 ElementSize=1 Type=ub\r
184 .declare        gubREFC_BWD             Base=r72 ElementSize=1 Type=ub\r
185 \r
186 \r
187 .declare    guwINTPY0           Base=r50 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
188 .declare        gudINTPY0               Base=r50 ElementSize=4 Type=ud\r
189 .declare    gubINTPY0           Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
190 .declare    guwINTPY1           Base=r60 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
191 .declare        gudINTPY1               Base=r60 ElementSize=4 Type=ud\r
192 .declare    gubINTPY1           Base=r60 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
193 .declare    guwYPRED            Base=r50 ElementSize=2 SrcRegion=<8;8,1> Type=uw\r
194 .declare    gubYPRED            Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
195 \r
196 .declare    guwINTPC0           Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
197 .declare    gwINTPC0            Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=w\r
198 .declare        gudINTPC0               Base=r54 ElementSize=4 Type=ud\r
199 .declare    gubINTPC0           Base=r54 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
200 .declare    guwINTPC1           Base=r36 ElementSize=2 SrcRegion=<16;16,1> Type=uw\r
201 .declare        gudINTPC1               Base=r36 ElementSize=4 Type=ud\r
202 .declare    gubINTPC1           Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
203 .declare    guwCPRED            Base=r54 ElementSize=2 SrcRegion=<16;8,2> Type=uw\r
204 .declare    gubCPRED            Base=r54 ElementSize=1 SrcRegion=<32;8,4> Type=ub\r
205 \r
206 #define         gINTERIM                r36\r
207 .declare        gubINTERIM_BUF  Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
208 #define         gINTERIM4x4             r38\r
209 .declare        gubINTERIM4x4_BUF Base=r38 ElementSize=1 SrcRegion=<32;16,2> Type=ub\r
210 .declare        gwINTERIM4x4_BUF Base=r38 ElementSize=2  Type=w\r
211 \r
212 .declare        gubINTERIM_BUF2 Base=r42 ElementSize=1 SrcRegion=<8;4,2> Type=ub\r
213 .declare        gwINTERIM_BUF2  Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w       \r
214 .declare        guwINTERIM_BUF2 Base=r42 ElementSize=2 Type=uw  \r
215 \r
216 .declare        gwINTERIM_BUF3  Base=r38 ElementSize=2 SrcRegion=<16;16,1> Type=w               // 2 GRFs\r
217 .declare        gubINTERIM_BUF3 Base=r38 ElementSize=1 Type=ub                                                  \r
218 \r
219 .declare        gwTEMP                  Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w\r
220 \r
221 //------------ General registers\r
222 \r
223 #define         gX                              r3.2                            // w\r
224 #define         gY                              r3.3                            // w\r
225 \r
226 #define         gMSGDSC_R               r3.6                            // ud\r
227 #define         gMSGDSC_W               r3.7                            // ud\r
228 \r
229 #ifdef  SW_W_128\r
230 .declare        gwMBTYPE                Base=r8.6 ElementSize=2 Type=w                  // Shared with gLOOP_SUBMB\r
231 \r
232 // TODO\r
233 #define         gLOOP_SUBMB             r8.6\r
234 #define         gLOOP_SUBMBPT   r8.7\r
235 #define         gLOOP_DIR               r9.6\r
236 #define         gLOOPCNT                r9.7                            // Loop counter for submodules\r
237 #else\r
238 .declare        gwMBTYPE                Base=r1.0 ElementSize=2 Type=w                  // Shared with gLOOP_SUBMB\r
239 \r
240 // TODO\r
241 #define         gLOOP_SUBMB             r1.0\r
242 #define         gLOOP_SUBMBPT   r1.1\r
243 #define         gLOOP_DIR               r8.7\r
244 #define         gLOOPCNT                r9.7                            // Loop counter for submodules\r
245 #endif  // SW_W_128\r
246 \r
247 #define         gW0                             r34.6                           // Temporary WORD \r
248 #define         gW1                             r34.7                           // Temporary WORD \r
249 #define         gW2                             r34.8                           // Temporary WORD \r
250 #define         gW3                             r34.9                           // Temporary WORD \r
251 #define         gD0                             r34.3                           // Temporary DWORD\r
252 \r
253 #define         gW4                             r34.15\r
254 \r
255 //\r
256 \r
257 #define         gMVX_INT                r34.0                           // :w\r
258 #define         gMVY_INT                r34.1                           // :w\r
259 #define         gMVX_FRAC               r34.2                           // :w\r
260 #define         gMVY_FRAC               r34.3                           // :w\r
261 #define         gMVX_FRACC              r34.4                           // :w\r
262 #define         gMVY_FRACC              r34.5                           // :w\r
263 \r
264 #define         gpINTPY                 r34.10\r
265 #define         gpINTPC                 r34.11\r
266 #define         gpINTP                  r34.5                           // DW\r
267 \r
268 #define         gPREDFLAG               r34.12\r
269 #define         gBIDX                   r34.13\r
270 #define         gREFPARITY              r34.14\r
271 #define         gCHRMVADJ               r1.14\r
272 #define         gPARITY                 r1.15\r
273 #define         gCBP_MASK               r1.1\r
274 \r
275 #define         gMVSTEP                 r1.13\r
276 \r
277 #define         gpADDR                  r1.2                            // :uw (8 words)\r
278 \r
279 #define         gSHAPETEMP              r8.15                           // :uw\r
280 \r
281 #define         gCOEFA                  r42.0                           \r
282 #define         gCOEFB                  r42.1                           \r
283 #define         gCOEFC                  r42.2                           \r
284 #define         gCOEFD                  r42.3\r
285 \r
286 // Weighted prediction\r
287 #define         gPREDFLAG0              r46.0\r
288 #define         gPREDFLAG1              r46.2\r
289 \r
290 #define         gWEIGHTFLAG             r43.2\r
291 #define         gBIPRED                 r43.3\r
292 #define         gYADD                   r43.4\r
293 #define         gCADD                   r43.5\r
294 #define         gYSHIFT                 r43.6\r
295 #define         gCSHIFT                 r43.7\r
296 \r
297 #define         gOFFSET                 r44.0\r
298 #define         gUOFFSET                r44.1\r
299 #define         gVOFFSET                r44.2\r
300 \r
301 #define         gWT0                    r45.0\r
302 #define         gO0                             r45.1\r
303 #define         gWT1                    r45.2\r
304 #define         gO1                             r45.3\r
305 #define         gUW0                    r45.4\r
306 #define         gUO0                    r45.5\r
307 #define         gUW1                    r45.6\r
308 #define         gUO1                    r45.7   \r
309 #define         gVW0                    r45.8   \r
310 #define         gVO0                    r45.9   \r
311 #define         gVW1                    r45.10  \r
312 #define         gVO1                    r45.11\r
313 \r
314 #define         gWT0_D                  r45.0   \r
315 #define         gUW0_D                  r45.2   \r
316 \r
317 //------------ Message-related Registers & constants\r
318 #define         gMSGSRC                 r2                                      // Message Source\r
319 \r
320 #define         mMSGHDR                 m1              \r
321 #define         mMSGHDRY                m1              \r
322 #define         mMSGHDRC                m2              \r
323 #define         mMSGHDR1                m1              \r
324 #define         mMSGHDR2                m2              \r
325 #define         mMSGHDR3                m3              \r
326 #define         mMSGHDR4                m4              \r
327 #define         mMSGHDRYW               m1              \r
328 #define         mMSGHDRCW               m10             \r
329 \r
330 #ifdef DEV_ILK\r
331         // 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)\r
332 #define         nDWBRMSGDSC_SC          0x0208A002      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
333 #define         nDWBRMSGDSC_SC_TF       0x0208E602      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
334 #define         nDWBRMSGDSC_SC_BF       0x0208E702      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
335         // 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)                                                                               \r
336 #define         nDWBWMSGDSC             0x02082000  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
337 #define         nDWBWMSGDSC_TF  0x02082600  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
338 #define         nDWBWMSGDSC_BF  0x02082700  // DWORD Block Write Message Descriptor through Data Port, Render Cache                                                                             \r
339 \r
340 #else   // Pre DEV_ILK\r
341         // 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)\r
342 #define         nDWBRMSGDSC_SC          0x0410A002      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
343 #define         nDWBRMSGDSC_SC_TF       0x0410A602      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
344 #define         nDWBRMSGDSC_SC_BF       0x0410A702      // DWORD Block Read Message Descriptor through Data Port, Sampler Cache\r
345         // 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)                                                                               \r
346 #define         nDWBWMSGDSC             0x05102000  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
347 #define         nDWBWMSGDSC_TF  0x05102600  // DWORD Block Write Message Descriptor through Data Port, Render Cache\r
348 #define         nDWBWMSGDSC_BF  0x05102700  // DWORD Block Write Message Descriptor through Data Port, Render Cache                                                                             \r
349 #endif  // DEV_ILK\r
350 \r
351 #define         nDWB_FIELD_MASK 0x0600\r
352                                                                                 \r
353 // message data payload\r
354 .declare    mbMSGPAYLOADY       Base=m2  ElementSize=1 SrcRegion=REGION(16,1) Type=b\r
355 .declare    mbMSGPAYLOADC       Base=m11 ElementSize=1 SrcRegion=REGION(16,1) Type=b\r
356 \r
357 // Destination registers for write commit\r
358 #define         gREG_WRITE_COMMIT_Y             r10.0\r
359 #define         gREG_WRITE_COMMIT_UV    r11.0\r
360 \r
361 #define RETURN_REG_INTER        r1.5            // Return pointer for all sub-routine calls (type DWORD)\r
362 \r
363 #define CALL_INTER(subFunc, skipInst)   add (1) RETURN_REG_INTER<1>:ud   ip:ud  1+skipInst*INST_SIZE \n\\r
364                                 jmpi (1) subFunc\r
365 #define RETURN_INTER            mov (1) ip:ud   RETURN_REG_INTER<0;1,0>:ud              // Return to calling module\r
366 \r
367 \r
368 // End of inter_header.inc\r
369 \r
370 #endif  // !defined(__INTER_HEADER__)\r
371 \r