OSDN Git Service

i965_drv_video: add support for H264 on Clarkdale/Arrandale
[android-x86/hardware-intel-common-libva.git] / i965_drv_video / shaders / h264 / ildb / AVC_ILDB_Root_Mbaff_Y.asm
1 /*\r
2  * Copyright © <2010>, Intel Corporation.\r
3  *\r
4  * This program is licensed under the terms and conditions of the\r
5  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
6  * http://www.opensource.org/licenses/eclipse-1.0.php.\r
7  *\r
8  */\r
9 /////////////////////////////////////////////////////////////////////////////////////\r
10 // Kernel name: AVC_ILDB_Root_Mbaff.asm\r
11 //\r
12 //  Root kernel serves as a scheduler for child threads.\r
13 //\r
14 //\r
15 //      ***** Note *****\r
16 //      Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm\r
17 //      with non mbaff kernels.\r
18 //\r
19 //      Optimization will be done later, putting top and bottom MBs on separate threads.\r
20 //\r
21 //\r
22 /////////////////////////////////////////////////////////////////////////////////////\r
23 //\r
24 //  $Revision: 1 $\r
25 //  $Date: 10/19/06 5:06p $\r
26 //\r
27 \r
28 // ----------------------------------------------------\r
29 //  AVC_ILDB_ROOT_MBAFF_Y\r
30 // ----------------------------------------------------\r
31 #define AVC_ILDB\r
32 \r
33 .kernel AVC_ILDB_ROOT_MBAFF_Y\r
34 #if defined(COMBINED_KERNEL)\r
35 ILDB_LABEL(AVC_ILDB_ROOT_Y):\r
36 #endif\r
37 \r
38 #include "setupVPKernel.asm"\r
39 #include "AVC_ILDB.inc"\r
40 \r
41 \r
42 #if defined(_DEBUG) \r
43 \r
44 /////////////////////////////////////////////////////////////////////////////////////\r
45 // Init URB space for running on RTL.  It satisfies reading an unwritten URB entries.  \r
46 // Will remove it for production release.\r
47 \r
48 \r
49 //mov (8) m1:ud                 0x11111111:ud\r
50 //mov (8) m2:ud                 0x22222222:ud \r
51 //mov (8) m3:ud                 0x33333333:ud\r
52 //mov (8) m4:ud                 0x44444444:ud \r
53 \r
54 //mov (1)       Temp1_W:w       0:w\r
55 \r
56 //ILDB_INIT_URB:\r
57 //mul (1)       URBOffset:w                             Temp1_W:w               4:w\r
58 //shl (1) URBWriteMsgDescLow:uw         URBOffset:w             4:w             // Msg descriptor: URB write dest offset (9:4)\r
59 //mov (1) URBWriteMsgDescHigh:uw        0x0650:uw                               // Msg descriptor: URB write 5 MRFs (m0 - m4)\r
60 //#include "writeURB.asm"\r
61 \r
62 //add           (1)             Temp1_W:w       Temp1_W:w       1:w                             // Increase block count\r
63 //cmp.l.f0.0 (1)        null            Temp1_W:w       MBsCntY:w               // Check the block count limit\r
64 //(f0.0) jmpi           ILDB_INIT_URB                                                   // Loop back\r
65 \r
66 /////////////////////////////////////////////////////////////////////////////////////\r
67 \r
68 \r
69 mov             (1)             EntrySignature:w                        0xEFF0:w\r
70 \r
71 #endif\r
72 //----------------------------------------------------------------------------------------------------------------\r
73 \r
74 // Set global variable\r
75 mov (32)        ChildParam:uw                   0:uw                                                            // Reset local variables\r
76 //mul   (1)             TotalBlocks:w                   MBsCntX:w               MBsCntY:w                       // Total # of MB pairs\r
77 //add   (1)             GatewayApertureE:w              MBsCntY:w               GatewayApertureB:w      // Aperture End = aperture Head + BlockCntY\r
78 \r
79 \r
80 // 2 URB entries for Y:\r
81 // Entry 0 - Child thread R0Hdr\r
82 // Entry 1 - input parameter to child kernel (child r1)\r
83 \r
84 #undef          URB_ENTRIES_PER_MB\r
85 #define         URB_ENTRIES_PER_MB              2\r
86 \r
87 // URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10\r
88 mov (1)         URB_EntriesPerMB_2:w            URB_ENTRIES_PER_MB-1:w\r
89 shl (1)         URB_EntriesPerMB_2:w            URB_EntriesPerMB_2:w    10:w\r
90 \r
91 mov     (1)             ChildThreadsID:uw               1:uw                                    // ChildThreadsID for chroma root\r
92 \r
93 shr (1)         ThreadLimit:w           MaxThreads:w            1:w             // Initial luma thread limit to 50%\r
94 mul     (1)             TotalBlocks:w           MBsCntX:w               MBsCntY:w       // MBs to be processed count down from TotalBlocks\r
95 \r
96 //***** Init CT_R0Hdr fields that are common to all threads *************************\r
97 mov (8)         CT_R0Hdr.0:ud                   r0.0<8;8,1>:ud                          // Init to root R0 header\r
98 mov (1)         CT_R0Hdr.7:ud                   r0.6:ud                                         // Copy Parent Thread Cnt; JJ did the change on 06/20/2006\r
99 mov (1)         CT_R0Hdr.31:ub                  0:w                                                     // Reset the highest byte\r
100 mov (1)         CT_R0Hdr.3:ud                   0x00000000       \r
101 mov (1)         CT_R0Hdr.6:uw                   sr0.0:uw                                        // sr0.0: state reg contains general thread states, e.g. EUID/TID.\r
102 \r
103 //***** Init ChildParam fields that are common to all threads ***********************\r
104 mov (8)         ChildParam<1>:ud        RootParam<8;8,1>:ud             // Copy all root parameters\r
105 mov (4)         CurCol<1>:w                     0:w                                             // Reset CurCol, CurRow\r
106 add     (2)             LastCol<1>:w            MBsCntX<2;2,1>:w                -1:w    // Get LastCol and LastRow\r
107 \r
108 mov (1)         URBWriteMsgDesc:ud              MSG_LEN(2)+URBWMSGDSC:ud\r
109 \r
110 //===================================================================================\r
111 \r
112 #include "AVC_ILDB_OpenGateway.asm"             // Open root thread gateway for receiving notification \r
113 \r
114 #if defined(DEV_CL)     \r
115         mov     (1)             URBOffset:uw            240:uw  // Use chroma URB offset to spawn chroma root\r
116 #else\r
117         mov     (1)             URBOffset:uw            320:uw  // Use chroma URB offset to spawn chroma root\r
118 #endif\r
119 \r
120 #include "AVC_ILDB_SpawnChromaRoot.asm" // Spawn chroma root\r
121 \r
122 mov     (1)             URBOffset:uw            0:uw    // Use luma URB offset to spawn luma child \r
123 mov     (1)             ChildThreadsID:uw       2:uw    // Starting ChildThreadsID for luma child threads\r
124 \r
125 #include "AVC_ILDB_Dep_Check.asm"       // Check dependency and spawn all MBs\r
126 \r
127 // Wait for UV root thread to finish\r
128 ILDB_LABEL(WAIT_FOR_UV):\r
129 cmp.l.f0.0 (1) null:w   ThreadLimit:w           MaxThreads:w\r
130 (f0.0)  jmpi    ILDB_LABEL(WAIT_FOR_UV)\r
131 \r
132 #include "AVC_ILDB_CloseGateway.asm"    // Close root thread gateway \r
133 \r
134 END_THREAD                                                              // End of root thread\r
135 \r
136 #if !defined(COMBINED_KERNEL)           // For standalone kernel only\r
137 .end_code\r
138 \r
139 .end_kernel\r
140 #endif\r