2 * Copyright © <2010>, Intel Corporation.
\r
4 * This program is licensed under the terms and conditions of the
\r
5 * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
\r
6 * http://www.opensource.org/licenses/eclipse-1.0.php.
\r
9 /////////////////////////////////////////////////////////////////////////////////////
\r
10 // Kernel name: AVC_ILDB_Root_Mbaff.asm
\r
12 // Root kernel serves as a scheduler for child threads.
\r
16 // Initial design bundle MB pair for each thread, and share AVC_ILDB_MB_Dep_Check.asm
\r
17 // with non mbaff kernels.
\r
19 // Optimization will be done later, putting top and bottom MBs on separate threads.
\r
22 /////////////////////////////////////////////////////////////////////////////////////
\r
25 // $Date: 10/19/06 5:06p $
\r
28 // ----------------------------------------------------
\r
29 // AVC_ILDB_ROOT_MBAFF_Y
\r
30 // ----------------------------------------------------
\r
33 .kernel AVC_ILDB_ROOT_MBAFF_Y
\r
34 #if defined(COMBINED_KERNEL)
\r
35 ILDB_LABEL(AVC_ILDB_ROOT_Y):
\r
38 #include "setupVPKernel.asm"
\r
39 #include "AVC_ILDB.inc"
\r
42 #if defined(_DEBUG)
\r
44 /////////////////////////////////////////////////////////////////////////////////////
\r
45 // Init URB space for running on RTL. It satisfies reading an unwritten URB entries.
\r
46 // Will remove it for production release.
\r
49 //mov (8) m1:ud 0x11111111:ud
\r
50 //mov (8) m2:ud 0x22222222:ud
\r
51 //mov (8) m3:ud 0x33333333:ud
\r
52 //mov (8) m4:ud 0x44444444:ud
\r
54 //mov (1) Temp1_W:w 0:w
\r
57 //mul (1) URBOffset:w Temp1_W:w 4:w
\r
58 //shl (1) URBWriteMsgDescLow:uw URBOffset:w 4:w // Msg descriptor: URB write dest offset (9:4)
\r
59 //mov (1) URBWriteMsgDescHigh:uw 0x0650:uw // Msg descriptor: URB write 5 MRFs (m0 - m4)
\r
60 //#include "writeURB.asm"
\r
62 //add (1) Temp1_W:w Temp1_W:w 1:w // Increase block count
\r
63 //cmp.l.f0.0 (1) null Temp1_W:w MBsCntY:w // Check the block count limit
\r
64 //(f0.0) jmpi ILDB_INIT_URB // Loop back
\r
66 /////////////////////////////////////////////////////////////////////////////////////
\r
69 mov (1) EntrySignature:w 0xEFF0:w
\r
72 //----------------------------------------------------------------------------------------------------------------
\r
74 // Set global variable
\r
75 mov (32) ChildParam:uw 0:uw // Reset local variables
\r
76 //mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // Total # of MB pairs
\r
77 //add (1) GatewayApertureE:w MBsCntY:w GatewayApertureB:w // Aperture End = aperture Head + BlockCntY
\r
80 // 2 URB entries for Y:
\r
81 // Entry 0 - Child thread R0Hdr
\r
82 // Entry 1 - input parameter to child kernel (child r1)
\r
84 #undef URB_ENTRIES_PER_MB
\r
85 #define URB_ENTRIES_PER_MB 2
\r
87 // URB_ENTRIES_PER_MB in differnt form, the final desired format is (URB_ENTRIES_PER_MB-1) << 10
\r
88 mov (1) URB_EntriesPerMB_2:w URB_ENTRIES_PER_MB-1:w
\r
89 shl (1) URB_EntriesPerMB_2:w URB_EntriesPerMB_2:w 10:w
\r
91 mov (1) ChildThreadsID:uw 1:uw // ChildThreadsID for chroma root
\r
93 shr (1) ThreadLimit:w MaxThreads:w 1:w // Initial luma thread limit to 50%
\r
94 mul (1) TotalBlocks:w MBsCntX:w MBsCntY:w // MBs to be processed count down from TotalBlocks
\r
96 //***** Init CT_R0Hdr fields that are common to all threads *************************
\r
97 mov (8) CT_R0Hdr.0:ud r0.0<8;8,1>:ud // Init to root R0 header
\r
98 mov (1) CT_R0Hdr.7:ud r0.6:ud // Copy Parent Thread Cnt; JJ did the change on 06/20/2006
\r
99 mov (1) CT_R0Hdr.31:ub 0:w // Reset the highest byte
\r
100 mov (1) CT_R0Hdr.3:ud 0x00000000
\r
101 mov (1) CT_R0Hdr.6:uw sr0.0:uw // sr0.0: state reg contains general thread states, e.g. EUID/TID.
\r
103 //***** Init ChildParam fields that are common to all threads ***********************
\r
104 mov (8) ChildParam<1>:ud RootParam<8;8,1>:ud // Copy all root parameters
\r
105 mov (4) CurCol<1>:w 0:w // Reset CurCol, CurRow
\r
106 add (2) LastCol<1>:w MBsCntX<2;2,1>:w -1:w // Get LastCol and LastRow
\r
108 mov (1) URBWriteMsgDesc:ud MSG_LEN(2)+URBWMSGDSC:ud
\r
110 //===================================================================================
\r
112 #include "AVC_ILDB_OpenGateway.asm" // Open root thread gateway for receiving notification
\r
114 #if defined(DEV_CL)
\r
115 mov (1) URBOffset:uw 240:uw // Use chroma URB offset to spawn chroma root
\r
117 mov (1) URBOffset:uw 320:uw // Use chroma URB offset to spawn chroma root
\r
120 #include "AVC_ILDB_SpawnChromaRoot.asm" // Spawn chroma root
\r
122 mov (1) URBOffset:uw 0:uw // Use luma URB offset to spawn luma child
\r
123 mov (1) ChildThreadsID:uw 2:uw // Starting ChildThreadsID for luma child threads
\r
125 #include "AVC_ILDB_Dep_Check.asm" // Check dependency and spawn all MBs
\r
127 // Wait for UV root thread to finish
\r
128 ILDB_LABEL(WAIT_FOR_UV):
\r
129 cmp.l.f0.0 (1) null:w ThreadLimit:w MaxThreads:w
\r
130 (f0.0) jmpi ILDB_LABEL(WAIT_FOR_UV)
\r
132 #include "AVC_ILDB_CloseGateway.asm" // Close root thread gateway
\r
134 END_THREAD // End of root thread
\r
136 #if !defined(COMBINED_KERNEL) // For standalone kernel only
\r