OSDN Git Service

i965_drv_video: add support for H264 on Clarkdale/Arrandale
[android-x86/hardware-intel-common-libva.git] / i965_drv_video / shaders / h264 / mc / weightedPred.asm
1 /*\r
2  * Weighted prediction of luminance and chrominance\r
3  * Copyright © <2010>, Intel Corporation.\r
4  *\r
5  * This program is licensed under the terms and conditions of the\r
6  * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at\r
7  * http://www.opensource.org/licenses/eclipse-1.0.php.\r
8  *\r
9  */\r
10 // Kernel name: WeightedPred.asm\r
11 //\r
12 // Weighted prediction of luminance and chrominance\r
13 //\r
14 \r
15 \r
16 //#if !defined(__WeightedPred__)                // Make sure this is only included once\r
17 //#define __WeightedPred__\r
18 \r
19 \r
20         and.z.f0.0 (1) gWEIGHTFLAG:w    gWPREDFLAG:ub                                   nWBIDIR_MASK:w\r
21         cmp.e.f0.1 (1) null:w                   gPREDFLAG:w                                             2:w\r
22         (-f0.0) jmpi INTERLABEL(WeightedPred)\r
23         (f0.1) jmpi INTERLABEL(DefaultWeightedPred_BiPred)\r
24         \r
25 INTERLABEL(DefaultWeightedPred_UniPred):\r
26 \r
27         cmp.e.f0.0 (1) null:w                   gPREDFLAG:w                                             0:w\r
28         (f0.0) jmpi INTERLABEL(Return_WeightedPred)\r
29 \r
30         // luma\r
31         mov (32)        gubYPRED(0)<2>          gubINTPY1(0)    {Compr}\r
32         mov (32)        gubYPRED(2)<2>          gubINTPY1(2)    {Compr}\r
33 \r
34 #ifndef MONO\r
35         // chroma       \r
36         mov (32)        gubCPRED(0)<2>          gubINTPC1(0)    {Compr}\r
37 #endif\r
38 \r
39         jmpi INTERLABEL(Return_WeightedPred)\r
40         \r
41 INTERLABEL(DefaultWeightedPred_BiPred):\r
42 \r
43         // luma\r
44         avg.sat (32) gubYPRED(0)<2>             gubINTPY0(0)                                    gubINTPY1(0)    {Compr}\r
45         avg.sat (32) gubYPRED(2)<2>             gubINTPY0(2)                                    gubINTPY1(2)    {Compr}\r
46         \r
47 #ifndef MONO\r
48         // chroma\r
49         avg.sat (32) gubCPRED(0)<2>             gubINTPC0(0)                                    gubINTPC1(0)    {Compr}\r
50 #endif\r
51         \r
52         jmpi INTERLABEL(Return_WeightedPred)\r
53         \r
54 INTERLABEL(WeightedPred):\r
55         cmp.e.f0.1 (1) null:w                   gWEIGHTFLAG:w                                   0x80:w\r
56         (-f0.1) jmpi INTERLABEL(WeightedPred_Explicit)\r
57         \r
58         cmp.e.f0.0 (1) null:w                   gPREDFLAG:w                                             2:w\r
59         (-f0.0) jmpi INTERLABEL(DefaultWeightedPred_UniPred)\r
60 \r
61         mov (2)         gYADD<1>:w                      32:w                                                            {NoDDClr}       \r
62         mov (2)         gYSHIFT<1>:w            6:w                                                                     {NoDDChk}\r
63         mov (4)         gOFFSET<1>:w            0:w\r
64         mov (8)         gWT0<2>:w                       r[pWGT,0]<0;2,1>:w\r
65         \r
66         jmpi INTERLABEL(WeightedPred_LOOP)\r
67         \r
68         // Explicit Prediction\r
69 INTERLABEL(WeightedPred_Explicit):\r
70         \r
71         // WA for weighted prediction - 2007/09/06      \r
72 #ifdef  SW_W_128                // CTG SW WA\r
73         cmp.e.f0.1 (8) null:ud                  r[pWGT,0]<8;8,1>:uw                             gudW128(0)<0;1,0>\r
74 #else                                   // ILK HW solution\r
75         and.ne.f0.1 (8) null:uw                 r[pWGT,12]<0;1,0>:ub                    0x88848421:v    // Expand W=128 flag to all components. 2 MSB are don't care\r
76 #endif  \r
77         asr.nz.f0.0 (2) gBIPRED<1>:w    gPREDFLAG<0;1,0>:w                              1:w\r
78         asr (1)         gWEIGHTFLAG:w           gWEIGHTFLAG:w                                   6:w     \r
79         (-f0.0) mov (2) gPREDFLAG1<1>:w gPREDFLAG<0;1,0>:w                                                              \r
80         (f0.0) mov (2)  gPREDFLAG0<1>:ud 0x00010001:ud\r
81         (-f0.0) add (2) gPREDFLAG0<1>:w -gPREDFLAG1<2;2,1>:w                    1:w\r
82         \r
83         // WA for weighted prediction - 2007/09/06      \r
84         (f0.1) mov (8)  gWT0<1>:ud              0x00000080:ud\r
85         (-f0.1) mov (8) gWT0<2>:w               r[pWGT,0]<16;8,2>:b\r
86         (-f0.1) mov (8) gO0<2>:w                r[pWGT,1]<16;8,2>:b\r
87         mul (16)                gWT0<1>:w               gWT0<16;16,1>:w                                 gPREDFLAG0<0;4,1>:w\r
88 \r
89         // Compute addition\r
90         cmp.e.f0.1 (2) null<1>:w                gYWDENOM<2;2,1>:ub                              0:w\r
91         (-f0.1) shl (2) gW0<1>:w                gWEIGHTFLAG<0;1,0>:w                    gYWDENOM<2;2,1>:ub\r
92         (f0.1) mov (2) gW0<1>:w                 0:w\r
93         (-f0.1) asr (2) gW0<1>:w                gW0<2;2,1>:w                                    1:w\r
94         shl (2)         gYADD<1>:w                      gW0<2;2,1>:w                                    gBIPRED<0;1,0>:w\r
95         (f0.1) add (2)  gYADD<1>:w              gYADD<2;2,1>:w                                  gBIPRED<0;1,0>:w\r
96         \r
97         // Compute shift\r
98         add (2)         gYSHIFT<1>:w            gYWDENOM<2;2,1>:ub                              gBIPRED<0;1,0>:w\r
99         \r
100         // Compute offset\r
101         add (4)         acc0<1>:w                       gO0<16;4,4>:w                                   gO1<16;4,4>:w\r
102         add (4)         acc0<1>:w                       acc0<4;4,1>:w                                   gBIPRED<0;1,0>:w\r
103         asr (4)         gOFFSET<1>:w            acc0<4;4,1>:w                                   gBIPRED<0;1,0>:w\r
104 \r
105 INTERLABEL(WeightedPred_LOOP):  \r
106         // luma\r
107         $for(0;<4;2) {  \r
108         mul (16)        acc0<1>:w                       gubINTPY0(%1)                                   gWT0<0;1,0>:w\r
109         mul (16)        acc1<1>:w                       gubINTPY0(%1+1)                                 gWT0<0;1,0>:w\r
110         mac (16)        acc0<1>:w                       gubINTPY1(%1)                                   gWT1<0;1,0>:w\r
111         mac (16)        acc1<1>:w                       gubINTPY1(%1+1)                                 gWT1<0;1,0>:w\r
112         add (16)        acc0<1>:w                       acc0<16;16,1>:w                                 gYADD:w\r
113         add (16)        acc1<1>:w                       acc1<16;16,1>:w                                 gYADD:w\r
114         // Accumulator cannot be used as destination for ASR\r
115         asr (16)        gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w                            gYSHIFT:w\r
116         asr (16)        gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w                            gYSHIFT:w\r
117         add.sat (16) gubYPRED(%1)<2>    gwINTERIM_BUF3(0)                               gOFFSET:w\r
118         add.sat (16) gubYPRED(%1+1)<2>  gwINTERIM_BUF3(1)                               gOFFSET:w\r
119         }       \r
120 \r
121 #ifndef MONO\r
122         // chroma\r
123         mul (16)        acc0<1>:w                       gubINTPC0(0)                                    gUW0<0;2,4>:w\r
124         mul (16)        acc1<1>:w                       gubINTPC0(1)                                    gUW0<0;2,4>:w\r
125         mac (16)        acc0<1>:w                       gubINTPC1(0)                                    gUW1<0;2,4>:w\r
126         mac (16)        acc1<1>:w                       gubINTPC1(1)                                    gUW1<0;2,4>:w\r
127         add (16)        acc0<1>:w                       acc0<16;16,1>:w                                 gCADD:w\r
128         add (16)        acc1<1>:w                       acc1<16;16,1>:w                                 gCADD:w\r
129         // Accumulator cannot be used as destination for ASR\r
130         asr (16)        gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w                            gCSHIFT:w\r
131         asr (16)        gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w                            gCSHIFT:w\r
132         add.sat (16) gubCPRED(0)<2>             gwINTERIM_BUF3(0)                               gUOFFSET<0;2,1>:w\r
133         add.sat (16) gubCPRED(1)<2>             gwINTERIM_BUF3(1)                               gUOFFSET<0;2,1>:w\r
134 #endif\r
135 \r
136 \r
137 INTERLABEL(Return_WeightedPred):\r
138 \r
139         \r
140 //#endif        // !defined(__WeightedPred__)\r