2 * Weighted prediction of luminance and chrominance
\r
3 * Copyright © <2010>, Intel Corporation.
\r
5 * This program is licensed under the terms and conditions of the
\r
6 * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
\r
7 * http://www.opensource.org/licenses/eclipse-1.0.php.
\r
10 // Kernel name: WeightedPred.asm
\r
12 // Weighted prediction of luminance and chrominance
\r
16 //#if !defined(__WeightedPred__) // Make sure this is only included once
\r
17 //#define __WeightedPred__
\r
20 and.z.f0.0 (1) gWEIGHTFLAG:w gWPREDFLAG:ub nWBIDIR_MASK:w
\r
21 cmp.e.f0.1 (1) null:w gPREDFLAG:w 2:w
\r
22 (-f0.0) jmpi INTERLABEL(WeightedPred)
\r
23 (f0.1) jmpi INTERLABEL(DefaultWeightedPred_BiPred)
\r
25 INTERLABEL(DefaultWeightedPred_UniPred):
\r
27 cmp.e.f0.0 (1) null:w gPREDFLAG:w 0:w
\r
28 (f0.0) jmpi INTERLABEL(Return_WeightedPred)
\r
31 mov (32) gubYPRED(0)<2> gubINTPY1(0) {Compr}
\r
32 mov (32) gubYPRED(2)<2> gubINTPY1(2) {Compr}
\r
36 mov (32) gubCPRED(0)<2> gubINTPC1(0) {Compr}
\r
39 jmpi INTERLABEL(Return_WeightedPred)
\r
41 INTERLABEL(DefaultWeightedPred_BiPred):
\r
44 avg.sat (32) gubYPRED(0)<2> gubINTPY0(0) gubINTPY1(0) {Compr}
\r
45 avg.sat (32) gubYPRED(2)<2> gubINTPY0(2) gubINTPY1(2) {Compr}
\r
49 avg.sat (32) gubCPRED(0)<2> gubINTPC0(0) gubINTPC1(0) {Compr}
\r
52 jmpi INTERLABEL(Return_WeightedPred)
\r
54 INTERLABEL(WeightedPred):
\r
55 cmp.e.f0.1 (1) null:w gWEIGHTFLAG:w 0x80:w
\r
56 (-f0.1) jmpi INTERLABEL(WeightedPred_Explicit)
\r
58 cmp.e.f0.0 (1) null:w gPREDFLAG:w 2:w
\r
59 (-f0.0) jmpi INTERLABEL(DefaultWeightedPred_UniPred)
\r
61 mov (2) gYADD<1>:w 32:w {NoDDClr}
\r
62 mov (2) gYSHIFT<1>:w 6:w {NoDDChk}
\r
63 mov (4) gOFFSET<1>:w 0:w
\r
64 mov (8) gWT0<2>:w r[pWGT,0]<0;2,1>:w
\r
66 jmpi INTERLABEL(WeightedPred_LOOP)
\r
68 // Explicit Prediction
\r
69 INTERLABEL(WeightedPred_Explicit):
\r
71 // WA for weighted prediction - 2007/09/06
\r
72 #ifdef SW_W_128 // CTG SW WA
\r
73 cmp.e.f0.1 (8) null:ud r[pWGT,0]<8;8,1>:uw gudW128(0)<0;1,0>
\r
74 #else // ILK HW solution
\r
75 and.ne.f0.1 (8) null:uw r[pWGT,12]<0;1,0>:ub 0x88848421:v // Expand W=128 flag to all components. 2 MSB are don't care
\r
77 asr.nz.f0.0 (2) gBIPRED<1>:w gPREDFLAG<0;1,0>:w 1:w
\r
78 asr (1) gWEIGHTFLAG:w gWEIGHTFLAG:w 6:w
\r
79 (-f0.0) mov (2) gPREDFLAG1<1>:w gPREDFLAG<0;1,0>:w
\r
80 (f0.0) mov (2) gPREDFLAG0<1>:ud 0x00010001:ud
\r
81 (-f0.0) add (2) gPREDFLAG0<1>:w -gPREDFLAG1<2;2,1>:w 1:w
\r
83 // WA for weighted prediction - 2007/09/06
\r
84 (f0.1) mov (8) gWT0<1>:ud 0x00000080:ud
\r
85 (-f0.1) mov (8) gWT0<2>:w r[pWGT,0]<16;8,2>:b
\r
86 (-f0.1) mov (8) gO0<2>:w r[pWGT,1]<16;8,2>:b
\r
87 mul (16) gWT0<1>:w gWT0<16;16,1>:w gPREDFLAG0<0;4,1>:w
\r
90 cmp.e.f0.1 (2) null<1>:w gYWDENOM<2;2,1>:ub 0:w
\r
91 (-f0.1) shl (2) gW0<1>:w gWEIGHTFLAG<0;1,0>:w gYWDENOM<2;2,1>:ub
\r
92 (f0.1) mov (2) gW0<1>:w 0:w
\r
93 (-f0.1) asr (2) gW0<1>:w gW0<2;2,1>:w 1:w
\r
94 shl (2) gYADD<1>:w gW0<2;2,1>:w gBIPRED<0;1,0>:w
\r
95 (f0.1) add (2) gYADD<1>:w gYADD<2;2,1>:w gBIPRED<0;1,0>:w
\r
98 add (2) gYSHIFT<1>:w gYWDENOM<2;2,1>:ub gBIPRED<0;1,0>:w
\r
101 add (4) acc0<1>:w gO0<16;4,4>:w gO1<16;4,4>:w
\r
102 add (4) acc0<1>:w acc0<4;4,1>:w gBIPRED<0;1,0>:w
\r
103 asr (4) gOFFSET<1>:w acc0<4;4,1>:w gBIPRED<0;1,0>:w
\r
105 INTERLABEL(WeightedPred_LOOP):
\r
108 mul (16) acc0<1>:w gubINTPY0(%1) gWT0<0;1,0>:w
\r
109 mul (16) acc1<1>:w gubINTPY0(%1+1) gWT0<0;1,0>:w
\r
110 mac (16) acc0<1>:w gubINTPY1(%1) gWT1<0;1,0>:w
\r
111 mac (16) acc1<1>:w gubINTPY1(%1+1) gWT1<0;1,0>:w
\r
112 add (16) acc0<1>:w acc0<16;16,1>:w gYADD:w
\r
113 add (16) acc1<1>:w acc1<16;16,1>:w gYADD:w
\r
114 // Accumulator cannot be used as destination for ASR
\r
115 asr (16) gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w gYSHIFT:w
\r
116 asr (16) gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w gYSHIFT:w
\r
117 add.sat (16) gubYPRED(%1)<2> gwINTERIM_BUF3(0) gOFFSET:w
\r
118 add.sat (16) gubYPRED(%1+1)<2> gwINTERIM_BUF3(1) gOFFSET:w
\r
123 mul (16) acc0<1>:w gubINTPC0(0) gUW0<0;2,4>:w
\r
124 mul (16) acc1<1>:w gubINTPC0(1) gUW0<0;2,4>:w
\r
125 mac (16) acc0<1>:w gubINTPC1(0) gUW1<0;2,4>:w
\r
126 mac (16) acc1<1>:w gubINTPC1(1) gUW1<0;2,4>:w
\r
127 add (16) acc0<1>:w acc0<16;16,1>:w gCADD:w
\r
128 add (16) acc1<1>:w acc1<16;16,1>:w gCADD:w
\r
129 // Accumulator cannot be used as destination for ASR
\r
130 asr (16) gwINTERIM_BUF3(0)<1> acc0<16;16,1>:w gCSHIFT:w
\r
131 asr (16) gwINTERIM_BUF3(1)<1> acc1<16;16,1>:w gCSHIFT:w
\r
132 add.sat (16) gubCPRED(0)<2> gwINTERIM_BUF3(0) gUOFFSET<0;2,1>:w
\r
133 add.sat (16) gubCPRED(1)<2> gwINTERIM_BUF3(1) gUOFFSET<0;2,1>:w
\r
137 INTERLABEL(Return_WeightedPred):
\r
140 //#endif // !defined(__WeightedPred__)
\r