2 * Intra predict 8X8 luma block
\r
3 * Copyright © <2010>, Intel Corporation.
\r
5 * This program is licensed under the terms and conditions of the
\r
6 * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
\r
7 * http://www.opensource.org/licenses/eclipse-1.0.php.
\r
10 #if !defined(__INTRA_PRED_8X8_Y__) // Make sure this is only included once
\r
11 #define __INTRA_PRED_8X8_Y__
\r
13 // Module name: intra_Pred_8X8_Y.asm
\r
15 // Intra predict 8X8 luma block
\r
17 //--------------------------------------------------------------------------
\r
20 // REF_TOP: Top reference data stored in BYTE with p[-1,-1] at REF_TOP(0,-1), p[-1,-1] and [15,-1] adjusted
\r
21 // REF_LEFT: Left reference data stored in BYTE with p[-1,0] at REF_LEFT(0,2), REF_LEFT(0,1) (p[-1,-1]) adjusted
\r
22 // PRED_MODE: Intra prediction mode stored in 4 LSBs
\r
23 // INTRA_PRED_AVAIL: Top/Left available flag, (Bit0: Left, Bit1: Top)
\r
27 // REG_INTRA_8X8_PRED: Predicted 8X8 block data
\r
28 //--------------------------------------------------------------------------
\r
30 #define INTRA_REF REG_INTRA_TEMP_1
\r
31 #define REF_TMP REG_INTRA_TEMP_2
\r
35 // Reference sample filtering
\r
37 // Set up boundary pixels for unified filtering
\r
38 mov (1) REF_TOP(0,16)<1> REF_TOP(0,15)REGION(1,0) // p[16,-1] = p[15,-1]
\r
39 mov (8) REF_LEFT(0,2+8)<1> REF_LEFT(0,2+7)REGION(1,0) // p[-1,8] = p[-1,7]
\r
41 // Top reference sample filtering (!!Consider instruction compression later)
\r
42 add (16) acc0<1>:w REF_TOP(0,-1)REGION(16,1) 2:w // p[x-1,-1]+2
\r
43 mac (16) acc0<1>:w REF_TOP(0)REGION(16,1) 2:w // p[x-1,-1]+2*p[x,-1]+2
\r
44 mac (16) acc0<1>:w REF_TOP(0,1)REGION(16,1) 1:w // p[x-1,-1]+2*p[x,-1]+p[x+1,-1]+2
\r
45 shr (16) REF_TMP<1>:w acc0:w 2:w // (p[x-1,-1]+2*p[x,-1]+p[x+1,-1]+2)>>2
\r
47 // Left reference sample filtering
\r
48 add (16) acc0<1>:w REF_LEFT(0)REGION(16,1) 2:w // p[-1,y-1]+2
\r
49 mac (16) acc0<1>:w REF_LEFT(0,1)REGION(16,1) 2:w // p[-1,y-1]+2*p[-1,y]+2
\r
50 mac (16) acc0<1>:w REF_LEFT(0,2)REGION(16,1) 1:w // p[-1,y-1]+2*p[-1,y]+p[-1,y+1]+2
\r
51 shr (16) INTRA_REF<1>:w acc0:w 2:w // (p[-1,y-1]+2*p[-1,y]+p[-1,y+1]+2)>>2
\r
53 // Re-assign filtered reference samples
\r
54 mov (16) REF_TOP(0)<1> REF_TMP<32;16,2>:ub // p'[x,-1], x=0...15
\r
55 mov (8) REF_LEFT(0)<1> INTRA_REF.2<16;8,2>:ub // p'[-1,y], y=0...7
\r
56 mov (1) REF_TOP(0,-1)<1> INTRA_REF<0;1,0>:ub // p'[-1,-1]
\r
58 // Select intra_8x8 prediction mode
\r
60 and (1) PINTRAPRED_Y<1>:w PRED_MODE<0;1,0>:w 0x0F:w
\r
61 // WA for "jmpi" restriction
\r
62 mov (1) REG_INTRA_TEMP_1<1>:ud r[PINTRAPRED_Y, INTRA_8X8_OFFSET]:ub
\r
63 jmpi (1) REG_INTRA_TEMP_1<0;1,0>:d
\r
69 $for(0,0; <4; 1,32) {
\r
70 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PERROR,%2]<16;16,1>:w REF_TOP(0)<0;8,1>
\r
75 INTRA_8X8_HORIZONTAL:
\r
76 $for(0,0; <8; 2,32) {
\r
77 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PERROR,%2]<16;16,1>:w REF_LEFT(0,%1)<1;8,0>
\r
83 // Rearrange reference samples for unified DC prediction code
\r
85 and.nz.f0.0 (16) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 2:w // Top macroblock available for intra prediction?
\r
86 and.nz.f0.1 (8) NULLREG INTRA_PRED_AVAIL<0;1,0>:w 1:w // Left macroblock available for intra prediction?
\r
87 (-f0.0.any16h) mov (16) REF_TOP_W(0)<1> 0x8080:uw
\r
88 (-f0.1.any8h) mov (8) REF_LEFT(0)<1> REF_TOP(0)REGION(8,1)
\r
89 (-f0.0.any8h) mov (8) REF_TOP(0)<1> REF_LEFT(0)REGION(8,1)
\r
91 // Perform DC prediction
\r
93 add (8) PRED_YW(15)<1> REF_TOP(0)REGION(8,1) REF_LEFT(0)REGION(8,1)
\r
94 add (4) PRED_YW(15)<1> PRED_YW(15)REGION(4,1) PRED_YW(15,4)REGION(4,1)
\r
95 add (2) PRED_YW(15)<1> PRED_YW(15)REGION(2,1) PRED_YW(15,2)REGION(2,1)
\r
96 add (16) acc0<1>:w PRED_YW(15)REGION(1,0) PRED_YW(15,1)REGION(1,0)
\r
97 add (16) acc0<1>:w acc0:w 8:w
\r
98 shr (16) REG_INTRA_TEMP_0<1>:w acc0:w 4:w
\r
101 $for(0,0; <4; 1,32) {
\r
102 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PERROR,%2]<16;16,1>:w REG_INTRA_TEMP_0<16;16,1>:w
\r
107 INTRA_8X8_DIAG_DOWN_LEFT:
\r
108 mov (8) REF_TOP(0,16)<1> REF_TOP(0,15)REGION(8,1) // p[16,-1] = p[15,-1]
\r
109 add (16) acc0<1>:w REF_TOP(0,2)REGION(16,1) 2:w // p[x+2]+2
\r
110 mac (16) acc0<1>:w REF_TOP(0,1)REGION(16,1) 2:w // 2*p[x+1]+p[x+2]+2
\r
111 mac (16) acc0<1>:w REF_TOP(0)REGION(16,1) 1:w // p[x]+2*p[x+1]+p[x+2]+2
\r
112 shr (16) REG_INTRA_TEMP_0<1>:w acc0<16;16,1>:w 2:w // (p[x]+2*p[x+1]+p[x+2]+2)>>2
\r
115 $for(0,0; <8; 2,32) {
\r
116 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PERROR,%2]<16;16,1>:w REG_INTRA_TEMP_0.%1<1;8,1>:w
\r
121 INTRA_8X8_DIAG_DOWN_RIGHT:
\r
122 #define INTRA_REF REG_INTRA_TEMP_1
\r
123 #define REF_TMP REG_INTRA_TEMP_2
\r
125 // Set inverse shift count
\r
126 shl (4) REF_TMP<1>:ud REF_LEFT_D(0,1)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order bottom 4 pixels of left ref.
\r
127 shl (4) REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order top 4 pixels of left ref.
\r
128 mov (8) INTRA_REF<1>:ub REF_TMP.3<32;8,4>:ub
\r
129 mov (16) INTRA_REF.8<1>:ub REF_TOP(0,-1)REGION(16,1) // INTRA_REF holds all reference data
\r
131 add (16) acc0<1>:w INTRA_REF.2<16;16,1>:ub 2:w // p[x+2]+2
\r
132 mac (16) acc0<1>:w INTRA_REF.1<16;16,1>:ub 2:w // 2*p[x+1]+p[x+2]+2
\r
133 mac (16) acc0<1>:w INTRA_REF<16;16,1>:ub 1:w // p[x]+2*p[x+1]+p[x+2]+2
\r
134 shr (16) INTRA_REF<1>:w acc0<16;16,1>:w 2:w // (p[x]+2*p[x+1]+p[x+2]+2)>>2
\r
136 // Store data in reversed order
\r
137 add (2) PBWDCOPY_8<1>:w INV_TRANS48<2;2,1>:b INTRA_TEMP_1*GRFWIB:w // Must match with INTRA_REF
\r
140 $for(0,96; <8; 2,-32) {
\r
141 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PBWDCOPY_8,%1*2]<8,1>:w r[PERROR,%2]<16;16,1>:w
\r
146 INTRA_8X8_VERT_RIGHT:
\r
147 #define INTRA_REF REG_INTRA_TEMP_1
\r
148 #define REF_TMP REG_INTRA_TEMP_2
\r
149 #define REF_TMP1 REG_INTRA_TEMP_3
\r
151 // Set inverse shift count
\r
152 shl (4) REF_TMP<1>:ud REF_LEFT_D(0,1)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order bottom 4 pixels of left ref.
\r
153 shl (4) REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order top 4 pixels of left ref.
\r
154 mov (8) INTRA_REF<1>:ub REF_TMP.3<32;8,4>:ub
\r
155 mov (16) INTRA_REF.8<1>:ub REF_TOP(0,-1)REGION(16,1) // INTRA_REF holds all reference data
\r
158 avg (16) PRED_YW(14)<1> INTRA_REF.8<16;16,1> INTRA_REF.9<16;16,1> // avg(p[x-1],p[x])
\r
160 add (16) acc0<1>:w INTRA_REF.3<16;16,1>:ub 2:w // p[x]+2
\r
161 mac (16) acc0<1>:w INTRA_REF.2<16;16,1>:ub 2:w // 2*p[x-1]+p[x]+2
\r
162 mac (16) acc0<1>:w INTRA_REF.1<16;16,1>:ub 1:w // p[x-2]+2*p[x-1]+p[x]+2
\r
163 shr (16) REF_TMP<1>:w acc0:w 2:w // (p[x-2]+2*p[x-1]+p[x]+2)>>2
\r
165 mov (8) INTRA_REF<1>:ub REF_TMP<16;8,2>:ub // Keep zVR = -1,-2,-3,-4,-5,-6,-7 sequencially
\r
166 mov (8) INTRA_REF.6<2>:ub REF_TMP.12<16;8,2>:ub // Keep zVR = -1,1,3,5,7,9,11,13 at even byte
\r
167 mov (8) INTRA_REF.7<2>:ub PRED_Y(14)REGION(8,2) // Combining zVR = 0,2,4,6,8,10,12,14 at odd byte
\r
169 add (2) PBWDCOPY_8<1>:w INV_TRANS8<2;2,1>:b INTRA_TEMP_1*GRFWIB:w // Must match with INTRA_REF
\r
172 $for(0,96; <8; 2,-32) {
\r
173 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PBWDCOPY_8,%1]<8,2>:ub r[PERROR,%2]<16;16,1>:w
\r
178 INTRA_8X8_HOR_DOWN:
\r
179 // Set inverse shift count
\r
180 shl (4) REF_TMP<1>:ud REF_LEFT_D(0,1)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order bottom 4 pixels of left ref.
\r
181 shl (4) REF_TMP.4<1>:ud REF_LEFT_D(0)REGION(1,0) INV_SHIFT<4;4,1>:b // Reverse order top 4 pixels of left ref.
\r
182 mov (8) INTRA_REF<1>:ub REF_TMP.3<16;4,4>:ub
\r
183 mov (16) INTRA_REF.8<1>:ub REF_TOP(0,-1)REGION(16,1) // INTRA_REF holds all reference data
\r
186 add (16) acc0<1>:w INTRA_REF.2<16;16,1>:ub 2:w // p[y]+2
\r
187 mac (16) acc0<1>:w INTRA_REF.1<16;16,1>:ub 2:w // 2*p[y-1]+p[y]+2
\r
188 mac (16) acc0<1>:w INTRA_REF.0<16;16,1>:ub 1:w // p[y-2]+2*p[y-1]+p[y]+2
\r
189 shr (16) PRED_YW(14)<1> acc0:w 2:w // (p[y-2]+2*p[y-1]+p[y]+2)>>2
\r
191 avg (16) INTRA_REF<1>:w INTRA_REF<16;16,1>:ub INTRA_REF.1<16;16,1>:ub // avg(p[y-1],p[y])
\r
193 mov (8) INTRA_REF.1<2>:ub PRED_Y(14)REGION(8,2) // Combining odd pixels to form byte type
\r
194 mov (8) INTRA_REF.16<1>:ub PRED_Y(14,16)REGION(8,2) // Keep zVR = -2,-3,-4,-5,-6,-7 unchanged
\r
195 // Now INTRA_REF.0 - INTRA_REF.21 contain predicted data
\r
197 add (2) PBWDCOPY_8<1>:w INV_TRANS48<2;2,1>:b INTRA_TEMP_1*GRFWIB:w // Must match with INTRA_REF
\r
200 $for(0,96; <13; 4,-32) {
\r
201 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub r[PBWDCOPY_8,%1]<8,1>:ub r[PERROR,%2]<16;16,1>:w
\r
206 INTRA_8X8_VERT_LEFT:
\r
208 avg (16) PRED_YW(14)<1> REF_TOP(0)REGION(16,1) REF_TOP(0,1)REGION(16,1) // avg(p[x],p[x+1])
\r
210 add (16) acc0<1>:w REF_TOP(0,2)REGION(16,1) 2:w // p[x+2]+2
\r
211 mac (16) acc0<1>:w REF_TOP(0,1)REGION(16,1) 2:w // 2*p[x+1]+p[x+2]+2
\r
212 mac (16) acc0<1>:w REF_TOP(0)REGION(16,1) 1:w // p[x]+2*p[x+1]+p[x+2]+2
\r
213 shr (16) PRED_YW(15)<1> acc0<1>:w 2:w // (p[x]+2*p[x+1]+p[x+2]+2)>>2
\r
216 $for(0,0; <4; 1,32) {
\r
217 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub PRED_YW(14,%1)<16;8,1> r[PERROR,%2]<16;16,1>:w
\r
223 // Set extra left reference pixels for unified prediction
\r
224 mov (8) REF_LEFT(0,8)<1> REF_LEFT(0,7)REGION(1,0) // Copy p[-1,7] to p[-1,y],y=8...15
\r
227 avg (16) PRED_YW(14)<1> REF_LEFT(0)REGION(16,1) REF_LEFT(0,1)REGION(16,1) // avg(p[y],p[y+1])
\r
229 add (16) acc0<1>:w REF_LEFT(0,2)REGION(16,1) 2:w // p[y+2]+2
\r
230 mac (16) acc0<1>:w REF_LEFT(0,1)REGION(16,1) 2:w // 2*p[y+1]+p[y+2]+2
\r
231 mac (16) acc0<1>:w REF_LEFT(0)REGION(16,1) 1:w // p[y]+2*p[y+1]+p[y+2]+2
\r
232 shr (16) PRED_YW(15)<1> acc0<1>:w 2:w // (p[y]+2*p[y+1]+p[y+2]+2)>>2
\r
234 // Merge even/odd pixels
\r
235 // The predicted data need to be stored in byte type (22 bytes are required)
\r
236 mov (16) PRED_Y(14,1)<2> PRED_Y(15)REGION(16,2)
\r
239 $for(0,0; <4; 1,32) {
\r
240 add.sat (16) r[PPREDBUF_Y,%2]<2>:ub PRED_Y(14,%1*4)<2;8,1> r[PERROR,%2]<16;16,1>:w
\r
244 // End of intra_Pred_8X8_Y
\r
246 #endif // !defined(__INTRA_PRED_8X8_Y__)
\r