Compute partial IDCT of single row.
shift = left-shift amount
a1 = source address
- a3 = row[2,0]
+ a3 = row[2,0] <= 2 cycles
a4 = row[3,1]
+ ip = w42 <= 2 cycles
Output in registers v1--v8
*/
.macro idct_row shift
- ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */
mov a2, #(1<<(\shift-1))
smlad v1, a3, ip, a2
cmpeq lr, a3, lsr #16
beq 1f
str a2, [sp, #-4]!
+ ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
cmp lr, #0
beq 2f
stmfd sp!, {a2, lr}
ldr a3, [a1] /* a3 = row[2,0] */
+ ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
ldr a4, [a1, #8] /* a4 = row[3,1] */
idct_row COL_SHIFT
ldr a2, [sp], #4
stmfd sp!, {a2, a3, lr}
ldr a3, [a1] /* a3 = row[2,0] */
+ ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
ldr a4, [a1, #8] /* a4 = row[3,1] */
idct_row COL_SHIFT
ldmfd sp!, {a2, a3}
stmfd sp!, {a2, a3, lr}
ldr a3, [a1] /* a3 = row[2,0] */
+ ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
ldr a4, [a1, #8] /* a4 = row[3,1] */
idct_row COL_SHIFT
ldmfd sp!, {a2, a3}