OSDN Git Service

r600g: fix check_and_set_bank_swizzle
authorVadim Girlin <vadimgirlin@gmail.com>
Wed, 24 Aug 2011 20:32:54 +0000 (00:32 +0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 25 Aug 2011 20:58:06 +0000 (16:58 -0400)
Need to do full check when not all bank swizzles in the group are forced
(e.g. when trying to merge interp_* group with the next instruction)

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/r600_asm.c

index 0311b56..604cb60 100644 (file)
@@ -696,15 +696,19 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
 {
        struct alu_bank_swizzle bs;
        int bank_swizzle[5];
-       int i, r = 0, forced = 0;
+       int i, r = 0, forced = 1;
        boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
        for (i = 0; i < max_slots; i++) {
-               if (slots[i] && slots[i]->bank_swizzle_force) {
-                       slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
-                       forced = 1;
+               if (slots[i]) {
+                       if (slots[i]->bank_swizzle_force) {
+                               slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+                       } else {
+                               forced = 0;
+                       }
                }
+
                if (i < 4 && slots[i])
                        scalar_only = false;
        }
@@ -714,7 +718,11 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
        /* Just check every possible combination of bank swizzle.
         * Not very efficent, but works on the first try in most of the cases. */
        for (i = 0; i < 4; i++)
-               bank_swizzle[i] = SQ_ALU_VEC_012;
+               if (!slots[i] || !slots[i]->bank_swizzle_force)
+                       bank_swizzle[i] = SQ_ALU_VEC_012;
+               else
+                       bank_swizzle[i] = slots[i]->bank_swizzle;
+
        bank_swizzle[4] = SQ_ALU_SCL_210;
        while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
 
@@ -751,11 +759,13 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
                        bank_swizzle[4]++;
                } else {
                        for (i = 0; i < max_slots; i++) {
-                               bank_swizzle[i]++;
-                               if (bank_swizzle[i] <= SQ_ALU_VEC_210)
-                                       break;
-                               else
-                                       bank_swizzle[i] = SQ_ALU_VEC_012;
+                               if (!slots[i] || !slots[i]->bank_swizzle_force) {
+                                       bank_swizzle[i]++;
+                                       if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+                                               break;
+                                       else
+                                               bank_swizzle[i] = SQ_ALU_VEC_012;
+                               }
                        }
                }
        }