From 80b27ca2cd8cd2bb2937baa441c43a396887cc03 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 24 Aug 2014 14:05:37 -0700 Subject: [PATCH] vc4: Switch to using native integers. There were troubles with bools without using native integers (st_glsl_to_tgsi seemed to think bool true was 1.0f sometimes, when as a uniform it's stored as ~0), and since I've got native integers other than divide, I might as well just support them. --- src/gallium/drivers/vc4/vc4_program.c | 171 ++++++++++++++++++++++++++++++++- src/gallium/drivers/vc4/vc4_qir.c | 16 ++- src/gallium/drivers/vc4/vc4_qir.h | 34 +++++++ src/gallium/drivers/vc4/vc4_qpu_emit.c | 12 +++ src/gallium/drivers/vc4/vc4_screen.c | 1 + 5 files changed, 228 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index aaa7eb346f3..82766ff1fc6 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -30,6 +30,7 @@ #include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "vc4_context.h" #include "vc4_qpu.h" @@ -129,7 +130,8 @@ qir_uniform_f(struct tgsi_to_qir *trans, float f) } static struct qreg -get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i) +get_src(struct tgsi_to_qir *trans, unsigned tgsi_op, + struct tgsi_src_register *src, int i) { struct qcompile *c = trans->c; struct qreg r = c->undef; @@ -182,8 +184,17 @@ get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i) if (src->Absolute) r = qir_FMAXABS(c, r, r); - if (src->Negate) - r = qir_FSUB(c, qir_uniform_f(trans, 0), r); + if (src->Negate) { + switch (tgsi_opcode_infer_src_type(tgsi_op)) { + case TGSI_TYPE_SIGNED: + case TGSI_TYPE_UNSIGNED: + r = qir_SUB(c, qir_uniform_ui(trans, 0), r); + break; + default: + r = qir_FSUB(c, qir_uniform_f(trans, 0.0), r); + break; + } + } return r; }; @@ -249,6 +260,51 @@ tgsi_to_qir_alu(struct tgsi_to_qir *trans, } static struct qreg +tgsi_to_qir_umul(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + + struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i], + qir_uniform_ui(trans, 16)); + struct qreg src0_lo = qir_AND(c, src[0 * 4 + i], + qir_uniform_ui(trans, 0xffff)); + struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i], + qir_uniform_ui(trans, 16)); + struct qreg src1_lo = qir_AND(c, src[1 * 4 + i], + qir_uniform_ui(trans, 0xffff)); + + struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo); + struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi); + struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo); + + return qir_ADD(c, lolo, qir_SHL(c, + qir_ADD(c, hilo, lohi), + qir_uniform_ui(trans, 16))); +} + +static struct qreg +tgsi_to_qir_idiv(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + return qir_FTOI(c, qir_FMUL(c, + qir_ITOF(c, src[0 * 4 + i]), + qir_RCP(c, qir_ITOF(c, src[1 * 4 + i])))); +} + +static struct qreg +tgsi_to_qir_ineg(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + return qir_SUB(c, qir_uniform_ui(trans, 0), src[0 * 4 + i]); +} + +static struct qreg tgsi_to_qir_seq(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) @@ -289,6 +345,86 @@ tgsi_to_qir_sge(struct tgsi_to_qir *trans, } static struct qreg +tgsi_to_qir_fseq(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZS(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_fsne(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZC(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_fslt(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NS(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_fsge(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NC(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_useq(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZS(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_usne(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_ZC(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_islt(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NS(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg +tgsi_to_qir_isge(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ + struct qcompile *c = trans->c; + qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i])); + return qir_SEL_X_0_NC(c, qir_uniform_ui(trans, ~0)); +} + +static struct qreg tgsi_to_qir_cmp(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) @@ -754,11 +890,37 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans, [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu }, [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu }, [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu }, + [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu }, + [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu }, + [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu }, + [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu }, + [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu }, + [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu }, + [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu }, + [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu }, + [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu }, + [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu }, + [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu }, + [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu }, + + [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul }, + [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv }, + [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg }, + [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu }, [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq }, [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne }, [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge }, [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt }, + [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq }, + [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne }, + [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge }, + [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt }, + [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq }, + [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne }, + [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge }, + [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt }, + [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 }, @@ -787,7 +949,8 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans, for (int s = 0; s < 3; s++) { for (int i = 0; i < 4; i++) { src_regs[4 * s + i] = - get_src(trans, &tgsi_inst->Src[s].Register, i); + get_src(trans, tgsi_inst->Instruction.Opcode, + &tgsi_inst->Src[s].Register, i); } } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 72149908422..93f97c219f7 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -38,10 +38,24 @@ static const struct qir_op_info qir_op_info[] = { [QOP_FADD] = { "fadd", 1, 2 }, [QOP_FSUB] = { "fsub", 1, 2 }, [QOP_FMUL] = { "fmul", 1, 2 }, + [QOP_MUL24] = { "mul24", 1, 2 }, [QOP_FMIN] = { "fmin", 1, 2 }, [QOP_FMAX] = { "fmax", 1, 2 }, [QOP_FMINABS] = { "fminabs", 1, 2 }, [QOP_FMAXABS] = { "fmaxabs", 1, 2 }, + [QOP_FTOI] = { "ftoi", 1, 1 }, + [QOP_ITOF] = { "itof", 1, 1 }, + [QOP_ADD] = { "add", 1, 2 }, + [QOP_SUB] = { "sub", 1, 2 }, + [QOP_SHR] = { "shr", 1, 2 }, + [QOP_ASR] = { "asr", 1, 2 }, + [QOP_SHL] = { "shl", 1, 2 }, + [QOP_MIN] = { "min", 1, 2 }, + [QOP_MAX] = { "max", 1, 2 }, + [QOP_AND] = { "and", 1, 2 }, + [QOP_OR] = { "or", 1, 2 }, + [QOP_XOR] = { "xor", 1, 2 }, + [QOP_NOT] = { "not", 1, 1 }, [QOP_SF] = { "sf", 0, 1 }, [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 }, @@ -53,8 +67,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 }, [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 }, - [QOP_FTOI] = { "ftoi", 1, 1 }, - [QOP_ITOF] = { "itof", 1, 1 }, [QOP_RCP] = { "rcp", 1, 1 }, [QOP_RSQ] = { "rsq", 1, 1 }, [QOP_EXP2] = { "exp2", 1, 2 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 99df99c1a07..2e210c3bd60 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -49,10 +49,22 @@ enum qop { QOP_FADD, QOP_FSUB, QOP_FMUL, + QOP_MUL24, QOP_FMIN, QOP_FMAX, QOP_FMINABS, QOP_FMAXABS, + QOP_ADD, + QOP_SUB, + QOP_SHL, + QOP_SHR, + QOP_ASR, + QOP_MIN, + QOP_MAX, + QOP_AND, + QOP_OR, + QOP_XOR, + QOP_NOT, /* Sets the flag register according to src. */ QOP_SF, @@ -270,6 +282,7 @@ QIR_ALU1(MOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) QIR_ALU2(FMUL) +QIR_ALU2(MUL24) QIR_NODST_1(SF) QIR_ALU1(SEL_X_0_ZS) QIR_ALU1(SEL_X_0_ZC) @@ -285,6 +298,19 @@ QIR_ALU2(FMINABS) QIR_ALU2(FMAXABS) QIR_ALU1(FTOI) QIR_ALU1(ITOF) + +QIR_ALU2(ADD) +QIR_ALU2(SUB) +QIR_ALU2(SHL) +QIR_ALU2(SHR) +QIR_ALU2(ASR) +QIR_ALU2(MIN) +QIR_ALU2(MAX) +QIR_ALU2(AND) +QIR_ALU2(OR) +QIR_ALU2(XOR) +QIR_ALU1(NOT) + QIR_ALU1(RCP) QIR_ALU1(RSQ) QIR_ALU1(EXP2) @@ -310,4 +336,12 @@ qir_R4_UNPACK(struct qcompile *c, int i) return t; } +static inline struct qreg +qir_SEL_X_0_COND(struct qcompile *c, int i) +{ + struct qreg t = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); + return t; +} + #endif /* VC4_QIR_H */ diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 6d2c34f2d1f..579bfdc1aa1 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -280,8 +280,20 @@ vc4_generate_code(struct qcompile *c) A(FMAXABS), A(FTOI), A(ITOF), + A(ADD), + A(SUB), + A(SHL), + A(SHR), + A(ASR), + A(MIN), + A(MAX), + A(AND), + A(OR), + A(XOR), + A(NOT), M(FMUL), + M(MUL24), }; struct qpu_reg src[4]; diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 46cd4c55b97..b0f97103dee 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -291,6 +291,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 0; case PIPE_SHADER_CAP_INTEGERS: + return 1; case PIPE_SHADER_CAP_DOUBLES: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: -- 2.11.0