return;
}
- if (insn->op == OP_SET)
+ if (insn->op == OP_SET || insn->op == OP_SET_AND ||
+ insn->op == OP_SET_OR || insn->op == OP_SET_XOR)
insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
else
if (insn->op == OP_SLCT)
if (i->op == OP_CALL) // calls have args as sources, they must be in regs
continue;
+ if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg
+ continue;
+
if (i->srcExists(1))
checkSwapSrc01(i);
virtual bool visit(BasicBlock *);
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
+ void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
void unary(Instruction *, const ImmediateValue&);
if (i->op == OP_MOV || i->op == OP_CALL)
continue;
- ImmediateValue src0, src1;
+ ImmediateValue src0, src1, src2;
+ if (i->srcExists(2) &&
+ i->src(0).getImmediate(src0) &&
+ i->src(1).getImmediate(src1) &&
+ i->src(2).getImmediate(src2))
+ expr(i, src0, src1, src2);
+ else
if (i->srcExists(1) &&
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
b->data.f32 = 0.0f;
}
switch (i->dType) {
- case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
+ case TYPE_F32:
+ res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor);
+ break;
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
case TYPE_S32:
- case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
+ break;
+ }
+ /* fallthrough */
+ case TYPE_U32:
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
+ break;
+ }
+ res.data.u32 = a->data.u32 * b->data.u32; break;
default:
return;
}
return;
res.data.u32 = a->data.u32;
break;
+ case OP_EXTBF: {
+ int offset = b->data.u32 & 0xff;
+ int width = (b->data.u32 >> 8) & 0xff;
+ int rshift = offset;
+ int lshift = 0;
+ if (width == 0) {
+ res.data.u32 = 0;
+ break;
+ }
+ if (width + offset < 32) {
+ rshift = 32 - width;
+ lshift = 32 - width - offset;
+ }
+ if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
+ res.data.u32 = util_bitreverse(a->data.u32);
+ else
+ res.data.u32 = a->data.u32;
+ switch (i->dType) {
+ case TYPE_S32: res.data.s32 = (res.data.s32 << lshift) >> rshift; break;
+ case TYPE_U32: res.data.u32 = (res.data.u32 << lshift) >> rshift; break;
+ default:
+ return;
+ }
+ break;
+ }
+ case OP_POPCNT:
+ res.data.u32 = util_bitcount(a->data.u32 & b->data.u32);
+ break;
+ case OP_PFETCH:
+ // The two arguments to pfetch are logically added together. Normally
+ // the second argument will not be constant, but that can happen.
+ res.data.u32 = a->data.u32 + b->data.u32;
+ break;
default:
return;
}
i->src(0).mod = Modifier(0);
i->src(1).mod = Modifier(0);
+ i->postFactor = 0;
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
i->setSrc(1, NULL);
i->getSrc(0)->reg.data = res.data;
- if (i->op == OP_MAD || i->op == OP_FMA) {
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA: {
i->op = OP_ADD;
i->setSrc(1, i->getSrc(0));
ImmediateValue src0;
if (i->src(0).getImmediate(src0))
expr(i, src0, *i->getSrc(1)->asImm());
- } else {
- i->op = OP_MOV;
+ if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
+ bld.setPosition(i, false);
+ i->setSrc(1, bld.loadImm(NULL, res.data.u32));
+ }
+ break;
+ }
+ case OP_PFETCH:
+ // Leave PFETCH alone... we just folded its 2 args into 1.
+ break;
+ default:
+ i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
+ break;
}
+ i->subOp = 0;
+}
+
+void
+ConstantFolding::expr(Instruction *i,
+ ImmediateValue &imm0,
+ ImmediateValue &imm1,
+ ImmediateValue &imm2)
+{
+ struct Storage *const a = &imm0.reg, *const b = &imm1.reg, *const c = &imm2.reg;
+ struct Storage res;
+
+ memset(&res.data, 0, sizeof(res.data));
+
+ switch (i->op) {
+ case OP_INSBF: {
+ int offset = b->data.u32 & 0xff;
+ int width = (b->data.u32 >> 8) & 0xff;
+ unsigned bitmask = ((1 << width) - 1) << offset;
+ res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask);
+ break;
+ }
+ default:
+ return;
+ }
+
+ ++foldCount;
+ i->src(0).mod = Modifier(0);
+ i->src(1).mod = Modifier(0);
+ i->src(2).mod = Modifier(0);
+
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
+ i->setSrc(1, NULL);
+ i->setSrc(2, NULL);
+
+ i->getSrc(0)->reg.data = res.data;
+
+ i->op = OP_MOV;
}
void
switch (i->op) {
case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
+ case OP_SAT: res.data.f32 = CLAMP(imm.reg.data.f32, 0.0f, 1.0f); break;
case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
Instruction *insn;
Instruction *mul1 = NULL; // mul1 before mul2
int e = 0;
- float f = imm2.reg.data.f32;
+ float f = imm2.reg.data.f32 * exp2f(mul2->postFactor);
ImmediateValue imm1;
assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
mul1->src(s1).mod = Modifier(0);
mul2->def(0).replace(mul1->getDef(0), false);
+ mul1->saturate = mul2->saturate;
} else
if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
// c = mul a, b
mul2->def(0).replace(mul1->getDef(0), false);
if (f < 0)
mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
+ mul1->saturate = mul2->saturate;
}
- mul1->saturate = mul2->saturate;
return;
}
}
// b = mul a, imm
// d = mul b, c -> d = mul_x_imm a, c
int s2, t2;
- insn = mul2->getDef(0)->uses.front()->getInsn();
+ insn = (*mul2->getDef(0)->uses.begin())->getInsn();
if (!insn)
return;
mul1 = mul2;
{
const int t = !s;
const operation op = i->op;
+ Instruction *newi = i;
switch (i->op) {
case OP_MUL:
if (i->dType == TYPE_F32)
tryCollapseChainedMULs(i, s, imm0);
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ assert(!isFloatType(i->sType));
+ if (imm0.isInteger(1) && i->dType == TYPE_S32) {
+ bld.setPosition(i, false);
+ // Need to set to the sign value, which is a compare.
+ newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
+ TYPE_S32, i->getSrc(t), bld.mkImm(0));
+ delete_Instruction(prog, i);
+ } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
+ // The high bits can't be set in this case (either mul by 0 or
+ // unsigned by 1)
+ i->op = OP_MOV;
+ i->subOp = 0;
+ i->setSrc(0, new_ImmediateValue(prog, 0u));
+ i->src(0).mod = Modifier(0);
+ i->setSrc(1, NULL);
+ } else if (!imm0.isNegative() && imm0.isPow2()) {
+ // Translate into a shift
+ imm0.applyLog2();
+ i->op = OP_SHR;
+ i->subOp = 0;
+ imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
+ i->setSrc(0, i->getSrc(t));
+ i->src(0).mod = i->src(t).mod;
+ i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
+ i->src(1).mod = 0;
+ }
+ } else
if (imm0.isInteger(0)) {
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(prog, 0u));
i->src(0).mod = Modifier(0);
+ i->postFactor = 0;
i->setSrc(1, NULL);
} else
- if (imm0.isInteger(1) || imm0.isInteger(-1)) {
+ if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = i->src(t).mod.getOp();
i->src(0).mod = 0;
i->setSrc(1, NULL);
} else
- if (imm0.isInteger(2) || imm0.isInteger(-2)) {
+ if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = OP_ADD;
i->src(1).mod = 0;
}
break;
+ case OP_MAD:
+ if (imm0.isInteger(0)) {
+ i->setSrc(0, i->getSrc(2));
+ i->src(0).mod = i->src(2).mod;
+ i->setSrc(1, NULL);
+ i->setSrc(2, NULL);
+ i->op = i->src(0).mod.getOp();
+ if (i->op != OP_CVT)
+ i->src(0).mod = 0;
+ } else
+ if (imm0.isInteger(1) || imm0.isInteger(-1)) {
+ if (imm0.isNegative())
+ i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
+ if (s == 0) {
+ i->setSrc(0, i->getSrc(1));
+ i->src(0).mod = i->src(1).mod;
+ }
+ i->setSrc(1, i->getSrc(2));
+ i->src(1).mod = i->src(2).mod;
+ i->setSrc(2, NULL);
+ i->op = OP_ADD;
+ }
+ break;
case OP_ADD:
if (i->usesFlags())
break;
else
tA = tB;
tB = s ? bld.getSSA() : i->getDef(0);
- bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+ newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
if (s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
tA = bld.getSSA();
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
- bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+ newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
if (d < 0)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
case OP_ABS:
case OP_NEG:
+ case OP_SAT:
case OP_LG2:
case OP_RCP:
case OP_SQRT:
case OP_EX2:
unary(i, imm0);
break;
+ case OP_BFIND: {
+ int32_t res;
+ switch (i->dType) {
+ case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
+ case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
+ default:
+ return;
+ }
+ if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
+ res = 31 - res;
+ bld.setPosition(i, false); /* make sure bld is init'ed */
+ i->setSrc(0, bld.mkImm(res));
+ i->setSrc(1, NULL);
+ i->op = OP_MOV;
+ i->subOp = 0;
+ break;
+ }
+ case OP_POPCNT: {
+ // Only deal with 1-arg POPCNT here
+ if (i->srcExists(1))
+ break;
+ uint32_t res = util_bitcount(imm0.reg.data.u32);
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
+ i->setSrc(1, NULL);
+ i->op = OP_MOV;
+ break;
+ }
default:
return;
}
- if (i->op != op)
+ if (newi->op != op)
foldCount++;
}
return true;
// try to attach join to previous instruction
- Instruction *insn = bb->getExit();
- if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
- insn = insn->prev;
- if (insn && !insn->getPredicate() &&
- !insn->asFlow() &&
- insn->op != OP_TEXBAR &&
- !isTextureOp(insn->op) && // probably just nve4
- !isSurfaceOp(insn->op) && // not confirmed
- insn->op != OP_LINTERP && // probably just nve4
- insn->op != OP_PINTERP && // probably just nve4
- ((insn->op != OP_LOAD && insn->op != OP_STORE) ||
- typeSizeof(insn->dType) <= 4) &&
- !insn->isNop()) {
- insn->join = 1;
- bb->remove(bb->getExit());
- return true;
+ if (prog->getTarget()->hasJoin) {
+ Instruction *insn = bb->getExit();
+ if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
+ insn = insn->prev;
+ if (insn && !insn->getPredicate() &&
+ !insn->asFlow() &&
+ insn->op != OP_TEXBAR &&
+ !isTextureOp(insn->op) && // probably just nve4
+ !isSurfaceOp(insn->op) && // not confirmed
+ insn->op != OP_LINTERP && // probably just nve4
+ insn->op != OP_PINTERP && // probably just nve4
+ ((insn->op != OP_LOAD && insn->op != OP_STORE) ||
+ (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) &&
+ !insn->isNop()) {
+ insn->join = 1;
+ bb->remove(bb->getExit());
+ return true;
+ }
}
}