// that stack slot.
if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
assert(Var->getWeight().isZero());
- if (!SpillVar->getLinkedTo()->hasReg()) {
+ if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
continue;
}
}
Operand *TargetX8632::loOperand(Operand *Operand) {
- assert(Operand->getType() == IceType_i64);
- if (Operand->getType() != IceType_i64)
+ assert(Operand->getType() == IceType_i64 ||
+ Operand->getType() == IceType_f64);
+ if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
return Operand;
if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
split64(Var);
}
Operand *TargetX8632::hiOperand(Operand *Operand) {
- assert(Operand->getType() == IceType_i64);
- if (Operand->getType() != IceType_i64)
+ assert(Operand->getType() == IceType_i64 ||
+ Operand->getType() == IceType_f64);
+ if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
return Operand;
if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
split64(Var);
// a_lo.i32 = t_lo.i32
// t_hi.i32 = hi(s.f64)
// a_hi.i32 = t_hi.i32
- SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(llvm::dyn_cast<Variable>(Src0RM));
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _movq(Spill, Src0RM);
+ Operand *SpillLo, *SpillHi;
+ if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
+ SpillVariable *SpillVar =
+ Func->makeVariable<SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Src0Var);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
+ _movq(Spill, Src0RM);
+ SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
+ SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
+ } else {
+ SpillLo = loOperand(Src0RM);
+ SpillHi = hiOperand(Src0RM);
+ }
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(IceType_i32);
Variable *T_Hi = makeReg(IceType_i32);
- VariableSplit *SpillLo =
- VariableSplit::create(Func, Spill, VariableSplit::Low);
- VariableSplit *SpillHi =
- VariableSplit::create(Func, Spill, VariableSplit::High);
_mov(T_Lo, SpillLo);
_mov(DestLo, T_Lo);
case IceType_f64: {
Src0 = legalize(Src0);
assert(Src0->getType() == IceType_i64);
+ if (llvm::isa<OperandX8632Mem>(Src0)) {
+ Variable *T = Func->makeVariable(Dest->getType());
+ _movq(T, Src0);
+ _movq(Dest, T);
+ break;
+ }
// a.f64 = bitcast b.i64 ==>
// t_lo.i32 = b_lo.i32
// FakeDef(s.f64)
} // anonymous namespace
-void TargetX8632::lowerLoad(const InstLoad *Inst) {
+void TargetX8632::lowerLoad(const InstLoad *Load) {
// A Load instruction can be treated the same as an Assign
// instruction, after the source operand is transformed into an
// OperandX8632Mem operand. Note that the address mode
// optimization already creates an OperandX8632Mem operand, so it
// doesn't need another level of transformation.
- Type Ty = Inst->getDest()->getType();
- Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);
+ Type Ty = Load->getDest()->getType();
+ Operand *Src0 = FormMemoryOperand(Load->getSourceAddress(), Ty);
// Fuse this load with a subsequent Arithmetic instruction in the
// following situations:
// a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
// a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
//
+ // Fuse this load with a subsequent Cast instruction:
+ // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a
+ //
// TODO: Clean up and test thoroughly.
// (E.g., if there is an mfence-all make sure the load ends up on the
// same side of the fence).
// load instruction's dest variable, and that instruction ends that
// variable's live range, then make the substitution. Deal with
// commutativity optimization in the arithmetic instruction lowering.
- InstArithmetic *NewArith = nullptr;
- if (InstArithmetic *Arith =
- llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) {
- Variable *DestLoad = Inst->getDest();
- Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
- Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
- if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) &&
- DestLoad != Src0Arith) {
- NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
- Arith->getSrc(0), Src0);
- } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
- Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) {
- NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(),
- Arith->getSrc(1), Src0);
- }
- if (NewArith) {
- Arith->setDeleted();
- Context.advanceNext();
- lowerArithmetic(NewArith);
- return;
+ //
+ // TODO(stichnot): Do load fusing as a separate pass. Run it before
+ // the bool folding pass. Modify Ice::Inst to allow src operands to
+ // be replaced, including updating Inst::LiveRangesEnded, to avoid
+ // having to manually mostly clone each instruction type.
+ Inst *NextInst = Context.getNextInst();
+ Variable *DestLoad = Load->getDest();
+ if (NextInst && NextInst->isLastUse(DestLoad)) {
+ if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) {
+ InstArithmetic *NewArith = nullptr;
+ Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
+ Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
+ if (Src1Arith == DestLoad && DestLoad != Src0Arith) {
+ NewArith = InstArithmetic::create(
+ Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0);
+ } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
+ DestLoad != Src1Arith) {
+ NewArith = InstArithmetic::create(
+ Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0);
+ }
+ if (NewArith) {
+ Arith->setDeleted();
+ Context.advanceNext();
+ lowerArithmetic(NewArith);
+ return;
+ }
+ } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) {
+ Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0));
+ if (Src0Cast == DestLoad) {
+ InstCast *NewCast =
+ InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0);
+ Cast->setDeleted();
+ Context.advanceNext();
+ lowerCast(NewCast);
+ return;
+ }
}
}
- InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
+ InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
lowerAssign(Assign);
}
--- /dev/null
+; Tests desired and undesired folding of load instructions into cast
+; instructions. The folding is only done when liveness analysis is performed,
+; so only O2 is tested.
+
+; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
+
+; Not testing trunc, or 32-bit bitcast, because the lowered code uses pretty
+; much the same mov instructions regardless of whether folding is done.
+
+define internal i32 @zext_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i8*
+ %load = load i8* %addr, align 1
+ %result = zext i8 %load to i32
+ ret i32 %result
+}
+; CHECK-LABEL: zext_fold
+; CHECK: movzx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @zext_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i8*
+ %load = load i8* %addr, align 1
+ %tmp1 = zext i8 %load to i32
+ %tmp2 = zext i8 %load to i32
+ %result = add i32 %tmp1, %tmp2
+ ret i32 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: zext_nofold
+; CHECK-NOT: movzx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @sext_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i8*
+ %load = load i8* %addr, align 1
+ %result = sext i8 %load to i32
+ ret i32 %result
+}
+; CHECK-LABEL: sext_fold
+; CHECK: movsx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal i32 @sext_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i8*
+ %load = load i8* %addr, align 1
+ %tmp1 = sext i8 %load to i32
+ %tmp2 = sext i8 %load to i32
+ %result = add i32 %tmp1, %tmp2
+ ret i32 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: sext_nofold
+; CHECK-NOT: movsx {{.*}},BYTE PTR [{{.*}}+0xc8]
+
+define internal float @fptrunc_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %result = fptrunc double %load to float
+ ret float %result
+}
+; CHECK-LABEL: fptrunc_fold
+; CHECK: cvtsd2ss {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal float @fptrunc_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %tmp1 = fptrunc double %load to float
+ %tmp2 = fptrunc double %load to float
+ %result = fadd float %tmp1, %tmp2
+ ret float %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptrunc_nofold
+; CHECK-NOT: cvtsd2ss {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @fpext_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to float*
+ %load = load float* %addr, align 4
+ %result = fpext float %load to double
+ ret double %result
+}
+; CHECK-LABEL: fpext_fold
+; CHECK: cvtss2sd {{.*}},DWORD PTR [{{.*}}+0xc8]
+
+define internal double @fpext_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to float*
+ %load = load float* %addr, align 4
+ %tmp1 = fpext float %load to double
+ %tmp2 = fpext float %load to double
+ %result = fadd double %tmp1, %tmp2
+ ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fpext_nofold
+; CHECK-NOT: cvtss2sd {{.*}},DWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptoui_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %result = fptoui double %load to i16
+ %result2 = zext i16 %result to i32
+ ret i32 %result2
+}
+; CHECK-LABEL: fptoui_fold
+; CHECK: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptoui_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %tmp1 = fptoui double %load to i16
+ %tmp2 = fptoui double %load to i16
+ %result = add i16 %tmp1, %tmp2
+ %result2 = zext i16 %result to i32
+ ret i32 %result2
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptoui_nofold
+; CHECK-NOT: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptosi_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %result = fptosi double %load to i16
+ %result2 = zext i16 %result to i32
+ ret i32 %result2
+}
+; CHECK-LABEL: fptosi_fold
+; CHECK: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i32 @fptosi_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %tmp1 = fptosi double %load to i16
+ %tmp2 = fptosi double %load to i16
+ %result = add i16 %tmp1, %tmp2
+ %result2 = zext i16 %result to i32
+ ret i32 %result2
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: fptosi_nofold
+; CHECK-NOT: cvttsd2si {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @uitofp_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i16*
+ %load = load i16* %addr, align 1
+ %result = uitofp i16 %load to double
+ ret double %result
+}
+; CHECK-LABEL: uitofp_fold
+; CHECK: movzx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @uitofp_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i16*
+ %load = load i16* %addr, align 1
+ %tmp1 = uitofp i16 %load to double
+ %tmp2 = uitofp i16 %load to double
+ %result = fadd double %tmp1, %tmp2
+ ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: uitofp_nofold
+; CHECK-NOT: movzx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @sitofp_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i16*
+ %load = load i16* %addr, align 1
+ %result = sitofp i16 %load to double
+ ret double %result
+}
+; CHECK-LABEL: sitofp_fold
+; CHECK: movsx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @sitofp_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i16*
+ %load = load i16* %addr, align 1
+ %tmp1 = sitofp i16 %load to double
+ %tmp2 = sitofp i16 %load to double
+ %result = fadd double %tmp1, %tmp2
+ ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: sitofp_nofold
+; CHECK-NOT: movsx {{.*}},WORD PTR [{{.*}}+0xc8]
+
+define internal double @bitcast_i64_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i64*
+ %load = load i64* %addr, align 1
+ %result = bitcast i64 %load to double
+ ret double %result
+}
+; CHECK-LABEL: bitcast_i64_fold
+; CHECK: movq {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal double @bitcast_i64_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to i64*
+ %load = load i64* %addr, align 1
+ %tmp1 = bitcast i64 %load to double
+ %tmp2 = bitcast i64 %load to double
+ %result = fadd double %tmp1, %tmp2
+ ret double %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: bitcast_i64_nofold
+; CHECK-NOT: movq {{.*}},QWORD PTR [{{.*}}+0xc8]
+
+define internal i64 @bitcast_double_fold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %result = bitcast double %load to i64
+ ret i64 %result
+}
+; CHECK-LABEL: bitcast_double_fold
+; CHECK-NOT: QWORD PTR
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0xc8]
+; CHECK: mov {{.*}},DWORD PTR [{{.*}}+0xcc]
+; CHECK-NOT: QWORD PTR
+
+define internal i64 @bitcast_double_nofold(i32 %arg) {
+entry:
+ %ptr = add i32 %arg, 200
+ %addr = inttoptr i32 %ptr to double*
+ %load = load double* %addr, align 8
+ %tmp1 = bitcast double %load to i64
+ %tmp2 = bitcast double %load to i64
+ %result = add i64 %tmp1, %tmp2
+ ret i64 %result
+}
+; Test that load folding does not happen.
+; CHECK-LABEL: bitcast_double_nofold
+; CHECK: QWORD PTR
+; CHECK: QWORD PTR