return TypeX8632Attributes[Ty].FldString;
}
+CondX86::BrCond InstX8632::getOppositeCondition(CondX86::BrCond Cond) {
+ return InstX8632BrAttributes[Cond].Opposite;
+}
+
OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
Constant *Offset, Variable *Index,
uint16_t Shift, SegmentRegisters SegmentReg)
// condition, swap the targets, and set new fallthrough to nullptr.
if (getTargetTrue() == NextNode) {
assert(Condition != CondX86::Br_None);
- Condition = InstX8632BrAttributes[Condition].Opposite;
+ Condition = getOppositeCondition(Condition);
TargetTrue = getTargetFalse();
TargetFalse = nullptr;
return true;
assert(Condition != CondX86::Br_None);
assert(getDest()->hasReg());
assert(getSrcSize() == 2);
- // Only need the reg/reg form now.
- const auto SrcVar = llvm::cast<Variable>(getSrc(1));
- assert(SrcVar->hasReg());
- assert(SrcVar->getType() == IceType_i32);
+ Operand *Src = getSrc(1);
+ Type SrcTy = Src->getType();
+ assert(SrcTy == IceType_i16 || SrcTy == IceType_i32);
X8632::AssemblerX8632 *Asm = Func->getAssembler<X8632::AssemblerX8632>();
- Asm->cmov(Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
- RegX8632::getEncodedGPR(SrcVar->getRegNum()));
+ if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+ if (SrcVar->hasReg()) {
+ Asm->cmov(SrcTy, Condition,
+ RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ RegX8632::getEncodedGPR(SrcVar->getRegNum()));
+ } else {
+ Asm->cmov(SrcTy, Condition,
+ RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ static_cast<TargetX8632 *>(Func->getTarget())
+ ->stackVarToAsmOperand(SrcVar));
+ }
+ } else if (const auto Mem = llvm::dyn_cast<OperandX8632Mem>(Src)) {
+ assert(Mem->getSegmentRegister() == OperandX8632Mem::DefaultSegment);
+ Asm->cmov(SrcTy, Condition, RegX8632::getEncodedGPR(getDest()->getRegNum()),
+ Mem->toAsmAddress(Asm));
+ } else {
+ llvm_unreachable("Unexpected operand type");
+ }
}
void InstX8632Cmov::dump(const Cfg *Func) const {
static const char *getWidthString(Type Ty);
static const char *getFldString(Type Ty);
+ static CondX86::BrCond getOppositeCondition(CondX86::BrCond Cond);
void dump(const Cfg *Func) const override;
protected:
void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *Dest = Inst->getDest();
+ Type DestTy = Dest->getType();
Operand *SrcT = Inst->getTrueOperand();
Operand *SrcF = Inst->getFalseOperand();
Operand *Condition = Inst->getCondition();
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
Type SrcTy = SrcT->getType();
Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
return;
}
+ CondX86::BrCond Cond = CondX86::Br_ne;
+ Operand *CmpOpnd0 = nullptr;
+ Operand *CmpOpnd1 = nullptr;
// Handle folding opportunities.
if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
assert(Producer->isDeleted());
default:
break;
case BoolFolding::PK_Icmp32: {
- // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- Operand *Src0 = Producer->getSrc(0);
- Operand *Src1 = legalize(Producer->getSrc(1));
- Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
- _cmp(Src0RM, Src1);
- // This is the same code as below (for both i64 and non-i64),
- // except without the _cmp instruction and with a different
- // branch condition. TODO(stichnot): refactor.
- if (Dest->getType() == IceType_i64) {
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
- Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
- _mov(DestLo, SrcLoRI);
- _mov(DestHi, SrcHiRI);
- _br(getIcmp32Mapping(Cmp->getCondition()), Label);
- Operand *SrcFLo = loOperand(SrcF);
- Operand *SrcFHi = hiOperand(SrcF);
- SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
- SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
- _mov_nonkillable(DestLo, SrcLoRI);
- _mov_nonkillable(DestHi, SrcHiRI);
- } else {
- SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
- _mov(Dest, SrcT);
- _br(getIcmp32Mapping(Cmp->getCondition()), Label);
- SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
- _mov_nonkillable(Dest, SrcF);
- }
- Context.insert(Label);
- return;
- }
+ Cond = getIcmp32Mapping(Cmp->getCondition());
+ CmpOpnd1 = legalize(Producer->getSrc(1));
+ CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
+ } break;
}
}
+ if (CmpOpnd0 == nullptr) {
+ CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
+ CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
+ }
+ assert(CmpOpnd0);
+ assert(CmpOpnd1);
- // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1:
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
-
- if (Dest->getType() == IceType_i64) {
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
- Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
- _cmp(ConditionRM, Zero);
- _mov(DestLo, SrcLoRI);
- _mov(DestHi, SrcHiRI);
- _br(CondX86::Br_ne, Label);
- Operand *SrcFLo = loOperand(SrcF);
- Operand *SrcFHi = hiOperand(SrcF);
- SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
- SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
- _mov_nonkillable(DestLo, SrcLoRI);
- _mov_nonkillable(DestHi, SrcHiRI);
- } else {
- _cmp(ConditionRM, Zero);
+ _cmp(CmpOpnd0, CmpOpnd1);
+ if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
+ // The cmov instruction doesn't allow 8-bit or FP operands, so
+ // we need explicit control flow.
+ // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
+ InstX8632Label *Label = InstX8632Label::create(Func, this);
SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
_mov(Dest, SrcT);
- _br(CondX86::Br_ne, Label);
+ _br(Cond, Label);
SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
_mov_nonkillable(Dest, SrcF);
+ Context.insert(Label);
+ return;
+ }
+ // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
+ // But if SrcT is immediate, we might be able to do better, as
+ // the cmov instruction doesn't allow an immediate operand:
+ // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
+ if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
+ std::swap(SrcT, SrcF);
+ Cond = InstX8632::getOppositeCondition(Cond);
+ }
+ if (DestTy == IceType_i64) {
+ // Set the low portion.
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *TLo = nullptr;
+ Operand *SrcFLo = legalize(loOperand(SrcF));
+ _mov(TLo, SrcFLo);
+ Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
+ _cmov(TLo, SrcTLo, Cond);
+ _mov(DestLo, TLo);
+ // Set the high portion.
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *THi = nullptr;
+ Operand *SrcFHi = legalize(hiOperand(SrcF));
+ _mov(THi, SrcFHi);
+ Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
+ _cmov(THi, SrcTHi, Cond);
+ _mov(DestHi, THi);
+ return;
}
- Context.insert(Label);
+ assert(DestTy == IceType_i16 || DestTy == IceType_i32);
+ Variable *T = nullptr;
+ SrcF = legalize(SrcF);
+ _mov(T, SrcF);
+ SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
+ _cmov(T, SrcT, Cond);
+ _mov(Dest, T);
}
void TargetX8632::lowerStore(const InstStore *Inst) {
EmitOperand(dst, src);
}
-void AssemblerX8632::cmov(CondX86::BrCond cond, GPRRegister dst,
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ if (Ty == IceType_i16)
+ EmitOperandSizeOverride();
+ else
+ assert(Ty == IceType_i32);
EmitUint8(0x0F);
EmitUint8(0x40 + cond);
EmitRegisterOperand(dst, src);
}
+void AssemblerX8632::cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst,
+ const Address &src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ if (Ty == IceType_i16)
+ EmitOperandSizeOverride();
+ else
+ assert(Ty == IceType_i32);
+ EmitUint8(0x0F);
+ EmitUint8(0x40 + cond);
+ EmitOperand(dst, src);
+}
+
void AssemblerX8632::rep_movsb() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
void lea(Type Ty, GPRRegister dst, const Address &src);
- void cmov(CondX86::BrCond cond, GPRRegister dst, GPRRegister src);
+ void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, GPRRegister src);
+ void cmov(Type Ty, CondX86::BrCond cond, GPRRegister dst, const Address &src);
void rep_movsb();
declare void @call_target()
@global_byte = internal global [1 x i8] zeroinitializer
@global_short = internal global [2 x i8] zeroinitializer
+@global_int = internal global [4 x i8] zeroinitializer
; A direct call sequence uses the right mask and register-call sequence.
define void @test_direct_call() {
; A zero-byte instruction (e.g. local label definition) at a bundle
; boundary should not trigger nop padding.
-define void @label_at_boundary(i32 %arg) {
+define void @label_at_boundary(i32 %arg, float %farg1, float %farg2) {
entry:
- %cmp = icmp eq i32 %arg, 0
+ %argi8 = trunc i32 %arg to i8
call void @call_target()
; bundle boundary
%addr_short = bitcast [2 x i8]* @global_short to i16*
- store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
- %blah = select i1 %cmp, i32 3, i32 5 ; 23-byte lowering sequence
+ %addr_int = bitcast [4 x i8]* @global_int to i32*
+ store i32 0, i32* %addr_int, align 1 ; 10-byte instruction
+ %blah = select i1 true, i8 %argi8, i8 %argi8 ; 22-byte lowering sequence
; label is here
- store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
+ store i16 0, i16* %addr_short, align 1 ; 9-byte instruction
ret void
}
; CHECK-LABEL: label_at_boundary
; CHECK: call
-; We rely on the hideous 4-instruction 23-byte Om1 lowering sequence for select.
-; CHECK-NEXT: 20: {{.*}} mov WORD PTR
-; CHECK-NEXT: 29: {{.*}} cmp BYTE PTR
-; CHECK-NEXT: 2e: {{.*}} mov DWORD PTR
+; We rely on a particular 7-instruction 22-byte Om1 lowering sequence
+; for select.
+; CHECK-NEXT: 20: {{.*}} mov DWORD PTR
+; CHECK-NEXT: 2a: {{.*}} mov {{.*}},0x1
+; CHECK-NEXT: 2c: {{.*}} cmp {{.*}},0x0
+; CHECK-NEXT: 2e: {{.*}} mov {{.*}},BYTE PTR
+; CHECK-NEXT: 32: {{.*}} mov BYTE PTR
; CHECK-NEXT: 36: {{.*}} jne 40
-; CHECK-NEXT: 38: {{.*}} mov DWORD PTR
+; CHECK-NEXT: 38: {{.*}} mov {{.*}},BYTE PTR
+; CHECK-NEXT: 3c: {{.*}} mov BYTE PTR
; CHECK-NEXT: 40: {{.*}} mov WORD PTR
; Bundle lock without padding.
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
;
; OPTM1-LABEL: select64VarVar
; OPTM1: cmp
; OPTM1: cmp
; OPTM1: jb
; OPTM1: cmp
-; OPTM1: jne
+; OPTM1: cmovne
define internal i64 @select64VarConst(i64 %a, i64 %b) {
entry:
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
;
; OPTM1-LABEL: select64VarConst
; OPTM1: cmp
; OPTM1: cmp
; OPTM1: jb
; OPTM1: cmp
-; OPTM1: jne
+; OPTM1: cmovne
define internal i64 @select64ConstVar(i64 %a, i64 %b) {
entry:
; CHECK: cmp
; CHECK: jb
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmove
;
; OPTM1-LABEL: select64ConstVar
; OPTM1: cmp
; OPTM1: cmp
; OPTM1: jb
; OPTM1: cmp
-; OPTM1: jne
+; OPTM1: cmove
define internal void @icmpEq64Imm() {
entry:
; CHECK-LABEL: fold_cmp_select
; CHECK: cmp
-; CHECK: jl
-; CHECK: mov
+; CHECK: cmovl
; 64-bit cmp/select folding.
; CHECK-LABEL: fold_cmp_select_64
; CHECK: cmp
-; CHECK: jl
-; CHECK: mov
-; CHECK: mov
+; CHECK: cmovl
+; CHECK: cmovl
; Cmp/select folding with intervening instructions.
; CHECK-NOT: cmp
; CHECK: call
; CHECK: cmp
-; CHECK: jl
-; CHECK: mov
+; CHECK: cmovl
; Cmp/multi-select folding.
; CHECK-LABEL: fold_cmp_select_multi
; CHECK: cmp
-; CHECK: jl
+; CHECK: cmovl
; CHECK: cmp
-; CHECK: jl
+; CHECK: cmovl
; CHECK: cmp
-; CHECK: jl
+; CHECK: cmovge
; CHECK: add
; CHECK: add
; CHECK-LABEL: no_fold_cmp_select_multi_liveout
; CHECK: set
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmove
; CHECK: add
; CHECK: add
; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist
; CHECK: set
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmovne
; CHECK: cmp
-; CHECK: jne
+; CHECK: cmove
; CHECK: movzx
; CHECK: add
; CHECK: add