ReMatPICStubLoad("remat-pic-stub-load",
cl::desc("Re-materialize load from stub in PIC mode"),
cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+PartialRegUpdateClearance("partial-reg-update-clearance",
+ cl::desc("Clearance between two register writes "
+ "for inserting XOR to avoid partial "
+ "register update"),
+ cl::init(64), cl::Hidden);
+static cl::opt<unsigned>
+UndefRegClearance("undef-reg-clearance",
+ cl::desc("How many idle instructions we would like before "
+ "certain undef register reads"),
+ cl::init(64), cl::Hidden);
enum {
// Select which memory operand is being unfolded.
return 0;
}
- // If any of the preceding 16 instructions are reading Reg, insert a
- // dependency breaking instruction. The magic number is based on a few
- // Nehalem experiments.
- return 16;
+ // If any instructions in the clearance range are reading Reg, insert a
+ // dependency breaking instruction, which is inexpensive and is likely to
+ // be hidden in other instruction's cycles.
+ return PartialRegUpdateClearance;
}
// Return true for any instruction the copies the high bits of the first source
const MachineOperand &MO = MI->getOperand(OpNum);
if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- // Use the same magic number as getPartialRegUpdateClearance.
- return 16;
+ return UndefRegClearance;
}
return 0;
}
; AVX1-NEXT: .LBB45_10:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX2-NEXT: .LBB45_10:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: .LBB74_10:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX2-NEXT: .LBB74_10:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; SSE-NEXT: .LBB78_10:
; SSE-NEXT: shrq %rax
; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm5, %xmm5
; SSE-NEXT: cvtsi2ssq %rcx, %xmm5
; SSE-NEXT: addss %xmm5, %xmm5
; SSE-NEXT: .LBB78_12:
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB78_16
; SSE-NEXT: # BB#17:
+; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB78_18
; SSE-NEXT: .LBB78_16:
; SSE-NEXT: shrq %rax
; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rcx, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB78_18:
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: js .LBB78_19
; AVX1-NEXT: # BB#20:
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm5
; AVX1-NEXT: jmp .LBB78_21
; AVX1-NEXT: .LBB78_19:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm5
; AVX1-NEXT: .LBB78_21:
; AVX2-NEXT: testq %rax, %rax
; AVX2-NEXT: js .LBB78_19
; AVX2-NEXT: # BB#20:
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm5
; AVX2-NEXT: jmp .LBB78_21
; AVX2-NEXT: .LBB78_19:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm5
; AVX2-NEXT: .LBB78_21: