assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+ // This optimization has a number of guards. At this point we are being very
+ // cautious and we do not try to do this when we have a fast call or
+ // we are using PIC base or we are using a frame pointer or a base pointer.
+ // It would be possible to turn on this optimization under these conditions
+ // as well but it would require further modifications to the prologue and
+ // epilogue. For example, if we want to turn on this optimization for
+ // functions that use frame pointers we would have to take into consideration
+ // the fact that spills to the stack may be using r30 instead of r1.
+ // Aside form that we need to have a non-zero frame and we need to have a
+ // non-large frame size. Notice that we did not use !isLargeFrame but we used
+ // isInt<16>(FrameSize) instead. This is important because this guard has to
+ // be identical to the one in the epilogue and in the epilogue the variable
+ // is defined as bool isLargeFrame = !isInt<16>(FrameSize);
+ if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+ !HasBP && isInt<16>(FrameSize)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (int i=0; i<Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ if (FrIdx < 0) {
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ }
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
}
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateUp = false;
+ if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP &&
+ !HasBP && !isLargeFrame) {
+ const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo();
+ for (int i=0; i<Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ if (FrIdx < 0) {
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc--;
+ MovingStackUpdateUp = true;
+ }
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
; CHECK-LABEL: _ZN2CC3funEv:
; CHECK: mflr 0
-; CHECK-NEXT: std 0, 16(1)
-; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
-; CHECK-NEXT: std 30, 32(1)
+; CHECK-NEXT: std 30, -16(1)
+; CHECK-NEXT: std 0, 16(1)
+; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: mr 30, 3
; CHECK-NEXT: ld 12, 0(30)
; CHECK-NEXT: std 2, 24(1)
; CHECK-NEXT: mr 3, 30
; CHECK-NEXT: bl _ZN2CC3barEPi
; CHECK-NEXT: nop
-; CHECK: ld 30, 32(1)
-; CHECK-NEXT: li 3, 0
+; CHECK: li 3, 0
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
+; CHECK: ld 30, -16(1)
; CHECK-NEXT: blr
entry:
%foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0
; stfd 14, 416(1)
; After the fix by patch D34337:
+; CHECK-LE:std 15, -280(1)
+; CHECK-LE:stfd 14, -144(1)
; CHECK-LE: stdu 1, -528(1)
-; CHECK-LE:std 15, 248(1)
-; CHECK-LE:stfd 14, 384(1)
+; CHECK-BE:std 15, -280(1)
+; CHECK-BE:stfd 14, -144(1)
; CHECK-BE: stdu 1, -544(1)
-; CHECK-BE:std 15, 264(1)
-; CHECK-BE:stfd 14, 400(1)
}
define signext i32 @foo() {
;
; Epilogue code.
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
; Next BB
; CHECK: %for.end
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
entry:
br label %for.preheader
; Make sure we save the link register
; CHECK: mflr {{[0-9]+}}
;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
;
; Loop preheader
; DISABLE: .[[EPILOG_BB]]: # %if.end
; Epilog code
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
; Make sure we save the link register
; CHECK: mflr {{[0-9]+}}
;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: bl somethingElse
;
; Epilogue code.
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
entry:
; CHECK-LABEL: test_foo:
-; CHECK: stdu 1, {{-?[0-9]+}}(1)
+; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1)
; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3
; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4
; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5
; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8
; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9
; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10
-; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1)
; CHECK: bl __tls_get_addr
; CHECK-DAG: stw 3, 0([[BACKUP_3]])
; CHECK-DAG: stw 3, 0([[BACKUP_4]])
define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: testSpill:
-; CHECK: li 11, 80
-; CHECK: li 12, 96
-; CHECK: li 3, 48
-; CHECK: li 10, 64
-; CHECK: stxvd2x 62, 1, 11 # 16-byte Folded Spill
-; CHECK: stxvd2x 63, 1, 12 # 16-byte Folded Spill
-; CHECK: stxvd2x 60, 1, 3 # 16-byte Folded Spill
-; CHECK: stxvd2x 61, 1, 10 # 16-byte Folded Spill
-; CHECK: li 9, 96
-; CHECK: li 10, 80
-; CHECK: li 11, 64
-; CHECK: li 12, 48
-; CHECK: lxvd2x 63, 1, 9 # 16-byte Folded Reload
-; CHECK: lxvd2x 62, 1, 10 # 16-byte Folded Reload
-; CHECK: lxvd2x 61, 1, 11 # 16-byte Folded Reload
-; CHECK: lxvd2x 60, 1, 12 # 16-byte Folded Reload
+; CHECK-DAG: li [[REG64:[0-9]+]], -64
+; CHECK-DAG: li [[REG48:[0-9]+]], -48
+; CHECK-DAG: li [[REG32:[0-9]+]], -32
+; CHECK-DAG: li [[REG16:[0-9]+]], -16
+; CHECK-NOT: li
+; CHECK-DAG: stxvd2x 60, 1, [[REG64]] # 16-byte Folded Spill
+; CHECK-DAG: stxvd2x 61, 1, [[REG48]] # 16-byte Folded Spill
+; CHECK-DAG: stxvd2x 62, 1, [[REG32]] # 16-byte Folded Spill
+; CHECK-DAG: stxvd2x 63, 1, [[REG16]] # 16-byte Folded Spill
+; CHECK: std 0, 16(1)
+; CHECK-DAG: li [[REG16:[0-9]+]], -16
+; CHECK-DAG: li [[REG32:[0-9]+]], -32
+; CHECK-DAG: li [[REG48:[0-9]+]], -48
+; CHECK-DAG: li [[REG64:[0-9]+]], -64
; CHECK: mtlr 0
+; CHECK-DAG: lxvd2x 63, 1, [[REG16]] # 16-byte Folded Reload
+; CHECK-DAG: lxvd2x 62, 1, [[REG32]] # 16-byte Folded Reload
+; CHECK-DAG: lxvd2x 61, 1, [[REG48]] # 16-byte Folded Reload
+; CHECK-DAG: lxvd2x 60, 1, [[REG64]] # 16-byte Folded Reload
; CHECK-NEXT: blr
;
; CHECK-PWR9-LABEL: testSpill:
-; CHECK-PWR9: stxv 62, 80(1) # 16-byte Folded Spill
-; CHECK-PWR9: stxv 63, 96(1) # 16-byte Folded Spill
-; CHECK-PWR9: stxv 60, 48(1) # 16-byte Folded Spill
-; CHECK-PWR9: stxv 61, 64(1) # 16-byte Folded Spill
-; CHECK-PWR9: lxv 63, 96(1) # 16-byte Folded Reload
-; CHECK-PWR9: lxv 62, 80(1) # 16-byte Folded Reload
-; CHECK-PWR9: lxv 61, 64(1) # 16-byte Folded Reload
-; CHECK-PWR9: lxv 60, 48(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG: stxv 60, -64(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG: stxv 61, -48(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG: stxv 62, -32(1) # 16-byte Folded Spill
+; CHECK-PWR9-DAG: stxv 63, -16(1) # 16-byte Folded Spill
; CHECK-PWR9: mtlr 0
+; CHECK-PWR9-DAG: lxv 63, -16(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG: lxv 62, -32(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG: lxv 61, -48(1) # 16-byte Folded Reload
+; CHECK-PWR9-DAG: lxv 60, -64(1) # 16-byte Folded Reload
; CHECK-PWR9-NEXT: blr
entry: