/// or
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
/// to TargetPassConfig::createMachineScheduler() to have an effect.
+ ///
+ /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
+ /// \p NumLoads is the number of loads that will be in the cluster if this
+ /// hook returns true.
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
unsigned NumLoads) const {
SUnit *SUb = MemOpRecords[Idx+1].SU;
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
MemOpRecords[Idx + 1].BaseOps,
- ClusterLength)) {
+ ClusterLength + 1)) {
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
(isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
- const unsigned MaxGlobalLoadCluster = 6;
+ const unsigned MaxGlobalLoadCluster = 7;
if (NumLoads > MaxGlobalLoadCluster)
return false;
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
+ // FIXME: NumLoads should not be subtracted 1. This is to match behavior
+ // of clusterNeighboringMemOps which was previosly passing cluster length
+ // less 1. LoadClusterThreshold should be tuned instead.
+ return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <=
+ LoadClusterThreshold;
}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,