static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
static unsigned long perf_nmi_window;
+/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */
+#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
+#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
+
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
else if (event->attr.exclude_guest)
event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+ if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
+ event->hw.flags |= PERF_X86_EVENT_PAIR;
+
return 0;
}
return &unconstrained;
}
+static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (is_counter_pair(hwc))
+ --cpuc->n_pair;
+}
+
static ssize_t amd_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
+ x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
+ x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
x86_pmu.flags |= PMU_FL_PAIR;
}
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val;
if (!test_bit(idx, cpuc->active_mask))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(x86_pmu_config_addr(idx), val);
+ if (is_counter_pair(hwc))
+ wrmsrl(x86_pmu_config_addr(idx + 1), 0);
}
}
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
int nr_gp; /* number of GP counters used */
- unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ u64 used;
};
/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
sched->saved_states--;
sched->state = sched->saved[sched->saved_states];
- /* continue with next counter: */
- clear_bit(sched->state.counter++, sched->state.used);
+ /* this assignment didn't work out */
+ /* XXX broken vs EVENT_PAIR */
+ sched->state.used &= ~BIT_ULL(sched->state.counter);
+
+ /* try the next one */
+ sched->state.counter++;
return true;
}
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!__test_and_set_bit(idx, sched->state.used))
- goto done;
+ u64 mask = BIT_ULL(idx);
+
+ if (sched->state.used & mask)
+ continue;
+
+ sched->state.used |= mask;
+ goto done;
}
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
- if (!__test_and_set_bit(idx, sched->state.used)) {
- if (sched->state.nr_gp++ >= sched->max_gp)
- return false;
+ u64 mask = BIT_ULL(idx);
- goto done;
- }
+ if (c->flags & PERF_X86_EVENT_PAIR)
+ mask |= mask << 1;
+
+ if (sched->state.used & mask)
+ continue;
+
+ if (sched->state.nr_gp++ >= sched->max_gp)
+ return false;
+
+ sched->state.used |= mask;
+ goto done;
}
return false;
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c;
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
struct hw_perf_event *hwc;
-
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ u64 used_mask = 0;
/*
* Compute the number of events already present; see x86_pmu_add(),
* fastpath, try to reuse previous register
*/
for (i = 0; i < n; i++) {
+ u64 mask;
+
hwc = &cpuc->event_list[i]->hw;
c = cpuc->event_constraint[i];
if (!test_bit(hwc->idx, c->idxmsk))
break;
+ mask = BIT_ULL(hwc->idx);
+ if (is_counter_pair(hwc))
+ mask |= mask << 1;
+
/* not already used */
- if (test_bit(hwc->idx, used_mask))
+ if (used_mask & mask)
break;
- __set_bit(hwc->idx, used_mask);
+ used_mask |= mask;
+
if (assign)
assign[i] = hwc->idx;
}
READ_ONCE(cpuc->excl_cntrs->exclusive_present))
gpmax /= 2;
+ /*
+ * Reduce the amount of available counters to allow fitting
+ * the extra Merge events needed by large increment events.
+ */
+ if (x86_pmu.flags & PMU_FL_PAIR) {
+ gpmax = x86_pmu.num_counters - cpuc->n_pair;
+ WARN_ON(gpmax <= 0);
+ }
+
unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, gpmax, assign);
}
return -EINVAL;
cpuc->event_list[n] = leader;
n++;
+ if (is_counter_pair(&leader->hw))
+ cpuc->n_pair++;
}
if (!dogrp)
return n;
cpuc->event_list[n] = event;
n++;
+ if (is_counter_pair(&event->hw))
+ cpuc->n_pair++;
}
return n;
}
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
+ * Clear the Merge event counter's upper 16 bits since
+ * we currently declare a 48-bit counter width
+ */
+ if (is_counter_pair(hwc))
+ wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+
+ /*
* Due to erratum on certan cpu we need
* a second write to be sure the register
* is updated properly
struct amd_nb *amd_nb;
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
u64 perf_ctr_virt_mask;
+ int n_pair; /* Large increment events */
void *kfree_on_online[X86_PERF_KFREE_MAX];
};
* AMD bits
*/
unsigned int amd_nb_constraints : 1;
+ u64 perf_ctr_pair_en;
/*
* Extra registers for events
void x86_pmu_disable_all(void);
+static inline bool is_counter_pair(struct hw_perf_event *hwc)
+{
+ return hwc->flags & PERF_X86_EVENT_PAIR;
+}
+
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+
+ /*
+ * Add enabled Merge event on next counter
+ * if large increment event being enabled on this counter
+ */
+ if (is_counter_pair(hwc))
+ wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
+
wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
}
struct hw_perf_event *hwc = &event->hw;
wrmsrl(hwc->config_base, hwc->config);
+
+ if (is_counter_pair(hwc))
+ wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
}
void x86_pmu_enable_event(struct perf_event *event);