Merge branch 'x86/urgent' into x86/cache, to pick up dependent fix

author Ingo Molnar <mingo@kernel.org>

Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)

committer Ingo Molnar <mingo@kernel.org>

Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
author Ingo Molnar <mingo@kernel.org>
Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
committer Ingo Molnar <mingo@kernel.org>
Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
diff --combined arch/x86/include/asm/perf_event.h

index b2cf84c,78241b7..8bdf749
--- 1/arch/x86/include/asm/perf_event.h
--- 2/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@@ -46,6 -46,14 +46,14 @@@
   #define INTEL_ARCH_EVENT_MASK \
         (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
   
+ #define AMD64_L3_SLICE_SHIFT                          48
+ #define AMD64_L3_SLICE_MASK                           \
+       ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+ 
+ #define AMD64_L3_THREAD_SHIFT                         56
+ #define AMD64_L3_THREAD_MASK                          \
+       ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+ 
   #define X86_RAW_EVENT_MASK            \
         (ARCH_PERFMON_EVENTSEL_EVENT |  \
          ARCH_PERFMON_EVENTSEL_UMASK |  \
@@@ -270,7 -278,6 +278,7 @@@ struct perf_guest_switch_msr 
   extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
   extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
   extern void perf_check_microcode(void);
+ +extern int x86_perf_rdpmc_index(struct perf_event *event);
   #else
   static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
   {
diff --combined arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c

index 30e6c9f,f8c260d..41aeb43
--- 1/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
--- 2/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@@ -17,7 -17,6 +17,7 @@@
   #include <linux/debugfs.h>
   #include <linux/kthread.h>
   #include <linux/mman.h>
+ +#include <linux/perf_event.h>
   #include <linux/pm_qos.h>
   #include <linux/slab.h>
   #include <linux/uaccess.h>
@@@ -27,7 -26,6 +27,7 @@@
   #include <asm/intel_rdt_sched.h>
   #include <asm/perf_event.h>
   
+ +#include "../../events/perf_event.h" /* For X86_CONFIG() */
   #include "intel_rdt.h"
   
   #define CREATE_TRACE_POINTS
@@@ -108,6 -106,16 +108,6 @@@ static u64 get_prefetch_disable_bits(vo
         return 0;
   }
   
- -/*
- - * Helper to write 64bit value to MSR without tracing. Used when
- - * use of the cache should be restricted and use of registers used
- - * for local variables avoided.
- - */
- -static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
- -{
- -      __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
- -}
- -
   /**
    * pseudo_lock_minor_get - Obtain available minor number
    * @minor: Pointer to where new minor number will be stored
@@@ -789,25 -797,27 +789,27 @@@ int rdtgroup_locksetup_exit(struct rdtg
   /**
    * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
    * @d: RDT domain
-  * @_cbm: CBM to test
+  * @cbm: CBM to test
    *
-  * @d represents a cache instance and @_cbm a capacity bitmask that is
-  * considered for it. Determine if @_cbm overlaps with any existing
+  * @d represents a cache instance and @cbm a capacity bitmask that is
+  * considered for it. Determine if @cbm overlaps with any existing
    * pseudo-locked region on @d.
    *
-  * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+  * @cbm is unsigned long, even if only 32 bits are used, to make the
+  * bitmap functions work correctly.
+  *
+  * Return: true if @cbm overlaps with pseudo-locked region on @d, false
    * otherwise.
    */
- bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
   {
-       unsigned long *cbm = (unsigned long *)&_cbm;
-       unsigned long *cbm_b;
         unsigned int cbm_len;
+       unsigned long cbm_b;
   
         if (d->plr) {
                 cbm_len = d->plr->r->cache.cbm_len;
-               cbm_b = (unsigned long *)&d->plr->cbm;
-               if (bitmap_intersects(cbm, cbm_b, cbm_len))
+               cbm_b = d->plr->cbm;
+               if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
                         return true;
         }
         return false;
@@@ -878,14 -888,31 +880,14 @@@ static int measure_cycles_lat_fn(void *
         struct pseudo_lock_region *plr = _plr;
         unsigned long i;
         u64 start, end;
- -#ifdef CONFIG_KASAN
- -      /*
- -       * The registers used for local register variables are also used
- -       * when KASAN is active. When KASAN is active we use a regular
- -       * variable to ensure we always use a valid pointer to access memory.
- -       * The cost is that accessing this pointer, which could be in
- -       * cache, will be included in the measurement of memory read latency.
- -       */
         void *mem_r;
- -#else
- -#ifdef CONFIG_X86_64
- -      register void *mem_r asm("rbx");
- -#else
- -      register void *mem_r asm("ebx");
- -#endif /* CONFIG_X86_64 */
- -#endif /* CONFIG_KASAN */
   
         local_irq_disable();
         /*
- -       * The wrmsr call may be reordered with the assignment below it.
- -       * Call wrmsr as directly as possible to avoid tracing clobbering
- -       * local register variable used for memory pointer.
+ +       * Disable hardware prefetchers.
          */
- -      __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- -      mem_r = plr->kmem;
+ +      wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ +      mem_r = READ_ONCE(plr->kmem);
         /*
          * Dummy execute of the time measurement to load the needed
          * instructions into the L1 instruction cache.
@@@ -907,240 -934,157 +909,240 @@@
         return 0;
   }
   
- -static int measure_cycles_perf_fn(void *_plr)
+ +/*
+ + * Create a perf_event_attr for the hit and miss perf events that will
+ + * be used during the performance measurement. A perf_event maintains
+ + * a pointer to its perf_event_attr so a unique attribute structure is
+ + * created for each perf_event.
+ + *
+ + * The actual configuration of the event is set right before use in order
+ + * to use the X86_CONFIG macro.
+ + */
+ +static struct perf_event_attr perf_miss_attr = {
+ +      .type           = PERF_TYPE_RAW,
+ +      .size           = sizeof(struct perf_event_attr),
+ +      .pinned         = 1,
+ +      .disabled       = 0,
+ +      .exclude_user   = 1,
+ +};
+ +
+ +static struct perf_event_attr perf_hit_attr = {
+ +      .type           = PERF_TYPE_RAW,
+ +      .size           = sizeof(struct perf_event_attr),
+ +      .pinned         = 1,
+ +      .disabled       = 0,
+ +      .exclude_user   = 1,
+ +};
+ +
+ +struct residency_counts {
+ +      u64 miss_before, hits_before;
+ +      u64 miss_after,  hits_after;
+ +};
+ +
+ +static int measure_residency_fn(struct perf_event_attr *miss_attr,
+ +                              struct perf_event_attr *hit_attr,
+ +                              struct pseudo_lock_region *plr,
+ +                              struct residency_counts *counts)
   {
- -      unsigned long long l3_hits = 0, l3_miss = 0;
- -      u64 l3_hit_bits = 0, l3_miss_bits = 0;
- -      struct pseudo_lock_region *plr = _plr;
- -      unsigned long long l2_hits, l2_miss;
- -      u64 l2_hit_bits, l2_miss_bits;
- -      unsigned long i;
- -#ifdef CONFIG_KASAN
- -      /*
- -       * The registers used for local register variables are also used
- -       * when KASAN is active. When KASAN is active we use regular variables
- -       * at the cost of including cache access latency to these variables
- -       * in the measurements.
- -       */
+ +      u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
+ +      struct perf_event *miss_event, *hit_event;
+ +      int hit_pmcnum, miss_pmcnum;
         unsigned int line_size;
         unsigned int size;
+ +      unsigned long i;
         void *mem_r;
- -#else
- -      register unsigned int line_size asm("esi");
- -      register unsigned int size asm("edi");
- -#ifdef CONFIG_X86_64
- -      register void *mem_r asm("rbx");
- -#else
- -      register void *mem_r asm("ebx");
- -#endif /* CONFIG_X86_64 */
- -#endif /* CONFIG_KASAN */
+ +      u64 tmp;
+ +
+ +      miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
+ +                                                    NULL, NULL, NULL);
+ +      if (IS_ERR(miss_event))
+ +              goto out;
+ +
+ +      hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
+ +                                                   NULL, NULL, NULL);
+ +      if (IS_ERR(hit_event))
+ +              goto out_miss;
+ +
+ +      local_irq_disable();
+ +      /*
+ +       * Check any possible error state of events used by performing
+ +       * one local read.
+ +       */
+ +      if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
+ +              local_irq_enable();
+ +              goto out_hit;
+ +      }
+ +      if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
+ +              local_irq_enable();
+ +              goto out_hit;
+ +      }
+ +
+ +      /*
+ +       * Disable hardware prefetchers.
+ +       */
+ +      wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
+ +
+ +      /* Initialize rest of local variables */
+ +      /*
+ +       * Performance event has been validated right before this with
+ +       * interrupts disabled - it is thus safe to read the counter index.
+ +       */
+ +      miss_pmcnum = x86_perf_rdpmc_index(miss_event);
+ +      hit_pmcnum = x86_perf_rdpmc_index(hit_event);
+ +      line_size = READ_ONCE(plr->line_size);
+ +      mem_r = READ_ONCE(plr->kmem);
+ +      size = READ_ONCE(plr->size);
+ +
+ +      /*
+ +       * Read counter variables twice - first to load the instructions
+ +       * used in L1 cache, second to capture accurate value that does not
+ +       * include cache misses incurred because of instruction loads.
+ +       */
+ +      rdpmcl(hit_pmcnum, hits_before);
+ +      rdpmcl(miss_pmcnum, miss_before);
+ +      /*
+ +       * From SDM: Performing back-to-back fast reads are not guaranteed
+ +       * to be monotonic.
+ +       * Use LFENCE to ensure all previous instructions are retired
+ +       * before proceeding.
+ +       */
+ +      rmb();
+ +      rdpmcl(hit_pmcnum, hits_before);
+ +      rdpmcl(miss_pmcnum, miss_before);
+ +      /*
+ +       * Use LFENCE to ensure all previous instructions are retired
+ +       * before proceeding.
+ +       */
+ +      rmb();
+ +      for (i = 0; i < size; i += line_size) {
+ +              /*
+ +               * Add a barrier to prevent speculative execution of this
+ +               * loop reading beyond the end of the buffer.
+ +               */
+ +              rmb();
+ +              asm volatile("mov (%0,%1,1), %%eax\n\t"
+ +                           :
+ +                           : "r" (mem_r), "r" (i)
+ +                           : "%eax", "memory");
+ +      }
+ +      /*
+ +       * Use LFENCE to ensure all previous instructions are retired
+ +       * before proceeding.
+ +       */
+ +      rmb();
+ +      rdpmcl(hit_pmcnum, hits_after);
+ +      rdpmcl(miss_pmcnum, miss_after);
+ +      /*
+ +       * Use LFENCE to ensure all previous instructions are retired
+ +       * before proceeding.
+ +       */
+ +      rmb();
+ +      /* Re-enable hardware prefetchers */
+ +      wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
+ +      local_irq_enable();
+ +out_hit:
+ +      perf_event_release_kernel(hit_event);
+ +out_miss:
+ +      perf_event_release_kernel(miss_event);
+ +out:
+ +      /*
+ +       * All counts will be zero on failure.
+ +       */
+ +      counts->miss_before = miss_before;
+ +      counts->hits_before = hits_before;
+ +      counts->miss_after  = miss_after;
+ +      counts->hits_after  = hits_after;
+ +      return 0;
+ +}
+ +
+ +static int measure_l2_residency(void *_plr)
+ +{
+ +      struct pseudo_lock_region *plr = _plr;
+ +      struct residency_counts counts = {0};
   
         /*
          * Non-architectural event for the Goldmont Microarchitecture
          * from Intel x86 Architecture Software Developer Manual (SDM):
          * MEM_LOAD_UOPS_RETIRED D1H (event number)
          * Umask values:
- -       *     L1_HIT   01H
          *     L2_HIT   02H
- -       *     L1_MISS  08H
          *     L2_MISS  10H
- -       *
- -       * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
- -       * has two "no fix" errata associated with it: BDM35 and BDM100. On
- -       * this platform we use the following events instead:
- -       *  L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
- -       *       REFERENCES FFH
- -       *       MISS       3FH
- -       *  LONGEST_LAT_CACHE 2EH (Documented in SDM)
- -       *       REFERENCE 4FH
- -       *       MISS      41H
          */
- -
- -      /*
- -       * Start by setting flags for IA32_PERFEVTSELx:
- -       *     OS  (Operating system mode)  0x2
- -       *     INT (APIC interrupt enable)  0x10
- -       *     EN  (Enable counter)         0x40
- -       *
- -       * Then add the Umask value and event number to select performance
- -       * event.
- -       */
- -
         switch (boot_cpu_data.x86_model) {
         case INTEL_FAM6_ATOM_GOLDMONT:
         case INTEL_FAM6_ATOM_GEMINI_LAKE:
- -              l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
- -              l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
- -              break;
- -      case INTEL_FAM6_BROADWELL_X:
- -              /* On BDW the l2_hit_bits count references, not hits */
- -              l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
- -              l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
- -              /* On BDW the l3_hit_bits count references, not hits */
- -              l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
- -              l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
+ +              perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
+ +                                                 .umask = 0x10);
+ +              perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
+ +                                                .umask = 0x2);
                 break;
         default:
                 goto out;
         }
   
- -      local_irq_disable();
+ +      measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
         /*
- -       * Call wrmsr direcly to avoid the local register variables from
- -       * being overwritten due to reordering of their assignment with
- -       * the wrmsr calls.
+ +       * If a failure prevented the measurements from succeeding
+ +       * tracepoints will still be written and all counts will be zero.
          */
- -      __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
- -      /* Disable events and reset counters */
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
- -      if (l3_hit_bits > 0) {
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
- -      }
- -      /* Set and enable the L2 counters */
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
- -      if (l3_hit_bits > 0) {
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- -                                    l3_hit_bits);
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- -                                    l3_miss_bits);
- -      }
- -      mem_r = plr->kmem;
- -      size = plr->size;
- -      line_size = plr->line_size;
- -      for (i = 0; i < size; i += line_size) {
- -              asm volatile("mov (%0,%1,1), %%eax\n\t"
- -                           :
- -                           : "r" (mem_r), "r" (i)
- -                           : "%eax", "memory");
- -      }
+ +      trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
+ +                           counts.miss_after - counts.miss_before);
+ +out:
+ +      plr->thread_done = 1;
+ +      wake_up_interruptible(&plr->lock_thread_wq);
+ +      return 0;
+ +}
+ +
+ +static int measure_l3_residency(void *_plr)
+ +{
+ +      struct pseudo_lock_region *plr = _plr;
+ +      struct residency_counts counts = {0};
+ +
         /*
- -       * Call wrmsr directly (no tracing) to not influence
- -       * the cache access counters as they are disabled.
+ +       * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
+ +       * has two "no fix" errata associated with it: BDM35 and BDM100. On
+ +       * this platform the following events are used instead:
+ +       * LONGEST_LAT_CACHE 2EH (Documented in SDM)
+ +       *       REFERENCE 4FH
+ +       *       MISS      41H
          */
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
- -                            l2_hit_bits & ~(0x40ULL << 16));
- -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
- -                            l2_miss_bits & ~(0x40ULL << 16));
- -      if (l3_hit_bits > 0) {
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
- -                                    l3_hit_bits & ~(0x40ULL << 16));
- -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
- -                                    l3_miss_bits & ~(0x40ULL << 16));
- -      }
- -      l2_hits = native_read_pmc(0);
- -      l2_miss = native_read_pmc(1);
- -      if (l3_hit_bits > 0) {
- -              l3_hits = native_read_pmc(2);
- -              l3_miss = native_read_pmc(3);
+ +
+ +      switch (boot_cpu_data.x86_model) {
+ +      case INTEL_FAM6_BROADWELL_X:
+ +              /* On BDW the hit event counts references, not hits */
+ +              perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
+ +                                                .umask = 0x4f);
+ +              perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
+ +                                                 .umask = 0x41);
+ +              break;
+ +      default:
+ +              goto out;
         }
- -      wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
- -      local_irq_enable();
+ +
+ +      measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
         /*
- -       * On BDW we count references and misses, need to adjust. Sometimes
- -       * the "hits" counter is a bit more than the references, for
- -       * example, x references but x + 1 hits. To not report invalid
- -       * hit values in this case we treat that as misses eaqual to
- -       * references.
+ +       * If a failure prevented the measurements from succeeding
+ +       * tracepoints will still be written and all counts will be zero.
          */
- -      if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- -              l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
- -      trace_pseudo_lock_l2(l2_hits, l2_miss);
- -      if (l3_hit_bits > 0) {
- -              if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
- -                      l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
- -              trace_pseudo_lock_l3(l3_hits, l3_miss);
+ +
+ +      counts.miss_after -= counts.miss_before;
+ +      if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
+ +              /*
+ +               * On BDW references and misses are counted, need to adjust.
+ +               * Sometimes the "hits" counter is a bit more than the
+ +               * references, for example, x references but x + 1 hits.
+ +               * To not report invalid hit values in this case we treat
+ +               * that as misses equal to references.
+ +               */
+ +              /* First compute the number of cache references measured */
+ +              counts.hits_after -= counts.hits_before;
+ +              /* Next convert references to cache hits */
+ +              counts.hits_after -= min(counts.miss_after, counts.hits_after);
+ +      } else {
+ +              counts.hits_after -= counts.hits_before;
         }
   
+ +      trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
   out:
         plr->thread_done = 1;
         wake_up_interruptible(&plr->lock_thread_wq);
@@@ -1179,20 -1123,13 +1181,20 @@@ static int pseudo_lock_measure_cycles(s
                 goto out;
         }
   
+ +      plr->cpu = cpu;
+ +
         if (sel == 1)
                 thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
                                                 cpu_to_node(cpu),
                                                 "pseudo_lock_measure/%u",
                                                 cpu);
         else if (sel == 2)
- -              thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
+ +              thread = kthread_create_on_node(measure_l2_residency, plr,
+ +                                              cpu_to_node(cpu),
+ +                                              "pseudo_lock_measure/%u",
+ +                                              cpu);
+ +      else if (sel == 3)
+ +              thread = kthread_create_on_node(measure_l3_residency, plr,
                                                 cpu_to_node(cpu),
                                                 "pseudo_lock_measure/%u",
                                                 cpu);
@@@ -1236,7 -1173,7 +1238,7 @@@ static ssize_t pseudo_lock_measure_trig
         buf[buf_size] = '\0';
         ret = kstrtoint(buf, 10, &sel);
         if (ret == 0) {
- -              if (sel != 1)
+ +              if (sel != 1 && sel != 2 && sel != 3)
                         return -EINVAL;
                 ret = debugfs_file_get(file->f_path.dentry);
                 if (ret)
diff --combined arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

index 82a4878,b140c68..643670f
--- 1/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
--- 2/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@@ -975,33 -975,34 +975,34 @@@ static int rdtgroup_mode_show(struct ke
    * is false then overlaps with any resource group or hardware entities
    * will be considered.
    *
+  * @cbm is unsigned long, even if only 32 bits are used, to make the
+  * bitmap functions work correctly.
+  *
    * Return: false if CBM does not overlap, true if it does.
    */
   bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-                          u32 _cbm, int closid, bool exclusive)
+                          unsigned long cbm, int closid, bool exclusive)
   {
-       unsigned long *cbm = (unsigned long *)&_cbm;
-       unsigned long *ctrl_b;
         enum rdtgrp_mode mode;
+       unsigned long ctrl_b;
         u32 *ctrl;
         int i;
   
         /* Check for any overlap with regions used by hardware directly */
         if (!exclusive) {
-               if (bitmap_intersects(cbm,
-                                     (unsigned long *)&r->cache.shareable_bits,
-                                     r->cache.cbm_len))
+               ctrl_b = r->cache.shareable_bits;
+               if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
                         return true;
         }
   
         /* Check for overlap with other resource groups */
         ctrl = d->ctrl_val;
         for (i = 0; i < closids_supported(); i++, ctrl++) {
-               ctrl_b = (unsigned long *)ctrl;
+               ctrl_b = *ctrl;
                 mode = rdtgroup_mode_by_closid(i);
                 if (closid_allocated(i) && i != closid &&
                     mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-                       if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+                       if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
                                 if (exclusive) {
                                         if (mode == RDT_MODE_EXCLUSIVE)
                                                 return true;
@@@ -1138,15 -1139,18 +1139,18 @@@ out
    * computed by first dividing the total cache size by the CBM length to
    * determine how many bytes each bit in the bitmask represents. The result
    * is multiplied with the number of bits set in the bitmask.
+  *
+  * @cbm is unsigned long, even if only 32 bits are used to make the
+  * bitmap functions work correctly.
    */
   unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-                                 struct rdt_domain *d, u32 cbm)
+                                 struct rdt_domain *d, unsigned long cbm)
   {
         struct cpu_cacheinfo *ci;
         unsigned int size = 0;
         int num_b, i;
   
-       num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+       num_b = bitmap_weight(&cbm, r->cache.cbm_len);
         ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
         for (i = 0; i < ci->num_leaves; i++) {
                 if (ci->info_list[i].level == r->cache_level) {
@@@ -2353,6 -2357,7 +2357,7 @@@ static int rdtgroup_init_alloc(struct r
         u32 used_b = 0, unused_b = 0;
         u32 closid = rdtgrp->closid;
         struct rdt_resource *r;
+       unsigned long tmp_cbm;
         enum rdtgrp_mode mode;
         struct rdt_domain *d;
         int i, ret;
@@@ -2390,9 -2395,14 +2395,14 @@@
                          * modify the CBM based on system availability.
                          */
                         cbm_ensure_valid(&d->new_ctrl, r);
-                       if (bitmap_weight((unsigned long *) &d->new_ctrl,
-                                         r->cache.cbm_len) <
-                                       r->cache.min_cbm_bits) {
+                       /*
+                        * Assign the u32 CBM to an unsigned long to ensure
+                        * that bitmap_weight() does not access out-of-bound
+                        * memory.
+                        */
+                       tmp_cbm = d->new_ctrl;
+                       if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+                           r->cache.min_cbm_bits) {
                                 rdt_last_cmd_printf("no space on %s:%d\n",
                                                     r->name, d->id);
                                 return -ENOSPC;
@@@ -2795,13 -2805,6 +2805,13 @@@ static int rdtgroup_show_options(struc
   {
         if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
                 seq_puts(seq, ",cdp");
+ +
+ +      if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
+ +              seq_puts(seq, ",cdpl2");
+ +
+ +      if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ +              seq_puts(seq, ",mba_MBps");
+ +
         return 0;
   }
author	Ingo Molnar <mingo@kernel.org>
	Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
		1	2
arch/x86/include/asm/perf_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history