OSDN Git Service

Merge branch 'x86/urgent' into x86/cache, to pick up dependent fix
authorIngo Molnar <mingo@kernel.org>
Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 9 Oct 2018 06:50:10 +0000 (08:50 +0200)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
1  2 
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

  #define INTEL_ARCH_EVENT_MASK \
        (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
  
+ #define AMD64_L3_SLICE_SHIFT                          48
+ #define AMD64_L3_SLICE_MASK                           \
+       ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+ #define AMD64_L3_THREAD_SHIFT                         56
+ #define AMD64_L3_THREAD_MASK                          \
+       ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
  #define X86_RAW_EVENT_MASK            \
        (ARCH_PERFMON_EVENTSEL_EVENT |  \
         ARCH_PERFMON_EVENTSEL_UMASK |  \
@@@ -270,7 -278,6 +278,7 @@@ struct perf_guest_switch_msr 
  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
  extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
  extern void perf_check_microcode(void);
 +extern int x86_perf_rdpmc_index(struct perf_event *event);
  #else
  static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
  {
@@@ -17,7 -17,6 +17,7 @@@
  #include <linux/debugfs.h>
  #include <linux/kthread.h>
  #include <linux/mman.h>
 +#include <linux/perf_event.h>
  #include <linux/pm_qos.h>
  #include <linux/slab.h>
  #include <linux/uaccess.h>
@@@ -27,7 -26,6 +27,7 @@@
  #include <asm/intel_rdt_sched.h>
  #include <asm/perf_event.h>
  
 +#include "../../events/perf_event.h" /* For X86_CONFIG() */
  #include "intel_rdt.h"
  
  #define CREATE_TRACE_POINTS
@@@ -108,6 -106,16 +108,6 @@@ static u64 get_prefetch_disable_bits(vo
        return 0;
  }
  
 -/*
 - * Helper to write 64bit value to MSR without tracing. Used when
 - * use of the cache should be restricted and use of registers used
 - * for local variables avoided.
 - */
 -static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
 -{
 -      __wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
 -}
 -
  /**
   * pseudo_lock_minor_get - Obtain available minor number
   * @minor: Pointer to where new minor number will be stored
@@@ -789,25 -797,27 +789,27 @@@ int rdtgroup_locksetup_exit(struct rdtg
  /**
   * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
   * @d: RDT domain
-  * @_cbm: CBM to test
+  * @cbm: CBM to test
   *
-  * @d represents a cache instance and @_cbm a capacity bitmask that is
-  * considered for it. Determine if @_cbm overlaps with any existing
+  * @d represents a cache instance and @cbm a capacity bitmask that is
+  * considered for it. Determine if @cbm overlaps with any existing
   * pseudo-locked region on @d.
   *
-  * Return: true if @_cbm overlaps with pseudo-locked region on @d, false
+  * @cbm is unsigned long, even if only 32 bits are used, to make the
+  * bitmap functions work correctly.
+  *
+  * Return: true if @cbm overlaps with pseudo-locked region on @d, false
   * otherwise.
   */
- bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm)
+ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
  {
-       unsigned long *cbm = (unsigned long *)&_cbm;
-       unsigned long *cbm_b;
        unsigned int cbm_len;
+       unsigned long cbm_b;
  
        if (d->plr) {
                cbm_len = d->plr->r->cache.cbm_len;
-               cbm_b = (unsigned long *)&d->plr->cbm;
-               if (bitmap_intersects(cbm, cbm_b, cbm_len))
+               cbm_b = d->plr->cbm;
+               if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
                        return true;
        }
        return false;
@@@ -878,14 -888,31 +880,14 @@@ static int measure_cycles_lat_fn(void *
        struct pseudo_lock_region *plr = _plr;
        unsigned long i;
        u64 start, end;
 -#ifdef CONFIG_KASAN
 -      /*
 -       * The registers used for local register variables are also used
 -       * when KASAN is active. When KASAN is active we use a regular
 -       * variable to ensure we always use a valid pointer to access memory.
 -       * The cost is that accessing this pointer, which could be in
 -       * cache, will be included in the measurement of memory read latency.
 -       */
        void *mem_r;
 -#else
 -#ifdef CONFIG_X86_64
 -      register void *mem_r asm("rbx");
 -#else
 -      register void *mem_r asm("ebx");
 -#endif /* CONFIG_X86_64 */
 -#endif /* CONFIG_KASAN */
  
        local_irq_disable();
        /*
 -       * The wrmsr call may be reordered with the assignment below it.
 -       * Call wrmsr as directly as possible to avoid tracing clobbering
 -       * local register variable used for memory pointer.
 +       * Disable hardware prefetchers.
         */
 -      __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
 -      mem_r = plr->kmem;
 +      wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
 +      mem_r = READ_ONCE(plr->kmem);
        /*
         * Dummy execute of the time measurement to load the needed
         * instructions into the L1 instruction cache.
        return 0;
  }
  
 -static int measure_cycles_perf_fn(void *_plr)
 +/*
 + * Create a perf_event_attr for the hit and miss perf events that will
 + * be used during the performance measurement. A perf_event maintains
 + * a pointer to its perf_event_attr so a unique attribute structure is
 + * created for each perf_event.
 + *
 + * The actual configuration of the event is set right before use in order
 + * to use the X86_CONFIG macro.
 + */
 +static struct perf_event_attr perf_miss_attr = {
 +      .type           = PERF_TYPE_RAW,
 +      .size           = sizeof(struct perf_event_attr),
 +      .pinned         = 1,
 +      .disabled       = 0,
 +      .exclude_user   = 1,
 +};
 +
 +static struct perf_event_attr perf_hit_attr = {
 +      .type           = PERF_TYPE_RAW,
 +      .size           = sizeof(struct perf_event_attr),
 +      .pinned         = 1,
 +      .disabled       = 0,
 +      .exclude_user   = 1,
 +};
 +
 +struct residency_counts {
 +      u64 miss_before, hits_before;
 +      u64 miss_after,  hits_after;
 +};
 +
 +static int measure_residency_fn(struct perf_event_attr *miss_attr,
 +                              struct perf_event_attr *hit_attr,
 +                              struct pseudo_lock_region *plr,
 +                              struct residency_counts *counts)
  {
 -      unsigned long long l3_hits = 0, l3_miss = 0;
 -      u64 l3_hit_bits = 0, l3_miss_bits = 0;
 -      struct pseudo_lock_region *plr = _plr;
 -      unsigned long long l2_hits, l2_miss;
 -      u64 l2_hit_bits, l2_miss_bits;
 -      unsigned long i;
 -#ifdef CONFIG_KASAN
 -      /*
 -       * The registers used for local register variables are also used
 -       * when KASAN is active. When KASAN is active we use regular variables
 -       * at the cost of including cache access latency to these variables
 -       * in the measurements.
 -       */
 +      u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
 +      struct perf_event *miss_event, *hit_event;
 +      int hit_pmcnum, miss_pmcnum;
        unsigned int line_size;
        unsigned int size;
 +      unsigned long i;
        void *mem_r;
 -#else
 -      register unsigned int line_size asm("esi");
 -      register unsigned int size asm("edi");
 -#ifdef CONFIG_X86_64
 -      register void *mem_r asm("rbx");
 -#else
 -      register void *mem_r asm("ebx");
 -#endif /* CONFIG_X86_64 */
 -#endif /* CONFIG_KASAN */
 +      u64 tmp;
 +
 +      miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
 +                                                    NULL, NULL, NULL);
 +      if (IS_ERR(miss_event))
 +              goto out;
 +
 +      hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
 +                                                   NULL, NULL, NULL);
 +      if (IS_ERR(hit_event))
 +              goto out_miss;
 +
 +      local_irq_disable();
 +      /*
 +       * Check any possible error state of events used by performing
 +       * one local read.
 +       */
 +      if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
 +              local_irq_enable();
 +              goto out_hit;
 +      }
 +      if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
 +              local_irq_enable();
 +              goto out_hit;
 +      }
 +
 +      /*
 +       * Disable hardware prefetchers.
 +       */
 +      wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
 +
 +      /* Initialize rest of local variables */
 +      /*
 +       * Performance event has been validated right before this with
 +       * interrupts disabled - it is thus safe to read the counter index.
 +       */
 +      miss_pmcnum = x86_perf_rdpmc_index(miss_event);
 +      hit_pmcnum = x86_perf_rdpmc_index(hit_event);
 +      line_size = READ_ONCE(plr->line_size);
 +      mem_r = READ_ONCE(plr->kmem);
 +      size = READ_ONCE(plr->size);
 +
 +      /*
 +       * Read counter variables twice - first to load the instructions
 +       * used in L1 cache, second to capture accurate value that does not
 +       * include cache misses incurred because of instruction loads.
 +       */
 +      rdpmcl(hit_pmcnum, hits_before);
 +      rdpmcl(miss_pmcnum, miss_before);
 +      /*
 +       * From SDM: Performing back-to-back fast reads are not guaranteed
 +       * to be monotonic.
 +       * Use LFENCE to ensure all previous instructions are retired
 +       * before proceeding.
 +       */
 +      rmb();
 +      rdpmcl(hit_pmcnum, hits_before);
 +      rdpmcl(miss_pmcnum, miss_before);
 +      /*
 +       * Use LFENCE to ensure all previous instructions are retired
 +       * before proceeding.
 +       */
 +      rmb();
 +      for (i = 0; i < size; i += line_size) {
 +              /*
 +               * Add a barrier to prevent speculative execution of this
 +               * loop reading beyond the end of the buffer.
 +               */
 +              rmb();
 +              asm volatile("mov (%0,%1,1), %%eax\n\t"
 +                           :
 +                           : "r" (mem_r), "r" (i)
 +                           : "%eax", "memory");
 +      }
 +      /*
 +       * Use LFENCE to ensure all previous instructions are retired
 +       * before proceeding.
 +       */
 +      rmb();
 +      rdpmcl(hit_pmcnum, hits_after);
 +      rdpmcl(miss_pmcnum, miss_after);
 +      /*
 +       * Use LFENCE to ensure all previous instructions are retired
 +       * before proceeding.
 +       */
 +      rmb();
 +      /* Re-enable hardware prefetchers */
 +      wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
 +      local_irq_enable();
 +out_hit:
 +      perf_event_release_kernel(hit_event);
 +out_miss:
 +      perf_event_release_kernel(miss_event);
 +out:
 +      /*
 +       * All counts will be zero on failure.
 +       */
 +      counts->miss_before = miss_before;
 +      counts->hits_before = hits_before;
 +      counts->miss_after  = miss_after;
 +      counts->hits_after  = hits_after;
 +      return 0;
 +}
 +
 +static int measure_l2_residency(void *_plr)
 +{
 +      struct pseudo_lock_region *plr = _plr;
 +      struct residency_counts counts = {0};
  
        /*
         * Non-architectural event for the Goldmont Microarchitecture
         * from Intel x86 Architecture Software Developer Manual (SDM):
         * MEM_LOAD_UOPS_RETIRED D1H (event number)
         * Umask values:
 -       *     L1_HIT   01H
         *     L2_HIT   02H
 -       *     L1_MISS  08H
         *     L2_MISS  10H
 -       *
 -       * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
 -       * has two "no fix" errata associated with it: BDM35 and BDM100. On
 -       * this platform we use the following events instead:
 -       *  L2_RQSTS 24H (Documented in https://download.01.org/perfmon/BDW/)
 -       *       REFERENCES FFH
 -       *       MISS       3FH
 -       *  LONGEST_LAT_CACHE 2EH (Documented in SDM)
 -       *       REFERENCE 4FH
 -       *       MISS      41H
         */
 -
 -      /*
 -       * Start by setting flags for IA32_PERFEVTSELx:
 -       *     OS  (Operating system mode)  0x2
 -       *     INT (APIC interrupt enable)  0x10
 -       *     EN  (Enable counter)         0x40
 -       *
 -       * Then add the Umask value and event number to select performance
 -       * event.
 -       */
 -
        switch (boot_cpu_data.x86_model) {
        case INTEL_FAM6_ATOM_GOLDMONT:
        case INTEL_FAM6_ATOM_GEMINI_LAKE:
 -              l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
 -              l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
 -              break;
 -      case INTEL_FAM6_BROADWELL_X:
 -              /* On BDW the l2_hit_bits count references, not hits */
 -              l2_hit_bits = (0x52ULL << 16) | (0xff << 8) | 0x24;
 -              l2_miss_bits = (0x52ULL << 16) | (0x3f << 8) | 0x24;
 -              /* On BDW the l3_hit_bits count references, not hits */
 -              l3_hit_bits = (0x52ULL << 16) | (0x4f << 8) | 0x2e;
 -              l3_miss_bits = (0x52ULL << 16) | (0x41 << 8) | 0x2e;
 +              perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
 +                                                 .umask = 0x10);
 +              perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
 +                                                .umask = 0x2);
                break;
        default:
                goto out;
        }
  
 -      local_irq_disable();
 +      measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
        /*
 -       * Call wrmsr direcly to avoid the local register variables from
 -       * being overwritten due to reordering of their assignment with
 -       * the wrmsr calls.
 +       * If a failure prevented the measurements from succeeding
 +       * tracepoints will still be written and all counts will be zero.
         */
 -      __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
 -      /* Disable events and reset counters */
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
 -      if (l3_hit_bits > 0) {
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x0);
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3, 0x0);
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 2, 0x0);
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 3, 0x0);
 -      }
 -      /* Set and enable the L2 counters */
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
 -      if (l3_hit_bits > 0) {
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
 -                                    l3_hit_bits);
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
 -                                    l3_miss_bits);
 -      }
 -      mem_r = plr->kmem;
 -      size = plr->size;
 -      line_size = plr->line_size;
 -      for (i = 0; i < size; i += line_size) {
 -              asm volatile("mov (%0,%1,1), %%eax\n\t"
 -                           :
 -                           : "r" (mem_r), "r" (i)
 -                           : "%eax", "memory");
 -      }
 +      trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
 +                           counts.miss_after - counts.miss_before);
 +out:
 +      plr->thread_done = 1;
 +      wake_up_interruptible(&plr->lock_thread_wq);
 +      return 0;
 +}
 +
 +static int measure_l3_residency(void *_plr)
 +{
 +      struct pseudo_lock_region *plr = _plr;
 +      struct residency_counts counts = {0};
 +
        /*
 -       * Call wrmsr directly (no tracing) to not influence
 -       * the cache access counters as they are disabled.
 +       * On Broadwell Microarchitecture the MEM_LOAD_UOPS_RETIRED event
 +       * has two "no fix" errata associated with it: BDM35 and BDM100. On
 +       * this platform the following events are used instead:
 +       * LONGEST_LAT_CACHE 2EH (Documented in SDM)
 +       *       REFERENCE 4FH
 +       *       MISS      41H
         */
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
 -                            l2_hit_bits & ~(0x40ULL << 16));
 -      pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
 -                            l2_miss_bits & ~(0x40ULL << 16));
 -      if (l3_hit_bits > 0) {
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 2,
 -                                    l3_hit_bits & ~(0x40ULL << 16));
 -              pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
 -                                    l3_miss_bits & ~(0x40ULL << 16));
 -      }
 -      l2_hits = native_read_pmc(0);
 -      l2_miss = native_read_pmc(1);
 -      if (l3_hit_bits > 0) {
 -              l3_hits = native_read_pmc(2);
 -              l3_miss = native_read_pmc(3);
 +
 +      switch (boot_cpu_data.x86_model) {
 +      case INTEL_FAM6_BROADWELL_X:
 +              /* On BDW the hit event counts references, not hits */
 +              perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
 +                                                .umask = 0x4f);
 +              perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
 +                                                 .umask = 0x41);
 +              break;
 +      default:
 +              goto out;
        }
 -      wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
 -      local_irq_enable();
 +
 +      measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
        /*
 -       * On BDW we count references and misses, need to adjust. Sometimes
 -       * the "hits" counter is a bit more than the references, for
 -       * example, x references but x + 1 hits. To not report invalid
 -       * hit values in this case we treat that as misses eaqual to
 -       * references.
 +       * If a failure prevented the measurements from succeeding
 +       * tracepoints will still be written and all counts will be zero.
         */
 -      if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
 -              l2_hits -= (l2_miss > l2_hits ? l2_hits : l2_miss);
 -      trace_pseudo_lock_l2(l2_hits, l2_miss);
 -      if (l3_hit_bits > 0) {
 -              if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X)
 -                      l3_hits -= (l3_miss > l3_hits ? l3_hits : l3_miss);
 -              trace_pseudo_lock_l3(l3_hits, l3_miss);
 +
 +      counts.miss_after -= counts.miss_before;
 +      if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
 +              /*
 +               * On BDW references and misses are counted, need to adjust.
 +               * Sometimes the "hits" counter is a bit more than the
 +               * references, for example, x references but x + 1 hits.
 +               * To not report invalid hit values in this case we treat
 +               * that as misses equal to references.
 +               */
 +              /* First compute the number of cache references measured */
 +              counts.hits_after -= counts.hits_before;
 +              /* Next convert references to cache hits */
 +              counts.hits_after -= min(counts.miss_after, counts.hits_after);
 +      } else {
 +              counts.hits_after -= counts.hits_before;
        }
  
 +      trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
  out:
        plr->thread_done = 1;
        wake_up_interruptible(&plr->lock_thread_wq);
@@@ -1179,20 -1123,13 +1181,20 @@@ static int pseudo_lock_measure_cycles(s
                goto out;
        }
  
 +      plr->cpu = cpu;
 +
        if (sel == 1)
                thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
                                                cpu_to_node(cpu),
                                                "pseudo_lock_measure/%u",
                                                cpu);
        else if (sel == 2)
 -              thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
 +              thread = kthread_create_on_node(measure_l2_residency, plr,
 +                                              cpu_to_node(cpu),
 +                                              "pseudo_lock_measure/%u",
 +                                              cpu);
 +      else if (sel == 3)
 +              thread = kthread_create_on_node(measure_l3_residency, plr,
                                                cpu_to_node(cpu),
                                                "pseudo_lock_measure/%u",
                                                cpu);
@@@ -1236,7 -1173,7 +1238,7 @@@ static ssize_t pseudo_lock_measure_trig
        buf[buf_size] = '\0';
        ret = kstrtoint(buf, 10, &sel);
        if (ret == 0) {
 -              if (sel != 1)
 +              if (sel != 1 && sel != 2 && sel != 3)
                        return -EINVAL;
                ret = debugfs_file_get(file->f_path.dentry);
                if (ret)
@@@ -975,33 -975,34 +975,34 @@@ static int rdtgroup_mode_show(struct ke
   * is false then overlaps with any resource group or hardware entities
   * will be considered.
   *
+  * @cbm is unsigned long, even if only 32 bits are used, to make the
+  * bitmap functions work correctly.
+  *
   * Return: false if CBM does not overlap, true if it does.
   */
  bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
-                          u32 _cbm, int closid, bool exclusive)
+                          unsigned long cbm, int closid, bool exclusive)
  {
-       unsigned long *cbm = (unsigned long *)&_cbm;
-       unsigned long *ctrl_b;
        enum rdtgrp_mode mode;
+       unsigned long ctrl_b;
        u32 *ctrl;
        int i;
  
        /* Check for any overlap with regions used by hardware directly */
        if (!exclusive) {
-               if (bitmap_intersects(cbm,
-                                     (unsigned long *)&r->cache.shareable_bits,
-                                     r->cache.cbm_len))
+               ctrl_b = r->cache.shareable_bits;
+               if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
                        return true;
        }
  
        /* Check for overlap with other resource groups */
        ctrl = d->ctrl_val;
        for (i = 0; i < closids_supported(); i++, ctrl++) {
-               ctrl_b = (unsigned long *)ctrl;
+               ctrl_b = *ctrl;
                mode = rdtgroup_mode_by_closid(i);
                if (closid_allocated(i) && i != closid &&
                    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
-                       if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
+                       if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
                                if (exclusive) {
                                        if (mode == RDT_MODE_EXCLUSIVE)
                                                return true;
   * computed by first dividing the total cache size by the CBM length to
   * determine how many bytes each bit in the bitmask represents. The result
   * is multiplied with the number of bits set in the bitmask.
+  *
+  * @cbm is unsigned long, even if only 32 bits are used to make the
+  * bitmap functions work correctly.
   */
  unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
-                                 struct rdt_domain *d, u32 cbm)
+                                 struct rdt_domain *d, unsigned long cbm)
  {
        struct cpu_cacheinfo *ci;
        unsigned int size = 0;
        int num_b, i;
  
-       num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len);
+       num_b = bitmap_weight(&cbm, r->cache.cbm_len);
        ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
        for (i = 0; i < ci->num_leaves; i++) {
                if (ci->info_list[i].level == r->cache_level) {
@@@ -2353,6 -2357,7 +2357,7 @@@ static int rdtgroup_init_alloc(struct r
        u32 used_b = 0, unused_b = 0;
        u32 closid = rdtgrp->closid;
        struct rdt_resource *r;
+       unsigned long tmp_cbm;
        enum rdtgrp_mode mode;
        struct rdt_domain *d;
        int i, ret;
                         * modify the CBM based on system availability.
                         */
                        cbm_ensure_valid(&d->new_ctrl, r);
-                       if (bitmap_weight((unsigned long *) &d->new_ctrl,
-                                         r->cache.cbm_len) <
-                                       r->cache.min_cbm_bits) {
+                       /*
+                        * Assign the u32 CBM to an unsigned long to ensure
+                        * that bitmap_weight() does not access out-of-bound
+                        * memory.
+                        */
+                       tmp_cbm = d->new_ctrl;
+                       if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
+                           r->cache.min_cbm_bits) {
                                rdt_last_cmd_printf("no space on %s:%d\n",
                                                    r->name, d->id);
                                return -ENOSPC;
@@@ -2795,13 -2805,6 +2805,13 @@@ static int rdtgroup_show_options(struc
  {
        if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
                seq_puts(seq, ",cdp");
 +
 +      if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
 +              seq_puts(seq, ",cdpl2");
 +
 +      if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA]))
 +              seq_puts(seq, ",mba_MBps");
 +
        return 0;
  }