OSDN Git Service

Merge tag 'v4.4.22' into android-4.4.y
authorDmitry Shmidt <dimitrysh@google.com>
Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
committerDmitry Shmidt <dimitrysh@google.com>
Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
This is the 4.4.22 stable release

Change-Id: Id49e3c87d2cacb2fa85d85a17226f718f4a5ac28

1  2 
arch/arm64/include/asm/spinlock.h
arch/x86/include/asm/uaccess.h
drivers/md/dm-crypt.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/proc/base.c
include/linux/mm.h
kernel/cpuset.c
kernel/fork.c
kernel/sched/core.c
net/ipv6/addrconf.c

   * The memory barriers are implicit with the load-acquire and store-release
   * instructions.
   */
 +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 +{
 +      unsigned int tmp;
 +      arch_spinlock_t lockval;
  
 -#define arch_spin_unlock_wait(lock) \
 -      do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 +      asm volatile(
 +"     sevl\n"
 +"1:   wfe\n"
 +"2:   ldaxr   %w0, %2\n"
 +"     eor     %w1, %w0, %w0, ror #16\n"
 +"     cbnz    %w1, 1b\n"
 +      ARM64_LSE_ATOMIC_INSN(
 +      /* LL/SC */
 +"     stxr    %w1, %w0, %2\n"
 +"     cbnz    %w1, 2b\n", /* Serialise against any concurrent lockers */
 +      /* LSE atomics */
 +"     nop\n"
 +"     nop\n")
 +      : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
 +      :
 +      : "memory");
 +}
  
  #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
  
@@@ -331,4 -312,14 +331,14 @@@ static inline int arch_read_trylock(arc
  #define arch_read_relax(lock) cpu_relax()
  #define arch_write_relax(lock)        cpu_relax()
  
+ /*
+  * Accesses appearing in program order before a spin_lock() operation
+  * can be reordered with accesses inside the critical section, by virtue
+  * of arch_spin_lock being constructed using acquire semantics.
+  *
+  * In cases where this is problematic (e.g. try_to_wake_up), an
+  * smp_mb__before_spinlock() can restore the required ordering.
+  */
+ #define smp_mb__before_spinlock()     smp_mb()
  #endif /* __ASM_SPINLOCK_H */
@@@ -134,9 -134,6 +134,9 @@@ extern int __get_user_4(void)
  extern int __get_user_8(void);
  extern int __get_user_bad(void);
  
 +#define __uaccess_begin() stac()
 +#define __uaccess_end()   clac()
 +
  /*
   * This is a type: either unsigned long, if the argument fits into
   * that type, or otherwise unsigned long long.
@@@ -196,10 -193,10 +196,10 @@@ __typeof__(__builtin_choose_expr(sizeof
  
  #ifdef CONFIG_X86_32
  #define __put_user_asm_u64(x, addr, err, errret)                      \
 -      asm volatile(ASM_STAC "\n"                                      \
 +      asm volatile("\n"                                               \
                     "1:        movl %%eax,0(%2)\n"                     \
                     "2:        movl %%edx,4(%2)\n"                     \
 -                   "3: " ASM_CLAC "\n"                                \
 +                   "3:"                                               \
                     ".section .fixup,\"ax\"\n"                         \
                     "4:        movl %3,%0\n"                           \
                     "  jmp 3b\n"                                       \
                     : "A" (x), "r" (addr), "i" (errret), "0" (err))
  
  #define __put_user_asm_ex_u64(x, addr)                                        \
 -      asm volatile(ASM_STAC "\n"                                      \
 +      asm volatile("\n"                                               \
                     "1:        movl %%eax,0(%1)\n"                     \
                     "2:        movl %%edx,4(%1)\n"                     \
 -                   "3: " ASM_CLAC "\n"                                \
 +                   "3:"                                               \
                     _ASM_EXTABLE_EX(1b, 2b)                            \
                     _ASM_EXTABLE_EX(2b, 3b)                            \
                     : : "A" (x), "r" (addr))
@@@ -307,10 -304,6 +307,10 @@@ do {                                                                     
        }                                                               \
  } while (0)
  
 +/*
 + * This doesn't do __uaccess_begin/end - the exception handling
 + * around it must do that.
 + */
  #define __put_user_size_ex(x, ptr, size)                              \
  do {                                                                  \
        __chk_user_ptr(ptr);                                            \
@@@ -365,9 -358,9 +365,9 @@@ do {                                                                       
  } while (0)
  
  #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)     \
 -      asm volatile(ASM_STAC "\n"                                      \
 +      asm volatile("\n"                                               \
                     "1:        mov"itype" %2,%"rtype"1\n"              \
 -                   "2: " ASM_CLAC "\n"                                \
 +                   "2:\n"                                             \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  xor"itype" %"rtype"1,%"rtype"1\n"               \
                     : "=r" (err), ltype(x)                             \
                     : "m" (__m(addr)), "i" (errret), "0" (err))
  
 +/*
 + * This doesn't do __uaccess_begin/end - the exception handling
 + * around it must do that.
 + */
  #define __get_user_size_ex(x, ptr, size)                              \
  do {                                                                  \
        __chk_user_ptr(ptr);                                            \
  #define __get_user_asm_ex(x, addr, itype, rtype, ltype)                       \
        asm volatile("1:        mov"itype" %1,%"rtype"0\n"              \
                     "2:\n"                                             \
-                    _ASM_EXTABLE_EX(1b, 2b)                            \
+                    ".section .fixup,\"ax\"\n"                         \
+                      "3:xor"itype" %"rtype"0,%"rtype"0\n"             \
+                    "  jmp 2b\n"                                       \
+                    ".previous\n"                                      \
+                    _ASM_EXTABLE_EX(1b, 3b)                            \
                     : ltype(x) : "m" (__m(addr)))
  
  #define __put_user_nocheck(x, ptr, size)                      \
  ({                                                            \
        int __pu_err;                                           \
 +      __uaccess_begin();                                      \
        __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
 +      __uaccess_end();                                        \
        __builtin_expect(__pu_err, 0);                          \
  })
  
  ({                                                                    \
        int __gu_err;                                                   \
        unsigned long __gu_val;                                         \
 +      __uaccess_begin();                                              \
        __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);    \
 +      __uaccess_end();                                                \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
        __builtin_expect(__gu_err, 0);                                  \
  })
@@@ -438,9 -427,9 +442,9 @@@ struct __large_struct { unsigned long b
   * aliasing issues.
   */
  #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)     \
 -      asm volatile(ASM_STAC "\n"                                      \
 +      asm volatile("\n"                                               \
                     "1:        mov"itype" %"rtype"1,%2\n"              \
 -                   "2: " ASM_CLAC "\n"                                \
 +                   "2:\n"                                             \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  jmp 2b\n"                                       \
   */
  #define uaccess_try   do {                                            \
        current_thread_info()->uaccess_err = 0;                         \
 -      stac();                                                         \
 +      __uaccess_begin();                                              \
        barrier();
  
  #define uaccess_catch(err)                                            \
 -      clac();                                                         \
 +      __uaccess_end();                                                \
        (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);    \
  } while (0)
  
@@@ -562,13 -551,12 +566,13 @@@ extern void __cmpxchg_wrong_size(void
        __typeof__(ptr) __uval = (uval);                                \
        __typeof__(*(ptr)) __old = (old);                               \
        __typeof__(*(ptr)) __new = (new);                               \
 +      __uaccess_begin();                                              \
        switch (size) {                                                 \
        case 1:                                                         \
        {                                                               \
 -              asm volatile("\t" ASM_STAC "\n"                         \
 +              asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"          \
 -                      "2:\t" ASM_CLAC "\n"                            \
 +                      "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
        }                                                               \
        case 2:                                                         \
        {                                                               \
 -              asm volatile("\t" ASM_STAC "\n"                         \
 +              asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"          \
 -                      "2:\t" ASM_CLAC "\n"                            \
 +                      "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
        }                                                               \
        case 4:                                                         \
        {                                                               \
 -              asm volatile("\t" ASM_STAC "\n"                         \
 +              asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"          \
 -                      "2:\t" ASM_CLAC "\n"                            \
 +                      "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
                if (!IS_ENABLED(CONFIG_X86_64))                         \
                        __cmpxchg_wrong_size();                         \
                                                                        \
 -              asm volatile("\t" ASM_STAC "\n"                         \
 +              asm volatile("\n"                                       \
                        "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"          \
 -                      "2:\t" ASM_CLAC "\n"                            \
 +                      "2:\n"                                          \
                        "\t.section .fixup, \"ax\"\n"                   \
                        "3:\tmov     %3, %0\n"                          \
                        "\tjmp     2b\n"                                \
        default:                                                        \
                __cmpxchg_wrong_size();                                 \
        }                                                               \
 +      __uaccess_end();                                                \
        *__uval = __old;                                                \
        __ret;                                                          \
  })
@@@ -706,7 -693,7 +710,7 @@@ __copy_from_user_overflow(int size, uns
  
  #endif
  
 -static inline unsigned long __must_check
 +static __always_inline unsigned long __must_check
  copy_from_user(void *to, const void __user *from, unsigned long n)
  {
        int sz = __compiletime_object_size(to);
         * case, and do only runtime checking for non-constant sizes.
         */
  
 -      if (likely(sz < 0 || sz >= n))
 +      if (likely(sz < 0 || sz >= n)) {
 +              check_object_size(to, n, false);
                n = _copy_from_user(to, from, n);
 -      else if(__builtin_constant_p(n))
 +      } else if (__builtin_constant_p(n))
                copy_from_user_overflow();
        else
                __copy_from_user_overflow(sz, n);
        return n;
  }
  
 -static inline unsigned long __must_check
 +static __always_inline unsigned long __must_check
  copy_to_user(void __user *to, const void *from, unsigned long n)
  {
        int sz = __compiletime_object_size(from);
        might_fault();
  
        /* See the comment in copy_from_user() above. */
 -      if (likely(sz < 0 || sz >= n))
 +      if (likely(sz < 0 || sz >= n)) {
 +              check_object_size(from, n, true);
                n = _copy_to_user(to, from, n);
 -      else if(__builtin_constant_p(n))
 +      } else if (__builtin_constant_p(n))
                copy_to_user_overflow();
        else
                __copy_to_user_overflow(sz, n);
  #undef __copy_from_user_overflow
  #undef __copy_to_user_overflow
  
 +/*
 + * The "unsafe" user accesses aren't really "unsafe", but the naming
 + * is a big fat warning: you have to not only do the access_ok()
 + * checking before using them, but you have to surround them with the
 + * user_access_begin/end() pair.
 + */
 +#define user_access_begin()   __uaccess_begin()
 +#define user_access_end()     __uaccess_end()
 +
 +#define unsafe_put_user(x, ptr, err_label)                                    \
 +do {                                                                          \
 +      int __pu_err;                                                           \
 +      __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);         \
 +      if (unlikely(__pu_err)) goto err_label;                                 \
 +} while (0)
 +
 +#define unsafe_get_user(x, ptr, err_label)                                    \
 +do {                                                                          \
 +      int __gu_err;                                                           \
 +      unsigned long __gu_val;                                                 \
 +      __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);    \
 +      (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
 +      if (unlikely(__gu_err)) goto err_label;                                 \
 +} while (0)
 +
  #endif /* _ASM_X86_UACCESS_H */
  
diff --combined drivers/md/dm-crypt.c
@@@ -1864,24 -1864,16 +1864,24 @@@ static int crypt_ctr(struct dm_target *
        }
  
        ret = -ENOMEM;
 -      cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
 +      cc->io_queue = alloc_workqueue("kcryptd_io",
 +                                     WQ_HIGHPRI |
 +                                     WQ_MEM_RECLAIM,
 +                                     1);
        if (!cc->io_queue) {
                ti->error = "Couldn't create kcryptd io queue";
                goto bad;
        }
  
        if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
 -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
 +              cc->crypt_queue = alloc_workqueue("kcryptd",
 +                                                WQ_HIGHPRI |
 +                                                WQ_MEM_RECLAIM, 1);
        else
 -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
 +              cc->crypt_queue = alloc_workqueue("kcryptd",
 +                                                WQ_HIGHPRI |
 +                                                WQ_MEM_RECLAIM |
 +                                                WQ_UNBOUND,
                                                  num_online_cpus());
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
@@@ -1928,6 -1920,13 +1928,13 @@@ static int crypt_map(struct dm_target *
                return DM_MAPIO_REMAPPED;
        }
  
+       /*
+        * Check if bio is too large, split as needed.
+        */
+       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+           bio_data_dir(bio) == WRITE)
+               dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
        io = dm_per_bio_data(bio, cc->per_bio_data_size);
        crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
        io->ctx.req = (struct ablkcipher_request *)(io + 1);
diff --combined fs/ext4/ioctl.c
@@@ -587,13 -587,11 +587,13 @@@ resizefs_out
                return err;
        }
  
 +      case FIDTRIM:
        case FITRIM:
        {
                struct request_queue *q = bdev_get_queue(sb->s_bdev);
                struct fstrim_range range;
                int ret = 0;
 +              int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
  
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
                if (!blk_queue_discard(q))
                        return -EOPNOTSUPP;
  
 +              if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
 +                      return -EOPNOTSUPP;
                if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                    sizeof(range)))
                        return -EFAULT;
  
                range.minlen = max((unsigned int)range.minlen,
                                   q->limits.discard_granularity);
 -              ret = ext4_trim_fs(sb, &range);
 +              ret = ext4_trim_fs(sb, &range, flags);
                if (ret < 0)
                        return ret;
  
                        goto encryption_policy_out;
                }
  
+               err = mnt_want_write_file(filp);
+               if (err)
+                       goto encryption_policy_out;
                err = ext4_process_policy(&policy, inode);
+               mnt_drop_write_file(filp);
  encryption_policy_out:
                return err;
  #else
diff --combined fs/ext4/mballoc.c
@@@ -815,7 -815,7 +815,7 @@@ static void mb_regenerate_buddy(struct 
   * for this page; do not hold this lock when calling this routine!
   */
  
- static int ext4_mb_init_cache(struct page *page, char *incore)
+ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
  {
        ext4_group_t ngroups;
        int blocksize;
        /* allocate buffer_heads to read bitmaps */
        if (groups_per_page > 1) {
                i = sizeof(struct buffer_head *) * groups_per_page;
-               bh = kzalloc(i, GFP_NOFS);
+               bh = kzalloc(i, gfp);
                if (bh == NULL) {
                        err = -ENOMEM;
                        goto out;
@@@ -983,7 -983,7 +983,7 @@@ out
   * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
   */
  static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
-               ext4_group_t group, struct ext4_buddy *e4b)
+               ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
  {
        struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
        int block, pnum, poff;
        block = group * 2;
        pnum = block / blocks_per_page;
        poff = block % blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
        if (!page)
                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
  
        block++;
        pnum = block / blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
        if (!page)
                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
@@@ -1042,7 -1042,7 +1042,7 @@@ static void ext4_mb_put_buddy_page_lock
   * calling this routine!
   */
  static noinline_for_stack
- int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
  {
  
        struct ext4_group_info *this_grp;
         * The call to ext4_mb_get_buddy_page_lock will mark the
         * page accessed.
         */
-       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
        if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
                /*
                 * somebody initialized the group
        }
  
        page = e4b.bd_bitmap_page;
-       ret = ext4_mb_init_cache(page, NULL);
+       ret = ext4_mb_init_cache(page, NULL, gfp);
        if (ret)
                goto err;
        if (!PageUptodate(page)) {
        }
        /* init buddy cache */
        page = e4b.bd_buddy_page;
-       ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+       ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
        if (ret)
                goto err;
        if (!PageUptodate(page)) {
@@@ -1109,8 -1109,8 +1109,8 @@@ err
   * calling this routine!
   */
  static noinline_for_stack int
- ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-                                       struct ext4_buddy *e4b)
+ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+                      struct ext4_buddy *e4b, gfp_t gfp)
  {
        int blocks_per_page;
        int block;
                 * we need full data about the group
                 * to make a good selection
                 */
-               ret = ext4_mb_init_group(sb, group);
+               ret = ext4_mb_init_group(sb, group, gfp);
                if (ret)
                        return ret;
        }
                         * wait for it to initialize.
                         */
                        page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, NULL);
+                               ret = ext4_mb_init_cache(page, NULL, gfp);
                                if (ret) {
                                        unlock_page(page);
                                        goto err;
        if (page == NULL || !PageUptodate(page)) {
                if (page)
                        page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+                                                        gfp);
                                if (ret) {
                                        unlock_page(page);
                                        goto err;
        return ret;
  }
  
+ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                             struct ext4_buddy *e4b)
+ {
+       return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+ }
  static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
  {
        if (e4b->bd_bitmap_page)
@@@ -2047,7 -2054,7 +2054,7 @@@ static int ext4_mb_good_group(struct ex
  
        /* We only do this if the grp has never been initialized */
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
                if (ret)
                        return ret;
        }
@@@ -2763,8 -2770,7 +2770,8 @@@ int ext4_mb_release(struct super_block 
  }
  
  static inline int ext4_issue_discard(struct super_block *sb,
 -              ext4_group_t block_group, ext4_grpblk_t cluster, int count)
 +              ext4_group_t block_group, ext4_grpblk_t cluster, int count,
 +              unsigned long flags)
  {
        ext4_fsblk_t discard_block;
  
        count = EXT4_C2B(EXT4_SB(sb), count);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
 -      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 +      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
  }
  
  /*
@@@ -2795,7 -2801,7 +2802,7 @@@ static void ext4_free_data_callback(str
        if (test_opt(sb, DISCARD)) {
                err = ext4_issue_discard(sb, entry->efd_group,
                                         entry->efd_start_cluster,
 -                                       entry->efd_count);
 +                                       entry->efd_count, 0);
                if (err && err != -EOPNOTSUPP)
                        ext4_msg(sb, KERN_WARNING, "discard request in"
                                 " group:%d block:%d count:%d failed"
@@@ -4809,7 -4815,9 +4816,9 @@@ do_more
  #endif
        trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
  
-       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+       err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+                                    GFP_NOFS|__GFP_NOFAIL);
        if (err)
                goto error_return;
  
                 * them with group lock_held
                 */
                if (test_opt(sb, DISCARD)) {
 -                      err = ext4_issue_discard(sb, block_group, bit, count);
 +                      err = ext4_issue_discard(sb, block_group, bit, count,
 +                                               0);
                        if (err && err != -EOPNOTSUPP)
                                ext4_msg(sb, KERN_WARNING, "discard request in"
                                         " group:%d block:%d count:%lu failed"
@@@ -5035,15 -5042,13 +5044,15 @@@ error_return
   * @count:    number of blocks to TRIM
   * @group:    alloc. group we are working with
   * @e4b:      ext4 buddy for the group
 + * @blkdev_flags: flags for the block device
   *
   * Trim "count" blocks starting at "start" in the "group". To assure that no
   * one will allocate those blocks, mark it as used in buddy bitmap. This must
   * be called with under the group lock.
   */
  static int ext4_trim_extent(struct super_block *sb, int start, int count,
 -                           ext4_group_t group, struct ext4_buddy *e4b)
 +                          ext4_group_t group, struct ext4_buddy *e4b,
 +                          unsigned long blkdev_flags)
  __releases(bitlock)
  __acquires(bitlock)
  {
         */
        mb_mark_used(e4b, &ex);
        ext4_unlock_group(sb, group);
 -      ret = ext4_issue_discard(sb, group, start, count);
 +      ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
        ext4_lock_group(sb, group);
        mb_free_blocks(NULL, e4b, start, ex.fe_len);
        return ret;
   * @start:            first group block to examine
   * @max:              last group block to examine
   * @minblocks:                minimum extent block count
 + * @blkdev_flags:     flags for the block device
   *
   * ext4_trim_all_free walks through group's buddy bitmap searching for free
   * extents. When the free block is found, ext4_trim_extent is called to TRIM
  static ext4_grpblk_t
  ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                   ext4_grpblk_t start, ext4_grpblk_t max,
 -                 ext4_grpblk_t minblocks)
 +                 ext4_grpblk_t minblocks, unsigned long blkdev_flags)
  {
        void *bitmap;
        ext4_grpblk_t next, count = 0, free_count = 0;
  
                if ((next - start) >= minblocks) {
                        ret = ext4_trim_extent(sb, start,
 -                                             next - start, group, &e4b);
 +                                             next - start, group, &e4b,
 +                                             blkdev_flags);
                        if (ret && ret != -EOPNOTSUPP)
                                break;
                        ret = 0;
@@@ -5168,7 -5171,6 +5177,7 @@@ out
   * ext4_trim_fs() -- trim ioctl handle function
   * @sb:                       superblock for filesystem
   * @range:            fstrim_range structure
 + * @blkdev_flags:     flags for the block device
   *
   * start:     First Byte to trim
   * len:               number of Bytes to trim from start
   * start to start+len. For each such a group ext4_trim_all_free function
   * is invoked to trim all free space.
   */
 -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
 +                      unsigned long blkdev_flags)
  {
        struct ext4_group_info *grp;
        ext4_group_t group, first_group, last_group;
                grp = ext4_get_group_info(sb, group);
                /* We only do this if the grp has never been initialized */
                if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-                       ret = ext4_mb_init_group(sb, group);
+                       ret = ext4_mb_init_group(sb, group, GFP_NOFS);
                        if (ret)
                                break;
                }
  
                if (grp->bb_free >= minlen) {
                        cnt = ext4_trim_all_free(sb, group, first_cluster,
 -                                              end, minlen);
 +                                              end, minlen, blkdev_flags);
                        if (cnt < 0) {
                                ret = cnt;
                                break;
diff --combined fs/proc/base.c
@@@ -1545,18 -1545,13 +1545,13 @@@ static const struct file_operations pro
  static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
  {
        struct task_struct *task;
-       struct mm_struct *mm;
        struct file *exe_file;
  
        task = get_proc_task(d_inode(dentry));
        if (!task)
                return -ENOENT;
-       mm = get_task_mm(task);
+       exe_file = get_task_exe_file(task);
        put_task_struct(task);
-       if (!mm)
-               return -ENOENT;
-       exe_file = get_mm_exe_file(mm);
-       mmput(mm);
        if (exe_file) {
                *exe_path = exe_file->f_path;
                path_get(&exe_file->f_path);
@@@ -2245,92 -2240,6 +2240,92 @@@ static const struct file_operations pro
        .release        = seq_release_private,
  };
  
 +static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
 +                                      size_t count, loff_t *offset)
 +{
 +      struct inode *inode = file_inode(file);
 +      struct task_struct *p;
 +      u64 slack_ns;
 +      int err;
 +
 +      err = kstrtoull_from_user(buf, count, 10, &slack_ns);
 +      if (err < 0)
 +              return err;
 +
 +      p = get_proc_task(inode);
 +      if (!p)
 +              return -ESRCH;
 +
 +      if (p != current) {
 +              if (!capable(CAP_SYS_NICE)) {
 +                      count = -EPERM;
 +                      goto out;
 +              }
 +
 +              err = security_task_setscheduler(p);
 +              if (err) {
 +                      count = err;
 +                      goto out;
 +              }
 +      }
 +
 +      task_lock(p);
 +      if (slack_ns == 0)
 +              p->timer_slack_ns = p->default_timer_slack_ns;
 +      else
 +              p->timer_slack_ns = slack_ns;
 +      task_unlock(p);
 +
 +out:
 +      put_task_struct(p);
 +
 +      return count;
 +}
 +
 +static int timerslack_ns_show(struct seq_file *m, void *v)
 +{
 +      struct inode *inode = m->private;
 +      struct task_struct *p;
 +      int err = 0;
 +
 +      p = get_proc_task(inode);
 +      if (!p)
 +              return -ESRCH;
 +
 +      if (p != current) {
 +
 +              if (!capable(CAP_SYS_NICE)) {
 +                      err = -EPERM;
 +                      goto out;
 +              }
 +              err = security_task_getscheduler(p);
 +              if (err)
 +                      goto out;
 +      }
 +
 +      task_lock(p);
 +      seq_printf(m, "%llu\n", p->timer_slack_ns);
 +      task_unlock(p);
 +
 +out:
 +      put_task_struct(p);
 +
 +      return err;
 +}
 +
 +static int timerslack_ns_open(struct inode *inode, struct file *filp)
 +{
 +      return single_open(filp, timerslack_ns_show, inode);
 +}
 +
 +static const struct file_operations proc_pid_set_timerslack_ns_operations = {
 +      .open           = timerslack_ns_open,
 +      .read           = seq_read,
 +      .write          = timerslack_ns_write,
 +      .llseek         = seq_lseek,
 +      .release        = single_release,
 +};
 +
  static int proc_pident_instantiate(struct inode *dir,
        struct dentry *dentry, struct task_struct *task, const void *ptr)
  {
@@@ -2881,8 -2790,8 +2876,8 @@@ static const struct pid_entry tgid_base
        ONE("cgroup",  S_IRUGO, proc_cgroup_show),
  #endif
        ONE("oom_score",  S_IRUGO, proc_oom_score),
 -      REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 +      REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
 +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
  #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
  #ifdef CONFIG_CHECKPOINT_RESTORE
        REG("timers",     S_IRUGO, proc_timers_operations),
  #endif
 +      REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
  };
  
  static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@@ -3166,44 -3074,6 +3161,44 @@@ int proc_pid_readdir(struct file *file
  }
  
  /*
 + * proc_tid_comm_permission is a special permission function exclusively
 + * used for the node /proc/<pid>/task/<tid>/comm.
 + * It bypasses generic permission checks in the case where a task of the same
 + * task group attempts to access the node.
 + * The rational behind this is that glibc and bionic access this node for
 + * cross thread naming (pthread_set/getname_np(!self)). However, if
 + * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
 + * which locks out the cross thread naming implementation.
 + * This function makes sure that the node is always accessible for members of
 + * same thread group.
 + */
 +static int proc_tid_comm_permission(struct inode *inode, int mask)
 +{
 +      bool is_same_tgroup;
 +      struct task_struct *task;
 +
 +      task = get_proc_task(inode);
 +      if (!task)
 +              return -ESRCH;
 +      is_same_tgroup = same_thread_group(current, task);
 +      put_task_struct(task);
 +
 +      if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
 +              /* This file (/proc/<pid>/task/<tid>/comm) can always be
 +               * read or written by the members of the corresponding
 +               * thread group.
 +               */
 +              return 0;
 +      }
 +
 +      return generic_permission(inode, mask);
 +}
 +
 +static const struct inode_operations proc_tid_comm_inode_operations = {
 +              .permission = proc_tid_comm_permission,
 +};
 +
 +/*
   * Tasks
   */
  static const struct pid_entry tid_base_stuff[] = {
  #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
  #endif
 -      REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 +      NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
 +                       &proc_tid_comm_inode_operations,
 +                       &proc_pid_set_comm_operations, {}),
  #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
        ONE("syscall",   S_IRUSR, proc_pid_syscall),
  #endif
        ONE("cgroup",  S_IRUGO, proc_cgroup_show),
  #endif
        ONE("oom_score", S_IRUGO, proc_oom_score),
 -      REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 +      REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
 +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
  #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
        REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --combined include/linux/mm.h
@@@ -51,17 -51,6 +51,17 @@@ extern int sysctl_legacy_va_layout
  #define sysctl_legacy_va_layout 0
  #endif
  
 +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
 +extern const int mmap_rnd_bits_min;
 +extern const int mmap_rnd_bits_max;
 +extern int mmap_rnd_bits __read_mostly;
 +#endif
 +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
 +extern const int mmap_rnd_compat_bits_min;
 +extern const int mmap_rnd_compat_bits_max;
 +extern int mmap_rnd_compat_bits __read_mostly;
 +#endif
 +
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/processor.h>
@@@ -1070,7 -1059,6 +1070,7 @@@ extern void pagefault_out_of_memory(voi
  extern void show_free_areas(unsigned int flags);
  extern bool skip_free_areas_node(unsigned int flags, int nid);
  
 +void shmem_set_file(struct vm_area_struct *vma, struct file *file);
  int shmem_zero_setup(struct vm_area_struct *);
  #ifdef CONFIG_SHMEM
  bool shmem_mapping(struct address_space *mapping);
@@@ -1878,7 -1866,7 +1878,7 @@@ extern int vma_adjust(struct vm_area_st
  extern struct vm_area_struct *vma_merge(struct mm_struct *,
        struct vm_area_struct *prev, unsigned long addr, unsigned long end,
        unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 -      struct mempolicy *, struct vm_userfaultfd_ctx);
 +      struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *);
  extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
  extern int split_vma(struct mm_struct *,
        struct vm_area_struct *, unsigned long addr, int new_below);
@@@ -1910,6 -1898,7 +1910,7 @@@ extern void mm_drop_all_locks(struct mm
  
  extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
  extern struct file *get_mm_exe_file(struct mm_struct *mm);
+ extern struct file *get_task_exe_file(struct task_struct *task);
  
  extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
  extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
diff --combined kernel/cpuset.c
@@@ -98,7 -98,6 +98,7 @@@ struct cpuset 
  
        /* user-configured CPUs and Memory Nodes allow to tasks */
        cpumask_var_t cpus_allowed;
 +      cpumask_var_t cpus_requested;
        nodemask_t mems_allowed;
  
        /* effective CPUs and Memory Nodes allow to tasks */
@@@ -387,7 -386,7 +387,7 @@@ static void cpuset_update_task_spread_f
  
  static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
  {
 -      return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
 +      return  cpumask_subset(p->cpus_requested, q->cpus_requested) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
@@@ -487,7 -486,7 +487,7 @@@ static int validate_change(struct cpuse
        cpuset_for_each_child(c, css, par) {
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
 -                  cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
 +                  cpumask_intersects(trial->cpus_requested, c->cpus_requested))
                        goto out;
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
@@@ -946,18 -945,17 +946,18 @@@ static int update_cpumask(struct cpuse
        if (!*buf) {
                cpumask_clear(trialcs->cpus_allowed);
        } else {
 -              retval = cpulist_parse(buf, trialcs->cpus_allowed);
 +              retval = cpulist_parse(buf, trialcs->cpus_requested);
                if (retval < 0)
                        return retval;
  
 -              if (!cpumask_subset(trialcs->cpus_allowed,
 -                                  top_cpuset.cpus_allowed))
 +              if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
                        return -EINVAL;
 +
 +              cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
        }
  
        /* Nothing to do if the cpus didn't change */
 -      if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
 +      if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
                return 0;
  
        retval = validate_change(cs, trialcs);
  
        spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
 +      cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
        spin_unlock_irq(&callback_lock);
  
        /* use trialcs->cpus_allowed as a temp variable */
@@@ -1757,7 -1754,7 +1757,7 @@@ static int cpuset_common_seq_show(struc
  
        switch (type) {
        case FILE_CPULIST:
 -              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
 +              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
                break;
        case FILE_MEMLIST:
                seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@@ -1946,14 -1943,11 +1946,14 @@@ cpuset_css_alloc(struct cgroup_subsys_s
                return ERR_PTR(-ENOMEM);
        if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
                goto free_cs;
 +      if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
 +              goto free_allowed;
        if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
 -              goto free_cpus;
 +              goto free_requested;
  
        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        cpumask_clear(cs->cpus_allowed);
 +      cpumask_clear(cs->cpus_requested);
        nodes_clear(cs->mems_allowed);
        cpumask_clear(cs->effective_cpus);
        nodes_clear(cs->effective_mems);
  
        return &cs->css;
  
 -free_cpus:
 +free_requested:
 +      free_cpumask_var(cs->cpus_requested);
 +free_allowed:
        free_cpumask_var(cs->cpus_allowed);
  free_cs:
        kfree(cs);
@@@ -2027,7 -2019,6 +2027,7 @@@ static int cpuset_css_online(struct cgr
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
 +      cpumask_copy(cs->cpus_requested, parent->cpus_requested);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
        spin_unlock_irq(&callback_lock);
  out_unlock:
@@@ -2062,7 -2053,6 +2062,7 @@@ static void cpuset_css_free(struct cgro
  
        free_cpumask_var(cs->effective_cpus);
        free_cpumask_var(cs->cpus_allowed);
 +      free_cpumask_var(cs->cpus_requested);
        kfree(cs);
  }
  
@@@ -2084,34 -2074,31 +2084,49 @@@ static void cpuset_bind(struct cgroup_s
        mutex_unlock(&cpuset_mutex);
  }
  
 +static int cpuset_allow_attach(struct cgroup_taskset *tset)
 +{
 +      const struct cred *cred = current_cred(), *tcred;
 +      struct task_struct *task;
 +      struct cgroup_subsys_state *css;
 +
 +      cgroup_taskset_for_each(task, css, tset) {
 +              tcred = __task_cred(task);
 +
 +              if ((current != task) && !capable(CAP_SYS_ADMIN) &&
 +                   cred->euid.val != tcred->uid.val && cred->euid.val != tcred->suid.val)
 +                      return -EACCES;
 +      }
 +
 +      return 0;
 +}
 +
+ /*
+  * Make sure the new task conform to the current state of its parent,
+  * which could have been changed by cpuset just after it inherits the
+  * state from the parent and before it sits on the cgroup's task list.
+  */
+ void cpuset_fork(struct task_struct *task)
+ {
+       if (task_css_is_root(task, cpuset_cgrp_id))
+               return;
+       set_cpus_allowed_ptr(task, &current->cpus_allowed);
+       task->mems_allowed = current->mems_allowed;
+ }
  struct cgroup_subsys cpuset_cgrp_subsys = {
        .css_alloc      = cpuset_css_alloc,
        .css_online     = cpuset_css_online,
        .css_offline    = cpuset_css_offline,
        .css_free       = cpuset_css_free,
        .can_attach     = cpuset_can_attach,
 +      .allow_attach   = cpuset_allow_attach,
        .cancel_attach  = cpuset_cancel_attach,
        .attach         = cpuset_attach,
        .post_attach    = cpuset_post_attach,
        .bind           = cpuset_bind,
+       .fork           = cpuset_fork,
        .legacy_cftypes = files,
        .early_init     = 1,
  };
@@@ -2130,11 -2117,8 +2145,11 @@@ int __init cpuset_init(void
                BUG();
        if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
                BUG();
 +      if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
 +              BUG();
  
        cpumask_setall(top_cpuset.cpus_allowed);
 +      cpumask_setall(top_cpuset.cpus_requested);
        nodes_setall(top_cpuset.mems_allowed);
        cpumask_setall(top_cpuset.effective_cpus);
        nodes_setall(top_cpuset.effective_mems);
@@@ -2268,7 -2252,7 +2283,7 @@@ retry
                goto retry;
        }
  
 -      cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
 +      cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
        nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
  
        cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
diff --combined kernel/fork.c
@@@ -764,6 -764,29 +764,29 @@@ struct file *get_mm_exe_file(struct mm_
  EXPORT_SYMBOL(get_mm_exe_file);
  
  /**
+  * get_task_exe_file - acquire a reference to the task's executable file
+  *
+  * Returns %NULL if task's mm (if any) has no associated executable file or
+  * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+  * User must release file via fput().
+  */
+ struct file *get_task_exe_file(struct task_struct *task)
+ {
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+ }
+ EXPORT_SYMBOL(get_task_exe_file);
+ /**
   * get_task_mm - acquire a reference to the task's mm
   *
   * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
@@@ -800,8 -823,7 +823,8 @@@ struct mm_struct *mm_access(struct task
  
        mm = get_task_mm(task);
        if (mm && mm != current->mm &&
 -                      !ptrace_may_access(task, mode)) {
 +                      !ptrace_may_access(task, mode) &&
 +                      !capable(CAP_SYS_RESOURCE)) {
                mmput(mm);
                mm = ERR_PTR(-EACCES);
        }
diff --combined kernel/sched/core.c
@@@ -89,7 -89,6 +89,7 @@@
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/sched.h>
 +#include "walt.h"
  
  DEFINE_MUTEX(sched_domains_mutex);
  DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@@ -288,18 -287,6 +288,18 @@@ int sysctl_sched_rt_runtime = 950000
  /* cpus with isolated domains */
  cpumask_var_t cpu_isolated_map;
  
 +struct rq *
 +lock_rq_of(struct task_struct *p, unsigned long *flags)
 +{
 +      return task_rq_lock(p, flags);
 +}
 +
 +void
 +unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags)
 +{
 +      task_rq_unlock(rq, p, flags);
 +}
 +
  /*
   * this_rq_lock - lock this runqueue and disable interrupts.
   */
@@@ -1089,9 -1076,7 +1089,9 @@@ static struct rq *move_queued_task(stru
  
        dequeue_task(rq, p, 0);
        p->on_rq = TASK_ON_RQ_MIGRATING;
 +      double_lock_balance(rq, cpu_rq(new_cpu));
        set_task_cpu(p, new_cpu);
 +      double_unlock_balance(rq, cpu_rq(new_cpu));
        raw_spin_unlock(&rq->lock);
  
        rq = cpu_rq(new_cpu);
@@@ -1315,8 -1300,6 +1315,8 @@@ void set_task_cpu(struct task_struct *p
                        p->sched_class->migrate_task_rq(p);
                p->se.nr_migrations++;
                perf_event_task_migrate(p);
 +
 +              walt_fixup_busy_time(p, new_cpu);
        }
  
        __set_task_cpu(p, new_cpu);
@@@ -1945,10 -1928,6 +1945,10 @@@ try_to_wake_up(struct task_struct *p, u
  {
        unsigned long flags;
        int cpu, success = 0;
 +#ifdef CONFIG_SMP
 +      struct rq *rq;
 +      u64 wallclock;
 +#endif
  
        /*
         * If we are going to wake up a thread waiting for CONDITION we
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
  
+       /*
+        * Ensure we load p->on_rq _after_ p->state, otherwise it would
+        * be possible to, falsely, observe p->on_rq == 0 and get stuck
+        * in smp_cond_load_acquire() below.
+        *
+        * sched_ttwu_pending()                 try_to_wake_up()
+        *   [S] p->on_rq = 1;                  [L] P->state
+        *       UNLOCK rq->lock  -----.
+        *                              \
+        *                               +---   RMB
+        * schedule()                   /
+        *       LOCK rq->lock    -----'
+        *       UNLOCK rq->lock
+        *
+        * [task p]
+        *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+        *
+        * Pairs with the UNLOCK+LOCK on rq->lock from the
+        * last wakeup of our task and the schedule that got our task
+        * current.
+        */
+       smp_rmb();
        if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;
  
         */
        smp_rmb();
  
 +      rq = cpu_rq(task_cpu(p));
 +
 +      raw_spin_lock(&rq->lock);
 +      wallclock = walt_ktime_clock();
 +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 +      walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
 +      raw_spin_unlock(&rq->lock);
 +
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
  
                p->sched_class->task_waking(p);
  
        cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 +
        if (task_cpu(p) != cpu) {
                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
        }
 +
  #endif /* CONFIG_SMP */
  
        ttwu_queue(p, cpu);
@@@ -2075,13 -2066,8 +2097,13 @@@ static void try_to_wake_up_local(struc
  
        trace_sched_waking(p);
  
 -      if (!task_on_rq_queued(p))
 +      if (!task_on_rq_queued(p)) {
 +              u64 wallclock = walt_ktime_clock();
 +
 +              walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 +              walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 +      }
  
        ttwu_do_wakeup(rq, p, 0);
        ttwu_stat(p, smp_processor_id(), 0);
@@@ -2147,7 -2133,6 +2169,7 @@@ static void __sched_fork(unsigned long 
        p->se.nr_migrations             = 0;
        p->se.vruntime                  = 0;
        INIT_LIST_HEAD(&p->se.group_node);
 +      walt_init_new_task_load(p);
  
  #ifdef CONFIG_SCHEDSTATS
        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@@ -2415,9 -2400,6 +2437,9 @@@ void wake_up_new_task(struct task_struc
        struct rq *rq;
  
        raw_spin_lock_irqsave(&p->pi_lock, flags);
 +
 +      walt_init_new_task_load(p);
 +
        /* Initialize new task's runnable average */
        init_entity_runnable_average(&p->se);
  #ifdef CONFIG_SMP
  #endif
  
        rq = __task_rq_lock(p);
 -      activate_task(rq, p, 0);
 +      walt_mark_task_starting(p);
 +      activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
        p->on_rq = TASK_ON_RQ_QUEUED;
        trace_sched_wakeup_new(p);
        check_preempt_curr(rq, p, WF_FORK);
@@@ -2812,36 -2793,6 +2834,36 @@@ unsigned long nr_iowait_cpu(int cpu
        return atomic_read(&this->nr_iowait);
  }
  
 +#ifdef CONFIG_CPU_QUIET
 +u64 nr_running_integral(unsigned int cpu)
 +{
 +      unsigned int seqcnt;
 +      u64 integral;
 +      struct rq *q;
 +
 +      if (cpu >= nr_cpu_ids)
 +              return 0;
 +
 +      q = cpu_rq(cpu);
 +
 +      /*
 +       * Update average to avoid reading stalled value if there were
 +       * no run-queue changes for a long time. On the other hand if
 +       * the changes are happening right now, just read current value
 +       * directly.
 +       */
 +
 +      seqcnt = read_seqcount_begin(&q->ave_seqcnt);
 +      integral = do_nr_running_integral(q);
 +      if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
 +              read_seqcount_begin(&q->ave_seqcnt);
 +              integral = q->nr_running_integral;
 +      }
 +
 +      return integral;
 +}
 +#endif
 +
  void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
  {
        struct rq *rq = this_rq();
@@@ -2928,93 -2879,6 +2950,93 @@@ unsigned long long task_sched_runtime(s
        return ns;
  }
  
 +#ifdef CONFIG_CPU_FREQ_GOV_SCHED
 +
 +static inline
 +unsigned long add_capacity_margin(unsigned long cpu_capacity)
 +{
 +      cpu_capacity  = cpu_capacity * capacity_margin;
 +      cpu_capacity /= SCHED_CAPACITY_SCALE;
 +      return cpu_capacity;
 +}
 +
 +static inline
 +unsigned long sum_capacity_reqs(unsigned long cfs_cap,
 +                              struct sched_capacity_reqs *scr)
 +{
 +      unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
 +      return total += scr->dl;
 +}
 +
 +static void sched_freq_tick_pelt(int cpu)
 +{
 +      unsigned long cpu_utilization = capacity_max;
 +      unsigned long capacity_curr = capacity_curr_of(cpu);
 +      struct sched_capacity_reqs *scr;
 +
 +      scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 +      if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
 +              return;
 +
 +      /*
 +       * To make free room for a task that is building up its "real"
 +       * utilization and to harm its performance the least, request
 +       * a jump to a higher OPP as soon as the margin of free capacity
 +       * is impacted (specified by capacity_margin).
 +       */
 +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 +}
 +
 +#ifdef CONFIG_SCHED_WALT
 +static void sched_freq_tick_walt(int cpu)
 +{
 +      unsigned long cpu_utilization = cpu_util(cpu);
 +      unsigned long capacity_curr = capacity_curr_of(cpu);
 +
 +      if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
 +              return sched_freq_tick_pelt(cpu);
 +
 +      /*
 +       * Add a margin to the WALT utilization.
 +       * NOTE: WALT tracks a single CPU signal for all the scheduling
 +       * classes, thus this margin is going to be added to the DL class as
 +       * well, which is something we do not do in sched_freq_tick_pelt case.
 +       */
 +      cpu_utilization = add_capacity_margin(cpu_utilization);
 +      if (cpu_utilization <= capacity_curr)
 +              return;
 +
 +      /*
 +       * It is likely that the load is growing so we
 +       * keep the added margin in our request as an
 +       * extra boost.
 +       */
 +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 +
 +}
 +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
 +#else
 +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
 +#endif /* CONFIG_SCHED_WALT */
 +
 +static void sched_freq_tick(int cpu)
 +{
 +      unsigned long capacity_orig, capacity_curr;
 +
 +      if (!sched_freq())
 +              return;
 +
 +      capacity_orig = capacity_orig_of(cpu);
 +      capacity_curr = capacity_curr_of(cpu);
 +      if (capacity_curr == capacity_orig)
 +              return;
 +
 +      _sched_freq_tick(cpu);
 +}
 +#else
 +static inline void sched_freq_tick(int cpu) { }
 +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
 +
  /*
   * This function gets called by the timer code, with HZ frequency.
   * We call it with interrupts disabled.
@@@ -3028,14 -2892,10 +3050,14 @@@ void scheduler_tick(void
        sched_clock_tick();
  
        raw_spin_lock(&rq->lock);
 +      walt_set_window_start(rq);
        update_rq_clock(rq);
        curr->sched_class->task_tick(rq, curr, 0);
        update_cpu_load_active(rq);
 +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
 +                      walt_ktime_clock(), 0);
        calc_global_load_tick(rq);
 +      sched_freq_tick(cpu);
        raw_spin_unlock(&rq->lock);
  
        perf_event_task_tick();
@@@ -3272,7 -3132,6 +3294,7 @@@ static void __sched notrace __schedule(
        unsigned long *switch_count;
        struct rq *rq;
        int cpu;
 +      u64 wallclock;
  
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
                update_rq_clock(rq);
  
        next = pick_next_task(rq, prev);
 +      wallclock = walt_ktime_clock();
 +      walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
 +      walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
        clear_tsk_need_resched(prev);
        clear_preempt_need_resched();
        rq->clock_skip_update = 0;
@@@ -5163,7 -5019,6 +5185,7 @@@ void init_idle(struct task_struct *idle
        raw_spin_lock(&rq->lock);
  
        __sched_fork(0, idle);
 +
        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
  
@@@ -5546,60 -5401,9 +5568,60 @@@ set_table_entry(struct ctl_table *entry
  }
  
  static struct ctl_table *
 +sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
 +{
 +      struct ctl_table *table = sd_alloc_ctl_entry(5);
 +
 +      if (table == NULL)
 +              return NULL;
 +
 +      set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
 +                      sizeof(int), 0644, proc_dointvec_minmax, false);
 +      set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
 +                      sge->nr_idle_states*sizeof(struct idle_state), 0644,
 +                      proc_doulongvec_minmax, false);
 +      set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
 +                      sizeof(int), 0644, proc_dointvec_minmax, false);
 +      set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
 +                      sge->nr_cap_states*sizeof(struct capacity_state), 0644,
 +                      proc_doulongvec_minmax, false);
 +
 +      return table;
 +}
 +
 +static struct ctl_table *
 +sd_alloc_ctl_group_table(struct sched_group *sg)
 +{
 +      struct ctl_table *table = sd_alloc_ctl_entry(2);
 +
 +      if (table == NULL)
 +              return NULL;
 +
 +      table->procname = kstrdup("energy", GFP_KERNEL);
 +      table->mode = 0555;
 +      table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
 +
 +      return table;
 +}
 +
 +static struct ctl_table *
  sd_alloc_ctl_domain_table(struct sched_domain *sd)
  {
 -      struct ctl_table *table = sd_alloc_ctl_entry(14);
 +      struct ctl_table *table;
 +      unsigned int nr_entries = 14;
 +
 +      int i = 0;
 +      struct sched_group *sg = sd->groups;
 +
 +      if (sg->sge) {
 +              int nr_sgs = 0;
 +
 +              do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
 +
 +              nr_entries += nr_sgs;
 +      }
 +
 +      table = sd_alloc_ctl_entry(nr_entries);
  
        if (table == NULL)
                return NULL;
                sizeof(long), 0644, proc_doulongvec_minmax, false);
        set_table_entry(&table[12], "name", sd->name,
                CORENAME_MAX_SIZE, 0444, proc_dostring, false);
 -      /* &table[13] is terminator */
 +      sg = sd->groups;
 +      if (sg->sge) {
 +              char buf[32];
 +              struct ctl_table *entry = &table[13];
 +
 +              do {
 +                      snprintf(buf, 32, "group%d", i);
 +                      entry->procname = kstrdup(buf, GFP_KERNEL);
 +                      entry->mode = 0555;
 +                      entry->child = sd_alloc_ctl_group_table(sg);
 +              } while (entry++, i++, sg = sg->next, sg != sd->groups);
 +      }
 +      /* &table[nr_entries-1] is terminator */
  
        return table;
  }
@@@ -5760,9 -5552,6 +5782,9 @@@ migration_call(struct notifier_block *n
        switch (action & ~CPU_TASKS_FROZEN) {
  
        case CPU_UP_PREPARE:
 +              raw_spin_lock_irqsave(&rq->lock, flags);
 +              walt_set_window_start(rq);
 +              raw_spin_unlock_irqrestore(&rq->lock, flags);
                rq->calc_load_update = calc_load_update;
                account_reset_rq(rq);
                break;
                sched_ttwu_pending();
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
 +              walt_migrate_sync_cpu(cpu);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_offline(rq);
@@@ -5955,7 -5743,7 +5977,7 @@@ static int sched_domain_debug_one(struc
                printk(KERN_CONT " %*pbl",
                       cpumask_pr_args(sched_group_cpus(group)));
                if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
 -                      printk(KERN_CONT " (cpu_capacity = %d)",
 +                      printk(KERN_CONT " (cpu_capacity = %lu)",
                                group->sgc->capacity);
                }
  
@@@ -6016,8 -5804,7 +6038,8 @@@ static int sd_degenerate(struct sched_d
                         SD_BALANCE_EXEC |
                         SD_SHARE_CPUCAPACITY |
                         SD_SHARE_PKG_RESOURCES |
 -                       SD_SHARE_POWERDOMAIN)) {
 +                       SD_SHARE_POWERDOMAIN |
 +                       SD_SHARE_CAP_STATES)) {
                if (sd->groups != sd->groups->next)
                        return 0;
        }
@@@ -6049,8 -5836,7 +6071,8 @@@ sd_parent_degenerate(struct sched_domai
                                SD_SHARE_CPUCAPACITY |
                                SD_SHARE_PKG_RESOURCES |
                                SD_PREFER_SIBLING |
 -                              SD_SHARE_POWERDOMAIN);
 +                              SD_SHARE_POWERDOMAIN |
 +                              SD_SHARE_CAP_STATES);
                if (nr_node_ids == 1)
                        pflags &= ~SD_SERIALIZE;
        }
@@@ -6129,8 -5915,6 +6151,8 @@@ static int init_rootdomain(struct root_
  
        if (cpupri_init(&rd->cpupri) != 0)
                goto free_rto_mask;
 +
 +      init_max_cpu_capacity(&rd->max_cpu_capacity);
        return 0;
  
  free_rto_mask:
@@@ -6236,13 -6020,11 +6258,13 @@@ DEFINE_PER_CPU(int, sd_llc_id)
  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
  DEFINE_PER_CPU(struct sched_domain *, sd_busy);
  DEFINE_PER_CPU(struct sched_domain *, sd_asym);
 +DEFINE_PER_CPU(struct sched_domain *, sd_ea);
 +DEFINE_PER_CPU(struct sched_domain *, sd_scs);
  
  static void update_top_cache_domain(int cpu)
  {
        struct sched_domain *sd;
 -      struct sched_domain *busy_sd = NULL;
 +      struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
        int id = cpu;
        int size = 1;
  
  
        sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
        rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
 +
 +      for_each_domain(cpu, sd) {
 +              if (sd->groups->sge)
 +                      ea_sd = sd;
 +              else
 +                      break;
 +      }
 +      rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
 +
 +      sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
 +      rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
  }
  
  /*
@@@ -6434,7 -6205,6 +6456,7 @@@ build_overlap_sched_groups(struct sched
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 +              sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
  
                /*
                 * Make sure the first group of this domain contains the
@@@ -6564,66 -6334,6 +6586,66 @@@ static void init_sched_groups_capacity(
  }
  
  /*
 + * Check that the per-cpu provided sd energy data is consistent for all cpus
 + * within the mask.
 + */
 +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
 +                                         const struct cpumask *cpumask)
 +{
 +      const struct sched_group_energy * const sge = fn(cpu);
 +      struct cpumask mask;
 +      int i;
 +
 +      if (cpumask_weight(cpumask) <= 1)
 +              return;
 +
 +      cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
 +
 +      for_each_cpu(i, &mask) {
 +              const struct sched_group_energy * const e = fn(i);
 +              int y;
 +
 +              BUG_ON(e->nr_idle_states != sge->nr_idle_states);
 +
 +              for (y = 0; y < (e->nr_idle_states); y++) {
 +                      BUG_ON(e->idle_states[y].power !=
 +                                      sge->idle_states[y].power);
 +              }
 +
 +              BUG_ON(e->nr_cap_states != sge->nr_cap_states);
 +
 +              for (y = 0; y < (e->nr_cap_states); y++) {
 +                      BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
 +                      BUG_ON(e->cap_states[y].power !=
 +                                      sge->cap_states[y].power);
 +              }
 +      }
 +}
 +
 +static void init_sched_energy(int cpu, struct sched_domain *sd,
 +                            sched_domain_energy_f fn)
 +{
 +      if (!(fn && fn(cpu)))
 +              return;
 +
 +      if (cpu != group_balance_cpu(sd->groups))
 +              return;
 +
 +      if (sd->child && !sd->child->groups->sge) {
 +              pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
 +#ifdef CONFIG_SCHED_DEBUG
 +              pr_err("     energy data on %s but not on %s domain\n",
 +                      sd->name, sd->child->name);
 +#endif
 +              return;
 +      }
 +
 +      check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
 +
 +      sd->groups->sge = fn(cpu);
 +}
 +
 +/*
   * Initializers for schedule domains
   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
   */
@@@ -6731,7 -6441,6 +6753,7 @@@ static int sched_domains_curr_level
   * SD_SHARE_PKG_RESOURCES - describes shared caches
   * SD_NUMA                - describes NUMA topologies
   * SD_SHARE_POWERDOMAIN   - describes shared power domain
 + * SD_SHARE_CAP_STATES    - describes shared capacity states
   *
   * Odd one out:
   * SD_ASYM_PACKING        - describes SMT quirks
         SD_SHARE_PKG_RESOURCES |       \
         SD_NUMA |                      \
         SD_ASYM_PACKING |              \
 -       SD_SHARE_POWERDOMAIN)
 +       SD_SHARE_POWERDOMAIN |         \
 +       SD_SHARE_CAP_STATES)
  
  static struct sched_domain *
  sd_init(struct sched_domain_topology_level *tl, int cpu)
@@@ -7292,7 -7000,6 +7314,7 @@@ static int build_sched_domains(const st
        enum s_alloc alloc_state;
        struct sched_domain *sd;
        struct s_data d;
 +      struct rq *rq = NULL;
        int i, ret = -ENOMEM;
  
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
  
        /* Calculate CPU capacity for physical packages and nodes */
        for (i = nr_cpumask_bits-1; i >= 0; i--) {
 +              struct sched_domain_topology_level *tl = sched_domain_topology;
 +
                if (!cpumask_test_cpu(i, cpu_map))
                        continue;
  
 -              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 +              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
 +                      init_sched_energy(i, sd, tl->energy);
                        claim_allocations(i, sd);
                        init_sched_groups_capacity(i, sd);
                }
        /* Attach the domains */
        rcu_read_lock();
        for_each_cpu(i, cpu_map) {
 +              rq = cpu_rq(i);
                sd = *per_cpu_ptr(d.sd, i);
                cpu_attach_domain(sd, d.rd, i);
        }
@@@ -7628,7 -7331,6 +7650,7 @@@ void __init sched_init_smp(void
  {
        cpumask_var_t non_isolated_cpus;
  
 +      walt_init_cpu_efficiency();
        alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
        alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
  
@@@ -7806,11 -7508,6 +7828,11 @@@ void __init sched_init(void
                rq->idle_stamp = 0;
                rq->avg_idle = 2*sysctl_sched_migration_cost;
                rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 +#ifdef CONFIG_SCHED_WALT
 +              rq->cur_irqload = 0;
 +              rq->avg_irqload = 0;
 +              rq->irqload_ts = 0;
 +#endif
  
                INIT_LIST_HEAD(&rq->cfs_tasks);
  
@@@ -7874,14 -7571,6 +7896,14 @@@ static inline int preempt_count_equals(
        return (nested == preempt_offset);
  }
  
 +static int __might_sleep_init_called;
 +int __init __might_sleep_init(void)
 +{
 +      __might_sleep_init_called = 1;
 +      return 0;
 +}
 +early_initcall(__might_sleep_init);
 +
  void __might_sleep(const char *file, int line, int preempt_offset)
  {
        /*
@@@ -7906,10 -7595,8 +7928,10 @@@ void ___might_sleep(const char *file, i
  
        rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 -           !is_idle_task(current)) ||
 -          system_state != SYSTEM_RUNNING || oops_in_progress)
 +           !is_idle_task(current)) || oops_in_progress)
 +              return;
 +      if (system_state != SYSTEM_RUNNING &&
 +          (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
                return;
        if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                return;
@@@ -8934,7 -8621,6 +8956,7 @@@ struct cgroup_subsys cpu_cgrp_subsys = 
        .fork           = cpu_cgroup_fork,
        .can_attach     = cpu_cgroup_can_attach,
        .attach         = cpu_cgroup_attach,
 +      .allow_attach   = subsys_cgroup_allow_attach,
        .legacy_cftypes = cpu_files,
        .early_init     = 1,
  };
diff --combined net/ipv6/addrconf.c
@@@ -205,7 -205,6 +205,7 @@@ static struct ipv6_devconf ipv6_devcon
        .accept_ra_rt_info_max_plen = 0,
  #endif
  #endif
 +      .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@@ -250,7 -249,6 +250,7 @@@ static struct ipv6_devconf ipv6_devconf
        .accept_ra_rt_info_max_plen = 0,
  #endif
  #endif
 +      .accept_ra_rt_table     = 0,
        .proxy_ndp              = 0,
        .accept_source_route    = 0,    /* we do not accept RH0 by default. */
        .disable_ipv6           = 0,
@@@ -1900,6 -1898,7 +1900,7 @@@ errdad
        spin_unlock_bh(&ifp->lock);
  
        addrconf_mod_dad_work(ifp, 0);
+       in6_ifa_put(ifp);
  }
  
  /* Join to solicited addr multicast group.
@@@ -2147,31 -2146,6 +2148,31 @@@ static void  __ipv6_try_regen_rndid(str
                __ipv6_regen_rndid(idev);
  }
  
 +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
 +      /* Determines into what table to put autoconf PIO/RIO/default routes
 +       * learned on this device.
 +       *
 +       * - If 0, use the same table for every device. This puts routes into
 +       *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
 +       *   (but note that these three are currently all equal to
 +       *   RT6_TABLE_MAIN).
 +       * - If > 0, use the specified table.
 +       * - If < 0, put routes into table dev->ifindex + (-rt_table).
 +       */
 +      struct inet6_dev *idev = in6_dev_get(dev);
 +      u32 table;
 +      int sysctl = idev->cnf.accept_ra_rt_table;
 +      if (sysctl == 0) {
 +              table = default_table;
 +      } else if (sysctl > 0) {
 +              table = (u32) sysctl;
 +      } else {
 +              table = (unsigned) dev->ifindex + (-sysctl);
 +      }
 +      in6_dev_put(idev);
 +      return table;
 +}
 +
  /*
   *    Add prefix route.
   */
@@@ -2181,7 -2155,7 +2182,7 @@@ addrconf_prefix_route(struct in6_addr *
                      unsigned long expires, u32 flags)
  {
        struct fib6_config cfg = {
 -              .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
 +              .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX),
                .fc_metric = IP6_RT_PRIO_ADDRCONF,
                .fc_ifindex = dev->ifindex,
                .fc_expires = expires,
@@@ -2214,7 -2188,7 +2215,7 @@@ static struct rt6_info *addrconf_get_pr
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 -      u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 +      u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX);
  
        table = fib6_get_table(dev_net(dev), tb_id);
        if (!table)
@@@ -3636,6 -3610,7 +3637,7 @@@ static void addrconf_dad_work(struct wo
                addrconf_dad_begin(ifp);
                goto out;
        } else if (action == DAD_ABORT) {
+               in6_ifa_hold(ifp);
                addrconf_dad_stop(ifp, 1);
                goto out;
        }
@@@ -4690,7 -4665,6 +4692,7 @@@ static inline void ipv6_store_devconf(s
        array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
  #endif
  #endif
 +      array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
        array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
        array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
  #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@@ -5658,13 -5632,6 +5660,13 @@@ static struct addrconf_sysctl_tabl
  #endif
  #endif
                {
 +                      .procname       = "accept_ra_rt_table",
 +                      .data           = &ipv6_devconf.accept_ra_rt_table,
 +                      .maxlen         = sizeof(int),
 +                      .mode           = 0644,
 +                      .proc_handler   = proc_dointvec,
 +              },
 +              {
                        .procname       = "proxy_ndp",
                        .data           = &ipv6_devconf.proxy_ndp,
                        .maxlen         = sizeof(int),