Merge tag 'v4.4.22' into android-4.4.y

author Dmitry Shmidt <dimitrysh@google.com>

Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)

committer Dmitry Shmidt <dimitrysh@google.com>

Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
author Dmitry Shmidt <dimitrysh@google.com>
Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
committer Dmitry Shmidt <dimitrysh@google.com>
Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
diff --combined arch/arm64/include/asm/spinlock.h

index fc9682b,499e8de..53ee219
--- 1/arch/arm64/include/asm/spinlock.h
--- 2/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@@ -26,28 -26,9 +26,28 @@@
    * The memory barriers are implicit with the load-acquire and store-release
    * instructions.
    */
+ +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+ +{
+ +      unsigned int tmp;
+ +      arch_spinlock_t lockval;
   
- -#define arch_spin_unlock_wait(lock) \
- -      do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+ +      asm volatile(
+ +"     sevl\n"
+ +"1:   wfe\n"
+ +"2:   ldaxr   %w0, %2\n"
+ +"     eor     %w1, %w0, %w0, ror #16\n"
+ +"     cbnz    %w1, 1b\n"
+ +      ARM64_LSE_ATOMIC_INSN(
+ +      /* LL/SC */
+ +"     stxr    %w1, %w0, %2\n"
+ +"     cbnz    %w1, 2b\n", /* Serialise against any concurrent lockers */
+ +      /* LSE atomics */
+ +"     nop\n"
+ +"     nop\n")
+ +      : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+ +      :
+ +      : "memory");
+ +}
   
   #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
   
@@@ -331,4 -312,14 +331,14 @@@ static inline int arch_read_trylock(arc
   #define arch_read_relax(lock) cpu_relax()
   #define arch_write_relax(lock)        cpu_relax()
   
+ /*
+  * Accesses appearing in program order before a spin_lock() operation
+  * can be reordered with accesses inside the critical section, by virtue
+  * of arch_spin_lock being constructed using acquire semantics.
+  *
+  * In cases where this is problematic (e.g. try_to_wake_up), an
+  * smp_mb__before_spinlock() can restore the required ordering.
+  */
+ #define smp_mb__before_spinlock()     smp_mb()
+ 
   #endif /* __ASM_SPINLOCK_H */
diff --combined arch/x86/include/asm/uaccess.h

index dbe64f2,d42252c..b8ff6ab
--- 1/arch/x86/include/asm/uaccess.h
--- 2/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@@ -134,9 -134,6 +134,9 @@@ extern int __get_user_4(void)
   extern int __get_user_8(void);
   extern int __get_user_bad(void);
   
+ +#define __uaccess_begin() stac()
+ +#define __uaccess_end()   clac()
+ +
   /*
    * This is a type: either unsigned long, if the argument fits into
    * that type, or otherwise unsigned long long.
@@@ -196,10 -193,10 +196,10 @@@ __typeof__(__builtin_choose_expr(sizeof
   
   #ifdef CONFIG_X86_32
   #define __put_user_asm_u64(x, addr, err, errret)                      \
- -      asm volatile(ASM_STAC "\n"                                      \
+ +      asm volatile("\n"                                               \
                      "1:        movl %%eax,0(%2)\n"                     \
                      "2:        movl %%edx,4(%2)\n"                     \
- -                   "3: " ASM_CLAC "\n"                                \
+ +                   "3:"                                               \
                      ".section .fixup,\"ax\"\n"                         \
                      "4:        movl %3,%0\n"                           \
                      "  jmp 3b\n"                                       \
@@@ -210,10 -207,10 +210,10 @@@
                      : "A" (x), "r" (addr), "i" (errret), "0" (err))
   
   #define __put_user_asm_ex_u64(x, addr)                                        \
- -      asm volatile(ASM_STAC "\n"                                      \
+ +      asm volatile("\n"                                               \
                      "1:        movl %%eax,0(%1)\n"                     \
                      "2:        movl %%edx,4(%1)\n"                     \
- -                   "3: " ASM_CLAC "\n"                                \
+ +                   "3:"                                               \
                      _ASM_EXTABLE_EX(1b, 2b)                            \
                      _ASM_EXTABLE_EX(2b, 3b)                            \
                      : : "A" (x), "r" (addr))
@@@ -307,10 -304,6 +307,10 @@@ do {                                                                     
         }                                                               \
   } while (0)
   
+ +/*
+ + * This doesn't do __uaccess_begin/end - the exception handling
+ + * around it must do that.
+ + */
   #define __put_user_size_ex(x, ptr, size)                              \
   do {                                                                  \
         __chk_user_ptr(ptr);                                            \
@@@ -365,9 -358,9 +365,9 @@@ do {                                                                       
   } while (0)
   
   #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)     \
- -      asm volatile(ASM_STAC "\n"                                      \
+ +      asm volatile("\n"                                               \
                      "1:        mov"itype" %2,%"rtype"1\n"              \
- -                   "2: " ASM_CLAC "\n"                                \
+ +                   "2:\n"                                             \
                      ".section .fixup,\"ax\"\n"                         \
                      "3:        mov %3,%0\n"                            \
                      "  xor"itype" %"rtype"1,%"rtype"1\n"               \
@@@ -377,10 -370,6 +377,10 @@@
                      : "=r" (err), ltype(x)                             \
                      : "m" (__m(addr)), "i" (errret), "0" (err))
   
+ +/*
+ + * This doesn't do __uaccess_begin/end - the exception handling
+ + * around it must do that.
+ + */
   #define __get_user_size_ex(x, ptr, size)                              \
   do {                                                                  \
         __chk_user_ptr(ptr);                                            \
@@@ -405,15 -394,17 +405,19 @@@
   #define __get_user_asm_ex(x, addr, itype, rtype, ltype)                       \
         asm volatile("1:        mov"itype" %1,%"rtype"0\n"              \
                      "2:\n"                                             \
-                    _ASM_EXTABLE_EX(1b, 2b)                            \
+                    ".section .fixup,\"ax\"\n"                         \
+                      "3:xor"itype" %"rtype"0,%"rtype"0\n"             \
+                    "  jmp 2b\n"                                       \
+                    ".previous\n"                                      \
+                    _ASM_EXTABLE_EX(1b, 3b)                            \
                      : ltype(x) : "m" (__m(addr)))
   
   #define __put_user_nocheck(x, ptr, size)                      \
   ({                                                            \
         int __pu_err;                                           \
+ +      __uaccess_begin();                                      \
         __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
+ +      __uaccess_end();                                        \
         __builtin_expect(__pu_err, 0);                          \
   })
   
@@@ -421,9 -412,7 +425,9 @@@
   ({                                                                    \
         int __gu_err;                                                   \
         unsigned long __gu_val;                                         \
+ +      __uaccess_begin();                                              \
         __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);    \
+ +      __uaccess_end();                                                \
         (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
         __builtin_expect(__gu_err, 0);                                  \
   })
@@@ -438,9 -427,9 +442,9 @@@ struct __large_struct { unsigned long b
    * aliasing issues.
    */
   #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)     \
- -      asm volatile(ASM_STAC "\n"                                      \
+ +      asm volatile("\n"                                               \
                      "1:        mov"itype" %"rtype"1,%2\n"              \
- -                   "2: " ASM_CLAC "\n"                                \
+ +                   "2:\n"                                             \
                      ".section .fixup,\"ax\"\n"                         \
                      "3:        mov %3,%0\n"                            \
                      "  jmp 2b\n"                                       \
@@@ -460,11 -449,11 +464,11 @@@
    */
   #define uaccess_try   do {                                            \
         current_thread_info()->uaccess_err = 0;                         \
- -      stac();                                                         \
+ +      __uaccess_begin();                                              \
         barrier();
   
   #define uaccess_catch(err)                                            \
- -      clac();                                                         \
+ +      __uaccess_end();                                                \
         (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);    \
   } while (0)
   
@@@ -562,13 -551,12 +566,13 @@@ extern void __cmpxchg_wrong_size(void
         __typeof__(ptr) __uval = (uval);                                \
         __typeof__(*(ptr)) __old = (old);                               \
         __typeof__(*(ptr)) __new = (new);                               \
+ +      __uaccess_begin();                                              \
         switch (size) {                                                 \
         case 1:                                                         \
         {                                                               \
- -              asm volatile("\t" ASM_STAC "\n"                         \
+ +              asm volatile("\n"                                       \
                         "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n"          \
- -                      "2:\t" ASM_CLAC "\n"                            \
+ +                      "2:\n"                                          \
                         "\t.section .fixup, \"ax\"\n"                   \
                         "3:\tmov     %3, %0\n"                          \
                         "\tjmp     2b\n"                                \
@@@ -582,9 -570,9 +586,9 @@@
         }                                                               \
         case 2:                                                         \
         {                                                               \
- -              asm volatile("\t" ASM_STAC "\n"                         \
+ +              asm volatile("\n"                                       \
                         "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n"          \
- -                      "2:\t" ASM_CLAC "\n"                            \
+ +                      "2:\n"                                          \
                         "\t.section .fixup, \"ax\"\n"                   \
                         "3:\tmov     %3, %0\n"                          \
                         "\tjmp     2b\n"                                \
@@@ -598,9 -586,9 +602,9 @@@
         }                                                               \
         case 4:                                                         \
         {                                                               \
- -              asm volatile("\t" ASM_STAC "\n"                         \
+ +              asm volatile("\n"                                       \
                         "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"          \
- -                      "2:\t" ASM_CLAC "\n"                            \
+ +                      "2:\n"                                          \
                         "\t.section .fixup, \"ax\"\n"                   \
                         "3:\tmov     %3, %0\n"                          \
                         "\tjmp     2b\n"                                \
@@@ -617,9 -605,9 +621,9 @@@
                 if (!IS_ENABLED(CONFIG_X86_64))                         \
                         __cmpxchg_wrong_size();                         \
                                                                         \
- -              asm volatile("\t" ASM_STAC "\n"                         \
+ +              asm volatile("\n"                                       \
                         "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n"          \
- -                      "2:\t" ASM_CLAC "\n"                            \
+ +                      "2:\n"                                          \
                         "\t.section .fixup, \"ax\"\n"                   \
                         "3:\tmov     %3, %0\n"                          \
                         "\tjmp     2b\n"                                \
@@@ -634,7 -622,6 +638,7 @@@
         default:                                                        \
                 __cmpxchg_wrong_size();                                 \
         }                                                               \
+ +      __uaccess_end();                                                \
         *__uval = __old;                                                \
         __ret;                                                          \
   })
@@@ -706,7 -693,7 +710,7 @@@ __copy_from_user_overflow(int size, uns
   
   #endif
   
- -static inline unsigned long __must_check
+ +static __always_inline unsigned long __must_check
   copy_from_user(void *to, const void __user *from, unsigned long n)
   {
         int sz = __compiletime_object_size(to);
@@@ -731,10 -718,9 +735,10 @@@
          * case, and do only runtime checking for non-constant sizes.
          */
   
- -      if (likely(sz < 0 || sz >= n))
+ +      if (likely(sz < 0 || sz >= n)) {
+ +              check_object_size(to, n, false);
                 n = _copy_from_user(to, from, n);
- -      else if(__builtin_constant_p(n))
+ +      } else if (__builtin_constant_p(n))
                 copy_from_user_overflow();
         else
                 __copy_from_user_overflow(sz, n);
@@@ -742,7 -728,7 +746,7 @@@
         return n;
   }
   
- -static inline unsigned long __must_check
+ +static __always_inline unsigned long __must_check
   copy_to_user(void __user *to, const void *from, unsigned long n)
   {
         int sz = __compiletime_object_size(from);
@@@ -750,10 -736,9 +754,10 @@@
         might_fault();
   
         /* See the comment in copy_from_user() above. */
- -      if (likely(sz < 0 || sz >= n))
+ +      if (likely(sz < 0 || sz >= n)) {
+ +              check_object_size(from, n, true);
                 n = _copy_to_user(to, from, n);
- -      else if(__builtin_constant_p(n))
+ +      } else if (__builtin_constant_p(n))
                 copy_to_user_overflow();
         else
                 __copy_to_user_overflow(sz, n);
@@@ -764,30 -749,5 +768,30 @@@
   #undef __copy_from_user_overflow
   #undef __copy_to_user_overflow
   
+ +/*
+ + * The "unsafe" user accesses aren't really "unsafe", but the naming
+ + * is a big fat warning: you have to not only do the access_ok()
+ + * checking before using them, but you have to surround them with the
+ + * user_access_begin/end() pair.
+ + */
+ +#define user_access_begin()   __uaccess_begin()
+ +#define user_access_end()     __uaccess_end()
+ +
+ +#define unsafe_put_user(x, ptr, err_label)                                    \
+ +do {                                                                          \
+ +      int __pu_err;                                                           \
+ +      __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT);         \
+ +      if (unlikely(__pu_err)) goto err_label;                                 \
+ +} while (0)
+ +
+ +#define unsafe_get_user(x, ptr, err_label)                                    \
+ +do {                                                                          \
+ +      int __gu_err;                                                           \
+ +      unsigned long __gu_val;                                                 \
+ +      __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT);    \
+ +      (x) = (__force __typeof__(*(ptr)))__gu_val;                             \
+ +      if (unlikely(__gu_err)) goto err_label;                                 \
+ +} while (0)
+ +
   #endif /* _ASM_X86_UACCESS_H */
   
diff --combined drivers/md/dm-crypt.c

index e85bcae,51eda72..e6a0bcb
--- 1/drivers/md/dm-crypt.c
--- 2/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@@ -1864,24 -1864,16 +1864,24 @@@ static int crypt_ctr(struct dm_target *
         }
   
         ret = -ENOMEM;
- -      cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
+ +      cc->io_queue = alloc_workqueue("kcryptd_io",
+ +                                     WQ_HIGHPRI |
+ +                                     WQ_MEM_RECLAIM,
+ +                                     1);
         if (!cc->io_queue) {
                 ti->error = "Couldn't create kcryptd io queue";
                 goto bad;
         }
   
         if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+ +              cc->crypt_queue = alloc_workqueue("kcryptd",
+ +                                                WQ_HIGHPRI |
+ +                                                WQ_MEM_RECLAIM, 1);
         else
- -              cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ +              cc->crypt_queue = alloc_workqueue("kcryptd",
+ +                                                WQ_HIGHPRI |
+ +                                                WQ_MEM_RECLAIM |
+ +                                                WQ_UNBOUND,
                                                   num_online_cpus());
         if (!cc->crypt_queue) {
                 ti->error = "Couldn't create kcryptd queue";
@@@ -1928,6 -1920,13 +1928,13 @@@ static int crypt_map(struct dm_target *
                 return DM_MAPIO_REMAPPED;
         }
   
+       /*
+        * Check if bio is too large, split as needed.
+        */
+       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+           bio_data_dir(bio) == WRITE)
+               dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
+ 
         io = dm_per_bio_data(bio, cc->per_bio_data_size);
         crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
         io->ctx.req = (struct ablkcipher_request *)(io + 1);
diff --combined fs/ext4/ioctl.c

index 95315b1,1fb12f9..7e97487
--- 1/fs/ext4/ioctl.c
--- 2/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@@ -587,13 -587,11 +587,13 @@@ resizefs_out
                 return err;
         }
   
+ +      case FIDTRIM:
         case FITRIM:
         {
                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
                 struct fstrim_range range;
                 int ret = 0;
+ +              int flags  = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
   
                 if (!capable(CAP_SYS_ADMIN))
                         return -EPERM;
@@@ -601,15 -599,13 +601,15 @@@
                 if (!blk_queue_discard(q))
                         return -EOPNOTSUPP;
   
+ +              if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
+ +                      return -EOPNOTSUPP;
                 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                     sizeof(range)))
                         return -EFAULT;
   
                 range.minlen = max((unsigned int)range.minlen,
                                    q->limits.discard_granularity);
- -              ret = ext4_trim_fs(sb, &range);
+ +              ret = ext4_trim_fs(sb, &range, flags);
                 if (ret < 0)
                         return ret;
   
@@@ -633,7 -629,13 +633,13 @@@
                         goto encryption_policy_out;
                 }
   
+               err = mnt_want_write_file(filp);
+               if (err)
+                       goto encryption_policy_out;
+ 
                 err = ext4_process_policy(&policy, inode);
+ 
+               mnt_drop_write_file(filp);
   encryption_policy_out:
                 return err;
   #else
diff --combined fs/ext4/mballoc.c

index a0daca4,3c7f0c4..0b1c978
--- 1/fs/ext4/mballoc.c
--- 2/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@@ -815,7 -815,7 +815,7 @@@ static void mb_regenerate_buddy(struct 
    * for this page; do not hold this lock when calling this routine!
    */
   
- static int ext4_mb_init_cache(struct page *page, char *incore)
+ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
   {
         ext4_group_t ngroups;
         int blocksize;
@@@ -848,7 -848,7 +848,7 @@@
         /* allocate buffer_heads to read bitmaps */
         if (groups_per_page > 1) {
                 i = sizeof(struct buffer_head *) * groups_per_page;
-               bh = kzalloc(i, GFP_NOFS);
+               bh = kzalloc(i, gfp);
                 if (bh == NULL) {
                         err = -ENOMEM;
                         goto out;
@@@ -983,7 -983,7 +983,7 @@@ out
    * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
    */
   static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
-               ext4_group_t group, struct ext4_buddy *e4b)
+               ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
   {
         struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
         int block, pnum, poff;
@@@ -1002,7 -1002,7 +1002,7 @@@
         block = group * 2;
         pnum = block / blocks_per_page;
         poff = block % blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
         if (!page)
                 return -ENOMEM;
         BUG_ON(page->mapping != inode->i_mapping);
@@@ -1016,7 -1016,7 +1016,7 @@@
   
         block++;
         pnum = block / blocks_per_page;
-       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, pnum, gfp);
         if (!page)
                 return -ENOMEM;
         BUG_ON(page->mapping != inode->i_mapping);
@@@ -1042,7 -1042,7 +1042,7 @@@ static void ext4_mb_put_buddy_page_lock
    * calling this routine!
    */
   static noinline_for_stack
- int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
   {
   
         struct ext4_group_info *this_grp;
@@@ -1062,7 -1062,7 +1062,7 @@@
          * The call to ext4_mb_get_buddy_page_lock will mark the
          * page accessed.
          */
-       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+       ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
         if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
                 /*
                  * somebody initialized the group
@@@ -1072,7 -1072,7 +1072,7 @@@
         }
   
         page = e4b.bd_bitmap_page;
-       ret = ext4_mb_init_cache(page, NULL);
+       ret = ext4_mb_init_cache(page, NULL, gfp);
         if (ret)
                 goto err;
         if (!PageUptodate(page)) {
@@@ -1091,7 -1091,7 +1091,7 @@@
         }
         /* init buddy cache */
         page = e4b.bd_buddy_page;
-       ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+       ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
         if (ret)
                 goto err;
         if (!PageUptodate(page)) {
@@@ -1109,8 -1109,8 +1109,8 @@@ err
    * calling this routine!
    */
   static noinline_for_stack int
- ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-                                       struct ext4_buddy *e4b)
+ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+                      struct ext4_buddy *e4b, gfp_t gfp)
   {
         int blocks_per_page;
         int block;
@@@ -1140,7 -1140,7 +1140,7 @@@
                  * we need full data about the group
                  * to make a good selection
                  */
-               ret = ext4_mb_init_group(sb, group);
+               ret = ext4_mb_init_group(sb, group, gfp);
                 if (ret)
                         return ret;
         }
@@@ -1168,11 -1168,11 +1168,11 @@@
                          * wait for it to initialize.
                          */
                         page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                 if (page) {
                         BUG_ON(page->mapping != inode->i_mapping);
                         if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, NULL);
+                               ret = ext4_mb_init_cache(page, NULL, gfp);
                                 if (ret) {
                                         unlock_page(page);
                                         goto err;
@@@ -1204,11 -1204,12 +1204,12 @@@
         if (page == NULL || !PageUptodate(page)) {
                 if (page)
                         page_cache_release(page);
-               page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+               page = find_or_create_page(inode->i_mapping, pnum, gfp);
                 if (page) {
                         BUG_ON(page->mapping != inode->i_mapping);
                         if (!PageUptodate(page)) {
-                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+                                                        gfp);
                                 if (ret) {
                                         unlock_page(page);
                                         goto err;
@@@ -1247,6 -1248,12 +1248,12 @@@ err
         return ret;
   }
   
+ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                             struct ext4_buddy *e4b)
+ {
+       return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+ }
+ 
   static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
   {
         if (e4b->bd_bitmap_page)
@@@ -2047,7 -2054,7 +2054,7 @@@ static int ext4_mb_good_group(struct ex
   
         /* We only do this if the grp has never been initialized */
         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
                 if (ret)
                         return ret;
         }
@@@ -2763,8 -2770,7 +2770,8 @@@ int ext4_mb_release(struct super_block 
   }
   
   static inline int ext4_issue_discard(struct super_block *sb,
- -              ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ +              ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ +              unsigned long flags)
   {
         ext4_fsblk_t discard_block;
   
@@@ -2773,7 -2779,7 +2780,7 @@@
         count = EXT4_C2B(EXT4_SB(sb), count);
         trace_ext4_discard_blocks(sb,
                         (unsigned long long) discard_block, count);
- -      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ +      return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
   }
   
   /*
@@@ -2795,7 -2801,7 +2802,7 @@@ static void ext4_free_data_callback(str
         if (test_opt(sb, DISCARD)) {
                 err = ext4_issue_discard(sb, entry->efd_group,
                                          entry->efd_start_cluster,
- -                                       entry->efd_count);
+ +                                       entry->efd_count, 0);
                 if (err && err != -EOPNOTSUPP)
                         ext4_msg(sb, KERN_WARNING, "discard request in"
                                  " group:%d block:%d count:%d failed"
@@@ -4809,7 -4815,9 +4816,9 @@@ do_more
   #endif
         trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
   
-       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+       err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+                                    GFP_NOFS|__GFP_NOFAIL);
         if (err)
                 goto error_return;
   
@@@ -4838,8 -4846,7 +4847,8 @@@
                  * them with group lock_held
                  */
                 if (test_opt(sb, DISCARD)) {
- -                      err = ext4_issue_discard(sb, block_group, bit, count);
+ +                      err = ext4_issue_discard(sb, block_group, bit, count,
+ +                                               0);
                         if (err && err != -EOPNOTSUPP)
                                 ext4_msg(sb, KERN_WARNING, "discard request in"
                                          " group:%d block:%d count:%lu failed"
@@@ -5035,15 -5042,13 +5044,15 @@@ error_return
    * @count:    number of blocks to TRIM
    * @group:    alloc. group we are working with
    * @e4b:      ext4 buddy for the group
+ + * @blkdev_flags: flags for the block device
    *
    * Trim "count" blocks starting at "start" in the "group". To assure that no
    * one will allocate those blocks, mark it as used in buddy bitmap. This must
    * be called with under the group lock.
    */
   static int ext4_trim_extent(struct super_block *sb, int start, int count,
- -                           ext4_group_t group, struct ext4_buddy *e4b)
+ +                          ext4_group_t group, struct ext4_buddy *e4b,
+ +                          unsigned long blkdev_flags)
   __releases(bitlock)
   __acquires(bitlock)
   {
@@@ -5064,7 -5069,7 +5073,7 @@@
          */
         mb_mark_used(e4b, &ex);
         ext4_unlock_group(sb, group);
- -      ret = ext4_issue_discard(sb, group, start, count);
+ +      ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
         ext4_lock_group(sb, group);
         mb_free_blocks(NULL, e4b, start, ex.fe_len);
         return ret;
@@@ -5077,7 -5082,6 +5086,7 @@@
    * @start:            first group block to examine
    * @max:              last group block to examine
    * @minblocks:                minimum extent block count
+ + * @blkdev_flags:     flags for the block device
    *
    * ext4_trim_all_free walks through group's buddy bitmap searching for free
    * extents. When the free block is found, ext4_trim_extent is called to TRIM
@@@ -5092,7 -5096,7 +5101,7 @@@
   static ext4_grpblk_t
   ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                    ext4_grpblk_t start, ext4_grpblk_t max,
- -                 ext4_grpblk_t minblocks)
+ +                 ext4_grpblk_t minblocks, unsigned long blkdev_flags)
   {
         void *bitmap;
         ext4_grpblk_t next, count = 0, free_count = 0;
@@@ -5125,8 -5129,7 +5134,8 @@@
   
                 if ((next - start) >= minblocks) {
                         ret = ext4_trim_extent(sb, start,
- -                                             next - start, group, &e4b);
+ +                                             next - start, group, &e4b,
+ +                                             blkdev_flags);
                         if (ret && ret != -EOPNOTSUPP)
                                 break;
                         ret = 0;
@@@ -5168,7 -5171,6 +5177,7 @@@ out
    * ext4_trim_fs() -- trim ioctl handle function
    * @sb:                       superblock for filesystem
    * @range:            fstrim_range structure
+ + * @blkdev_flags:     flags for the block device
    *
    * start:     First Byte to trim
    * len:               number of Bytes to trim from start
@@@ -5177,8 -5179,7 +5186,8 @@@
    * start to start+len. For each such a group ext4_trim_all_free function
    * is invoked to trim all free space.
    */
- -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+ +                      unsigned long blkdev_flags)
   {
         struct ext4_group_info *grp;
         ext4_group_t group, first_group, last_group;
@@@ -5218,7 -5219,7 +5227,7 @@@
                 grp = ext4_get_group_info(sb, group);
                 /* We only do this if the grp has never been initialized */
                 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-                       ret = ext4_mb_init_group(sb, group);
+                       ret = ext4_mb_init_group(sb, group, GFP_NOFS);
                         if (ret)
                                 break;
                 }
@@@ -5234,7 -5235,7 +5243,7 @@@
   
                 if (grp->bb_free >= minlen) {
                         cnt = ext4_trim_all_free(sb, group, first_cluster,
- -                                              end, minlen);
+ +                                              end, minlen, blkdev_flags);
                         if (cnt < 0) {
                                 ret = cnt;
                                 break;
diff --combined fs/proc/base.c

index df715a0,d2b8c75..0c9ea52
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -1545,18 -1545,13 +1545,13 @@@ static const struct file_operations pro
   static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
   {
         struct task_struct *task;
-       struct mm_struct *mm;
         struct file *exe_file;
   
         task = get_proc_task(d_inode(dentry));
         if (!task)
                 return -ENOENT;
-       mm = get_task_mm(task);
+       exe_file = get_task_exe_file(task);
         put_task_struct(task);
-       if (!mm)
-               return -ENOENT;
-       exe_file = get_mm_exe_file(mm);
-       mmput(mm);
         if (exe_file) {
                 *exe_path = exe_file->f_path;
                 path_get(&exe_file->f_path);
@@@ -2245,92 -2240,6 +2240,92 @@@ static const struct file_operations pro
         .release        = seq_release_private,
   };
   
+ +static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
+ +                                      size_t count, loff_t *offset)
+ +{
+ +      struct inode *inode = file_inode(file);
+ +      struct task_struct *p;
+ +      u64 slack_ns;
+ +      int err;
+ +
+ +      err = kstrtoull_from_user(buf, count, 10, &slack_ns);
+ +      if (err < 0)
+ +              return err;
+ +
+ +      p = get_proc_task(inode);
+ +      if (!p)
+ +              return -ESRCH;
+ +
+ +      if (p != current) {
+ +              if (!capable(CAP_SYS_NICE)) {
+ +                      count = -EPERM;
+ +                      goto out;
+ +              }
+ +
+ +              err = security_task_setscheduler(p);
+ +              if (err) {
+ +                      count = err;
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      task_lock(p);
+ +      if (slack_ns == 0)
+ +              p->timer_slack_ns = p->default_timer_slack_ns;
+ +      else
+ +              p->timer_slack_ns = slack_ns;
+ +      task_unlock(p);
+ +
+ +out:
+ +      put_task_struct(p);
+ +
+ +      return count;
+ +}
+ +
+ +static int timerslack_ns_show(struct seq_file *m, void *v)
+ +{
+ +      struct inode *inode = m->private;
+ +      struct task_struct *p;
+ +      int err = 0;
+ +
+ +      p = get_proc_task(inode);
+ +      if (!p)
+ +              return -ESRCH;
+ +
+ +      if (p != current) {
+ +
+ +              if (!capable(CAP_SYS_NICE)) {
+ +                      err = -EPERM;
+ +                      goto out;
+ +              }
+ +              err = security_task_getscheduler(p);
+ +              if (err)
+ +                      goto out;
+ +      }
+ +
+ +      task_lock(p);
+ +      seq_printf(m, "%llu\n", p->timer_slack_ns);
+ +      task_unlock(p);
+ +
+ +out:
+ +      put_task_struct(p);
+ +
+ +      return err;
+ +}
+ +
+ +static int timerslack_ns_open(struct inode *inode, struct file *filp)
+ +{
+ +      return single_open(filp, timerslack_ns_show, inode);
+ +}
+ +
+ +static const struct file_operations proc_pid_set_timerslack_ns_operations = {
+ +      .open           = timerslack_ns_open,
+ +      .read           = seq_read,
+ +      .write          = timerslack_ns_write,
+ +      .llseek         = seq_lseek,
+ +      .release        = single_release,
+ +};
+ +
   static int proc_pident_instantiate(struct inode *dir,
         struct dentry *dentry, struct task_struct *task, const void *ptr)
   {
@@@ -2881,8 -2790,8 +2876,8 @@@ static const struct pid_entry tgid_base
         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
   #endif
         ONE("oom_score",  S_IRUGO, proc_oom_score),
- -      REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
- -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+ +      REG("oom_adj",    S_IRUSR, proc_oom_adj_operations),
+ +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
   #ifdef CONFIG_AUDITSYSCALL
         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
@@@ -2908,7 -2817,6 +2903,7 @@@
   #ifdef CONFIG_CHECKPOINT_RESTORE
         REG("timers",     S_IRUGO, proc_timers_operations),
   #endif
+ +      REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
   };
   
   static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@@ -3166,44 -3074,6 +3161,44 @@@ int proc_pid_readdir(struct file *file
   }
   
   /*
+ + * proc_tid_comm_permission is a special permission function exclusively
+ + * used for the node /proc/<pid>/task/<tid>/comm.
+ + * It bypasses generic permission checks in the case where a task of the same
+ + * task group attempts to access the node.
+ + * The rational behind this is that glibc and bionic access this node for
+ + * cross thread naming (pthread_set/getname_np(!self)). However, if
+ + * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
+ + * which locks out the cross thread naming implementation.
+ + * This function makes sure that the node is always accessible for members of
+ + * same thread group.
+ + */
+ +static int proc_tid_comm_permission(struct inode *inode, int mask)
+ +{
+ +      bool is_same_tgroup;
+ +      struct task_struct *task;
+ +
+ +      task = get_proc_task(inode);
+ +      if (!task)
+ +              return -ESRCH;
+ +      is_same_tgroup = same_thread_group(current, task);
+ +      put_task_struct(task);
+ +
+ +      if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
+ +              /* This file (/proc/<pid>/task/<tid>/comm) can always be
+ +               * read or written by the members of the corresponding
+ +               * thread group.
+ +               */
+ +              return 0;
+ +      }
+ +
+ +      return generic_permission(inode, mask);
+ +}
+ +
+ +static const struct inode_operations proc_tid_comm_inode_operations = {
+ +              .permission = proc_tid_comm_permission,
+ +};
+ +
+ +/*
    * Tasks
    */
   static const struct pid_entry tid_base_stuff[] = {
@@@ -3221,9 -3091,7 +3216,9 @@@
   #ifdef CONFIG_SCHED_DEBUG
         REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
   #endif
- -      REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
+ +      NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
+ +                       &proc_tid_comm_inode_operations,
+ +                       &proc_pid_set_comm_operations, {}),
   #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
         ONE("syscall",   S_IRUSR, proc_pid_syscall),
   #endif
@@@ -3270,8 -3138,8 +3265,8 @@@
         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
   #endif
         ONE("oom_score", S_IRUGO, proc_oom_score),
- -      REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
- -      REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
+ +      REG("oom_adj",   S_IRUSR, proc_oom_adj_operations),
+ +      REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations),
   #ifdef CONFIG_AUDITSYSCALL
         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
diff --combined include/linux/mm.h

index 3ea8620,cfebb74..b009ab1
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -51,17 -51,6 +51,17 @@@ extern int sysctl_legacy_va_layout
   #define sysctl_legacy_va_layout 0
   #endif
   
+ +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+ +extern const int mmap_rnd_bits_min;
+ +extern const int mmap_rnd_bits_max;
+ +extern int mmap_rnd_bits __read_mostly;
+ +#endif
+ +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ +extern const int mmap_rnd_compat_bits_min;
+ +extern const int mmap_rnd_compat_bits_max;
+ +extern int mmap_rnd_compat_bits __read_mostly;
+ +#endif
+ +
   #include <asm/page.h>
   #include <asm/pgtable.h>
   #include <asm/processor.h>
@@@ -1070,7 -1059,6 +1070,7 @@@ extern void pagefault_out_of_memory(voi
   extern void show_free_areas(unsigned int flags);
   extern bool skip_free_areas_node(unsigned int flags, int nid);
   
+ +void shmem_set_file(struct vm_area_struct *vma, struct file *file);
   int shmem_zero_setup(struct vm_area_struct *);
   #ifdef CONFIG_SHMEM
   bool shmem_mapping(struct address_space *mapping);
@@@ -1878,7 -1866,7 +1878,7 @@@ extern int vma_adjust(struct vm_area_st
   extern struct vm_area_struct *vma_merge(struct mm_struct *,
         struct vm_area_struct *prev, unsigned long addr, unsigned long end,
         unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- -      struct mempolicy *, struct vm_userfaultfd_ctx);
+ +      struct mempolicy *, struct vm_userfaultfd_ctx, const char __user *);
   extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
   extern int split_vma(struct mm_struct *,
         struct vm_area_struct *, unsigned long addr, int new_below);
@@@ -1910,6 -1898,7 +1910,7 @@@ extern void mm_drop_all_locks(struct mm
   
   extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
   extern struct file *get_mm_exe_file(struct mm_struct *mm);
+ extern struct file *get_task_exe_file(struct task_struct *task);
   
   extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
   extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
diff --combined kernel/cpuset.c

index e2e294d,e120bd9..f4d6afa
--- 1/kernel/cpuset.c
--- 2/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@@ -98,7 -98,6 +98,7 @@@ struct cpuset 
   
         /* user-configured CPUs and Memory Nodes allow to tasks */
         cpumask_var_t cpus_allowed;
+ +      cpumask_var_t cpus_requested;
         nodemask_t mems_allowed;
   
         /* effective CPUs and Memory Nodes allow to tasks */
@@@ -387,7 -386,7 +387,7 @@@ static void cpuset_update_task_spread_f
   
   static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
   {
- -      return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+ +      return  cpumask_subset(p->cpus_requested, q->cpus_requested) &&
                 nodes_subset(p->mems_allowed, q->mems_allowed) &&
                 is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                 is_mem_exclusive(p) <= is_mem_exclusive(q);
@@@ -487,7 -486,7 +487,7 @@@ static int validate_change(struct cpuse
         cpuset_for_each_child(c, css, par) {
                 if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                     c != cur &&
- -                  cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+ +                  cpumask_intersects(trial->cpus_requested, c->cpus_requested))
                         goto out;
                 if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                     c != cur &&
@@@ -946,18 -945,17 +946,18 @@@ static int update_cpumask(struct cpuse
         if (!*buf) {
                 cpumask_clear(trialcs->cpus_allowed);
         } else {
- -              retval = cpulist_parse(buf, trialcs->cpus_allowed);
+ +              retval = cpulist_parse(buf, trialcs->cpus_requested);
                 if (retval < 0)
                         return retval;
   
- -              if (!cpumask_subset(trialcs->cpus_allowed,
- -                                  top_cpuset.cpus_allowed))
+ +              if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
                         return -EINVAL;
+ +
+ +              cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
         }
   
         /* Nothing to do if the cpus didn't change */
- -      if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+ +      if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
                 return 0;
   
         retval = validate_change(cs, trialcs);
@@@ -966,7 -964,6 +966,7 @@@
   
         spin_lock_irq(&callback_lock);
         cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+ +      cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
         spin_unlock_irq(&callback_lock);
   
         /* use trialcs->cpus_allowed as a temp variable */
@@@ -1757,7 -1754,7 +1757,7 @@@ static int cpuset_common_seq_show(struc
   
         switch (type) {
         case FILE_CPULIST:
- -              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+ +              seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
                 break;
         case FILE_MEMLIST:
                 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@@ -1946,14 -1943,11 +1946,14 @@@ cpuset_css_alloc(struct cgroup_subsys_s
                 return ERR_PTR(-ENOMEM);
         if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
                 goto free_cs;
+ +      if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
+ +              goto free_allowed;
         if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
- -              goto free_cpus;
+ +              goto free_requested;
   
         set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
         cpumask_clear(cs->cpus_allowed);
+ +      cpumask_clear(cs->cpus_requested);
         nodes_clear(cs->mems_allowed);
         cpumask_clear(cs->effective_cpus);
         nodes_clear(cs->effective_mems);
@@@ -1962,9 -1956,7 +1962,9 @@@
   
         return &cs->css;
   
- -free_cpus:
+ +free_requested:
+ +      free_cpumask_var(cs->cpus_requested);
+ +free_allowed:
         free_cpumask_var(cs->cpus_allowed);
   free_cs:
         kfree(cs);
@@@ -2027,7 -2019,6 +2027,7 @@@ static int cpuset_css_online(struct cgr
         cs->mems_allowed = parent->mems_allowed;
         cs->effective_mems = parent->mems_allowed;
         cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+ +      cpumask_copy(cs->cpus_requested, parent->cpus_requested);
         cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
         spin_unlock_irq(&callback_lock);
   out_unlock:
@@@ -2062,7 -2053,6 +2062,7 @@@ static void cpuset_css_free(struct cgro
   
         free_cpumask_var(cs->effective_cpus);
         free_cpumask_var(cs->cpus_allowed);
+ +      free_cpumask_var(cs->cpus_requested);
         kfree(cs);
   }
   
@@@ -2084,34 -2074,31 +2084,49 @@@ static void cpuset_bind(struct cgroup_s
         mutex_unlock(&cpuset_mutex);
   }
   
+ +static int cpuset_allow_attach(struct cgroup_taskset *tset)
+ +{
+ +      const struct cred *cred = current_cred(), *tcred;
+ +      struct task_struct *task;
+ +      struct cgroup_subsys_state *css;
+ +
+ +      cgroup_taskset_for_each(task, css, tset) {
+ +              tcred = __task_cred(task);
+ +
+ +              if ((current != task) && !capable(CAP_SYS_ADMIN) &&
+ +                   cred->euid.val != tcred->uid.val && cred->euid.val != tcred->suid.val)
+ +                      return -EACCES;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ /*
+  * Make sure the new task conform to the current state of its parent,
+  * which could have been changed by cpuset just after it inherits the
+  * state from the parent and before it sits on the cgroup's task list.
+  */
+ void cpuset_fork(struct task_struct *task)
+ {
+       if (task_css_is_root(task, cpuset_cgrp_id))
+               return;
+ 
+       set_cpus_allowed_ptr(task, &current->cpus_allowed);
+       task->mems_allowed = current->mems_allowed;
+ }
+ 
   struct cgroup_subsys cpuset_cgrp_subsys = {
         .css_alloc      = cpuset_css_alloc,
         .css_online     = cpuset_css_online,
         .css_offline    = cpuset_css_offline,
         .css_free       = cpuset_css_free,
         .can_attach     = cpuset_can_attach,
+ +      .allow_attach   = cpuset_allow_attach,
         .cancel_attach  = cpuset_cancel_attach,
         .attach         = cpuset_attach,
         .post_attach    = cpuset_post_attach,
         .bind           = cpuset_bind,
+       .fork           = cpuset_fork,
         .legacy_cftypes = files,
         .early_init     = 1,
   };
@@@ -2130,11 -2117,8 +2145,11 @@@ int __init cpuset_init(void
                 BUG();
         if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL))
                 BUG();
+ +      if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
+ +              BUG();
   
         cpumask_setall(top_cpuset.cpus_allowed);
+ +      cpumask_setall(top_cpuset.cpus_requested);
         nodes_setall(top_cpuset.mems_allowed);
         cpumask_setall(top_cpuset.effective_cpus);
         nodes_setall(top_cpuset.effective_mems);
@@@ -2268,7 -2252,7 +2283,7 @@@ retry
                 goto retry;
         }
   
- -      cpumask_and(&new_cpus, cs->cpus_allowed, parent_cs(cs)->effective_cpus);
+ +      cpumask_and(&new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
         nodes_and(new_mems, cs->mems_allowed, parent_cs(cs)->effective_mems);
   
         cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
diff --combined kernel/fork.c

index d6a6da5,8860d1f..a1d1632
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -764,6 -764,29 +764,29 @@@ struct file *get_mm_exe_file(struct mm_
   EXPORT_SYMBOL(get_mm_exe_file);
   
   /**
+  * get_task_exe_file - acquire a reference to the task's executable file
+  *
+  * Returns %NULL if task's mm (if any) has no associated executable file or
+  * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+  * User must release file via fput().
+  */
+ struct file *get_task_exe_file(struct task_struct *task)
+ {
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+ 
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+ }
+ EXPORT_SYMBOL(get_task_exe_file);
+ 
+ /**
    * get_task_mm - acquire a reference to the task's mm
    *
    * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
@@@ -800,8 -823,7 +823,8 @@@ struct mm_struct *mm_access(struct task
   
         mm = get_task_mm(task);
         if (mm && mm != current->mm &&
- -                      !ptrace_may_access(task, mode)) {
+ +                      !ptrace_may_access(task, mode) &&
+ +                      !capable(CAP_SYS_RESOURCE)) {
                 mmput(mm);
                 mm = ERR_PTR(-EACCES);
         }
diff --combined kernel/sched/core.c

index 778335a,20253db..01cb249
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -89,7 -89,6 +89,7 @@@
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/sched.h>
+ +#include "walt.h"
   
   DEFINE_MUTEX(sched_domains_mutex);
   DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@@ -288,18 -287,6 +288,18 @@@ int sysctl_sched_rt_runtime = 950000
   /* cpus with isolated domains */
   cpumask_var_t cpu_isolated_map;
   
+ +struct rq *
+ +lock_rq_of(struct task_struct *p, unsigned long *flags)
+ +{
+ +      return task_rq_lock(p, flags);
+ +}
+ +
+ +void
+ +unlock_rq_of(struct rq *rq, struct task_struct *p, unsigned long *flags)
+ +{
+ +      task_rq_unlock(rq, p, flags);
+ +}
+ +
   /*
    * this_rq_lock - lock this runqueue and disable interrupts.
    */
@@@ -1089,9 -1076,7 +1089,9 @@@ static struct rq *move_queued_task(stru
   
         dequeue_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_MIGRATING;
+ +      double_lock_balance(rq, cpu_rq(new_cpu));
         set_task_cpu(p, new_cpu);
+ +      double_unlock_balance(rq, cpu_rq(new_cpu));
         raw_spin_unlock(&rq->lock);
   
         rq = cpu_rq(new_cpu);
@@@ -1315,8 -1300,6 +1315,8 @@@ void set_task_cpu(struct task_struct *p
                         p->sched_class->migrate_task_rq(p);
                 p->se.nr_migrations++;
                 perf_event_task_migrate(p);
+ +
+ +              walt_fixup_busy_time(p, new_cpu);
         }
   
         __set_task_cpu(p, new_cpu);
@@@ -1945,10 -1928,6 +1945,10 @@@ try_to_wake_up(struct task_struct *p, u
   {
         unsigned long flags;
         int cpu, success = 0;
+ +#ifdef CONFIG_SMP
+ +      struct rq *rq;
+ +      u64 wallclock;
+ +#endif
   
         /*
          * If we are going to wake up a thread waiting for CONDITION we
@@@ -1966,6 -1945,28 +1966,28 @@@
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
   
+       /*
+        * Ensure we load p->on_rq _after_ p->state, otherwise it would
+        * be possible to, falsely, observe p->on_rq == 0 and get stuck
+        * in smp_cond_load_acquire() below.
+        *
+        * sched_ttwu_pending()                 try_to_wake_up()
+        *   [S] p->on_rq = 1;                  [L] P->state
+        *       UNLOCK rq->lock  -----.
+        *                              \
+        *                               +---   RMB
+        * schedule()                   /
+        *       LOCK rq->lock    -----'
+        *       UNLOCK rq->lock
+        *
+        * [task p]
+        *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+        *
+        * Pairs with the UNLOCK+LOCK on rq->lock from the
+        * last wakeup of our task and the schedule that got our task
+        * current.
+        */
+       smp_rmb();
         if (p->on_rq && ttwu_remote(p, wake_flags))
                 goto stat;
   
@@@ -2006,14 -2007,6 +2028,14 @@@
          */
         smp_rmb();
   
+ +      rq = cpu_rq(task_cpu(p));
+ +
+ +      raw_spin_lock(&rq->lock);
+ +      wallclock = walt_ktime_clock();
+ +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ +      walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
+ +      raw_spin_unlock(&rq->lock);
+ +
         p->sched_contributes_to_load = !!task_contributes_to_load(p);
         p->state = TASK_WAKING;
   
@@@ -2021,12 -2014,10 +2043,12 @@@
                 p->sched_class->task_waking(p);
   
         cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
+ +
         if (task_cpu(p) != cpu) {
                 wake_flags |= WF_MIGRATED;
                 set_task_cpu(p, cpu);
         }
+ +
   #endif /* CONFIG_SMP */
   
         ttwu_queue(p, cpu);
@@@ -2075,13 -2066,8 +2097,13 @@@ static void try_to_wake_up_local(struc
   
         trace_sched_waking(p);
   
- -      if (!task_on_rq_queued(p))
+ +      if (!task_on_rq_queued(p)) {
+ +              u64 wallclock = walt_ktime_clock();
+ +
+ +              walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ +              walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
+ +      }
   
         ttwu_do_wakeup(rq, p, 0);
         ttwu_stat(p, smp_processor_id(), 0);
@@@ -2147,7 -2133,6 +2169,7 @@@ static void __sched_fork(unsigned long 
         p->se.nr_migrations             = 0;
         p->se.vruntime                  = 0;
         INIT_LIST_HEAD(&p->se.group_node);
+ +      walt_init_new_task_load(p);
   
   #ifdef CONFIG_SCHEDSTATS
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@@ -2415,9 -2400,6 +2437,9 @@@ void wake_up_new_task(struct task_struc
         struct rq *rq;
   
         raw_spin_lock_irqsave(&p->pi_lock, flags);
+ +
+ +      walt_init_new_task_load(p);
+ +
         /* Initialize new task's runnable average */
         init_entity_runnable_average(&p->se);
   #ifdef CONFIG_SMP
@@@ -2430,8 -2412,7 +2452,8 @@@
   #endif
   
         rq = __task_rq_lock(p);
- -      activate_task(rq, p, 0);
+ +      walt_mark_task_starting(p);
+ +      activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
         p->on_rq = TASK_ON_RQ_QUEUED;
         trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
@@@ -2812,36 -2793,6 +2834,36 @@@ unsigned long nr_iowait_cpu(int cpu
         return atomic_read(&this->nr_iowait);
   }
   
+ +#ifdef CONFIG_CPU_QUIET
+ +u64 nr_running_integral(unsigned int cpu)
+ +{
+ +      unsigned int seqcnt;
+ +      u64 integral;
+ +      struct rq *q;
+ +
+ +      if (cpu >= nr_cpu_ids)
+ +              return 0;
+ +
+ +      q = cpu_rq(cpu);
+ +
+ +      /*
+ +       * Update average to avoid reading stalled value if there were
+ +       * no run-queue changes for a long time. On the other hand if
+ +       * the changes are happening right now, just read current value
+ +       * directly.
+ +       */
+ +
+ +      seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+ +      integral = do_nr_running_integral(q);
+ +      if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+ +              read_seqcount_begin(&q->ave_seqcnt);
+ +              integral = q->nr_running_integral;
+ +      }
+ +
+ +      return integral;
+ +}
+ +#endif
+ +
   void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
   {
         struct rq *rq = this_rq();
@@@ -2928,93 -2879,6 +2950,93 @@@ unsigned long long task_sched_runtime(s
         return ns;
   }
   
+ +#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+ +
+ +static inline
+ +unsigned long add_capacity_margin(unsigned long cpu_capacity)
+ +{
+ +      cpu_capacity  = cpu_capacity * capacity_margin;
+ +      cpu_capacity /= SCHED_CAPACITY_SCALE;
+ +      return cpu_capacity;
+ +}
+ +
+ +static inline
+ +unsigned long sum_capacity_reqs(unsigned long cfs_cap,
+ +                              struct sched_capacity_reqs *scr)
+ +{
+ +      unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
+ +      return total += scr->dl;
+ +}
+ +
+ +static void sched_freq_tick_pelt(int cpu)
+ +{
+ +      unsigned long cpu_utilization = capacity_max;
+ +      unsigned long capacity_curr = capacity_curr_of(cpu);
+ +      struct sched_capacity_reqs *scr;
+ +
+ +      scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
+ +      if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
+ +              return;
+ +
+ +      /*
+ +       * To make free room for a task that is building up its "real"
+ +       * utilization and to harm its performance the least, request
+ +       * a jump to a higher OPP as soon as the margin of free capacity
+ +       * is impacted (specified by capacity_margin).
+ +       */
+ +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+ +}
+ +
+ +#ifdef CONFIG_SCHED_WALT
+ +static void sched_freq_tick_walt(int cpu)
+ +{
+ +      unsigned long cpu_utilization = cpu_util(cpu);
+ +      unsigned long capacity_curr = capacity_curr_of(cpu);
+ +
+ +      if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
+ +              return sched_freq_tick_pelt(cpu);
+ +
+ +      /*
+ +       * Add a margin to the WALT utilization.
+ +       * NOTE: WALT tracks a single CPU signal for all the scheduling
+ +       * classes, thus this margin is going to be added to the DL class as
+ +       * well, which is something we do not do in sched_freq_tick_pelt case.
+ +       */
+ +      cpu_utilization = add_capacity_margin(cpu_utilization);
+ +      if (cpu_utilization <= capacity_curr)
+ +              return;
+ +
+ +      /*
+ +       * It is likely that the load is growing so we
+ +       * keep the added margin in our request as an
+ +       * extra boost.
+ +       */
+ +      set_cfs_cpu_capacity(cpu, true, cpu_utilization);
+ +
+ +}
+ +#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
+ +#else
+ +#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
+ +#endif /* CONFIG_SCHED_WALT */
+ +
+ +static void sched_freq_tick(int cpu)
+ +{
+ +      unsigned long capacity_orig, capacity_curr;
+ +
+ +      if (!sched_freq())
+ +              return;
+ +
+ +      capacity_orig = capacity_orig_of(cpu);
+ +      capacity_curr = capacity_curr_of(cpu);
+ +      if (capacity_curr == capacity_orig)
+ +              return;
+ +
+ +      _sched_freq_tick(cpu);
+ +}
+ +#else
+ +static inline void sched_freq_tick(int cpu) { }
+ +#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
+ +
   /*
    * This function gets called by the timer code, with HZ frequency.
    * We call it with interrupts disabled.
@@@ -3028,14 -2892,10 +3050,14 @@@ void scheduler_tick(void
         sched_clock_tick();
   
         raw_spin_lock(&rq->lock);
+ +      walt_set_window_start(rq);
         update_rq_clock(rq);
         curr->sched_class->task_tick(rq, curr, 0);
         update_cpu_load_active(rq);
+ +      walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+ +                      walt_ktime_clock(), 0);
         calc_global_load_tick(rq);
+ +      sched_freq_tick(cpu);
         raw_spin_unlock(&rq->lock);
   
         perf_event_task_tick();
@@@ -3272,7 -3132,6 +3294,7 @@@ static void __sched notrace __schedule(
         unsigned long *switch_count;
         struct rq *rq;
         int cpu;
+ +      u64 wallclock;
   
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
@@@ -3334,9 -3193,6 +3356,9 @@@
                 update_rq_clock(rq);
   
         next = pick_next_task(rq, prev);
+ +      wallclock = walt_ktime_clock();
+ +      walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+ +      walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
         clear_tsk_need_resched(prev);
         clear_preempt_need_resched();
         rq->clock_skip_update = 0;
@@@ -5163,7 -5019,6 +5185,7 @@@ void init_idle(struct task_struct *idle
         raw_spin_lock(&rq->lock);
   
         __sched_fork(0, idle);
+ +
         idle->state = TASK_RUNNING;
         idle->se.exec_start = sched_clock();
   
@@@ -5546,60 -5401,9 +5568,60 @@@ set_table_entry(struct ctl_table *entry
   }
   
   static struct ctl_table *
+ +sd_alloc_ctl_energy_table(struct sched_group_energy *sge)
+ +{
+ +      struct ctl_table *table = sd_alloc_ctl_entry(5);
+ +
+ +      if (table == NULL)
+ +              return NULL;
+ +
+ +      set_table_entry(&table[0], "nr_idle_states", &sge->nr_idle_states,
+ +                      sizeof(int), 0644, proc_dointvec_minmax, false);
+ +      set_table_entry(&table[1], "idle_states", &sge->idle_states[0].power,
+ +                      sge->nr_idle_states*sizeof(struct idle_state), 0644,
+ +                      proc_doulongvec_minmax, false);
+ +      set_table_entry(&table[2], "nr_cap_states", &sge->nr_cap_states,
+ +                      sizeof(int), 0644, proc_dointvec_minmax, false);
+ +      set_table_entry(&table[3], "cap_states", &sge->cap_states[0].cap,
+ +                      sge->nr_cap_states*sizeof(struct capacity_state), 0644,
+ +                      proc_doulongvec_minmax, false);
+ +
+ +      return table;
+ +}
+ +
+ +static struct ctl_table *
+ +sd_alloc_ctl_group_table(struct sched_group *sg)
+ +{
+ +      struct ctl_table *table = sd_alloc_ctl_entry(2);
+ +
+ +      if (table == NULL)
+ +              return NULL;
+ +
+ +      table->procname = kstrdup("energy", GFP_KERNEL);
+ +      table->mode = 0555;
+ +      table->child = sd_alloc_ctl_energy_table((struct sched_group_energy *)sg->sge);
+ +
+ +      return table;
+ +}
+ +
+ +static struct ctl_table *
   sd_alloc_ctl_domain_table(struct sched_domain *sd)
   {
- -      struct ctl_table *table = sd_alloc_ctl_entry(14);
+ +      struct ctl_table *table;
+ +      unsigned int nr_entries = 14;
+ +
+ +      int i = 0;
+ +      struct sched_group *sg = sd->groups;
+ +
+ +      if (sg->sge) {
+ +              int nr_sgs = 0;
+ +
+ +              do {} while (nr_sgs++, sg = sg->next, sg != sd->groups);
+ +
+ +              nr_entries += nr_sgs;
+ +      }
+ +
+ +      table = sd_alloc_ctl_entry(nr_entries);
   
         if (table == NULL)
                 return NULL;
@@@ -5632,19 -5436,7 +5654,19 @@@
                 sizeof(long), 0644, proc_doulongvec_minmax, false);
         set_table_entry(&table[12], "name", sd->name,
                 CORENAME_MAX_SIZE, 0444, proc_dostring, false);
- -      /* &table[13] is terminator */
+ +      sg = sd->groups;
+ +      if (sg->sge) {
+ +              char buf[32];
+ +              struct ctl_table *entry = &table[13];
+ +
+ +              do {
+ +                      snprintf(buf, 32, "group%d", i);
+ +                      entry->procname = kstrdup(buf, GFP_KERNEL);
+ +                      entry->mode = 0555;
+ +                      entry->child = sd_alloc_ctl_group_table(sg);
+ +              } while (entry++, i++, sg = sg->next, sg != sd->groups);
+ +      }
+ +      /* &table[nr_entries-1] is terminator */
   
         return table;
   }
@@@ -5760,9 -5552,6 +5782,9 @@@ migration_call(struct notifier_block *n
         switch (action & ~CPU_TASKS_FROZEN) {
   
         case CPU_UP_PREPARE:
+ +              raw_spin_lock_irqsave(&rq->lock, flags);
+ +              walt_set_window_start(rq);
+ +              raw_spin_unlock_irqrestore(&rq->lock, flags);
                 rq->calc_load_update = calc_load_update;
                 account_reset_rq(rq);
                 break;
@@@ -5783,7 -5572,6 +5805,7 @@@
                 sched_ttwu_pending();
                 /* Update our root-domain */
                 raw_spin_lock_irqsave(&rq->lock, flags);
+ +              walt_migrate_sync_cpu(cpu);
                 if (rq->rd) {
                         BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                         set_rq_offline(rq);
@@@ -5955,7 -5743,7 +5977,7 @@@ static int sched_domain_debug_one(struc
                 printk(KERN_CONT " %*pbl",
                        cpumask_pr_args(sched_group_cpus(group)));
                 if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
- -                      printk(KERN_CONT " (cpu_capacity = %d)",
+ +                      printk(KERN_CONT " (cpu_capacity = %lu)",
                                 group->sgc->capacity);
                 }
   
@@@ -6016,8 -5804,7 +6038,8 @@@ static int sd_degenerate(struct sched_d
                          SD_BALANCE_EXEC |
                          SD_SHARE_CPUCAPACITY |
                          SD_SHARE_PKG_RESOURCES |
- -                       SD_SHARE_POWERDOMAIN)) {
+ +                       SD_SHARE_POWERDOMAIN |
+ +                       SD_SHARE_CAP_STATES)) {
                 if (sd->groups != sd->groups->next)
                         return 0;
         }
@@@ -6049,8 -5836,7 +6071,8 @@@ sd_parent_degenerate(struct sched_domai
                                 SD_SHARE_CPUCAPACITY |
                                 SD_SHARE_PKG_RESOURCES |
                                 SD_PREFER_SIBLING |
- -                              SD_SHARE_POWERDOMAIN);
+ +                              SD_SHARE_POWERDOMAIN |
+ +                              SD_SHARE_CAP_STATES);
                 if (nr_node_ids == 1)
                         pflags &= ~SD_SERIALIZE;
         }
@@@ -6129,8 -5915,6 +6151,8 @@@ static int init_rootdomain(struct root_
   
         if (cpupri_init(&rd->cpupri) != 0)
                 goto free_rto_mask;
+ +
+ +      init_max_cpu_capacity(&rd->max_cpu_capacity);
         return 0;
   
   free_rto_mask:
@@@ -6236,13 -6020,11 +6258,13 @@@ DEFINE_PER_CPU(int, sd_llc_id)
   DEFINE_PER_CPU(struct sched_domain *, sd_numa);
   DEFINE_PER_CPU(struct sched_domain *, sd_busy);
   DEFINE_PER_CPU(struct sched_domain *, sd_asym);
+ +DEFINE_PER_CPU(struct sched_domain *, sd_ea);
+ +DEFINE_PER_CPU(struct sched_domain *, sd_scs);
   
   static void update_top_cache_domain(int cpu)
   {
         struct sched_domain *sd;
- -      struct sched_domain *busy_sd = NULL;
+ +      struct sched_domain *busy_sd = NULL, *ea_sd = NULL;
         int id = cpu;
         int size = 1;
   
@@@ -6263,17 -6045,6 +6285,17 @@@
   
         sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
         rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
+ +
+ +      for_each_domain(cpu, sd) {
+ +              if (sd->groups->sge)
+ +                      ea_sd = sd;
+ +              else
+ +                      break;
+ +      }
+ +      rcu_assign_pointer(per_cpu(sd_ea, cpu), ea_sd);
+ +
+ +      sd = highest_flag_domain(cpu, SD_SHARE_CAP_STATES);
+ +      rcu_assign_pointer(per_cpu(sd_scs, cpu), sd);
   }
   
   /*
@@@ -6434,7 -6205,6 +6456,7 @@@ build_overlap_sched_groups(struct sched
                  * die on a /0 trap.
                  */
                 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
+ +              sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
   
                 /*
                  * Make sure the first group of this domain contains the
@@@ -6564,66 -6334,6 +6586,66 @@@ static void init_sched_groups_capacity(
   }
   
   /*
+ + * Check that the per-cpu provided sd energy data is consistent for all cpus
+ + * within the mask.
+ + */
+ +static inline void check_sched_energy_data(int cpu, sched_domain_energy_f fn,
+ +                                         const struct cpumask *cpumask)
+ +{
+ +      const struct sched_group_energy * const sge = fn(cpu);
+ +      struct cpumask mask;
+ +      int i;
+ +
+ +      if (cpumask_weight(cpumask) <= 1)
+ +              return;
+ +
+ +      cpumask_xor(&mask, cpumask, get_cpu_mask(cpu));
+ +
+ +      for_each_cpu(i, &mask) {
+ +              const struct sched_group_energy * const e = fn(i);
+ +              int y;
+ +
+ +              BUG_ON(e->nr_idle_states != sge->nr_idle_states);
+ +
+ +              for (y = 0; y < (e->nr_idle_states); y++) {
+ +                      BUG_ON(e->idle_states[y].power !=
+ +                                      sge->idle_states[y].power);
+ +              }
+ +
+ +              BUG_ON(e->nr_cap_states != sge->nr_cap_states);
+ +
+ +              for (y = 0; y < (e->nr_cap_states); y++) {
+ +                      BUG_ON(e->cap_states[y].cap != sge->cap_states[y].cap);
+ +                      BUG_ON(e->cap_states[y].power !=
+ +                                      sge->cap_states[y].power);
+ +              }
+ +      }
+ +}
+ +
+ +static void init_sched_energy(int cpu, struct sched_domain *sd,
+ +                            sched_domain_energy_f fn)
+ +{
+ +      if (!(fn && fn(cpu)))
+ +              return;
+ +
+ +      if (cpu != group_balance_cpu(sd->groups))
+ +              return;
+ +
+ +      if (sd->child && !sd->child->groups->sge) {
+ +              pr_err("BUG: EAS setup broken for CPU%d\n", cpu);
+ +#ifdef CONFIG_SCHED_DEBUG
+ +              pr_err("     energy data on %s but not on %s domain\n",
+ +                      sd->name, sd->child->name);
+ +#endif
+ +              return;
+ +      }
+ +
+ +      check_sched_energy_data(cpu, fn, sched_group_cpus(sd->groups));
+ +
+ +      sd->groups->sge = fn(cpu);
+ +}
+ +
+ +/*
    * Initializers for schedule domains
    * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
    */
@@@ -6731,7 -6441,6 +6753,7 @@@ static int sched_domains_curr_level
    * SD_SHARE_PKG_RESOURCES - describes shared caches
    * SD_NUMA                - describes NUMA topologies
    * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ + * SD_SHARE_CAP_STATES    - describes shared capacity states
    *
    * Odd one out:
    * SD_ASYM_PACKING        - describes SMT quirks
@@@ -6741,8 -6450,7 +6763,8 @@@
          SD_SHARE_PKG_RESOURCES |       \
          SD_NUMA |                      \
          SD_ASYM_PACKING |              \
- -       SD_SHARE_POWERDOMAIN)
+ +       SD_SHARE_POWERDOMAIN |         \
+ +       SD_SHARE_CAP_STATES)
   
   static struct sched_domain *
   sd_init(struct sched_domain_topology_level *tl, int cpu)
@@@ -7292,7 -7000,6 +7314,7 @@@ static int build_sched_domains(const st
         enum s_alloc alloc_state;
         struct sched_domain *sd;
         struct s_data d;
+ +      struct rq *rq = NULL;
         int i, ret = -ENOMEM;
   
         alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@@ -7331,13 -7038,10 +7353,13 @@@
   
         /* Calculate CPU capacity for physical packages and nodes */
         for (i = nr_cpumask_bits-1; i >= 0; i--) {
+ +              struct sched_domain_topology_level *tl = sched_domain_topology;
+ +
                 if (!cpumask_test_cpu(i, cpu_map))
                         continue;
   
- -              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+ +              for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+ +                      init_sched_energy(i, sd, tl->energy);
                         claim_allocations(i, sd);
                         init_sched_groups_capacity(i, sd);
                 }
@@@ -7346,7 -7050,6 +7368,7 @@@
         /* Attach the domains */
         rcu_read_lock();
         for_each_cpu(i, cpu_map) {
+ +              rq = cpu_rq(i);
                 sd = *per_cpu_ptr(d.sd, i);
                 cpu_attach_domain(sd, d.rd, i);
         }
@@@ -7628,7 -7331,6 +7650,7 @@@ void __init sched_init_smp(void
   {
         cpumask_var_t non_isolated_cpus;
   
+ +      walt_init_cpu_efficiency();
         alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
         alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
   
@@@ -7806,11 -7508,6 +7828,11 @@@ void __init sched_init(void
                 rq->idle_stamp = 0;
                 rq->avg_idle = 2*sysctl_sched_migration_cost;
                 rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+ +#ifdef CONFIG_SCHED_WALT
+ +              rq->cur_irqload = 0;
+ +              rq->avg_irqload = 0;
+ +              rq->irqload_ts = 0;
+ +#endif
   
                 INIT_LIST_HEAD(&rq->cfs_tasks);
   
@@@ -7874,14 -7571,6 +7896,14 @@@ static inline int preempt_count_equals(
         return (nested == preempt_offset);
   }
   
+ +static int __might_sleep_init_called;
+ +int __init __might_sleep_init(void)
+ +{
+ +      __might_sleep_init_called = 1;
+ +      return 0;
+ +}
+ +early_initcall(__might_sleep_init);
+ +
   void __might_sleep(const char *file, int line, int preempt_offset)
   {
         /*
@@@ -7906,10 -7595,8 +7928,10 @@@ void ___might_sleep(const char *file, i
   
         rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
         if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
- -           !is_idle_task(current)) ||
- -          system_state != SYSTEM_RUNNING || oops_in_progress)
+ +           !is_idle_task(current)) || oops_in_progress)
+ +              return;
+ +      if (system_state != SYSTEM_RUNNING &&
+ +          (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
                 return;
         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                 return;
@@@ -8934,7 -8621,6 +8956,7 @@@ struct cgroup_subsys cpu_cgrp_subsys = 
         .fork           = cpu_cgroup_fork,
         .can_attach     = cpu_cgroup_can_attach,
         .attach         = cpu_cgroup_attach,
+ +      .allow_attach   = subsys_cgroup_allow_attach,
         .legacy_cftypes = cpu_files,
         .early_init     = 1,
   };
diff --combined net/ipv6/addrconf.c

index 3cdf591,036b39e..563a91f
--- 1/net/ipv6/addrconf.c
--- 2/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -205,7 -205,6 +205,7 @@@ static struct ipv6_devconf ipv6_devcon
         .accept_ra_rt_info_max_plen = 0,
   #endif
   #endif
+ +      .accept_ra_rt_table     = 0,
         .proxy_ndp              = 0,
         .accept_source_route    = 0,    /* we do not accept RH0 by default. */
         .disable_ipv6           = 0,
@@@ -250,7 -249,6 +250,7 @@@ static struct ipv6_devconf ipv6_devconf
         .accept_ra_rt_info_max_plen = 0,
   #endif
   #endif
+ +      .accept_ra_rt_table     = 0,
         .proxy_ndp              = 0,
         .accept_source_route    = 0,    /* we do not accept RH0 by default. */
         .disable_ipv6           = 0,
@@@ -1900,6 -1898,7 +1900,7 @@@ errdad
         spin_unlock_bh(&ifp->lock);
   
         addrconf_mod_dad_work(ifp, 0);
+       in6_ifa_put(ifp);
   }
   
   /* Join to solicited addr multicast group.
@@@ -2147,31 -2146,6 +2148,31 @@@ static void  __ipv6_try_regen_rndid(str
                 __ipv6_regen_rndid(idev);
   }
   
+ +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) {
+ +      /* Determines into what table to put autoconf PIO/RIO/default routes
+ +       * learned on this device.
+ +       *
+ +       * - If 0, use the same table for every device. This puts routes into
+ +       *   one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route
+ +       *   (but note that these three are currently all equal to
+ +       *   RT6_TABLE_MAIN).
+ +       * - If > 0, use the specified table.
+ +       * - If < 0, put routes into table dev->ifindex + (-rt_table).
+ +       */
+ +      struct inet6_dev *idev = in6_dev_get(dev);
+ +      u32 table;
+ +      int sysctl = idev->cnf.accept_ra_rt_table;
+ +      if (sysctl == 0) {
+ +              table = default_table;
+ +      } else if (sysctl > 0) {
+ +              table = (u32) sysctl;
+ +      } else {
+ +              table = (unsigned) dev->ifindex + (-sysctl);
+ +      }
+ +      in6_dev_put(idev);
+ +      return table;
+ +}
+ +
   /*
    *    Add prefix route.
    */
@@@ -2181,7 -2155,7 +2182,7 @@@ addrconf_prefix_route(struct in6_addr *
                       unsigned long expires, u32 flags)
   {
         struct fib6_config cfg = {
- -              .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
+ +              .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX),
                 .fc_metric = IP6_RT_PRIO_ADDRCONF,
                 .fc_ifindex = dev->ifindex,
                 .fc_expires = expires,
@@@ -2214,7 -2188,7 +2215,7 @@@ static struct rt6_info *addrconf_get_pr
         struct fib6_node *fn;
         struct rt6_info *rt = NULL;
         struct fib6_table *table;
- -      u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
+ +      u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_PREFIX);
   
         table = fib6_get_table(dev_net(dev), tb_id);
         if (!table)
@@@ -3636,6 -3610,7 +3637,7 @@@ static void addrconf_dad_work(struct wo
                 addrconf_dad_begin(ifp);
                 goto out;
         } else if (action == DAD_ABORT) {
+               in6_ifa_hold(ifp);
                 addrconf_dad_stop(ifp, 1);
                 goto out;
         }
@@@ -4690,7 -4665,6 +4692,7 @@@ static inline void ipv6_store_devconf(s
         array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
   #endif
   #endif
+ +      array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table;
         array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
         array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
   #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@@ -5658,13 -5632,6 +5660,13 @@@ static struct addrconf_sysctl_tabl
   #endif
   #endif
                 {
+ +                      .procname       = "accept_ra_rt_table",
+ +                      .data           = &ipv6_devconf.accept_ra_rt_table,
+ +                      .maxlen         = sizeof(int),
+ +                      .mode           = 0644,
+ +                      .proc_handler   = proc_dointvec,
+ +              },
+ +              {
                         .procname       = "proxy_ndp",
                         .data           = &ipv6_devconf.proxy_ndp,
                         .maxlen         = sizeof(int),
author	Dmitry Shmidt <dimitrysh@google.com>
	Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
committer	Dmitry Shmidt <dimitrysh@google.com>
	Mon, 26 Sep 2016 17:37:43 +0000 (10:37 -0700)
		1	2
arch/arm64/include/asm/spinlock.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/uaccess.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-crypt.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/mballoc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cpuset.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/ipv6/addrconf.c	patch \|	diff1 \|	diff2 \|	blob \| history