net: Fix skb->csum update in inet_proto_csum_replace16().

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / kernel / futex.c
diff --git a/kernel/futex.c b/kernel/futex.c

index 684d754..15d850f 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
         unsigned long address = (unsigned long)uaddr;
         struct mm_struct *mm = current->mm;
         struct page *page, *page_head;
+       struct address_space *mapping;
         int err, ro = 0;
  
         /*
@@ -555,7 +556,19 @@ again:
         }
  #endif
  
-       lock_page(page_head);
+       /*
+        * The treatment of mapping from this point on is critical. The page
+        * lock protects many things but in this context the page lock
+        * stabilizes mapping, prevents inode freeing in the shared
+        * file-backed region case and guards against movement to swap cache.
+        *
+        * Strictly speaking the page lock is not needed in all cases being
+        * considered here and page lock forces unnecessarily serialization
+        * From this point on, mapping will be re-verified if necessary and
+        * page lock will be acquired only if it is unavoidable
+        */
+
+       mapping = READ_ONCE(page_head->mapping);
  
         /*
          * If page_head->mapping is NULL, then it cannot be a PageAnon
@@ -572,18 +585,31 @@ again:
          * shmem_writepage move it from filecache to swapcache beneath us:
          * an unlikely race, but we do need to retry for page_head->mapping.
          */
-       if (!page_head->mapping) {
-               int shmem_swizzled = PageSwapCache(page_head);
+       if (unlikely(!mapping)) {
+               int shmem_swizzled;
+
+               /*
+                * Page lock is required to identify which special case above
+                * applies. If this is really a shmem page then the page lock
+                * will prevent unexpected transitions.
+                */
+               lock_page(page_head);
+               shmem_swizzled = PageSwapCache(page_head) || page_head->mapping;
                 unlock_page(page_head);
                 put_page(page_head);
+
                 if (shmem_swizzled)
                         goto again;
+
                 return -EFAULT;
         }
  
         /*
          * Private mappings are handled in a simple way.
          *
+        * If the futex key is stored on an anonymous page, then the associated
+        * object is the mm which is implicitly pinned by the calling process.
+        *
          * NOTE: When userspace waits on a MAP_SHARED mapping, even if
          * it's a read-only handle, it's expected that futexes attach to
          * the object not the particular process.
@@ -601,16 +627,75 @@ again:
                 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                 key->private.mm = mm;
                 key->private.address = address;
+
+               get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
         } else {
+               struct inode *inode;
+
+               /*
+                * The associated futex object in this case is the inode and
+                * the page->mapping must be traversed. Ordinarily this should
+                * be stabilised under page lock but it's not strictly
+                * necessary in this case as we just want to pin the inode, not
+                * update the radix tree or anything like that.
+                *
+                * The RCU read lock is taken as the inode is finally freed
+                * under RCU. If the mapping still matches expectations then the
+                * mapping->host can be safely accessed as being a valid inode.
+                */
+               rcu_read_lock();
+
+               if (READ_ONCE(page_head->mapping) != mapping) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               inode = READ_ONCE(mapping->host);
+               if (!inode) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               /*
+                * Take a reference unless it is about to be freed. Previously
+                * this reference was taken by ihold under the page lock
+                * pinning the inode in place so i_lock was unnecessary. The
+                * only way for this check to fail is if the inode was
+                * truncated in parallel which is almost certainly an
+                * application bug. In such a case, just retry.
+                *
+                * We are not calling into get_futex_key_refs() in file-backed
+                * cases, therefore a successful atomic_inc return below will
+                * guarantee that get_futex_key() will still imply smp_mb(); (B).
+                */
+               if (!atomic_inc_not_zero(&inode->i_count)) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               /* Should be impossible but lets be paranoid for now */
+               if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+                       err = -EFAULT;
+                       rcu_read_unlock();
+                       iput(inode);
+
+                       goto out;
+               }
+
                 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-               key->shared.inode = page_head->mapping->host;
+               key->shared.inode = inode;
                 key->shared.pgoff = basepage_index(page);
+               rcu_read_unlock();
         }
  
-       get_futex_key_refs(key); /* implies MB (B) */
-
  out:
-       unlock_page(page_head);
         put_page(page_head);
         return err;
  }
@@ -1244,10 +1329,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
         if (unlikely(should_fail_futex(true)))
                 ret = -EFAULT;
  
-       if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+       if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
                 ret = -EFAULT;
-       else if (curval != uval)
-               ret = -EINVAL;
+       } else if (curval != uval) {
+               /*
+                * If a unconditional UNLOCK_PI operation (user space did not
+                * try the TID->0 transition) raced with a waiter setting the
+                * FUTEX_WAITERS flag between get_user() and locking the hash
+                * bucket lock, retry the operation.
+                */
+               if ((FUTEX_TID_MASK & curval) == uval)
+                       ret = -EAGAIN;
+               else
+                       ret = -EINVAL;
+       }
         if (ret) {
                 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
                 return ret;
@@ -1358,6 +1453,45 @@ out:
         return ret;
  }
  
+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
+{
+       unsigned int op =         (encoded_op & 0x70000000) >> 28;
+       unsigned int cmp =        (encoded_op & 0x0f000000) >> 24;
+       int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+       int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
+       int oldval, ret;
+
+       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
+               if (oparg < 0 || oparg > 31)
+                       return -EINVAL;
+               oparg = 1 << oparg;
+       }
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
+       if (ret)
+               return ret;
+
+       switch (cmp) {
+       case FUTEX_OP_CMP_EQ:
+               return oldval == cmparg;
+       case FUTEX_OP_CMP_NE:
+               return oldval != cmparg;
+       case FUTEX_OP_CMP_LT:
+               return oldval < cmparg;
+       case FUTEX_OP_CMP_GE:
+               return oldval >= cmparg;
+       case FUTEX_OP_CMP_LE:
+               return oldval <= cmparg;
+       case FUTEX_OP_CMP_GT:
+               return oldval > cmparg;
+       default:
+               return -ENOSYS;
+       }
+}
+
  /*
   * Wake up all waiters hashed on the physical page that is mapped
   * to this virtual address:
@@ -1474,8 +1608,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
         if (likely(&hb1->chain != &hb2->chain)) {
                 plist_del(&q->list, &hb1->chain);
                 hb_waiters_dec(hb1);
-               plist_add(&q->list, &hb2->chain);
                 hb_waiters_inc(hb2);
+               plist_add(&q->list, &hb2->chain);
                 q->lock_ptr = &hb2->lock;
         }
         get_futex_key_refs(key2);
@@ -1611,6 +1745,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
         struct futex_q *this, *next;
         WAKE_Q(wake_q);
  
+       if (nr_wake < 0 || nr_requeue < 0)
+               return -EINVAL;
+
         if (requeue_pi) {
                 /*
                  * Requeue PI only works on two distinct uaddrs. This
@@ -1929,8 +2066,12 @@ static int unqueue_me(struct futex_q *q)
  
         /* In the common case we don't take the spinlock, which is nice. */
  retry:
-       lock_ptr = q->lock_ptr;
-       barrier();
+       /*
+        * q->lock_ptr can change between this read and the following spin_lock.
+        * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+        * optimizing lock_ptr out of the logic below.
+        */
+       lock_ptr = READ_ONCE(q->lock_ptr);
         if (lock_ptr != NULL) {
                 spin_lock(lock_ptr);
                 /*
@@ -2538,6 +2679,15 @@ retry:
                 if (ret == -EFAULT)
                         goto pi_faulted;
                 /*
+                * A unconditional UNLOCK_PI op raced against a waiter
+                * setting the FUTEX_WAITERS bit. Try again.
+                */
+               if (ret == -EAGAIN) {
+                       spin_unlock(&hb->lock);
+                       put_futex_key(&key);
+                       goto retry;
+               }
+               /*
                  * wake_futex_pi has detected invalid state. Tell user
                  * space.
                  */
@@ -2671,7 +2821,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  {
         struct hrtimer_sleeper timeout, *to = NULL;
         struct rt_mutex_waiter rt_waiter;
-       struct rt_mutex *pi_mutex = NULL;
         struct futex_hash_bucket *hb;
         union futex_key key2 = FUTEX_KEY_INIT;
         struct futex_q q = futex_q_init;
@@ -2755,9 +2904,18 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 if (q.pi_state && (q.pi_state->owner != current)) {
                         spin_lock(q.lock_ptr);
                         ret = fixup_pi_state_owner(uaddr2, &q, current);
+                       if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+                               rt_mutex_unlock(&q.pi_state->pi_mutex);
+                       /*
+                        * Drop the reference to the pi state which
+                        * the requeue_pi() code acquired for us.
+                        */
+                       free_pi_state(q.pi_state);
                         spin_unlock(q.lock_ptr);
                 }
         } else {
+               struct rt_mutex *pi_mutex;
+
                 /*
                  * We have been woken up by futex_unlock_pi(), a timeout, or a
                  * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2765,10 +2923,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                  */
                 WARN_ON(!q.pi_state);
                 pi_mutex = &q.pi_state->pi_mutex;
-               ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
-               debug_rt_mutex_free_waiter(&rt_waiter);
+               ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
  
                 spin_lock(q.lock_ptr);
+               if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
+                       ret = 0;
+
+               debug_rt_mutex_free_waiter(&rt_waiter);
                 /*
                  * Fixup the pi_state owner and possibly acquire the lock if we
                  * haven't already.
@@ -2781,18 +2942,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 if (res)
                         ret = (res < 0) ? res : 0;
  
+               /*
+                * If fixup_pi_state_owner() faulted and was unable to handle
+                * the fault, unlock the rt_mutex and return the fault to
+                * userspace.
+                */
+               if (ret && rt_mutex_owner(pi_mutex) == current)
+                       rt_mutex_unlock(pi_mutex);
+
                 /* Unqueue and drop the lock. */
                 unqueue_me_pi(&q);
         }
  
-       /*
-        * If fixup_pi_state_owner() faulted and was unable to handle the
-        * fault, unlock the rt_mutex and return the fault to userspace.
-        */
-       if (ret == -EFAULT) {
-               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-                       rt_mutex_unlock(pi_mutex);
-       } else if (ret == -EINTR) {
+       if (ret == -EINTR) {
                 /*
                  * We've already been requeued, but cannot restart by calling
                  * futex_lock_pi() directly. We could restart this syscall, but
@@ -2881,7 +3043,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
         }
  
         ret = -EPERM;
-       if (!ptrace_may_access(p, PTRACE_MODE_READ))
+       if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
                 goto err_unlock;
  
         head = p->robust_list;
@@ -2905,6 +3067,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
  {
         u32 uval, uninitialized_var(nval), mval;
  
+       /* Futex address must be 32bit aligned */
+       if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+               return -1;
+
  retry:
         if (get_user(uval, uaddr))
                 return -1;
@@ -3175,4 +3341,4 @@ static int __init futex_init(void)
  
         return 0;
  }
-__initcall(futex_init);
+core_initcall(futex_init);