OSDN Git Service

net: Fix skb->csum update in inet_proto_csum_replace16().
[sagit-ice-cold/kernel_xiaomi_msm8998.git] / kernel / futex.c
index eaa3a8d..15d850f 100644 (file)
@@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
        struct page *page, *page_head;
+       struct address_space *mapping;
        int err, ro = 0;
 
        /*
@@ -555,7 +556,19 @@ again:
        }
 #endif
 
-       lock_page(page_head);
+       /*
+        * The treatment of mapping from this point on is critical. The page
+        * lock protects many things but in this context the page lock
+        * stabilizes mapping, prevents inode freeing in the shared
+        * file-backed region case and guards against movement to swap cache.
+        *
+        * Strictly speaking the page lock is not needed in all cases being
+        * considered here and page lock forces unnecessarily serialization
+        * From this point on, mapping will be re-verified if necessary and
+        * page lock will be acquired only if it is unavoidable
+        */
+
+       mapping = READ_ONCE(page_head->mapping);
 
        /*
         * If page_head->mapping is NULL, then it cannot be a PageAnon
@@ -572,18 +585,31 @@ again:
         * shmem_writepage move it from filecache to swapcache beneath us:
         * an unlikely race, but we do need to retry for page_head->mapping.
         */
-       if (!page_head->mapping) {
-               int shmem_swizzled = PageSwapCache(page_head);
+       if (unlikely(!mapping)) {
+               int shmem_swizzled;
+
+               /*
+                * Page lock is required to identify which special case above
+                * applies. If this is really a shmem page then the page lock
+                * will prevent unexpected transitions.
+                */
+               lock_page(page_head);
+               shmem_swizzled = PageSwapCache(page_head) || page_head->mapping;
                unlock_page(page_head);
                put_page(page_head);
+
                if (shmem_swizzled)
                        goto again;
+
                return -EFAULT;
        }
 
        /*
         * Private mappings are handled in a simple way.
         *
+        * If the futex key is stored on an anonymous page, then the associated
+        * object is the mm which is implicitly pinned by the calling process.
+        *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
         * the object not the particular process.
@@ -601,16 +627,75 @@ again:
                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
+
+               get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
        } else {
+               struct inode *inode;
+
+               /*
+                * The associated futex object in this case is the inode and
+                * the page->mapping must be traversed. Ordinarily this should
+                * be stabilised under page lock but it's not strictly
+                * necessary in this case as we just want to pin the inode, not
+                * update the radix tree or anything like that.
+                *
+                * The RCU read lock is taken as the inode is finally freed
+                * under RCU. If the mapping still matches expectations then the
+                * mapping->host can be safely accessed as being a valid inode.
+                */
+               rcu_read_lock();
+
+               if (READ_ONCE(page_head->mapping) != mapping) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               inode = READ_ONCE(mapping->host);
+               if (!inode) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               /*
+                * Take a reference unless it is about to be freed. Previously
+                * this reference was taken by ihold under the page lock
+                * pinning the inode in place so i_lock was unnecessary. The
+                * only way for this check to fail is if the inode was
+                * truncated in parallel which is almost certainly an
+                * application bug. In such a case, just retry.
+                *
+                * We are not calling into get_futex_key_refs() in file-backed
+                * cases, therefore a successful atomic_inc return below will
+                * guarantee that get_futex_key() will still imply smp_mb(); (B).
+                */
+               if (!atomic_inc_not_zero(&inode->i_count)) {
+                       rcu_read_unlock();
+                       put_page(page_head);
+
+                       goto again;
+               }
+
+               /* Should be impossible but lets be paranoid for now */
+               if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+                       err = -EFAULT;
+                       rcu_read_unlock();
+                       iput(inode);
+
+                       goto out;
+               }
+
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-               key->shared.inode = page_head->mapping->host;
+               key->shared.inode = inode;
                key->shared.pgoff = basepage_index(page);
+               rcu_read_unlock();
        }
 
-       get_futex_key_refs(key); /* implies MB (B) */
-
 out:
-       unlock_page(page_head);
        put_page(page_head);
        return err;
 }
@@ -1368,6 +1453,45 @@ out:
        return ret;
 }
 
+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
+{
+       unsigned int op =         (encoded_op & 0x70000000) >> 28;
+       unsigned int cmp =        (encoded_op & 0x0f000000) >> 24;
+       int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+       int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
+       int oldval, ret;
+
+       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
+               if (oparg < 0 || oparg > 31)
+                       return -EINVAL;
+               oparg = 1 << oparg;
+       }
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
+       if (ret)
+               return ret;
+
+       switch (cmp) {
+       case FUTEX_OP_CMP_EQ:
+               return oldval == cmparg;
+       case FUTEX_OP_CMP_NE:
+               return oldval != cmparg;
+       case FUTEX_OP_CMP_LT:
+               return oldval < cmparg;
+       case FUTEX_OP_CMP_GE:
+               return oldval >= cmparg;
+       case FUTEX_OP_CMP_LE:
+               return oldval <= cmparg;
+       case FUTEX_OP_CMP_GT:
+               return oldval > cmparg;
+       default:
+               return -ENOSYS;
+       }
+}
+
 /*
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
@@ -1484,8 +1608,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
        if (likely(&hb1->chain != &hb2->chain)) {
                plist_del(&q->list, &hb1->chain);
                hb_waiters_dec(hb1);
-               plist_add(&q->list, &hb2->chain);
                hb_waiters_inc(hb2);
+               plist_add(&q->list, &hb2->chain);
                q->lock_ptr = &hb2->lock;
        }
        get_futex_key_refs(key2);
@@ -1621,6 +1745,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
        struct futex_q *this, *next;
        WAKE_Q(wake_q);
 
+       if (nr_wake < 0 || nr_requeue < 0)
+               return -EINVAL;
+
        if (requeue_pi) {
                /*
                 * Requeue PI only works on two distinct uaddrs. This
@@ -1939,8 +2066,12 @@ static int unqueue_me(struct futex_q *q)
 
        /* In the common case we don't take the spinlock, which is nice. */
 retry:
-       lock_ptr = q->lock_ptr;
-       barrier();
+       /*
+        * q->lock_ptr can change between this read and the following spin_lock.
+        * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+        * optimizing lock_ptr out of the logic below.
+        */
+       lock_ptr = READ_ONCE(q->lock_ptr);
        if (lock_ptr != NULL) {
                spin_lock(lock_ptr);
                /*
@@ -2690,7 +2821,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct rt_mutex_waiter rt_waiter;
-       struct rt_mutex *pi_mutex = NULL;
        struct futex_hash_bucket *hb;
        union futex_key key2 = FUTEX_KEY_INIT;
        struct futex_q q = futex_q_init;
@@ -2774,6 +2904,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
                        ret = fixup_pi_state_owner(uaddr2, &q, current);
+                       if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+                               rt_mutex_unlock(&q.pi_state->pi_mutex);
                        /*
                         * Drop the reference to the pi state which
                         * the requeue_pi() code acquired for us.
@@ -2782,6 +2914,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                        spin_unlock(q.lock_ptr);
                }
        } else {
+               struct rt_mutex *pi_mutex;
+
                /*
                 * We have been woken up by futex_unlock_pi(), a timeout, or a
                 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2789,10 +2923,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                 */
                WARN_ON(!q.pi_state);
                pi_mutex = &q.pi_state->pi_mutex;
-               ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
-               debug_rt_mutex_free_waiter(&rt_waiter);
+               ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
 
                spin_lock(q.lock_ptr);
+               if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
+                       ret = 0;
+
+               debug_rt_mutex_free_waiter(&rt_waiter);
                /*
                 * Fixup the pi_state owner and possibly acquire the lock if we
                 * haven't already.
@@ -2805,18 +2942,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                if (res)
                        ret = (res < 0) ? res : 0;
 
+               /*
+                * If fixup_pi_state_owner() faulted and was unable to handle
+                * the fault, unlock the rt_mutex and return the fault to
+                * userspace.
+                */
+               if (ret && rt_mutex_owner(pi_mutex) == current)
+                       rt_mutex_unlock(pi_mutex);
+
                /* Unqueue and drop the lock. */
                unqueue_me_pi(&q);
        }
 
-       /*
-        * If fixup_pi_state_owner() faulted and was unable to handle the
-        * fault, unlock the rt_mutex and return the fault to userspace.
-        */
-       if (ret == -EFAULT) {
-               if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-                       rt_mutex_unlock(pi_mutex);
-       } else if (ret == -EINTR) {
+       if (ret == -EINTR) {
                /*
                 * We've already been requeued, but cannot restart by calling
                 * futex_lock_pi() directly. We could restart this syscall, but
@@ -2929,6 +3067,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
 {
        u32 uval, uninitialized_var(nval), mval;
 
+       /* Futex address must be 32bit aligned */
+       if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
+               return -1;
+
 retry:
        if (get_user(uval, uaddr))
                return -1;
@@ -3199,4 +3341,4 @@ static int __init futex_init(void)
 
        return 0;
 }
-__initcall(futex_init);
+core_initcall(futex_init);