lockdep: Fix file mode of lock_stat
[safe/jmp/linux-2.6] / kernel / futex.c
index 157bfcd..0672ff8 100644 (file)
@@ -199,6 +199,7 @@ static void drop_futex_key_refs(union futex_key *key)
  * @uaddr: virtual address of the futex
  * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
  * @key: address where result is stored.
+ * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE)
  *
  * Returns a negative error code or 0
  * The key words are stored in *key on success.
@@ -209,7 +210,8 @@ static void drop_futex_key_refs(union futex_key *key)
  *
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
-static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
+static int
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
@@ -232,7 +234,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
         *        but access_ok() should be faster than find_vma()
         */
        if (!fshared) {
-               if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
+               if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
                        return -EFAULT;
                key->private.mm = mm;
                key->private.address = address;
@@ -241,10 +243,11 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
        }
 
 again:
-       err = get_user_pages_fast(address, 1, 0, &page);
+       err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
        if (err < 0)
                return err;
 
+       page = compound_head(page);
        lock_page(page);
        if (!page->mapping) {
                unlock_page(page);
@@ -282,6 +285,25 @@ void put_futex_key(int fshared, union futex_key *key)
        drop_futex_key_refs(key);
 }
 
+/*
+ * fault_in_user_writeable - fault in user address and verify RW access
+ * @uaddr:     pointer to faulting user space address
+ *
+ * Slow path to fixup the fault we just took in the atomic write
+ * access to @uaddr.
+ *
+ * We have no generic implementation of a non destructive write to the
+ * user address. We know that we faulted in the atomic pagefault
+ * disabled section so we can as well avoid the #PF overhead by
+ * calling get_user_pages() right away.
+ */
+static int fault_in_user_writeable(u32 __user *uaddr)
+{
+       int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
+                                1, 1, 0, NULL, NULL);
+       return ret < 0 ? ret : 0;
+}
+
 /**
  * futex_top_waiter() - Return the highest priority waiter on a futex
  * @hb:     the hash bucket the futex_q's reside in
@@ -834,7 +856,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        if (!bitset)
                return -EINVAL;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
        if (unlikely(ret != 0))
                goto out;
 
@@ -880,10 +902,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
        int ret, op_ret;
 
 retry:
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -894,7 +916,6 @@ retry:
 retry_private:
        op_ret = futex_atomic_op_inuser(op, uaddr2);
        if (unlikely(op_ret < 0)) {
-               u32 dummy;
 
                double_unlock_hb(hb1, hb2);
 
@@ -912,7 +933,7 @@ retry_private:
                        goto out_put_keys;
                }
 
-               ret = get_user(dummy, uaddr2);
+               ret = fault_in_user_writeable(uaddr2);
                if (ret)
                        goto out_put_keys;
 
@@ -1131,10 +1152,11 @@ retry:
                pi_state = NULL;
        }
 
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, fshared, &key2,
+                           requeue_pi ? VERIFY_WRITE : VERIFY_READ);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -1201,7 +1223,7 @@ retry_private:
                        double_unlock_hb(hb1, hb2);
                        put_futex_key(fshared, &key2);
                        put_futex_key(fshared, &key1);
-                       ret = get_user(curval2, uaddr2);
+                       ret = fault_in_user_writeable(uaddr2);
                        if (!ret)
                                goto retry;
                        goto out;
@@ -1267,7 +1289,12 @@ retry_private:
 out_unlock:
        double_unlock_hb(hb1, hb2);
 
-       /* drop_futex_key_refs() must be called outside the spinlocks. */
+       /*
+        * drop_futex_key_refs() must be called outside the spinlocks. During
+        * the requeue we moved futex_q's from the hash bucket at key1 to the
+        * one at key2 and updated their key pointer.  We no longer need to
+        * hold the references to key1.
+        */
        while (--drop_count >= 0)
                drop_futex_key_refs(&key1);
 
@@ -1474,7 +1501,7 @@ retry:
 handle_fault:
        spin_unlock(q->lock_ptr);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
 
        spin_lock(q->lock_ptr);
 
@@ -1499,7 +1526,6 @@ handle_fault:
 #define FLAGS_HAS_TIMEOUT      0x04
 
 static long futex_wait_restart(struct restart_block *restart);
-static long futex_lock_pi_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
@@ -1660,7 +1686,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
         */
 retry:
        q->key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q->key);
+       ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1800,7 +1826,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct futex_hash_bucket *hb;
-       u32 uval;
        struct futex_q q;
        int res, ret;
 
@@ -1819,7 +1844,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
        q.rt_waiter = NULL;
 retry:
        q.key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q.key);
+       ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
        if (unlikely(ret != 0))
                goto out;
 
@@ -1902,16 +1927,9 @@ out:
        return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
 uaddr_faulted:
-       /*
-        * We have to r/w  *(int __user *)uaddr, and we have to modify it
-        * atomically.  Therefore, if we continue to fault after get_user()
-        * below, we need to handle the fault ourselves, while still holding
-        * the mmap_sem.  This can occur if the uaddr is under contention as
-        * we have to drop the mmap_sem in order to call get_user().
-        */
        queue_unlock(&q, hb);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
        if (ret)
                goto out_put_key;
 
@@ -1922,21 +1940,6 @@ uaddr_faulted:
        goto retry;
 }
 
-static long futex_lock_pi_restart(struct restart_block *restart)
-{
-       u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
-       ktime_t t, *tp = NULL;
-       int fshared = restart->futex.flags & FLAGS_SHARED;
-
-       if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-               t.tv64 = restart->futex.time;
-               tp = &t;
-       }
-       restart->fn = do_no_restart_syscall;
-
-       return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
-}
-
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
  * This is the in-kernel slowpath: we look up the PI state (if any),
@@ -1960,7 +1963,7 @@ retry:
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
        if (unlikely(ret != 0))
                goto out;
 
@@ -2021,17 +2024,10 @@ out:
        return ret;
 
 pi_faulted:
-       /*
-        * We have to r/w  *(int __user *)uaddr, and we have to modify it
-        * atomically.  Therefore, if we continue to fault after get_user()
-        * below, we need to handle the fault ourselves, while still holding
-        * the mmap_sem.  This can occur if the uaddr is under contention as
-        * we have to drop the mmap_sem in order to call get_user().
-        */
        spin_unlock(&hb->lock);
        put_futex_key(fshared, &key);
 
-       ret = get_user(uval, uaddr);
+       ret = fault_in_user_writeable(uaddr);
        if (!ret)
                goto retry;
 
@@ -2052,7 +2048,7 @@ pi_faulted:
  *
  * Returns
  *  0 - no early wakeup detected
- * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
+ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
  */
 static inline
 int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
@@ -2079,15 +2075,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 
                if (timeout && !timeout->task)
                        ret = -ETIMEDOUT;
-               else {
-                       /*
-                        * We expect signal_pending(current), but another
-                        * thread may have handled it for us already.
-                        */
-                       /* FIXME: ERESTARTSYS or ERESTARTNOINTR?  Do we care if
-                        * the user specified SA_RESTART or not? */
-                       ret = -ERESTARTSYS;
-               }
+               else
+                       ret = -ERESTARTNOINTR;
        }
        return ret;
 }
@@ -2140,12 +2129,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        struct hrtimer_sleeper timeout, *to = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct rt_mutex *pi_mutex = NULL;
-       struct restart_block *restart;
        struct futex_hash_bucket *hb;
        union futex_key key2;
        struct futex_q q;
        int res, ret;
-       u32 uval;
 
        if (!bitset)
                return -EINVAL;
@@ -2171,16 +2158,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        q.rt_waiter = &rt_waiter;
 
        key2 = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
        if (unlikely(ret != 0))
                goto out;
 
        /* Prepare to wait on uaddr. */
        ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
-       if (ret) {
-               put_futex_key(fshared, &key2);
-               goto out;
-       }
+       if (ret)
+               goto out_key2;
 
        /* Queue the futex_q, drop the hb lock, wait for wakeup. */
        futex_wait_queue_me(hb, &q, to);
@@ -2246,34 +2231,22 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
                if (rt_mutex_owner(pi_mutex) == current)
                        rt_mutex_unlock(pi_mutex);
        } else if (ret == -EINTR) {
-               ret = -EFAULT;
-               if (get_user(uval, uaddr2))
-                       goto out_put_keys;
-
                /*
-                * We've already been requeued, so restart by calling
-                * futex_lock_pi() directly, rather then returning to this
-                * function.
+                * We've already been requeued, but we have no way to
+                * restart by calling futex_lock_pi() directly. We
+                * could restart the syscall, but that will look at
+                * the user space value and return right away. So we
+                * drop back with EWOULDBLOCK to tell user space that
+                * "val" has been changed. That's the same what the
+                * restart of the syscall would do in
+                * futex_wait_setup().
                 */
-               ret = -ERESTART_RESTARTBLOCK;
-               restart = &current_thread_info()->restart_block;
-               restart->fn = futex_lock_pi_restart;
-               restart->futex.uaddr = (u32 *)uaddr2;
-               restart->futex.val = uval;
-               restart->futex.flags = 0;
-               if (abs_time) {
-                       restart->futex.flags |= FLAGS_HAS_TIMEOUT;
-                       restart->futex.time = abs_time->tv64;
-               }
-
-               if (fshared)
-                       restart->futex.flags |= FLAGS_SHARED;
-               if (clockrt)
-                       restart->futex.flags |= FLAGS_CLOCKRT;
+               ret = -EWOULDBLOCK;
        }
 
 out_put_keys:
        put_futex_key(fshared, &q.key);
+out_key2:
        put_futex_key(fshared, &key2);
 
 out: