if (err < 0)
return err;
+ page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
drop_futex_key_refs(key);
}
+/*
+ * fault_in_user_writeable - fault in user address and verify RW access
+ * @uaddr: pointer to faulting user space address
+ *
+ * Slow path to fixup the fault we just took in the atomic write
+ * access to @uaddr.
+ *
+ * We have no generic implementation of a non destructive write to the
+ * user address. We know that we faulted in the atomic pagefault
+ * disabled section so we can as well avoid the #PF overhead by
+ * calling get_user_pages() right away.
+ */
+static int fault_in_user_writeable(u32 __user *uaddr)
+{
+ int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
+ 1, 1, 0, NULL, NULL);
+ return ret < 0 ? ret : 0;
+}
+
/**
* futex_top_waiter() - Return the highest priority waiter on a futex
* @hb: the hash bucket the futex_q's reside in
retry_private:
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
- u32 dummy;
double_unlock_hb(hb1, hb2);
goto out_put_keys;
}
- ret = get_user(dummy, uaddr2);
+ ret = fault_in_user_writeable(uaddr2);
if (ret)
goto out_put_keys;
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
+ * hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition. Must be called with the q->lock_ptr held.
+ * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
+ * to protect access to the pi_state to fixup the owner later. Must be called
+ * with both q->lock_ptr and hb->lock held.
*/
static inline
-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+ struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
+ q->lock_ptr = &hb->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+
wake_up_state(q->task, TASK_NORMAL);
}
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
- requeue_pi_wake_futex(top_waiter, key2);
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
double_unlock_hb(hb1, hb2);
put_futex_key(fshared, &key2);
put_futex_key(fshared, &key1);
- ret = get_user(curval2, uaddr2);
+ ret = fault_in_user_writeable(uaddr2);
if (!ret)
goto retry;
goto out;
if (!match_futex(&this->key, &key1))
continue;
- WARN_ON(!requeue_pi && this->rt_waiter);
- WARN_ON(requeue_pi && !this->rt_waiter);
+ /*
+ * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+ * be paired with each other and no other futex ops.
+ */
+ if ((requeue_pi && !this->rt_waiter) ||
+ (!requeue_pi && this->rt_waiter)) {
+ ret = -EINVAL;
+ break;
+ }
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
this->task, 1);
if (ret == 1) {
/* We got the lock. */
- requeue_pi_wake_futex(this, &key2);
+ requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */
handle_fault:
spin_unlock(q->lock_ptr);
- ret = get_user(uval, uaddr);
+ ret = fault_in_user_writeable(uaddr);
spin_lock(q->lock_ptr);
#define FLAGS_HAS_TIMEOUT 0x04
static long futex_wait_restart(struct restart_block *restart);
-static long futex_lock_pi_restart(struct restart_block *restart);
/**
* fixup_owner() - Post lock pi_state and corner case management
{
struct hrtimer_sleeper timeout, *to = NULL;
struct futex_hash_bucket *hb;
- u32 uval;
struct futex_q q;
int res, ret;
return ret != -EINTR ? ret : -ERESTARTNOINTR;
uaddr_faulted:
- /*
- * We have to r/w *(int __user *)uaddr, and we have to modify it
- * atomically. Therefore, if we continue to fault after get_user()
- * below, we need to handle the fault ourselves, while still holding
- * the mmap_sem. This can occur if the uaddr is under contention as
- * we have to drop the mmap_sem in order to call get_user().
- */
queue_unlock(&q, hb);
- ret = get_user(uval, uaddr);
+ ret = fault_in_user_writeable(uaddr);
if (ret)
goto out_put_key;
goto retry;
}
-static long futex_lock_pi_restart(struct restart_block *restart)
-{
- u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
- ktime_t t, *tp = NULL;
- int fshared = restart->futex.flags & FLAGS_SHARED;
-
- if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
- t.tv64 = restart->futex.time;
- tp = &t;
- }
- restart->fn = do_no_restart_syscall;
-
- return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
-}
-
/*
* Userspace attempted a TID -> 0 atomic transition, and failed.
* This is the in-kernel slowpath: we look up the PI state (if any),
return ret;
pi_faulted:
- /*
- * We have to r/w *(int __user *)uaddr, and we have to modify it
- * atomically. Therefore, if we continue to fault after get_user()
- * below, we need to handle the fault ourselves, while still holding
- * the mmap_sem. This can occur if the uaddr is under contention as
- * we have to drop the mmap_sem in order to call get_user().
- */
spin_unlock(&hb->lock);
put_futex_key(fshared, &key);
- ret = get_user(uval, uaddr);
+ ret = fault_in_user_writeable(uaddr);
if (!ret)
goto retry;
*
* Returns
* 0 - no early wakeup detected
- * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
+ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
*/
static inline
int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
- else {
- /*
- * We expect signal_pending(current), but another
- * thread may have handled it for us already.
- */
- /* FIXME: ERESTARTSYS or ERESTARTNOINTR? Do we care if
- * the user specified SA_RESTART or not? */
- ret = -ERESTARTSYS;
- }
+ else
+ ret = -ERESTARTNOINTR;
}
return ret;
}
struct hrtimer_sleeper timeout, *to = NULL;
struct rt_mutex_waiter rt_waiter;
struct rt_mutex *pi_mutex = NULL;
- struct restart_block *restart;
struct futex_hash_bucket *hb;
union futex_key key2;
struct futex_q q;
int res, ret;
- u32 uval;
if (!bitset)
return -EINVAL;
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
- if (ret) {
- put_futex_key(fshared, &key2);
- goto out;
- }
+ if (ret)
+ goto out_key2;
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
if (rt_mutex_owner(pi_mutex) == current)
rt_mutex_unlock(pi_mutex);
} else if (ret == -EINTR) {
- ret = -EFAULT;
- if (get_user(uval, uaddr2))
- goto out_put_keys;
-
/*
- * We've already been requeued, so restart by calling
- * futex_lock_pi() directly, rather then returning to this
- * function.
+ * We've already been requeued, but we have no way to
+ * restart by calling futex_lock_pi() directly. We
+ * could restart the syscall, but that will look at
+ * the user space value and return right away. So we
+ * drop back with EWOULDBLOCK to tell user space that
+ * "val" has been changed. That's the same what the
+ * restart of the syscall would do in
+ * futex_wait_setup().
*/
- ret = -ERESTART_RESTARTBLOCK;
- restart = ¤t_thread_info()->restart_block;
- restart->fn = futex_lock_pi_restart;
- restart->futex.uaddr = (u32 *)uaddr2;
- restart->futex.val = uval;
- restart->futex.flags = 0;
- if (abs_time) {
- restart->futex.flags |= FLAGS_HAS_TIMEOUT;
- restart->futex.time = abs_time->tv64;
- }
-
- if (fshared)
- restart->futex.flags |= FLAGS_SHARED;
- if (clockrt)
- restart->futex.flags |= FLAGS_CLOCKRT;
+ ret = -EWOULDBLOCK;
}
out_put_keys:
put_futex_key(fshared, &q.key);
+out_key2:
put_futex_key(fshared, &key2);
out: