#include <linux/syscalls.h>
#include <linux/signal.h>
#include <linux/module.h>
+#include <linux/magic.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
+
#include <asm/futex.h>
#include "rtmutex_common.h"
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# include "rtmutex-debug.h"
-#else
-# include "rtmutex.h"
-#endif
+int __read_mostly futex_cmpxchg_enabled;
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
struct futex_pi_state *pi_state;
struct task_struct *task;
- /*
- * This waiter is used in case of requeue from a
- * normal futex to a PI-futex
- */
- struct rt_mutex_waiter waiter;
+ /* Bitset for the optional bitmasked wakeup */
+ u32 bitset;
};
/*
static struct vfsmount *futex_mnt;
/*
+ * Take mm->mmap_sem, when futex is shared
+ */
+static inline void futex_lock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ down_read(fshared);
+}
+
+/*
+ * Release mm->mmap_sem, when the futex is shared
+ */
+static inline void futex_unlock_mm(struct rw_semaphore *fshared)
+{
+ if (fshared)
+ up_read(fshared);
+}
+
+/*
* We hash on the keys returned from get_futex_key (see below).
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
* For other futexes, it points to ¤t->mm->mmap_sem and
* caller must have taken the reader lock. but NOT any spinlocks.
*/
-int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
- union futex_key *key)
+static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
+ union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
- /* Save the user address in the ley */
- key->uaddr = uaddr;
-
/*
* Private mappings are handled in a simple way.
*
}
return err;
}
-EXPORT_SYMBOL_GPL(get_futex_key);
/*
* Take a reference to the resource addressed by a key.
* Can be called while holding spinlocks.
*
*/
-inline void get_futex_key_refs(union futex_key *key)
+static void get_futex_key_refs(union futex_key *key)
{
- if (key->both.ptr == 0)
+ if (key->both.ptr == NULL)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
break;
}
}
-EXPORT_SYMBOL_GPL(get_futex_key_refs);
/*
* Drop a reference to the resource addressed by a key.
* The hash bucket spinlock must not be held.
*/
-void drop_futex_key_refs(union futex_key *key)
+static void drop_futex_key_refs(union futex_key *key)
{
- if (key->both.ptr == 0)
+ if (!key->both.ptr)
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
break;
}
}
-EXPORT_SYMBOL_GPL(drop_futex_key_refs);
-static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
+static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+{
+ u32 curval;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ return curval;
+}
+
+static int get_futex_value_locked(u32 *dest, u32 __user *from)
{
int ret;
vma = find_vma(mm, address);
if (vma && address >= vma->vm_start &&
(vma->vm_flags & VM_WRITE)) {
- switch (handle_mm_fault(mm, vma, address, 1)) {
- case VM_FAULT_MINOR:
- ret = 0;
- current->min_flt++;
- break;
- case VM_FAULT_MAJOR:
+ int fault;
+ fault = handle_mm_fault(mm, vma, address, 1);
+ if (unlikely((fault & VM_FAULT_ERROR))) {
+#if 0
+ /* XXX: let's do this when we verify it is OK */
+ if (ret & VM_FAULT_OOM)
+ ret = -ENOMEM;
+#endif
+ } else {
ret = 0;
- current->maj_flt++;
- break;
+ if (fault & VM_FAULT_MAJOR)
+ current->maj_flt++;
+ else
+ current->min_flt++;
}
}
if (!fshared)
struct task_struct *p;
rcu_read_lock();
- p = find_task_by_pid(pid);
- if (!p)
- goto out_unlock;
- if ((current->euid != p->euid) && (current->euid != p->uid)) {
- p = NULL;
- goto out_unlock;
- }
- get_task_struct(p);
-out_unlock:
+ p = find_task_by_vpid(pid);
+ if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
+ p = ERR_PTR(-ESRCH);
+ else
+ get_task_struct(p);
+
rcu_read_unlock();
return p;
struct futex_hash_bucket *hb;
union futex_key key;
+ if (!futex_cmpxchg_enabled)
+ return;
/*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
if (!(uval & FUTEX_OWNER_DIED)) {
int ret = 0;
- newval = FUTEX_WAITERS | new_owner->pid;
- /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
- newval |= (uval & FUTEX_WAITER_REQUEUED);
+ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
- if (curval != uval)
+ else if (curval != uval)
ret = -EINVAL;
if (ret) {
spin_unlock(&pi_state->pi_mutex.wait_lock);
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
- pagefault_disable();
- oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0);
- pagefault_enable();
+ oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
if (oldval == -EFAULT)
return oldval;
* to this virtual address:
*/
static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
- int nr_wake)
+ int nr_wake, u32 bitset)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
union futex_key key;
int ret;
- if (fshared)
- down_read(fshared);
+ if (!bitset)
+ return -EINVAL;
+
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
ret = -EINVAL;
break;
}
- wake_futex(this);
- if (++ret >= nr_wake)
- break;
- }
- }
-
- spin_unlock(&hb->lock);
-out:
- if (fshared)
- up_read(fshared);
- return ret;
-}
-
-/*
- * Called from futex_requeue_pi.
- * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
- * PI-futex value; search its associated pi_state if an owner exist
- * or create a new one without owner.
- */
-static inline int
-lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
- union futex_key *key,
- struct futex_pi_state **pi_state)
-{
- u32 curval, uval, newval;
-
-retry:
- /*
- * We can't handle a fault cleanly because we can't
- * release the locks here. Simply return the fault.
- */
- if (get_futex_value_locked(&curval, uaddr))
- return -EFAULT;
- /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
- if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
- != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
- /*
- * No waiters yet, we prepare the futex to have some waiters.
- */
-
- uval = curval;
- newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
-
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
-
- if (unlikely(curval == -EFAULT))
- return -EFAULT;
- if (unlikely(curval != uval))
- goto retry;
- }
-
- if (!(curval & FUTEX_TID_MASK)
- || lookup_pi_state(curval, hb, key, pi_state)) {
- /* the futex has no owner (yet) or the lookup failed:
- allocate one pi_state without owner */
-
- *pi_state = alloc_pi_state();
+ /* Check if one of the bits is set in both bitsets */
+ if (!(this->bitset & bitset))
+ continue;
- /* Already stores the key: */
- (*pi_state)->key = *key;
-
- /* init the mutex without owner */
- __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
- }
-
- return 0;
-}
-
-/*
- * Keep the first nr_wake waiter from futex1, wake up one,
- * and requeue the next nr_requeue waiters following hashed on
- * one physical page to another physical page (PI-futex uaddr2)
- */
-static int futex_requeue_pi(u32 __user *uaddr1,
- struct rw_semaphore *fshared,
- u32 __user *uaddr2,
- int nr_wake, int nr_requeue, u32 *cmpval)
-{
- union futex_key key1, key2;
- struct futex_hash_bucket *hb1, *hb2;
- struct plist_head *head1;
- struct futex_q *this, *next;
- struct futex_pi_state *pi_state2 = NULL;
- struct rt_mutex_waiter *waiter, *top_waiter = NULL;
- struct rt_mutex *lock2 = NULL;
- int ret, drop_count = 0;
-
- if (refill_pi_state_cache())
- return -ENOMEM;
-
-retry:
- /*
- * First take all the futex related locks:
- */
- if (fshared)
- down_read(fshared);
-
- ret = get_futex_key(uaddr1, fshared, &key1);
- if (unlikely(ret != 0))
- goto out;
- ret = get_futex_key(uaddr2, fshared, &key2);
- if (unlikely(ret != 0))
- goto out;
-
- hb1 = hash_futex(&key1);
- hb2 = hash_futex(&key2);
-
- double_lock_hb(hb1, hb2);
-
- if (likely(cmpval != NULL)) {
- u32 curval;
-
- ret = get_futex_value_locked(&curval, uaddr1);
-
- if (unlikely(ret)) {
- spin_unlock(&hb1->lock);
- if (hb1 != hb2)
- spin_unlock(&hb2->lock);
-
- /*
- * If we would have faulted, release mmap_sem, fault
- * it in and start all over again.
- */
- if (fshared)
- up_read(fshared);
-
- ret = get_user(curval, uaddr1);
-
- if (!ret)
- goto retry;
-
- return ret;
- }
- if (curval != *cmpval) {
- ret = -EAGAIN;
- goto out_unlock;
- }
- }
-
- head1 = &hb1->chain;
- plist_for_each_entry_safe(this, next, head1, list) {
- if (!match_futex (&this->key, &key1))
- continue;
- if (++ret <= nr_wake) {
wake_futex(this);
- } else {
- /*
- * FIRST: get and set the pi_state
- */
- if (!pi_state2) {
- int s;
- /* do this only the first time we requeue someone */
- s = lookup_pi_state_for_requeue(uaddr2, hb2,
- &key2, &pi_state2);
- if (s) {
- ret = s;
- goto out_unlock;
- }
-
- lock2 = &pi_state2->pi_mutex;
- spin_lock(&lock2->wait_lock);
-
- /* Save the top waiter of the wait_list */
- if (rt_mutex_has_waiters(lock2))
- top_waiter = rt_mutex_top_waiter(lock2);
- } else
- atomic_inc(&pi_state2->refcount);
-
-
- this->pi_state = pi_state2;
-
- /*
- * SECOND: requeue futex_q to the correct hashbucket
- */
-
- /*
- * If key1 and key2 hash to the same bucket, no need to
- * requeue.
- */
- if (likely(head1 != &hb2->chain)) {
- plist_del(&this->list, &hb1->chain);
- plist_add(&this->list, &hb2->chain);
- this->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
- this->list.plist.lock = &hb2->lock;
-#endif
- }
- this->key = key2;
- get_futex_key_refs(&key2);
- drop_count++;
-
-
- /*
- * THIRD: queue it to lock2
- */
- spin_lock_irq(&this->task->pi_lock);
- waiter = &this->waiter;
- waiter->task = this->task;
- waiter->lock = lock2;
- plist_node_init(&waiter->list_entry, this->task->prio);
- plist_node_init(&waiter->pi_list_entry, this->task->prio);
- plist_add(&waiter->list_entry, &lock2->wait_list);
- this->task->pi_blocked_on = waiter;
- spin_unlock_irq(&this->task->pi_lock);
-
- if (ret - nr_wake >= nr_requeue)
+ if (++ret >= nr_wake)
break;
}
}
- /* If we've requeued some tasks and the top_waiter of the rt_mutex
- has changed, we must adjust the priority of the owner, if any */
- if (drop_count) {
- struct task_struct *owner = rt_mutex_owner(lock2);
- if (owner &&
- (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
- int chain_walk = 0;
-
- spin_lock_irq(&owner->pi_lock);
- if (top_waiter)
- plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
- else
- /*
- * There was no waiters before the requeue,
- * the flag must be updated
- */
- mark_rt_mutex_waiters(lock2);
-
- plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
- __rt_mutex_adjust_prio(owner);
- if (owner->pi_blocked_on) {
- chain_walk = 1;
- get_task_struct(owner);
- }
-
- spin_unlock_irq(&owner->pi_lock);
- spin_unlock(&lock2->wait_lock);
-
- if (chain_walk)
- rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
- current);
- } else {
- /* No owner or the top_waiter does not change */
- mark_rt_mutex_waiters(lock2);
- spin_unlock(&lock2->wait_lock);
- }
- }
-
-out_unlock:
- spin_unlock(&hb1->lock);
- if (hb1 != hb2)
- spin_unlock(&hb2->lock);
-
- /* drop_futex_key_refs() must be called outside the spinlocks. */
- while (--drop_count >= 0)
- drop_futex_key_refs(&key1);
-
+ spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
int ret, op_ret, attempt = 0;
retryfull:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
*/
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr2,
- fshared, attempt);
+ fshared, attempt);
if (ret)
goto out;
goto retry;
* If we would have faulted, release mmap_sem,
* fault it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(dummy, uaddr2);
if (ret)
if (hb1 != hb2)
spin_unlock(&hb2->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
+
return ret;
}
int ret, drop_count = 0;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(curval, uaddr1);
drop_futex_key_refs(&key1);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
retry:
lock_ptr = q->lock_ptr;
barrier();
- if (lock_ptr != 0) {
+ if (lock_ptr != NULL) {
spin_lock(lock_ptr);
/*
* q->lock_ptr can change between reading it and
}
/*
- * Fixup the pi_state owner with current.
+ * Fixup the pi_state owner with the new owner.
*
* Must be called with hash bucket lock held and mm->sem held for non
* private futexes.
*/
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *curr)
+ struct task_struct *newowner)
{
- u32 newtid = curr->pid | FUTEX_WAITERS;
+ u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
u32 uval, curval, newval;
int ret;
} else
newtid |= FUTEX_OWNER_DIED;
- pi_state->owner = curr;
+ pi_state->owner = newowner;
- spin_lock_irq(&curr->pi_lock);
+ spin_lock_irq(&newowner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &curr->pi_state_list);
- spin_unlock_irq(&curr->pi_lock);
+ list_add(&pi_state->list, &newowner->pi_state_list);
+ spin_unlock_irq(&newowner->pi_lock);
/*
* We own it, so we have to replace the pending owner
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
- newval |= (uval & FUTEX_WAITER_REQUEUED);
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (curval == -EFAULT)
ret = -EFAULT;
/*
* In case we must use restart_block to restart a futex_wait,
- * we encode in the 'arg3' shared capability
+ * we encode in the 'flags' shared capability
*/
-#define ARG3_SHARED 1
+#define FLAGS_SHARED 1
static long futex_wait_restart(struct restart_block *restart);
+
static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
- u32 val, ktime_t *abs_time)
+ u32 val, ktime_t *abs_time, u32 bitset)
{
struct task_struct *curr = current;
DECLARE_WAITQUEUE(wait, curr);
struct futex_q q;
u32 uval;
int ret;
- struct hrtimer_sleeper t, *to = NULL;
+ struct hrtimer_sleeper t;
int rem = 0;
+ if (!bitset)
+ return -EINVAL;
+
q.pi_state = NULL;
+ q.bitset = bitset;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (uval != val)
goto out_unlock_release_sem;
- /*
- * This rt_mutex_waiter structure is prepared here and will
- * be used only if this task is requeued from a normal futex to
- * a PI-futex with futex_requeue_pi.
- */
- debug_rt_mutex_init_waiter(&q.waiter);
- q.waiter.task = NULL;
-
/* Only actually queue if *uaddr contained val. */
__queue_me(&q, hb);
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
/*
* There might have been scheduling since the queue_me(), as we
if (!abs_time)
schedule();
else {
- to = &t;
hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_init_sleeper(&t, current);
t.timer.expires = *abs_time;
hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
+ if (!hrtimer_active(&t.timer))
+ t.task = NULL;
/*
* the timer could have already expired, in which
* we are the only user of it.
*/
- if (q.pi_state) {
- /*
- * We were woken but have been requeued on a PI-futex.
- * We have to complete the lock acquisition by taking
- * the rtmutex.
- */
-
- struct rt_mutex *lock = &q.pi_state->pi_mutex;
-
- spin_lock(&lock->wait_lock);
- if (unlikely(q.waiter.task)) {
- remove_waiter(lock, &q.waiter);
- }
- spin_unlock(&lock->wait_lock);
-
- if (rem)
- ret = -ETIMEDOUT;
- else
- ret = rt_mutex_timed_lock(lock, to, 1);
-
- if (fshared)
- down_read(fshared);
- spin_lock(q.lock_ptr);
-
- /*
- * Got the lock. We might not be the anticipated owner if we
- * did a lock-steal - fix up the PI-state in that case.
- */
- if (!ret && q.pi_state->owner != curr) {
- /*
- * We MUST play with the futex we were requeued on,
- * NOT the current futex.
- * We can retrieve it from the key of the pi_state
- */
- uaddr = q.pi_state->key.uaddr;
-
- ret = fixup_pi_state_owner(uaddr, &q, curr);
- } else {
- /*
- * Catch the rare case, where the lock was released
- * when we were on the way back before we locked
- * the hash bucket.
- */
- if (ret && q.pi_state->owner == curr) {
- if (rt_mutex_trylock(&q.pi_state->pi_mutex))
- ret = 0;
- }
- }
-
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
-
- debug_rt_mutex_free_waiter(&q.waiter);
-
- return ret;
- }
-
- debug_rt_mutex_free_waiter(&q.waiter);
-
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&q))
return 0;
struct restart_block *restart;
restart = ¤t_thread_info()->restart_block;
restart->fn = futex_wait_restart;
- restart->arg0 = (unsigned long)uaddr;
- restart->arg1 = (unsigned long)val;
- restart->arg2 = (unsigned long)abs_time;
- restart->arg3 = 0;
+ restart->futex.uaddr = (u32 *)uaddr;
+ restart->futex.val = val;
+ restart->futex.time = abs_time->tv64;
+ restart->futex.bitset = bitset;
+ restart->futex.flags = 0;
+
if (fshared)
- restart->arg3 |= ARG3_SHARED;
+ restart->futex.flags |= FLAGS_SHARED;
return -ERESTART_RESTARTBLOCK;
}
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
}
static long futex_wait_restart(struct restart_block *restart)
{
- u32 __user *uaddr = (u32 __user *)restart->arg0;
- u32 val = (u32)restart->arg1;
- ktime_t *abs_time = (ktime_t *)restart->arg2;
+ u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
struct rw_semaphore *fshared = NULL;
+ ktime_t t;
+ t.tv64 = restart->futex.time;
restart->fn = do_no_restart_syscall;
- if (restart->arg3 & ARG3_SHARED)
+ if (restart->futex.flags & FLAGS_SHARED)
fshared = ¤t->mm->mmap_sem;
- return (long)futex_wait(uaddr, fshared, val, abs_time);
+ return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
+ restart->futex.bitset);
}
-static void set_pi_futex_owner(struct futex_hash_bucket *hb,
- union futex_key *key, struct task_struct *p)
-{
- struct plist_head *head;
- struct futex_q *this, *next;
- struct futex_pi_state *pi_state = NULL;
- struct rt_mutex *lock;
-
- /* Search a waiter that should already exists */
-
- head = &hb->chain;
-
- plist_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, key)) {
- pi_state = this->pi_state;
- break;
- }
- }
-
- BUG_ON(!pi_state);
-
- /* set p as pi_state's owner */
- lock = &pi_state->pi_mutex;
-
- spin_lock(&lock->wait_lock);
- spin_lock_irq(&p->pi_lock);
-
- list_add(&pi_state->list, &p->pi_state_list);
- pi_state->owner = p;
-
-
- /* set p as pi_mutex's owner */
- debug_rt_mutex_proxy_lock(lock, p);
- WARN_ON(rt_mutex_owner(lock));
- rt_mutex_set_owner(lock, p, 0);
- rt_mutex_deadlock_account_lock(lock, p);
-
- plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
- &p->pi_waiters);
- __rt_mutex_adjust_prio(p);
-
- spin_unlock_irq(&p->pi_lock);
- spin_unlock(&lock->wait_lock);
-}
-
/*
* Userspace tried a 0 -> TID atomic transition of the futex value
* and failed. The kernel side here does the whole locking operation:
q.pi_state = NULL;
retry:
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
* (by doing a 0 -> TID atomic cmpxchg), while holding all
* the locks. It will most likely not succeed.
*/
- newval = current->pid;
+ newval = task_pid_vnr(current);
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
* Detect deadlocks. In case of REQUEUE_PI this is a valid
* situation and we return success to user space.
*/
- if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
- if (!(curval & FUTEX_WAITER_REQUEUED))
- ret = -EDEADLK;
+ if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
+ ret = -EDEADLK;
goto out_unlock_release_sem;
}
/*
* There are two cases, where a futex might have no owner (the
- * owner TID is 0): OWNER_DIED or REQUEUE. We take over the
- * futex in this case. We also do an unconditional take over,
- * when the owner of the futex died.
+ * owner TID is 0): OWNER_DIED. We take over the futex in this
+ * case. We also do an unconditional take over, when the owner
+ * of the futex died.
*
* This is safe as we are protected by the hash bucket lock !
*/
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
- /* Keep the OWNER_DIED and REQUEUE bits */
- newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+ /* Keep the OWNER_DIED bit */
+ newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
ownerdied = 0;
lock_taken = 1;
}
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- pagefault_enable();
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
goto retry_locked;
/*
- * We took the lock due to requeue or owner died take over.
+ * We took the lock due to owner died take over.
*/
- if (unlikely(lock_taken)) {
- /* For requeue we need to fixup the pi_futex */
- if (curval & FUTEX_WAITER_REQUEUED)
- set_pi_futex_owner(hb, &q.key, curr);
+ if (unlikely(lock_taken))
goto out_unlock_release_sem;
- }
/*
* We dont have the lock. Look up the PI state (or create it if
* exit to complete.
*/
queue_unlock(&q, hb);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
cond_resched();
goto retry;
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
WARN_ON(!q.pi_state);
/*
ret = ret ? 0 : -EWOULDBLOCK;
}
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
spin_lock(q.lock_ptr);
if (!ret) {
* when we were on the way back before we locked the
* hash bucket.
*/
- if (q.pi_state->owner == curr &&
- rt_mutex_trylock(&q.pi_state->pi_mutex)) {
- ret = 0;
+ if (q.pi_state->owner == curr) {
+ /*
+ * Try to get the rt_mutex now. This might
+ * fail as some other task acquired the
+ * rt_mutex after we removed ourself from the
+ * rt_mutex waiters list.
+ */
+ if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+ ret = 0;
+ else {
+ /*
+ * pi_state is incorrect, some other
+ * task did a lock steal and we
+ * returned due to timeout or signal
+ * without taking the rt_mutex. Too
+ * late. We can access the
+ * rt_mutex_owner without locking, as
+ * the other task is now blocked on
+ * the hash bucket lock. Fix the state
+ * up.
+ */
+ struct task_struct *owner;
+ int res;
+
+ owner = rt_mutex_owner(&q.pi_state->pi_mutex);
+ res = fixup_pi_state_owner(uaddr, &q, owner);
+
+ /* propagate -EFAULT, if the fixup failed */
+ if (res)
+ ret = res;
+ }
} else {
/*
* Paranoia check. If we did not take the lock
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
queue_unlock(&q, hb);
out_release_sem:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
uaddr_faulted:
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
/*
* We release only a lock we actually own:
*/
- if ((uval & FUTEX_TID_MASK) != current->pid)
+ if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
return -EPERM;
/*
* First take all the futex related locks:
*/
- if (fshared)
- down_read(fshared);
+ futex_lock_mm(fshared);
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- if (!(uval & FUTEX_OWNER_DIED)) {
- pagefault_disable();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
- pagefault_enable();
- }
+ if (!(uval & FUTEX_OWNER_DIED))
+ uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
+
if (unlikely(uval == -EFAULT))
goto pi_faulted;
* Rare case: we managed to release the lock atomically,
* no need to wake anyone else up:
*/
- if (unlikely(uval == current->pid))
+ if (unlikely(uval == task_pid_vnr(current)))
goto out_unlock;
/*
out_unlock:
spin_unlock(&hb->lock);
out:
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
return ret;
attempt);
if (ret)
goto out;
+ uval = 0;
goto retry_unlocked;
}
- if (fshared)
- up_read(fshared);
+ futex_unlock_mm(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
- "will be removed from the kernel in June 2007\n",
- current->comm);
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
}
ret = -EINVAL;
sys_set_robust_list(struct robust_list_head __user *head,
size_t len)
{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
/*
* The kernel knows only one size for now:
*/
struct robust_list_head __user *head;
unsigned long ret;
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
if (!pid)
head = current->robust_list;
else {
ret = -ESRCH;
rcu_read_lock();
- p = find_task_by_pid(pid);
+ p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
ret = -EPERM;
if (get_user(uval, uaddr))
return -1;
- if ((uval & FUTEX_TID_MASK) == curr->pid) {
+ if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
/*
* Ok, this dying thread is truly holding a futex
* of interest. Set the OWNER_DIED bit atomically
* userspace.
*/
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
- /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
- mval |= (uval & FUTEX_WAITER_REQUEUED);
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
if (nval == -EFAULT)
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
- if (!pi) {
- if (uval & FUTEX_WAITERS)
- futex_wake(uaddr, &curr->mm->mmap_sem, 1);
- }
+ if (!pi && (uval & FUTEX_WAITERS))
+ futex_wake(uaddr, &curr->mm->mmap_sem, 1,
+ FUTEX_BITSET_MATCH_ANY);
}
return 0;
}
void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
- struct robust_list __user *entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
+ struct robust_list __user *entry, *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip;
unsigned long futex_offset;
+ int rc;
+
+ if (!futex_cmpxchg_enabled)
+ return;
/*
* Fetch the list head (which was registered earlier, via
if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
return;
- if (pending)
- handle_futex_death((void __user *)pending + futex_offset,
- curr, pip);
-
+ next_entry = NULL; /* avoid warning with gcc */
while (entry != &head->list) {
/*
+ * Fetch the next entry in the list before calling
+ * handle_futex_death:
+ */
+ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
+ /*
* A pending lock might already be on the list, so
* don't process it twice:
*/
if (handle_futex_death((void __user *)entry + futex_offset,
curr, pi))
return;
- /*
- * Fetch the next entry in the list:
- */
- if (fetch_robust_entry(&entry, &entry->next, &pi))
+ if (rc)
return;
+ entry = next_entry;
+ pi = next_pi;
/*
* Avoid excessively long or circular lists:
*/
cond_resched();
}
+
+ if (pending)
+ handle_futex_death((void __user *)pending + futex_offset,
+ curr, pip);
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
- int ret;
+ int ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
struct rw_semaphore *fshared = NULL;
switch (cmd) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, fshared, val, timeout);
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ case FUTEX_WAIT_BITSET:
+ ret = futex_wait(uaddr, fshared, val, timeout, val3);
break;
case FUTEX_WAKE:
- ret = futex_wake(uaddr, fshared, val);
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ case FUTEX_WAKE_BITSET:
+ ret = futex_wake(uaddr, fshared, val, val3);
break;
case FUTEX_FD:
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+ if (futex_cmpxchg_enabled)
+ ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr, fshared);
+ if (futex_cmpxchg_enabled)
+ ret = futex_unlock_pi(uaddr, fshared);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
- break;
- case FUTEX_CMP_REQUEUE_PI:
- ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3);
+ if (futex_cmpxchg_enabled)
+ ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
break;
default:
ret = -ENOSYS;
u32 val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+ cmd == FUTEX_WAIT_BITSET)) {
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
if (!timespec_valid(&ts))
t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
- t = ktime_add(ktime_get(), t);
+ t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
/*
* requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+ * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
*/
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE
- || cmd == FUTEX_CMP_REQUEUE_PI)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_WAKE_OP)
val2 = (u32) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
int flags, const char *dev_name, void *data,
struct vfsmount *mnt)
{
- return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt);
+ return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
}
static struct file_system_type futex_fs_type = {
.kill_sb = kill_anon_super,
};
-static int __init init(void)
+static int __init futex_init(void)
{
- int i = register_filesystem(&futex_fs_type);
+ u32 curval;
+ int i;
+ /*
+ * This will fail and we want it. Some arch implementations do
+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
+ * functionality. We want to know that before we call in any
+ * of the complex code paths. Also we want to prevent
+ * registration of robust lists in that case. NULL is
+ * guaranteed to fault and we get -EFAULT on functional
+ * implementation, the non functional ones will return
+ * -ENOSYS.
+ */
+ curval = cmpxchg_futex_value_locked(NULL, 0, 0);
+ if (curval == -EFAULT)
+ futex_cmpxchg_enabled = 1;
+
+ for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
+ plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
+ spin_lock_init(&futex_queues[i].lock);
+ }
+
+ i = register_filesystem(&futex_fs_type);
if (i)
return i;
return PTR_ERR(futex_mnt);
}
- for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
- plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
- spin_lock_init(&futex_queues[i].lock);
- }
return 0;
}
-__initcall(init);
+__initcall(futex_init);