Merge branches 'core/futexes', 'core/locking', 'core/rcu' and 'linus' into core/urgent
authorIngo Molnar <mingo@elte.hu>
Tue, 6 Jan 2009 08:32:11 +0000 (09:32 +0100)
committerIngo Molnar <mingo@elte.hu>
Tue, 6 Jan 2009 08:32:11 +0000 (09:32 +0100)
1  2  3  4 
kernel/futex.c
lib/percpu_counter.c
mm/backing-dev.c

diff --combined kernel/futex.c
@@@@@ -92,12 -92,12 -92,11 -92,12 +92,12 @@@@@ struct futex_pi_state 
     * A futex_q has a woken state, just like tasks have TASK_RUNNING.
     * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
     * The order of wakup is always to make the first condition true, then
  -  * wake up q->waiters, then make the second condition true.
  +  * wake up q->waiter, then make the second condition true.
     */
    struct futex_q {
        struct plist_node list;
  -     wait_queue_head_t waiters;
  +     /* There can only be a single waiter */
  +     wait_queue_head_t waiter;
    
        /* Which hash list lock to use: */
        spinlock_t *lock_ptr;
@@@@@ -124,6 -124,6 -123,24 -124,6 +124,6 @@@@@ struct futex_hash_bucket 
    static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
    
    /*
  -  * Take mm->mmap_sem, when futex is shared
  -  */
  - static inline void futex_lock_mm(struct rw_semaphore *fshared)
  - {
  -     if (fshared)
  -             down_read(fshared);
  - }
  - 
  - /*
  -  * Release mm->mmap_sem, when the futex is shared
  -  */
  - static inline void futex_unlock_mm(struct rw_semaphore *fshared)
  - {
  -     if (fshared)
  -             up_read(fshared);
  - }
  - 
  - /*
     * We hash on the keys returned from get_futex_key (see below).
     */
    static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@@@@ -144,45 -144,48 -161,6 -144,45 +144,48 @@@@@ static inline int match_futex(union fut
                && key1->both.offset == key2->both.offset);
    }
    
-  -    if (!key->both.ptr)
  + /*
  +  * Take a reference to the resource addressed by a key.
  +  * Can be called while holding spinlocks.
  +  *
  +  */
  + static void get_futex_key_refs(union futex_key *key)
  + {
  +     if (!key->both.ptr)
  +             return;
  + 
  +     switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  +     case FUT_OFF_INODE:
  +             atomic_inc(&key->shared.inode->i_count);
  +             break;
  +     case FUT_OFF_MMSHARED:
  +             atomic_inc(&key->private.mm->mm_count);
  +             break;
  +     }
  + }
  + 
  + /*
  +  * Drop a reference to the resource addressed by a key.
  +  * The hash bucket spinlock must not be held.
  +  */
  + static void drop_futex_key_refs(union futex_key *key)
  + {
+ ++    if (!key->both.ptr) {
+ ++            /* If we're here then we tried to put a key we failed to get */
+ ++            WARN_ON_ONCE(1);
  +             return;
+ ++    }
  + 
  +     switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  +     case FUT_OFF_INODE:
  +             iput(key->shared.inode);
  +             break;
  +     case FUT_OFF_MMSHARED:
  +             mmdrop(key->private.mm);
  +             break;
  +     }
  + }
  + 
    /**
     * get_futex_key - Get parameters which are the keys for a futex.
     * @uaddr: virtual address of the futex
     * For other futexes, it points to &current->mm->mmap_sem and
     * caller must have taken the reader lock. but NOT any spinlocks.
     */
  - static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
  -                      union futex_key *key)
  + static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
    {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
  -     struct vm_area_struct *vma;
        struct page *page;
        int err;
    
                        return -EFAULT;
                key->private.mm = mm;
                key->private.address = address;
  +             get_futex_key_refs(key);
                return 0;
        }
  -     /*
  -      * The futex is hashed differently depending on whether
  -      * it's in a shared or private mapping.  So check vma first.
  -      */
  -     vma = find_extend_vma(mm, address);
  -     if (unlikely(!vma))
  -             return -EFAULT;
    
  -     /*
  -      * Permissions.
  -      */
  -     if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
  -             return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
  + again:
  +     err = get_user_pages_fast(address, 1, 0, &page);
  +     if (err < 0)
  +             return err;
  + 
  +     lock_page(page);
  +     if (!page->mapping) {
  +             unlock_page(page);
  +             put_page(page);
  +             goto again;
  +     }
    
        /*
         * Private mappings are handled in a simple way.
         *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
  -      * the object not the particular process.  Therefore we use
  -      * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
  -      * mappings of _writable_ handles.
  +      * the object not the particular process.
         */
  -     if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
  -             key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
  +     if (PageAnon(page)) {
  +             key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
  -             return 0;
  -     }
  - 
  -     /*
  -      * Linear file mappings are also simple.
  -      */
  -     key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
  -     key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
  -     if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
  -             key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
  -                                  + vma->vm_pgoff);
  -             return 0;
  +     } else {
  +             key->both.offset |= FUT_OFF_INODE; /* inode-based key */
  +             key->shared.inode = page->mapping->host;
  +             key->shared.pgoff = page->index;
        }
    
  -     /*
  -      * We could walk the page table to read the non-linear
  -      * pte, and get the page index without fetching the page
  -      * from swap.  But that's a lot of code to duplicate here
  -      * for a rare case, so we simply fetch the page.
  -      */
  -     err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
  -     if (err >= 0) {
  -             key->shared.pgoff =
  -                     page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  -             put_page(page);
  -             return 0;
  -     }
  -     return err;
  - }
  +     get_futex_key_refs(key);
    
  - /*
  -  * Take a reference to the resource addressed by a key.
  -  * Can be called while holding spinlocks.
  -  *
  -  */
  - static void get_futex_key_refs(union futex_key *key)
  - {
  -     if (key->both.ptr == NULL)
  -             return;
  -     switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  -             case FUT_OFF_INODE:
  -                     atomic_inc(&key->shared.inode->i_count);
  -                     break;
  -             case FUT_OFF_MMSHARED:
  -                     atomic_inc(&key->private.mm->mm_count);
  -                     break;
  -     }
  +     unlock_page(page);
  +     put_page(page);
  +     return 0;
    }
    
  - /*
  -  * Drop a reference to the resource addressed by a key.
  -  * The hash bucket spinlock must not be held.
  -  */
  - static void drop_futex_key_refs(union futex_key *key)
  + static inline
  + void put_futex_key(int fshared, union futex_key *key)
    {
  -     if (!key->both.ptr)
  -             return;
  -     switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
  -             case FUT_OFF_INODE:
  -                     iput(key->shared.inode);
  -                     break;
  -             case FUT_OFF_MMSHARED:
  -                     mmdrop(key->private.mm);
  -                     break;
  -     }
  +     drop_futex_key_refs(key);
    }
    
    static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@@@@ -298,8 -301,8 -328,10 -298,8 +301,8 @@@@@ static int get_futex_value_locked(u32 *
    
    /*
     * Fault handling.
  -  * if fshared is non NULL, current->mm->mmap_sem is already held
     */
  - static int futex_handle_fault(unsigned long address,
  -                           struct rw_semaphore *fshared, int attempt)
  + static int futex_handle_fault(unsigned long address, int attempt)
    {
        struct vm_area_struct * vma;
        struct mm_struct *mm = current->mm;
        if (attempt > 2)
                return ret;
    
  -     if (!fshared)
  -             down_read(&mm->mmap_sem);
  +     down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (vma && address >= vma->vm_start &&
            (vma->vm_flags & VM_WRITE)) {
                                current->min_flt++;
                }
        }
  -     if (!fshared)
  -             up_read(&mm->mmap_sem);
  +     up_read(&mm->mmap_sem);
        return ret;
    }
    
@@@@@ -351,7 -354,7 -385,6 -351,7 +354,7 @@@@@ static int refill_pi_state_cache(void
        /* pi_mutex gets initialized later */
        pi_state->owner = NULL;
        atomic_set(&pi_state->refcount, 1);
  +     pi_state->key = FUTEX_KEY_INIT;
    
        current->pi_state_cache = pi_state;
    
@@@@@ -406,20 -409,13 -439,13 -406,20 +409,20 @@@@@ static void free_pi_state(struct futex_
    static struct task_struct * futex_find_get_task(pid_t pid)
    {
        struct task_struct *p;
 ++     const struct cred *cred = current_cred(), *pcred;
    
        rcu_read_lock();
        p = find_task_by_vpid(pid);
 --     if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
 ++     if (!p) {
                p = ERR_PTR(-ESRCH);
 --     else
 --             get_task_struct(p);
 ++     } else {
 ++             pcred = __task_cred(p);
 ++             if (cred->euid != pcred->euid &&
 ++                 cred->euid != pcred->uid)
 ++                     p = ERR_PTR(-ESRCH);
 ++             else
 ++                     get_task_struct(p);
 ++     }
    
        rcu_read_unlock();
    
@@@@@ -436,7 -432,7 -462,7 -436,7 +439,7 @@@@@ void exit_pi_state_list(struct task_str
        struct list_head *next, *head = &curr->pi_state_list;
        struct futex_pi_state *pi_state;
        struct futex_hash_bucket *hb;
  -     union futex_key key;
  +     union futex_key key = FUTEX_KEY_INIT;
    
        if (!futex_cmpxchg_enabled)
                return;
@@@@@ -581,7 -577,7 -607,7 -581,7 +584,7 @@@@@ static void wake_futex(struct futex_q *
         * The lock in wake_up_all() is a crucial memory barrier after the
         * plist_del() and also before assigning to q->lock_ptr.
         */
  -     wake_up_all(&q->waiters);
  +     wake_up(&q->waiter);
        /*
         * The waiting task can free the futex_q as soon as this is written,
         * without taking any locks.  This must come last.
@@@@@ -693,17 -689,17 -719,20 -693,17 +696,17 @@@@@ double_lock_hb(struct futex_hash_bucke
     * Wake up all waiters hashed on the physical page that is mapped
     * to this virtual address:
     */
  - static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
  -                   int nr_wake, u32 bitset)
  + static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
    {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        struct plist_head *head;
  -     union futex_key key;
  +     union futex_key key = FUTEX_KEY_INIT;
        int ret;
    
        if (!bitset)
                return -EINVAL;
    
  -     futex_lock_mm(fshared);
  - 
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
                goto out;
        }
    
        spin_unlock(&hb->lock);
-  -out:
  +     put_futex_key(fshared, &key);
+  +out:
  -     futex_unlock_mm(fshared);
        return ret;
    }
    
     * to this virtual address:
     */
    static int
  - futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
  -           u32 __user *uaddr2,
  + futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op)
    {
  -     union futex_key key1, key2;
  +     union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head;
        struct futex_q *this, *next;
        int ret, op_ret, attempt = 0;
    
    retryfull:
  -     futex_lock_mm(fshared);
  - 
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
        ret = get_futex_key(uaddr2, fshared, &key2);
        if (unlikely(ret != 0))
- --            goto out;
+ ++            goto out_put_key1;
    
        hb1 = hash_futex(&key1);
        hb2 = hash_futex(&key2);
                 * but we might get them from range checking
                 */
                ret = op_ret;
- --            goto out;
+ ++            goto out_put_keys;
    #endif
    
                if (unlikely(op_ret != -EFAULT)) {
                        ret = op_ret;
- --                    goto out;
+ ++                    goto out_put_keys;
                }
    
                /*
                 */
                if (attempt++) {
                        ret = futex_handle_fault((unsigned long)uaddr2,
  -                                              fshared, attempt);
  +                                              attempt);
                        if (ret)
- --                            goto out;
+ ++                            goto out_put_keys;
                        goto retry;
                }
    
  -             /*
  -              * If we would have faulted, release mmap_sem,
  -              * fault it in and start all over again.
  -              */
  -             futex_unlock_mm(fshared);
  - 
                ret = get_user(dummy, uaddr2);
                if (ret)
                        return ret;
        spin_unlock(&hb1->lock);
        if (hb1 != hb2)
                spin_unlock(&hb2->lock);
-  -out:
+ ++out_put_keys:
  +     put_futex_key(fshared, &key2);
+ ++out_put_key1:
  +     put_futex_key(fshared, &key1);
-  -
+  +out:
  -     futex_unlock_mm(fshared);
  - 
        return ret;
    }
    
     * Requeue all waiters hashed on one physical page to another
     * physical page.
     */
  - static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
  -                      u32 __user *uaddr2,
  + static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
                         int nr_wake, int nr_requeue, u32 *cmpval)
    {
  -     union futex_key key1, key2;
  +     union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head1;
        struct futex_q *this, *next;
        int ret, drop_count = 0;
    
- -- retry:
  -     futex_lock_mm(fshared);
  - 
+ ++retry:
        ret = get_futex_key(uaddr1, fshared, &key1);
        if (unlikely(ret != 0))
                goto out;
        ret = get_futex_key(uaddr2, fshared, &key2);
        if (unlikely(ret != 0))
- --            goto out;
+ ++            goto out_put_key1;
    
        hb1 = hash_futex(&key1);
        hb2 = hash_futex(&key2);
                        if (hb1 != hb2)
                                spin_unlock(&hb2->lock);
    
  -                     /*
  -                      * If we would have faulted, release mmap_sem, fault
  -                      * it in and start all over again.
  -                      */
  -                     futex_unlock_mm(fshared);
  - 
                        ret = get_user(curval, uaddr1);
    
                        if (!ret)
                                goto retry;
    
- --                    return ret;
+ ++                    goto out_put_keys;
                }
                if (curval != *cmpval) {
                        ret = -EAGAIN;
@@@@@ -927,9 -924,11 -973,8 -927,9 +931,11 @@@@@ out_unlock
        while (--drop_count >= 0)
                drop_futex_key_refs(&key1);
    
-  -out:
+ ++out_put_keys:
  +     put_futex_key(fshared, &key2);
+ ++out_put_key1:
  +     put_futex_key(fshared, &key1);
+  +out:
  -     futex_unlock_mm(fshared);
        return ret;
    }
    
@@@@@ -938,7 -937,7 -983,7 -938,7 +944,7 @@@@@ static inline struct futex_hash_bucket 
    {
        struct futex_hash_bucket *hb;
    
  -     init_waitqueue_head(&q->waiters);
  +     init_waitqueue_head(&q->waiter);
    
        get_futex_key_refs(&q->key);
        hb = hash_futex(&q->key);
@@@@@ -990,7 -989,7 -1035,7 -990,7 +996,7 @@@@@ static int unqueue_me(struct futex_q *q
        int ret = 0;
    
        /* In the common case we don't take the spinlock, which is nice. */
- -- retry:
+ ++retry:
        lock_ptr = q->lock_ptr;
        barrier();
        if (lock_ptr != NULL) {
@@@@@ -1051,7 -1050,7 -1096,8 -1051,7 +1057,7 @@@@@ static void unqueue_me_pi(struct futex_
     * private futexes.
     */
    static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
  -                             struct task_struct *newowner,
  -                             struct rw_semaphore *fshared)
  +                             struct task_struct *newowner, int fshared)
    {
        u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
    handle_fault:
        spin_unlock(q->lock_ptr);
    
  -     ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
  +     ret = futex_handle_fault((unsigned long)uaddr, attempt++);
    
        spin_lock(q->lock_ptr);
    
     * In case we must use restart_block to restart a futex_wait,
     * we encode in the 'flags' shared capability
     */
  - #define FLAGS_SHARED  1
  + #define FLAGS_SHARED                0x01
  + #define FLAGS_CLOCKRT               0x02
    
    static long futex_wait_restart(struct restart_block *restart);
    
  - static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
  -                   u32 val, ktime_t *abs_time, u32 bitset)
  + static int futex_wait(u32 __user *uaddr, int fshared,
  +                   u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
    {
        struct task_struct *curr = current;
        DECLARE_WAITQUEUE(wait, curr);
    
        q.pi_state = NULL;
        q.bitset = bitset;
- -- retry:
  -     futex_lock_mm(fshared);
  - 
+ ++retry:
  +     q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
- --            goto out_release_sem;
+ ++            goto out;
    
        hb = queue_lock(&q);
    
    
        if (unlikely(ret)) {
                queue_unlock(&q, hb);
  - 
  -             /*
  -              * If we would have faulted, release mmap_sem, fault it in and
  -              * start all over again.
  -              */
  -             futex_unlock_mm(fshared);
+ ++            put_futex_key(fshared, &q.key);
    
                ret = get_user(uval, uaddr);
    
        }
        ret = -EWOULDBLOCK;
        if (uval != val)
- --            goto out_unlock_release_sem;
+ ++            goto out_unlock_put_key;
    
        /* Only actually queue if *uaddr contained val.  */
        queue_me(&q, hb);
    
        /*
  -      * Now the futex is queued and we have checked the data, we
  -      * don't want to hold mmap_sem while we sleep.
  -      */
  -     futex_unlock_mm(fshared);
  - 
  -     /*
         * There might have been scheduling since the queue_me(), as we
         * cannot hold a spinlock across the get_user() in case it
         * faults, and we cannot just set TASK_INTERRUPTIBLE state when
    
        /* add_wait_queue is the barrier after __set_current_state. */
        __set_current_state(TASK_INTERRUPTIBLE);
  -     add_wait_queue(&q.waiters, &wait);
  +     add_wait_queue(&q.waiter, &wait);
        /*
         * !plist_node_empty() is safe here without any lock.
         * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
                        slack = current->timer_slack_ns;
                        if (rt_task(current))
                                slack = 0;
  -                     hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
  -                                             HRTIMER_MODE_ABS);
  +                     hrtimer_init_on_stack(&t.timer,
  +                                           clockrt ? CLOCK_REALTIME :
  +                                           CLOCK_MONOTONIC,
  +                                           HRTIMER_MODE_ABS);
                        hrtimer_init_sleeper(&t, current);
                        hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
    
    
                if (fshared)
                        restart->futex.flags |= FLAGS_SHARED;
  +             if (clockrt)
  +                     restart->futex.flags |= FLAGS_CLOCKRT;
                return -ERESTART_RESTARTBLOCK;
        }
    
- -- out_unlock_release_sem:
+ ++out_unlock_put_key:
        queue_unlock(&q, hb);
-  -
-  - out_release_sem:
  +     put_futex_key(fshared, &q.key);
+  +
  -  out_release_sem:
  -     futex_unlock_mm(fshared);
+ ++out:
        return ret;
    }
    
    static long futex_wait_restart(struct restart_block *restart)
    {
        u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
  -     struct rw_semaphore *fshared = NULL;
  +     int fshared = 0;
        ktime_t t;
    
        t.tv64 = restart->futex.time;
        restart->fn = do_no_restart_syscall;
        if (restart->futex.flags & FLAGS_SHARED)
  -             fshared = &current->mm->mmap_sem;
  +             fshared = 1;
        return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
  -                             restart->futex.bitset);
  +                             restart->futex.bitset,
  +                             restart->futex.flags & FLAGS_CLOCKRT);
    }
    
    
     * if there are waiters then it will block, it does PI, etc. (Due to
     * races the kernel might see a 0 value of the futex too.)
     */
  - static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
  + static int futex_lock_pi(u32 __user *uaddr, int fshared,
                         int detect, ktime_t *time, int trylock)
    {
        struct hrtimer_sleeper timeout, *to = NULL;
        }
    
        q.pi_state = NULL;
- -- retry:
  -     futex_lock_mm(fshared);
  - 
+ ++retry:
  +     q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key);
        if (unlikely(ret != 0))
- --            goto out_release_sem;
+ ++            goto out;
    
- -- retry_unlocked:
+ ++retry_unlocked:
        hb = queue_lock(&q);
    
- -- retry_locked:
+ ++retry_locked:
        ret = lock_taken = 0;
    
        /*
         */
        if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
                ret = -EDEADLK;
- --            goto out_unlock_release_sem;
+ ++            goto out_unlock_put_key;
        }
    
        /*
         * Surprise - we got the lock. Just return to userspace:
         */
        if (unlikely(!curval))
- --            goto out_unlock_release_sem;
+ ++            goto out_unlock_put_key;
    
        uval = curval;
    
         * We took the lock due to owner died take over.
         */
        if (unlikely(lock_taken))
- --            goto out_unlock_release_sem;
+ ++            goto out_unlock_put_key;
    
        /*
         * We dont have the lock. Look up the PI state (or create it if
                         * exit to complete.
                         */
                        queue_unlock(&q, hb);
  -                     futex_unlock_mm(fshared);
                        cond_resched();
                        goto retry;
    
                                goto retry_locked;
                        }
                default:
- --                    goto out_unlock_release_sem;
+ ++                    goto out_unlock_put_key;
                }
        }
    
         */
        queue_me(&q, hb);
    
  -     /*
  -      * Now the futex is queued and we have checked the data, we
  -      * don't want to hold mmap_sem while we sleep.
  -      */
  -     futex_unlock_mm(fshared);
  - 
        WARN_ON(!q.pi_state);
        /*
         * Block on the PI mutex:
                ret = ret ? 0 : -EWOULDBLOCK;
        }
    
  -     futex_lock_mm(fshared);
        spin_lock(q.lock_ptr);
    
        if (!ret) {
    
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
  -     futex_unlock_mm(fshared);
    
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
        return ret != -EINTR ? ret : -ERESTARTNOINTR;
    
- -- out_unlock_release_sem:
+ ++out_unlock_put_key:
        queue_unlock(&q, hb);
    
- -- out_release_sem:
  -     futex_unlock_mm(fshared);
+ ++out_put_key:
  +     put_futex_key(fshared, &q.key);
+ ++out:
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
        return ret;
    
- -- uaddr_faulted:
+ ++uaddr_faulted:
        /*
  -      * We have to r/w  *(int __user *)uaddr, but we can't modify it
  -      * non-atomically.  Therefore, if get_user below is not
  -      * enough, we need to handle the fault ourselves, while
  -      * still holding the mmap_sem.
  -      *
  -      * ... and hb->lock. :-) --ANK
  +      * We have to r/w  *(int __user *)uaddr, and we have to modify it
  +      * atomically.  Therefore, if we continue to fault after get_user()
  +      * below, we need to handle the fault ourselves, while still holding
  +      * the mmap_sem.  This can occur if the uaddr is under contention as
  +      * we have to drop the mmap_sem in order to call get_user().
         */
        queue_unlock(&q, hb);
    
        if (attempt++) {
  -             ret = futex_handle_fault((unsigned long)uaddr, fshared,
  -                                      attempt);
  +             ret = futex_handle_fault((unsigned long)uaddr, attempt);
                if (ret)
- --                    goto out_release_sem;
+ ++                    goto out_put_key;
                goto retry_unlocked;
        }
    
  -     futex_unlock_mm(fshared);
  - 
        ret = get_user(uval, uaddr);
  -     if (!ret && (uval != -EFAULT))
  +     if (!ret)
                goto retry;
    
        if (to)
     * This is the in-kernel slowpath: we look up the PI state (if any),
     * and do the rt-mutex unlock.
     */
  - static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
  + static int futex_unlock_pi(u32 __user *uaddr, int fshared)
    {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
        u32 uval;
        struct plist_head *head;
  -     union futex_key key;
  +     union futex_key key = FUTEX_KEY_INIT;
        int ret, attempt = 0;
    
    retry:
         */
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
  -     /*
  -      * First take all the futex related locks:
  -      */
  -     futex_lock_mm(fshared);
    
        ret = get_futex_key(uaddr, fshared, &key);
        if (unlikely(ret != 0))
@@@@@ -1675,31 -1676,31 -1746,35 -1675,31 +1683,31 @@@@@ retry_unlocked
    
    out_unlock:
        spin_unlock(&hb->lock);
- --out:
  -     futex_unlock_mm(fshared);
  +     put_futex_key(fshared, &key);
    
+ ++out:
        return ret;
    
    pi_faulted:
        /*
  -      * We have to r/w  *(int __user *)uaddr, but we can't modify it
  -      * non-atomically.  Therefore, if get_user below is not
  -      * enough, we need to handle the fault ourselves, while
  -      * still holding the mmap_sem.
  -      *
  -      * ... and hb->lock. --ANK
  +      * We have to r/w  *(int __user *)uaddr, and we have to modify it
  +      * atomically.  Therefore, if we continue to fault after get_user()
  +      * below, we need to handle the fault ourselves, while still holding
  +      * the mmap_sem.  This can occur if the uaddr is under contention as
  +      * we have to drop the mmap_sem in order to call get_user().
         */
        spin_unlock(&hb->lock);
    
        if (attempt++) {
  -             ret = futex_handle_fault((unsigned long)uaddr, fshared,
  -                                      attempt);
  +             ret = futex_handle_fault((unsigned long)uaddr, attempt);
                if (ret)
                        goto out;
                uval = 0;
                goto retry_unlocked;
        }
    
  -     futex_unlock_mm(fshared);
  - 
        ret = get_user(uval, uaddr);
  -     if (!ret && (uval != -EFAULT))
  +     if (!ret)
                goto retry;
    
        return ret;
@@@@@ -1754,7 -1755,6 -1829,6 -1754,7 +1762,7 @@@@@ sys_get_robust_list(int pid, struct rob
    {
        struct robust_list_head __user *head;
        unsigned long ret;
 ++     const struct cred *cred = current_cred(), *pcred;
    
        if (!futex_cmpxchg_enabled)
                return -ENOSYS;
                if (!p)
                        goto err_unlock;
                ret = -EPERM;
 --             if ((current->euid != p->euid) && (current->euid != p->uid) &&
 --                             !capable(CAP_SYS_PTRACE))
 ++             pcred = __task_cred(p);
 ++             if (cred->euid != pcred->euid &&
 ++                 cred->euid != pcred->uid &&
 ++                 !capable(CAP_SYS_PTRACE))
                        goto err_unlock;
                head = p->robust_list;
                rcu_read_unlock();
                 * PI futexes happens in exit_pi_state():
                 */
                if (!pi && (uval & FUTEX_WAITERS))
  -                     futex_wake(uaddr, &curr->mm->mmap_sem, 1,
  -                                FUTEX_BITSET_MATCH_ANY);
  +                     futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
        }
        return 0;
    }
@@@@@ -1920,22 -1918,22 -1993,18 -1920,22 +1928,22 @@@@@ void exit_robust_list(struct task_struc
    long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
    {
  -     int ret = -ENOSYS;
  +     int clockrt, ret = -ENOSYS;
        int cmd = op & FUTEX_CMD_MASK;
  -     struct rw_semaphore *fshared = NULL;
  +     int fshared = 0;
    
        if (!(op & FUTEX_PRIVATE_FLAG))
  -             fshared = &current->mm->mmap_sem;
  +             fshared = 1;
  + 
  +     clockrt = op & FUTEX_CLOCK_REALTIME;
  +     if (clockrt && cmd != FUTEX_WAIT_BITSET)
  +             return -ENOSYS;
    
        switch (cmd) {
        case FUTEX_WAIT:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAIT_BITSET:
  -             ret = futex_wait(uaddr, fshared, val, timeout, val3);
  +             ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
                break;
        case FUTEX_WAKE:
                val3 = FUTEX_BITSET_MATCH_ANY;
diff --combined lib/percpu_counter.c
@@@@@ -62,17 -62,20 -62,20 -62,17 +62,17 @@@@@ s64 __percpu_counter_sum(struct percpu_
        for_each_online_cpu(cpu) {
                s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
                ret += *pcount;
 --             *pcount = 0;
        }
 --     fbc->count = ret;
 -- 
        spin_unlock(&fbc->lock);
        return ret;
    }
    EXPORT_SYMBOL(__percpu_counter_sum);
    
-- -static struct lock_class_key percpu_counter_irqsafe;
-- -
-- -int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
++ +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
++ +                      struct lock_class_key *key)
    {
        spin_lock_init(&fbc->lock);
++ +    lockdep_set_class(&fbc->lock, key);
        fbc->count = amount;
        fbc->counters = alloc_percpu(s32);
        if (!fbc->counters)
    #endif
        return 0;
    }
-- -EXPORT_SYMBOL(percpu_counter_init);
-- -
-- -int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount)
-- -{
-- -    int err;
-- -
-- -    err = percpu_counter_init(fbc, amount);
-- -    if (!err)
-- -            lockdep_set_class(&fbc->lock, &percpu_counter_irqsafe);
-- -    return err;
-- -}
++ +EXPORT_SYMBOL(__percpu_counter_init);
    
    void percpu_counter_destroy(struct percpu_counter *fbc)
    {
        if (!fbc->counters)
                return;
    
 --     free_percpu(fbc->counters);
 --     fbc->counters = NULL;
    #ifdef CONFIG_HOTPLUG_CPU
        mutex_lock(&percpu_counters_lock);
        list_del(&fbc->list);
        mutex_unlock(&percpu_counters_lock);
    #endif
 ++     free_percpu(fbc->counters);
 ++     fbc->counters = NULL;
    }
    EXPORT_SYMBOL(percpu_counter_destroy);
    
diff --combined mm/backing-dev.c
@@@@@ -176,9 -176,6 -176,6 -176,9 +176,9 @@@@@ int bdi_register(struct backing_dev_inf
        int ret = 0;
        struct device *dev;
    
 ++     if (bdi->dev)   /* The driver needs to use separate queues per device */
 ++             goto exit;
 ++ 
        va_start(args, fmt);
        dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
        va_end(args);
@@@@@ -223,7 -220,7 -220,7 -223,7 +223,7 @@@@@ int bdi_init(struct backing_dev_info *b
        bdi->max_prop_frac = PROP_FRAC_BASE;
    
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-- -            err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
++ +            err = percpu_counter_init(&bdi->bdi_stat[i], 0);
                if (err)
                        goto err;
        }