Merge branch 'linus' into cont_syslog
[safe/jmp/linux-2.6] / block / cfq-iosched.c
index 9e0df2b..5ff4f48 100644 (file)
@@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4;
 #define RQ_CIC(rq)             \
        ((struct cfq_io_context *) (rq)->elevator_private)
 #define RQ_CFQQ(rq)            (struct cfq_queue *) ((rq)->elevator_private2)
+#define RQ_CFQG(rq)            (struct cfq_group *) ((rq)->elevator_private3)
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
@@ -63,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
+static DEFINE_SPINLOCK(cic_index_lock);
+static DEFINE_IDA(cic_index_ida);
+
 #define CFQ_PRIO_LISTS         IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)   ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)     ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -270,6 +274,7 @@ struct cfq_data {
        unsigned int cfq_latency;
        unsigned int cfq_group_isolation;
 
+       unsigned int cic_index;
        struct list_head cic_list;
 
        /*
@@ -344,7 +349,7 @@ CFQ_CFQQ_FNS(deep);
 CFQ_CFQQ_FNS(wait_busy);
 #undef CFQ_CFQQ_FNS
 
-#ifdef CONFIG_DEBUG_CFQ_IOSCHED
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
        blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
                        cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
@@ -429,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
        cic->cfqq[is_sync] = cfqq;
 }
 
+#define CIC_DEAD_KEY   1ul
+#define CIC_DEAD_INDEX_SHIFT   1
+
+static inline void *cfqd_dead_key(struct cfq_data *cfqd)
+{
+       return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
+}
+
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+{
+       struct cfq_data *cfqd = cic->key;
+
+       if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
+               return NULL;
+
+       return cfqd;
+}
+
 /*
  * We regard a request as SYNC, if it's either a read or has the SYNC bit
  * set (in which case it could also be direct WRITE).
@@ -887,7 +910,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
 }
 
 static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
-                               struct cfq_queue *cfqq, bool forced)
+                               struct cfq_queue *cfqq)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
        unsigned int used_sl, charge_sl;
@@ -917,7 +940,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
        blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
-       blkiocg_set_start_empty_time(&cfqg->blkg, forced);
+       blkiocg_set_start_empty_time(&cfqg->blkg);
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -961,7 +984,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
        for_each_cfqg_st(cfqg, i, j, st)
                *st = CFQ_RB_ROOT;
        RB_CLEAR_NODE(&cfqg->rb_node);
-       blkio_group_init(&cfqg->blkg);
 
        /*
         * Take the initial reference that will be released on destroy
@@ -1002,6 +1024,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
        return cfqg;
 }
 
+static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
+{
+       atomic_inc(&cfqg->ref);
+       return cfqg;
+}
+
 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 {
        /* Currently, all async queues are mapped to root group */
@@ -1085,6 +1113,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
 {
        return &cfqd->root_group;
 }
+
+static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
+{
+       return cfqg;
+}
+
 static inline void
 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
        cfqq->cfqg = cfqg;
@@ -1387,12 +1421,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
        elv_rb_del(&cfqq->sort_list, rq);
        cfqq->queued[rq_is_sync(rq)]--;
-       blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq),
+       blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
                                                rq_is_sync(rq));
        cfq_add_rq_rb(rq);
-       blkiocg_update_io_add_stats(
-                       &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg,
-                       rq_data_dir(rq), rq_is_sync(rq));
+       blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
+                       &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
+                       rq_is_sync(rq));
 }
 
 static struct request *
@@ -1448,7 +1482,7 @@ static void cfq_remove_request(struct request *rq)
        cfq_del_rq_rb(rq);
 
        cfqq->cfqd->rq_queued--;
-       blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq),
+       blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
                                                rq_is_sync(rq));
        if (rq_is_meta(rq)) {
                WARN_ON(!cfqq->meta_pending);
@@ -1484,8 +1518,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                struct bio *bio)
 {
-       struct cfq_queue *cfqq = RQ_CFQQ(req);
-       blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio),
+       blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio),
                                        cfq_bio_sync(bio));
 }
 
@@ -1506,7 +1539,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
        if (cfqq->next_rq == next)
                cfqq->next_rq = rq;
        cfq_remove_request(next);
-       blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next),
+       blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next),
                                        rq_is_sync(next));
 }
 
@@ -1571,7 +1604,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
  */
 static void
 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                   bool timed_out, bool forced)
+                   bool timed_out)
 {
        cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
 
@@ -1598,7 +1631,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
        }
 
-       cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced);
+       cfq_group_served(cfqd, cfqq->cfqg, cfqq);
 
        if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
                cfq_del_cfqq_rr(cfqd, cfqq);
@@ -1617,13 +1650,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        }
 }
 
-static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out,
-                                       bool forced)
+static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
 {
        struct cfq_queue *cfqq = cfqd->active_queue;
 
        if (cfqq)
-               __cfq_slice_expired(cfqd, cfqq, timed_out, forced);
+               __cfq_slice_expired(cfqd, cfqq, timed_out);
 }
 
 /*
@@ -2191,7 +2223,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
        }
 
 expire:
-       cfq_slice_expired(cfqd, 0, false);
+       cfq_slice_expired(cfqd, 0);
 new_queue:
        /*
         * Current queue expired. Check if we have to switch to a new
@@ -2217,7 +2249,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
        BUG_ON(!list_empty(&cfqq->fifo));
 
        /* By default cfqq is not expired if it is empty. Do it explicitly */
-       __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true);
+       __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
        return dispatched;
 }
 
@@ -2400,7 +2432,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
            cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
            cfq_class_idle(cfqq))) {
                cfqq->slice_end = jiffies + 1;
-               cfq_slice_expired(cfqd, 0, false);
+               cfq_slice_expired(cfqd, 0);
        }
 
        cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
@@ -2431,7 +2463,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        orig_cfqg = cfqq->orig_cfqg;
 
        if (unlikely(cfqd->active_queue == cfqq)) {
-               __cfq_slice_expired(cfqd, cfqq, 0, false);
+               __cfq_slice_expired(cfqd, cfqq, 0);
                cfq_schedule_dispatch(cfqd);
        }
 
@@ -2500,11 +2532,12 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
 {
        unsigned long flags;
+       unsigned long dead_key = (unsigned long) cic->key;
 
-       BUG_ON(!cic->dead_key);
+       BUG_ON(!(dead_key & CIC_DEAD_KEY));
 
        spin_lock_irqsave(&ioc->lock, flags);
-       radix_tree_delete(&ioc->radix_root, cic->dead_key);
+       radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
        hlist_del_rcu(&cic->cic_list);
        spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2527,15 +2560,10 @@ static void cfq_free_io_context(struct io_context *ioc)
        __call_for_each_cic(ioc, cic_free_func);
 }
 
-static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
        struct cfq_queue *__cfqq, *next;
 
-       if (unlikely(cfqq == cfqd->active_queue)) {
-               __cfq_slice_expired(cfqd, cfqq, 0, false);
-               cfq_schedule_dispatch(cfqd);
-       }
-
        /*
         * If this queue was scheduled to merge with another queue, be
         * sure to drop the reference taken on that queue (and others in
@@ -2551,6 +2579,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                cfq_put_queue(__cfqq);
                __cfqq = next;
        }
+}
+
+static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+       if (unlikely(cfqq == cfqd->active_queue)) {
+               __cfq_slice_expired(cfqd, cfqq, 0);
+               cfq_schedule_dispatch(cfqd);
+       }
+
+       cfq_put_cooperator(cfqq);
 
        cfq_put_queue(cfqq);
 }
@@ -2563,11 +2601,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
        list_del_init(&cic->queue_list);
 
        /*
-        * Make sure key == NULL is seen for dead queues
+        * Make sure dead mark is seen for dead queues
         */
        smp_wmb();
-       cic->dead_key = (unsigned long) cic->key;
-       cic->key = NULL;
+       cic->key = cfqd_dead_key(cfqd);
 
        if (ioc->ioc_data == cic)
                rcu_assign_pointer(ioc->ioc_data, NULL);
@@ -2586,7 +2623,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 static void cfq_exit_single_io_context(struct io_context *ioc,
                                       struct cfq_io_context *cic)
 {
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
 
        if (cfqd) {
                struct request_queue *q = cfqd->queue;
@@ -2599,7 +2636,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
                 * race between exiting task and queue
                 */
                smp_read_barrier_depends();
-               if (cic->key)
+               if (cic->key == cfqd)
                        __cfq_exit_single_io_context(cfqd, cic);
 
                spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2679,7 +2716,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 
 static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 {
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
        struct cfq_queue *cfqq;
        unsigned long flags;
 
@@ -2736,7 +2773,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
 {
        struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
-       struct cfq_data *cfqd = cic->key;
+       struct cfq_data *cfqd = cic_to_cfqd(cic);
        unsigned long flags;
        struct request_queue *q;
 
@@ -2873,12 +2910,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
        unsigned long flags;
 
        WARN_ON(!list_empty(&cic->queue_list));
+       BUG_ON(cic->key != cfqd_dead_key(cfqd));
 
        spin_lock_irqsave(&ioc->lock, flags);
 
        BUG_ON(ioc->ioc_data == cic);
 
-       radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
+       radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
        hlist_del_rcu(&cic->cic_list);
        spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -2890,7 +2928,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
        struct cfq_io_context *cic;
        unsigned long flags;
-       void *k;
 
        if (unlikely(!ioc))
                return NULL;
@@ -2907,13 +2944,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
        }
 
        do {
-               cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
+               cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
                rcu_read_unlock();
                if (!cic)
                        break;
-               /* ->key must be copied to avoid race with cfq_exit_queue() */
-               k = cic->key;
-               if (unlikely(!k)) {
+               if (unlikely(cic->key != cfqd)) {
                        cfq_drop_dead_cic(cfqd, ioc, cic);
                        rcu_read_lock();
                        continue;
@@ -2946,7 +2981,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 
                spin_lock_irqsave(&ioc->lock, flags);
                ret = radix_tree_insert(&ioc->radix_root,
-                                               (unsigned long) cfqd, cic);
+                                               cfqd->cic_index, cic);
                if (!ret)
                        hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
                spin_unlock_irqrestore(&ioc->lock, flags);
@@ -3161,7 +3196,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        cfq_log_cfqq(cfqd, cfqq, "preempt");
-       cfq_slice_expired(cfqd, 1, false);
+       cfq_slice_expired(cfqd, 1);
 
        /*
         * Put the new queue at the front of the of the current list,
@@ -3241,8 +3276,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_add_rq_rb(rq);
-
-       blkiocg_update_io_add_stats(&cfqq->cfqg->blkg,
+       blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
                        &cfqd->serving_group->blkg, rq_data_dir(rq),
                        rq_is_sync(rq));
        cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -3373,7 +3407,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                 * - when there is a close cooperator
                 */
                if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
-                       cfq_slice_expired(cfqd, 1, false);
+                       cfq_slice_expired(cfqd, 1);
                else if (sync && cfqq_empty &&
                         !cfq_close_cooperator(cfqd, cfqq)) {
                        cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
@@ -3473,6 +3507,10 @@ static void cfq_put_request(struct request *rq)
                rq->elevator_private = NULL;
                rq->elevator_private2 = NULL;
 
+               /* Put down rq reference on cfqg */
+               cfq_put_cfqg(RQ_CFQG(rq));
+               rq->elevator_private3 = NULL;
+
                cfq_put_queue(cfqq);
        }
 }
@@ -3503,6 +3541,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
        }
 
        cic_set_cfqq(cic, NULL, 1);
+
+       cfq_put_cooperator(cfqq);
+
        cfq_put_queue(cfqq);
        return NULL;
 }
@@ -3561,6 +3602,7 @@ new_queue:
 
        rq->elevator_private = cic;
        rq->elevator_private2 = cfqq;
+       rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
        return 0;
 
 queue_fail:
@@ -3633,7 +3675,7 @@ static void cfq_idle_slice_timer(unsigned long data)
                cfq_clear_cfqq_deep(cfqq);
        }
 expire:
-       cfq_slice_expired(cfqd, timed_out, false);
+       cfq_slice_expired(cfqd, timed_out);
 out_kick:
        cfq_schedule_dispatch(cfqd);
 out_cont:
@@ -3676,7 +3718,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
        spin_lock_irq(q->queue_lock);
 
        if (cfqd->active_queue)
-               __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false);
+               __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
        while (!list_empty(&cfqd->cic_list)) {
                struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
@@ -3694,10 +3736,32 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
        cfq_shutdown_timer_wq(cfqd);
 
+       spin_lock(&cic_index_lock);
+       ida_remove(&cic_index_ida, cfqd->cic_index);
+       spin_unlock(&cic_index_lock);
+
        /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
        call_rcu(&cfqd->rcu, cfq_cfqd_free);
 }
 
+static int cfq_alloc_cic_index(void)
+{
+       int index, error;
+
+       do {
+               if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
+                       return -ENOMEM;
+
+               spin_lock(&cic_index_lock);
+               error = ida_get_new(&cic_index_ida, &index);
+               spin_unlock(&cic_index_lock);
+               if (error && error != -EAGAIN)
+                       return error;
+       } while (error);
+
+       return index;
+}
+
 static void *cfq_init_queue(struct request_queue *q)
 {
        struct cfq_data *cfqd;
@@ -3705,10 +3769,16 @@ static void *cfq_init_queue(struct request_queue *q)
        struct cfq_group *cfqg;
        struct cfq_rb_root *st;
 
+       i = cfq_alloc_cic_index();
+       if (i < 0)
+               return NULL;
+
        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
        if (!cfqd)
                return NULL;
 
+       cfqd->cic_index = i;
+
        /* Init root service tree */
        cfqd->grp_service_tree = CFQ_RB_ROOT;
 
@@ -3727,8 +3797,10 @@ static void *cfq_init_queue(struct request_queue *q)
         * to make sure that cfq_put_cfqg() does not try to kfree root group
         */
        atomic_set(&cfqg->ref, 1);
+       rcu_read_lock();
        blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
                                        0);
+       rcu_read_unlock();
 #endif
        /*
         * Not strictly needed (since RB_ROOT just clears the node and we
@@ -3774,7 +3846,6 @@ static void *cfq_init_queue(struct request_queue *q)
         * second, in order to have larger depth for async operations.
         */
        cfqd->last_delayed_sync = jiffies - HZ;
-       INIT_RCU_HEAD(&cfqd->rcu);
        return cfqd;
 }
 
@@ -3969,6 +4040,7 @@ static void __exit cfq_exit(void)
         */
        if (elv_ioc_count_read(cfq_ioc_count))
                wait_for_completion(&all_gone);
+       ida_destroy(&cic_index_ida);
        cfq_slab_kill();
 }