X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fsunrpc%2Fsched.c;h=8f459abe97cff614525775560ec355f9d4c69246;hb=b87221de6a4934eda856475a0065688d12973a04;hp=6d87320074b1556f98280dde419157f6288a2cb7;hpb=d9bc125caf592b7d081021f32ce5b717efdf70c8;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6d87320..8f459ab 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -25,7 +24,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED #define RPC_TASK_MAGIC_ID 0xf00baa -static int rpc_task_id; #endif /* @@ -39,73 +37,49 @@ static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; -static void __rpc_default_timer(struct rpc_task *task); -static void rpciod_killall(void); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); +static void __rpc_queue_timer_fn(unsigned long ptr); /* * RPC tasks sit here while waiting for conditions to improve. */ -static RPC_WAITQ(delay_queue, "delayq"); - -/* - * All RPC tasks are linked into this list - */ -static LIST_HEAD(all_tasks); +static struct rpc_wait_queue delay_queue; /* * rpciod-related stuff */ -static DEFINE_MUTEX(rpciod_mutex); -static unsigned int rpciod_users; struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for other critical sections of code. - */ -static DEFINE_SPINLOCK(rpc_sched_lock); - -/* * Disable the timer for a given RPC task. Should be called with * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ -static inline void -__rpc_disable_timer(struct rpc_task *task) +static void +__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { + if (task->tk_timeout == 0) + return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); - task->tk_timeout_fn = NULL; task->tk_timeout = 0; + list_del(&task->u.tk_wait.timer_list); + if (list_empty(&queue->timer_list.list)) + del_timer(&queue->timer_list.timer); } -/* - * Run a timeout function. - * We use the callback in order to allow __rpc_wake_up_task() - * and friends to disable the timer synchronously on SMP systems - * without calling del_timer_sync(). The latter could cause a - * deadlock if called while we're holding spinlocks... - */ -static void rpc_run_timer(struct rpc_task *task) +static void +rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - void (*callback)(struct rpc_task *); - - callback = task->tk_timeout_fn; - task->tk_timeout_fn = NULL; - if (callback && RPC_IS_QUEUED(task)) { - dprintk("RPC: %5u running timer\n", task->tk_pid); - callback(task); - } - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); + queue->timer_list.expires = expires; + mod_timer(&queue->timer_list.timer, expires); } /* * Set up a timer for the current task. */ -static inline void -__rpc_add_timer(struct rpc_task *task, rpc_action timer) +static void +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (!task->tk_timeout) return; @@ -113,27 +87,10 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer) - task->tk_timeout_fn = timer; - else - task->tk_timeout_fn = __rpc_default_timer; - set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - mod_timer(&task->tk_timer, jiffies + task->tk_timeout); -} - -/* - * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding queue->lock. - */ -static void -rpc_delete_timer(struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task)) - return; - if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { - del_singleshot_timer_sync(&task->tk_timer); - dprintk("RPC: %5u deleting timer\n", task->tk_pid); - } + task->u.tk_wait.expires = jiffies + task->tk_timeout; + if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) + rpc_set_queue_timer(queue, task->u.tk_wait.expires); + list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } /* @@ -149,7 +106,7 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r if (unlikely(task->tk_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; list_for_each_entry(t, q, u.tk_wait.list) { - if (t->tk_cookie == task->tk_cookie) { + if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); return; } @@ -175,7 +132,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task * list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); - task->u.tk_wait.rpc_waitq = queue; + task->tk_waitqueue = queue; queue->qlen++; rpc_set_queued(task); @@ -195,22 +152,18 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->u.tk_wait.list); } /* * Remove request from queue. * Note: must be called with spin lock held. */ -static void __rpc_remove_wait_queue(struct rpc_task *task) +static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - struct rpc_wait_queue *queue; - queue = task->u.tk_wait.rpc_waitq; - + __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - else - list_del(&task->u.tk_wait.list); + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -222,27 +175,30 @@ static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int queue->count = 1 << (priority * 2); } -static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie) +static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) { - queue->cookie = cookie; + queue->owner = pid; queue->nr = RPC_BATCH_COUNT; } static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_cookie(queue, 0); + rpc_set_waitqueue_owner(queue, 0); } -static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio) +static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; spin_lock_init(&queue->lock); for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) INIT_LIST_HEAD(&queue->tasks[i]); - queue->maxpriority = maxprio; + queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); + queue->qlen = 0; + setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + INIT_LIST_HEAD(&queue->timer_list.list); #ifdef RPC_DEBUG queue->name = qname; #endif @@ -250,35 +206,56 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) { - __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH); + __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY); } void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) { - __rpc_init_priority_wait_queue(queue, qname, 0); + __rpc_init_priority_wait_queue(queue, qname, 1); } -EXPORT_SYMBOL(rpc_init_wait_queue); +EXPORT_SYMBOL_GPL(rpc_init_wait_queue); -static int rpc_wait_bit_interruptible(void *word) +void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) { - if (signal_pending(current)) + del_timer_sync(&queue->timer_list.timer); +} +EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); + +static int rpc_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) return -ERESTARTSYS; schedule(); return 0; } +#ifdef RPC_DEBUG +static void rpc_task_set_debuginfo(struct rpc_task *task) +{ + static atomic_t rpc_pid; + + task->tk_magic = RPC_TASK_MAGIC_ID; + task->tk_pid = atomic_inc_return(&rpc_pid); +} +#else +static inline void rpc_task_set_debuginfo(struct rpc_task *task) +{ +} +#endif + static void rpc_set_active(struct rpc_task *task) { + struct rpc_clnt *clnt; if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) return; - spin_lock(&rpc_sched_lock); -#ifdef RPC_DEBUG - task->tk_magic = RPC_TASK_MAGIC_ID; - task->tk_pid = rpc_task_id++; -#endif + rpc_task_set_debuginfo(task); /* Add to global list of all tasks */ - list_add_tail(&task->tk_task, &all_tasks); - spin_unlock(&rpc_sched_lock); + clnt = task->tk_client; + if (clnt != NULL) { + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } } /* @@ -298,11 +275,11 @@ static void rpc_mark_complete_task(struct rpc_task *task) int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) { if (action == NULL) - action = rpc_wait_bit_interruptible; + action = rpc_wait_bit_killable; return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, - action, TASK_INTERRUPTIBLE); + action, TASK_KILLABLE); } -EXPORT_SYMBOL(__rpc_wait_for_completion_task); +EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); /* * Make an RPC task runnable. @@ -312,20 +289,14 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - BUG_ON(task->tk_timeout_fn); rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; - /* We might have raced */ - if (RPC_IS_QUEUED(task)) { - rpc_clear_running(task); - return; - } if (RPC_IS_ASYNC(task)) { int status; INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(task->tk_workqueue, &task->u.tk_work); + status = queue_work(rpciod_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; @@ -342,7 +313,7 @@ static void rpc_make_runnable(struct rpc_task *task) * as it's on a wait queue. */ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -356,11 +327,11 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task, timer); + __rpc_add_timer(q, task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { /* Mark the task as being activated if so needed */ rpc_set_active(task); @@ -369,17 +340,19 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action, timer); + __rpc_sleep_on(q, task, action); spin_unlock_bh(&q->lock); } +EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task + * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -393,8 +366,7 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) return; } - __rpc_disable_timer(task); - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -402,46 +374,31 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } /* - * Wake up the specified task + * Wake up a queued task while the queue lock is being held */ -static void __rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) - __rpc_do_wake_up_task(task); - rpc_finish_wakeup(task); - } + if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); } /* - * Default timeout handler if none specified by user + * Wake up a task on a specific queue */ -static void -__rpc_default_timer(struct rpc_task *task) +void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task(task); + spin_lock_bh(&queue->lock); + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock_bh(&queue->lock); } +EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the specified task */ -void rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task(struct rpc_task *task) { - rcu_read_lock_bh(); - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); - } - rpc_finish_wakeup(task); - } - rcu_read_unlock_bh(); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } /* @@ -453,12 +410,12 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu struct rpc_task *task; /* - * Service a batch of tasks from a single cookie. + * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; if (!list_empty(q)) { task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->cookie == task->tk_cookie) { + if (queue->owner == task->tk_owner) { if (--queue->nr) goto out; list_move_tail(&task->u.tk_wait.list, q); @@ -467,7 +424,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu * Check if we need to switch queues. */ if (--queue->count) - goto new_cookie; + goto new_owner; } /* @@ -489,10 +446,10 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_cookie: - rpc_set_waitqueue_cookie(queue, task->tk_cookie); +new_owner: + rpc_set_waitqueue_owner(queue, task->tk_owner); out: - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); return task; } @@ -505,19 +462,18 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** * rpc_wake_up - wake up all rpc_tasks @@ -530,19 +486,18 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } +EXPORT_SYMBOL_GPL(rpc_wake_up); /** * rpc_wake_up_status - wake up all rpc_tasks and set their status value. @@ -556,25 +511,48 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } if (head == &queue->tasks[0]) break; head--; } + spin_unlock_bh(&queue->lock); +} +EXPORT_SYMBOL_GPL(rpc_wake_up_status); + +static void __rpc_queue_timer_fn(unsigned long ptr) +{ + struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_task *task, *n; + unsigned long expires, now, timeo; + + spin_lock(&queue->lock); + expires = now = jiffies; + list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { + timeo = task->u.tk_wait.expires; + if (time_after_eq(now, timeo)) { + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; + rpc_wake_up_task_queue_locked(queue, task); + continue; + } + if (expires == now || time_after(expires, timeo)) + expires = timeo; + } + if (!list_empty(&queue->timer_list.list)) + rpc_set_queue_timer(queue, expires); spin_unlock(&queue->lock); - rcu_read_unlock_bh(); } static void __rpc_atrun(struct rpc_task *task) { - rpc_wake_up_task(task); + task->tk_status = 0; } /* @@ -583,17 +561,16 @@ static void __rpc_atrun(struct rpc_task *task) void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); + rpc_sleep_on(&delay_queue, task, __rpc_atrun); } +EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare */ -static void rpc_prepare_task(struct rpc_task *task) +void rpc_prepare_task(struct rpc_task *task) { - lock_kernel(); task->tk_ops->rpc_call_prepare(task, task->tk_calldata); - unlock_kernel(); } /* @@ -603,9 +580,7 @@ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; if (task->tk_ops->rpc_call_done != NULL) { - lock_kernel(); task->tk_ops->rpc_call_done(task, task->tk_calldata); - unlock_kernel(); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -613,15 +588,12 @@ void rpc_exit_task(struct rpc_task *task) } } } -EXPORT_SYMBOL(rpc_exit_task); +EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { - if (ops->rpc_release != NULL) { - lock_kernel(); + if (ops->rpc_release != NULL) ops->rpc_release(calldata); - unlock_kernel(); - } } /* @@ -629,7 +601,9 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) */ static void __rpc_execute(struct rpc_task *task) { - int status = 0; + struct rpc_wait_queue *queue; + int task_is_async = RPC_IS_ASYNC(task); + int status = 0; dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); @@ -637,27 +611,19 @@ static void __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); for (;;) { - /* - * Garbage collection of pending timers... - */ - rpc_delete_timer(task); /* * Execute any pending callback. */ - if (RPC_DO_CALLBACK(task)) { - /* Define a callback save pointer */ + if (task->tk_callback) { void (*save_callback)(struct rpc_task *); /* - * If a callback exists, save it, reset it, - * call it. - * The save is needed to stop from resetting - * another callback set within the callback handler - * - Dave + * We set tk_callback to NULL before calling it, + * in case it sets the tk_callback field itself: */ - save_callback=task->tk_callback; - task->tk_callback=NULL; + save_callback = task->tk_callback; + task->tk_callback = NULL; save_callback(task); } @@ -677,22 +643,31 @@ static void __rpc_execute(struct rpc_task *task) */ if (!RPC_IS_QUEUED(task)) continue; - rpc_clear_running(task); - if (RPC_IS_ASYNC(task)) { - /* Careful! we may have raced... */ - if (RPC_IS_QUEUED(task)) - return; - if (rpc_test_and_set_running(task)) - return; + /* + * The queue->lock protects against races with + * rpc_make_runnable(). + * + * Note that once we clear RPC_TASK_RUNNING on an asynchronous + * rpc_task, rpc_make_runnable() can assign it to a + * different workqueue. We therefore cannot assume that the + * rpc_task pointer may still be dereferenced. + */ + queue = task->tk_waitqueue; + spin_lock_bh(&queue->lock); + if (!RPC_IS_QUEUED(task)) { + spin_unlock_bh(&queue->lock); continue; } + rpc_clear_running(task); + spin_unlock_bh(&queue->lock); + if (task_is_async) + return; /* sync task: sleep here */ dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); - /* Note: Caller should be using rpc_clnt_sigmask() */ status = out_of_line_wait_on_bit(&task->tk_runstate, - RPC_TASK_QUEUED, rpc_wait_bit_interruptible, - TASK_INTERRUPTIBLE); + RPC_TASK_QUEUED, rpc_wait_bit_killable, + TASK_KILLABLE); if (status == -ERESTARTSYS) { /* * When a sync task receives a signal, it exits with @@ -736,99 +711,121 @@ static void rpc_async_schedule(struct work_struct *work) __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); } +struct rpc_buffer { + size_t len; + char data[]; +}; + /** * rpc_malloc - allocate an RPC buffer * @task: RPC task that will use this buffer * @size: requested byte size * - * We try to ensure that some NFS reads and writes can always proceed - * by using a mempool when allocating 'small' buffers. + * To prevent rpciod from hanging, this allocator never sleeps, + * returning NULL if the request cannot be serviced immediately. + * The caller can arrange to sleep in a way that is safe for rpciod. + * + * Most requests are 'small' (under 2KiB) and can be serviced from a + * mempool, ensuring that NFS reads and writes can always proceed, + * and that there is good locality of reference for these buffers. + * * In order to avoid memory starvation triggering more writebacks of - * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. + * NFS requests, we avoid using GFP_KERNEL. */ -void * rpc_malloc(struct rpc_task *task, size_t size) +void *rpc_malloc(struct rpc_task *task, size_t size) { - struct rpc_rqst *req = task->tk_rqstp; - gfp_t gfp; + struct rpc_buffer *buf; + gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; - if (task->tk_flags & RPC_TASK_SWAPPER) - gfp = GFP_ATOMIC; + size += sizeof(struct rpc_buffer); + if (size <= RPC_BUFFER_MAXSIZE) + buf = mempool_alloc(rpc_buffer_mempool, gfp); else - gfp = GFP_NOFS; - - if (size > RPC_BUFFER_MAXSIZE) { - req->rq_buffer = kmalloc(size, gfp); - if (req->rq_buffer) - req->rq_bufsize = size; - } else { - req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (req->rq_buffer) - req->rq_bufsize = RPC_BUFFER_MAXSIZE; - } - return req->rq_buffer; + buf = kmalloc(size, gfp); + + if (!buf) + return NULL; + + buf->len = size; + dprintk("RPC: %5u allocated buffer of size %zu at %p\n", + task->tk_pid, size, buf); + return &buf->data; } +EXPORT_SYMBOL_GPL(rpc_malloc); /** * rpc_free - free buffer allocated via rpc_malloc - * @task: RPC task with a buffer to be freed + * @buffer: buffer to free * */ -void rpc_free(struct rpc_task *task) +void rpc_free(void *buffer) { - struct rpc_rqst *req = task->tk_rqstp; + size_t size; + struct rpc_buffer *buf; - if (req->rq_buffer) { - if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(req->rq_buffer, rpc_buffer_mempool); - else - kfree(req->rq_buffer); - req->rq_buffer = NULL; - req->rq_bufsize = 0; - } + if (!buffer) + return; + + buf = container_of(buffer, struct rpc_buffer, data); + size = buf->len; + + dprintk("RPC: freeing buffer of size %zu at %p\n", + size, buf); + + if (size <= RPC_BUFFER_MAXSIZE) + mempool_free(buf, rpc_buffer_mempool); + else + kfree(buf); } +EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures */ -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) +static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); - init_timer(&task->tk_timer); - task->tk_timer.data = (unsigned long) task; - task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; atomic_set(&task->tk_count, 1); - task->tk_client = clnt; - task->tk_flags = flags; - task->tk_ops = tk_ops; - if (tk_ops->rpc_call_prepare != NULL) - task->tk_action = rpc_prepare_task; - task->tk_calldata = calldata; + task->tk_flags = task_setup_data->flags; + task->tk_ops = task_setup_data->callback_ops; + task->tk_calldata = task_setup_data->callback_data; + INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_priority = RPC_PRIORITY_NORMAL; - task->tk_cookie = (unsigned long)current; + task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; + task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ - task->tk_workqueue = rpciod_workqueue; + task->tk_workqueue = task_setup_data->workqueue; - if (clnt) { - atomic_inc(&clnt->cl_users); - if (clnt->cl_softrtry) + task->tk_client = task_setup_data->rpc_client; + if (task->tk_client != NULL) { + kref_get(&task->tk_client->cl_kref); + if (task->tk_client->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; - if (!clnt->cl_intr) - task->tk_flags |= RPC_TASK_NOINTR; } - BUG_ON(task->tk_ops == NULL); + if (task->tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; + + if (task_setup_data->rpc_message != NULL) { + task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; + task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; + task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; + /* Bind the user cred */ + rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); + if (task->tk_action == NULL) + rpc_call_start(task); + } /* starting timestamp */ task->tk_start = jiffies; dprintk("RPC: new task initialized, procpid %u\n", - current->pid); + task_pid_nr(current)); } static struct rpc_task * @@ -837,50 +834,48 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) -{ - struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); - dprintk("RPC: %5u freeing task\n", task->tk_pid); - mempool_free(task, rpc_task_mempool); -} - /* - * Create a new task for the specified client. We have to - * clean up after an allocation failure, as the client may - * have specified "oneshot". + * Create a new task for the specified client. */ -struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) +struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) { - struct rpc_task *task; + struct rpc_task *task = setup_data->task; + unsigned short flags = 0; - task = rpc_alloc_task(); - if (!task) - goto cleanup; + if (task == NULL) { + task = rpc_alloc_task(); + if (task == NULL) + goto out; + flags = RPC_TASK_DYNAMIC; + } - rpc_init_task(task, clnt, flags, tk_ops, calldata); + rpc_init_task(task, setup_data); + task->tk_flags |= flags; dprintk("RPC: allocated task %p\n", task); - task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; +} + +static void rpc_free_task(struct rpc_task *task) +{ + const struct rpc_call_ops *tk_ops = task->tk_ops; + void *calldata = task->tk_calldata; -cleanup: - /* Check whether to release the client */ - if (clnt) { - printk("rpc_new_task: failed, users=%d, oneshot=%d\n", - atomic_read(&clnt->cl_users), clnt->cl_oneshot); - atomic_inc(&clnt->cl_users); /* pretend we were used ... */ - rpc_release_client(clnt); + if (task->tk_flags & RPC_TASK_DYNAMIC) { + dprintk("RPC: %5u freeing task\n", task->tk_pid); + mempool_free(task, rpc_task_mempool); } - goto out; + rpc_release_calldata(tk_ops, calldata); } +static void rpc_async_release(struct work_struct *work) +{ + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); +} void rpc_put_task(struct rpc_task *task) { - const struct rpc_call_ops *tk_ops = task->tk_ops; - void *calldata = task->tk_calldata; - if (!atomic_dec_and_test(&task->tk_count)) return; /* Release resources */ @@ -892,11 +887,13 @@ void rpc_put_task(struct rpc_task *task) rpc_release_client(task->tk_client); task->tk_client = NULL; } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); + if (task->tk_workqueue != NULL) { + INIT_WORK(&task->u.tk_work, rpc_async_release); + queue_work(task->tk_workqueue, &task->u.tk_work); + } else + rpc_free_task(task); } -EXPORT_SYMBOL(rpc_put_task); +EXPORT_SYMBOL_GPL(rpc_put_task); static void rpc_release_task(struct rpc_task *task) { @@ -905,16 +902,15 @@ static void rpc_release_task(struct rpc_task *task) #endif dprintk("RPC: %5u release task\n", task->tk_pid); - /* Remove from global task list */ - spin_lock(&rpc_sched_lock); - list_del(&task->tk_task); - spin_unlock(&rpc_sched_lock); - + if (!list_empty(&task->tk_task)) { + struct rpc_clnt *clnt = task->tk_client; + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + } BUG_ON (RPC_IS_QUEUED(task)); - /* Synchronously delete any running timer */ - rpc_delete_timer(task); - #ifdef RPC_DEBUG task->tk_magic = 0; #endif @@ -924,29 +920,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *ops, - void *data) -{ - struct rpc_task *task; - task = rpc_new_task(clnt, flags, ops, data); - if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); - } - atomic_inc(&task->tk_count); - rpc_execute(task); - return task; -} -EXPORT_SYMBOL(rpc_run_task); - /* * Kill all tasks for the given client. * XXX: kill their descendants as well? @@ -954,143 +927,71 @@ EXPORT_SYMBOL(rpc_run_task); void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; - struct list_head *le; - dprintk("RPC: killing all tasks for client %p\n", clnt); + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); /* * Spin lock all_tasks to prevent changes... */ - spin_lock(&rpc_sched_lock); - alltask_for_each(rovr, le, &all_tasks) { + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { if (! RPC_IS_ACTIVATED(rovr)) continue; - if (!clnt || rovr->tk_client == clnt) { + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); rpc_wake_up_task(rovr); } } - spin_unlock(&rpc_sched_lock); + spin_unlock(&clnt->cl_lock); } +EXPORT_SYMBOL_GPL(rpc_killall_tasks); -static DECLARE_MUTEX_LOCKED(rpciod_running); - -static void rpciod_killall(void) +int rpciod_up(void) { - unsigned long flags; - - while (!list_empty(&all_tasks)) { - clear_thread_flag(TIF_SIGPENDING); - rpc_killall_tasks(NULL); - flush_workqueue(rpciod_workqueue); - if (!list_empty(&all_tasks)) { - dprintk("RPC: rpciod_killall: waiting for tasks " - "to exit\n"); - yield(); - } - } + return try_module_get(THIS_MODULE) ? 0 : -EINVAL; +} - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); +void rpciod_down(void) +{ + module_put(THIS_MODULE); } /* - * Start up the rpciod process if it's not already running. + * Start up the rpciod workqueue. */ -int -rpciod_up(void) +static int rpciod_start(void) { struct workqueue_struct *wq; - int error = 0; - mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_up: users %u\n", rpciod_users); - rpciod_users++; - if (rpciod_workqueue) - goto out; - /* - * If there's no pid, we should be the first user. - */ - if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); /* * Create the rpciod thread and wait for it to start. */ - error = -ENOMEM; + dprintk("RPC: creating workqueue rpciod\n"); wq = create_workqueue("rpciod"); - if (wq == NULL) { - printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); - rpciod_users--; - goto out; - } rpciod_workqueue = wq; - error = 0; -out: - mutex_unlock(&rpciod_mutex); - return error; + return rpciod_workqueue != NULL; } -void -rpciod_down(void) +static void rpciod_stop(void) { - mutex_lock(&rpciod_mutex); - dprintk("RPC: rpciod_down sema %u\n", rpciod_users); - if (rpciod_users) { - if (--rpciod_users) - goto out; - } else - printk(KERN_WARNING "rpciod_down: no users??\n"); + struct workqueue_struct *wq = NULL; - if (!rpciod_workqueue) { - dprintk("RPC: rpciod_down: Nothing to do!\n"); - goto out; - } - rpciod_killall(); + if (rpciod_workqueue == NULL) + return; + dprintk("RPC: destroying workqueue rpciod\n"); - destroy_workqueue(rpciod_workqueue); + wq = rpciod_workqueue; rpciod_workqueue = NULL; - out: - mutex_unlock(&rpciod_mutex); + destroy_workqueue(wq); } -#ifdef RPC_DEBUG -void rpc_show_tasks(void) -{ - struct list_head *le; - struct rpc_task *t; - - spin_lock(&rpc_sched_lock); - if (list_empty(&all_tasks)) { - spin_unlock(&rpc_sched_lock); - return; - } - printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- ---ops--\n"); - alltask_for_each(t, le, &all_tasks) { - const char *rpc_waitq = "none"; - - if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); - - printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, - (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), - t->tk_flags, t->tk_status, - t->tk_client, - (t->tk_client ? t->tk_client->cl_prog : 0), - t->tk_rqstp, t->tk_timeout, - rpc_waitq, - t->tk_action, t->tk_ops); - } - spin_unlock(&rpc_sched_lock); -} -#endif - void rpc_destroy_mempool(void) { + rpciod_stop(); if (rpc_buffer_mempool) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) @@ -1099,21 +1000,30 @@ rpc_destroy_mempool(void) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); + rpc_destroy_wait_queue(&delay_queue); } int rpc_init_mempool(void) { + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); + if (!rpciod_start()) + goto err_nomem; + rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_task_slabp) goto err_nomem; rpc_buffer_slabp = kmem_cache_create("rpc_buffers", RPC_BUFFER_MAXSIZE, 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (!rpc_buffer_slabp) goto err_nomem; rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,