X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Feventpoll.c;h=085c5c063420d3dca2153f70ca5d3dfaf48a1962;hb=dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d;hp=e24d137c93b6d960a3ef0b9511e726c054ce6c9a;hpb=296e236e96dddef351a1809c0d414bcddfcf3800;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e24d137..085c5c0 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -71,24 +71,6 @@ * a better scalability. */ -#define DEBUG_EPOLL 0 - -#if DEBUG_EPOLL > 0 -#define DPRINTK(x) printk x -#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0) -#else /* #if DEBUG_EPOLL > 0 */ -#define DPRINTK(x) (void) 0 -#define DNPRINTK(n, x) (void) 0 -#endif /* #if DEBUG_EPOLL > 0 */ - -#define DEBUG_EPI 0 - -#if DEBUG_EPI != 0 -#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */) -#else /* #if DEBUG_EPI != 0 */ -#define EPI_SLAB_DEBUG 0 -#endif /* #if DEBUG_EPI != 0 */ - /* Epoll private bits inside the event mask */ #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) @@ -115,8 +97,8 @@ struct epoll_filefd { */ struct nested_call_node { struct list_head llink; - struct task_struct *task; void *cookie; + void *ctx; }; /* @@ -210,7 +192,7 @@ struct eppoll_entry { struct list_head llink; /* The "base" pointer is set to the container "struct epitem" */ - void *base; + struct epitem *base; /* * Wait queue item that will be linked to the target file wait @@ -335,17 +317,17 @@ static void ep_nested_calls_init(struct nested_calls *ncalls) * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. + * @ctx: This instance context. * * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, int (*nproc)(void *, void *, int), void *priv, - void *cookie) + void *cookie, void *ctx) { int error, call_nests = 0; unsigned long flags; - struct task_struct *this_task = current; struct list_head *lsthead = &ncalls->tasks_call_list; struct nested_call_node *tncur; struct nested_call_node tnode; @@ -358,20 +340,19 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, * very much limited. */ list_for_each_entry(tncur, lsthead, llink) { - if (tncur->task == this_task && + if (tncur->ctx == ctx && (tncur->cookie == cookie || ++call_nests > max_nests)) { /* * Ops ... loop detected or maximum nest level reached. * We abort this wake by breaking the cycle itself. */ - spin_unlock_irqrestore(&ncalls->lock, flags); - - return -1; + error = -1; + goto out_unlock; } } /* Add the current task and cookie to the list */ - tnode.task = this_task; + tnode.ctx = ctx; tnode.cookie = cookie; list_add(&tnode.llink, lsthead); @@ -383,14 +364,34 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, /* Remove the current task from the list */ spin_lock_irqsave(&ncalls->lock, flags); list_del(&tnode.llink); +out_unlock: spin_unlock_irqrestore(&ncalls->lock, flags); return error; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, + unsigned long events, int subclass) +{ + unsigned long flags; + + spin_lock_irqsave_nested(&wqueue->lock, flags, subclass); + wake_up_locked_poll(wqueue, events); + spin_unlock_irqrestore(&wqueue->lock, flags); +} +#else +static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, + unsigned long events, int subclass) +{ + wake_up_poll(wqueue, events); +} +#endif + static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) { - wake_up_nested((wait_queue_head_t *) cookie, 1 + call_nests); + ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN, + 1 + call_nests); return 0; } @@ -406,32 +407,30 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) */ static void ep_poll_safewake(wait_queue_head_t *wq) { + int this_cpu = get_cpu(); + ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, - ep_poll_wakeup_proc, NULL, wq); + ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); + + put_cpu(); } /* - * This function unregister poll callbacks from the associated file descriptor. - * Since this must be called without holding "ep->lock" the atomic exchange trick - * will protect us from multiple unregister. + * This function unregisters poll callbacks from the associated file + * descriptor. Must be called with "mtx" held (or "epmutex" if called from + * ep_free). */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { - int nwait; struct list_head *lsthead = &epi->pwqlist; struct eppoll_entry *pwq; - /* This is called without locks, so we need the atomic exchange */ - nwait = xchg(&epi->nwait, 0); - - if (nwait) { - while (!list_empty(lsthead)) { - pwq = list_first_entry(lsthead, struct eppoll_entry, llink); + while (!list_empty(lsthead)) { + pwq = list_first_entry(lsthead, struct eppoll_entry, llink); - list_del_init(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); - kmem_cache_free(pwq_cache, pwq); - } + list_del(&pwq->llink); + remove_wait_queue(pwq->whead, &pwq->wait); + kmem_cache_free(pwq_cache, pwq); } } @@ -458,7 +457,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, /* * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). + * eventpoll_release_file() and epoll_ctl(). */ mutex_lock(&ep->mtx); @@ -567,9 +566,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) atomic_dec(&ep->user->epoll_watches); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", - current, ep, file)); - return 0; } @@ -625,7 +621,6 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) if (ep) ep_free(ep); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); return 0; } @@ -671,7 +666,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) * could re-enter here. */ pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, - ep_poll_readyevents_proc, ep, ep); + ep_poll_readyevents_proc, ep, ep, current); return pollflags != -1 ? pollflags : 0; } @@ -750,8 +745,6 @@ static int ep_alloc(struct eventpoll **pep) *pep = ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", - current, ep)); return 0; free_uid: @@ -785,9 +778,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) } } - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", - current, file, epir)); - return epir; } @@ -803,9 +793,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", - current, epi->ffd.file, epi, ep)); - spin_lock_irqsave(&ep->lock, flags); /* @@ -818,6 +805,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k goto out_unlock; /* + * Check the events coming with the callback. At this stage, not + * every device reports the events in the "key" parameter of the + * callback. We need to be able to handle both cases here, hence the + * test for "key" != NULL before the event match test. + */ + if (key && !((unsigned long) key & epi->event.events)) + goto out_unlock; + + /* * If we are trasfering events to userspace, we can hold no locks * (because we're accessing user memory, and because of linux f_op->poll() * semantics). All the events that happens during that period of time are @@ -978,9 +974,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, if (pwake) ep_poll_safewake(&ep->poll_wait); - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n", - current, ep, tfile, fd)); - return 0; error_unregister: @@ -1010,15 +1003,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even { int pwake = 0; unsigned int revents; - unsigned long flags; /* - * Set the new event interest mask before calling f_op->poll(), otherwise - * a potential race might occur. In fact if we do this operation inside - * the lock, an event might happen between the f_op->poll() call and the - * new event set registering. + * Set the new event interest mask before calling f_op->poll(); + * otherwise we might miss an event that happens between the + * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; + epi->event.data = event->data; /* protected by mtx */ /* * Get current event bits. We can safely use the file* here because @@ -1026,16 +1018,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - spin_lock_irqsave(&ep->lock, flags); - - /* Copy the data member from inside the lock */ - epi->event.data = event->data; - /* * If the item is "hot" and it is not registered inside the ready * list, push it inside. */ if (revents & event->events) { + spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1045,8 +1033,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even if (waitqueue_active(&ep->poll_wait)) pwake++; } + spin_unlock_irq(&ep->lock); } - spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -1086,8 +1074,10 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, */ if (revents) { if (__put_user(revents, &uevent->events) || - __put_user(epi->event.data, &uevent->data)) + __put_user(epi->event.data, &uevent->data)) { + list_add(&epi->rdllink, head); return eventcnt ? eventcnt : -EFAULT; + } eventcnt++; uevent++; if (epi->event.events & EPOLLONESHOT) @@ -1197,46 +1187,35 @@ retry: */ SYSCALL_DEFINE1(epoll_create1, int, flags) { - int error, fd = -1; - struct eventpoll *ep; + int error; + struct eventpoll *ep = NULL; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); if (flags & ~EPOLL_CLOEXEC) return -EINVAL; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", - current, flags)); - /* - * Create the internal data structure ( "struct eventpoll" ). + * Create the internal data structure ("struct eventpoll"). */ error = ep_alloc(&ep); - if (error < 0) { - fd = error; - goto error_return; - } - + if (error < 0) + return error; /* * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ - fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, - flags & O_CLOEXEC); - if (fd < 0) + error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, + flags & O_CLOEXEC); + if (error < 0) ep_free(ep); -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, flags, fd)); - - return fd; + return error; } SYSCALL_DEFINE1(epoll_create, int, size) { - if (size < 0) + if (size <= 0) return -EINVAL; return sys_epoll_create1(0); @@ -1256,9 +1235,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epitem *epi; struct epoll_event epds; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n", - current, epfd, op, fd, event)); - error = -EFAULT; if (ep_op_has_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) @@ -1309,7 +1285,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, case EPOLL_CTL_ADD: if (!epi) { epds.events |= POLLERR | POLLHUP; - error = ep_insert(ep, &epds, tfile, fd); } else error = -EEXIST; @@ -1335,8 +1310,6 @@ error_tgt_fput: error_fput: fput(file); error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n", - current, epfd, op, fd, event, error)); return error; } @@ -1352,9 +1325,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, struct file *file; struct eventpoll *ep; - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n", - current, epfd, events, maxevents, timeout)); - /* The maximum number of event must be greater than zero */ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) return -EINVAL; @@ -1391,8 +1361,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, error_fput: fput(file); error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n", - current, epfd, events, maxevents, timeout, error)); return error; } @@ -1464,13 +1432,11 @@ static int __init eventpoll_init(void) /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), - 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, - NULL); + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", - sizeof(struct eppoll_entry), 0, - EPI_SLAB_DEBUG|SLAB_PANIC, NULL); + sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL); return 0; }