X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Feventpoll.c;h=011b9b8c90c6976bb15e5bf50ffaf865fd31892b;hb=1cf6e7d83bf334cc5916137862c920a97aabc018;hp=0d237182d7217876a223a035e6c80afe5988cd18;hpb=cdac75e6f2fec9abc21d0abb4e5d80720eeebb10;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0d23718..011b9b8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -102,6 +102,8 @@ #define EP_UNACTIVE_PTR ((void *) -1L) +#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) + struct epoll_filefd { struct file *file; int fd; @@ -200,6 +202,9 @@ struct eventpoll { * holding ->lock. */ struct epitem *ovflist; + + /* The user that created the eventpoll descriptor */ + struct user_struct *user; }; /* Wait structure used by the poll hooks */ @@ -227,9 +232,15 @@ struct ep_pqueue { }; /* + * Configuration options available inside /proc/sys/fs/epoll/ + */ +/* Maximum number of epoll watched descriptors, per user */ +static int max_user_watches __read_mostly; + +/* * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ -static struct mutex epmutex; +static DEFINE_MUTEX(epmutex); /* Safe wake up implementation */ static struct poll_safewake psw; @@ -240,6 +251,25 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; +#ifdef CONFIG_SYSCTL + +#include + +static int zero; + +ctl_table epoll_table[] = { + { + .procname = "max_user_watches", + .data = &max_user_watches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + }, + { .ctl_name = 0 } +}; +#endif /* CONFIG_SYSCTL */ + /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, @@ -402,6 +432,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); + atomic_dec(&ep->user->epoll_watches); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", current, ep, file)); @@ -449,6 +481,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); + free_uid(ep->user); kfree(ep); } @@ -532,10 +565,15 @@ void eventpoll_release_file(struct file *file) static int ep_alloc(struct eventpoll **pep) { - struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); + int error; + struct user_struct *user; + struct eventpoll *ep; - if (!ep) - return -ENOMEM; + user = get_current_user(); + error = -ENOMEM; + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (unlikely(!ep)) + goto free_uid; spin_lock_init(&ep->lock); mutex_init(&ep->mtx); @@ -544,12 +582,17 @@ static int ep_alloc(struct eventpoll **pep) INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; ep->ovflist = EP_UNACTIVE_PTR; + ep->user = user; *pep = ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", current, ep)); return 0; + +free_uid: + free_uid(user); + return error; } /* @@ -703,9 +746,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct epitem *epi; struct ep_pqueue epq; - error = -ENOMEM; + if (unlikely(atomic_read(&ep->user->epoll_watches) >= + max_user_watches)) + return -ENOSPC; if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) - goto error_return; + return -ENOMEM; /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); @@ -735,6 +780,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ + error = -ENOMEM; if (epi->nwait < 0) goto error_unregister; @@ -765,6 +811,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, spin_unlock_irqrestore(&ep->lock, flags); + atomic_inc(&ep->user->epoll_watches); + /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(&psw, &ep->poll_wait); @@ -789,7 +837,7 @@ error_unregister: spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); -error_return: + return error; } @@ -927,12 +975,16 @@ errxit: /* * During the time we spent in the loop above, some other events * might have been queued by the poll callback. We re-insert them - * here (in case they are not already queued, or they're one-shot). + * inside the main ready-list here. */ for (nepi = ep->ovflist; (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { - if (!ep_is_linked(&epi->rdllink) && - (epi->event.events & ~EP_PRIVATE_BITS)) + /* + * If the above loop quit with errors, the epoll item might still + * be linked to "txlist", and the list_splice() done below will + * take care of those cases. + */ + if (!ep_is_linked(&epi->rdllink)) list_add_tail(&epi->rdllink, &ep->rdllist); } /* @@ -1041,49 +1093,53 @@ retry: } /* - * It opens an eventpoll file descriptor. The "size" parameter is there - * for historical reasons, when epoll was using an hash instead of an - * RB tree. With the current implementation, the "size" parameter is ignored - * (besides sanity checks). + * Open an eventpoll file descriptor. */ -asmlinkage long sys_epoll_create(int size) +SYSCALL_DEFINE1(epoll_create1, int, flags) { int error, fd = -1; struct eventpoll *ep; - struct inode *inode; - struct file *file; + + /* Check the EPOLL_* constant for consistency. */ + BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); + + if (flags & ~EPOLL_CLOEXEC) + return -EINVAL; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", - current, size)); + current, flags)); /* - * Sanity check on the size parameter, and create the internal data - * structure ( "struct eventpoll" ). + * Create the internal data structure ( "struct eventpoll" ). */ - error = -EINVAL; - if (size <= 0 || (error = ep_alloc(&ep)) != 0) + error = ep_alloc(&ep); + if (error < 0) { + fd = error; goto error_return; + } /* * Creates all the items needed to setup an eventpoll file. That is, - * a file structure, and inode and a free file descriptor. + * a file structure and a free file descriptor. */ - error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", - &eventpoll_fops, ep); - if (error) - goto error_free; + fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, + flags & O_CLOEXEC); + if (fd < 0) + ep_free(ep); +error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, fd)); + current, flags, fd)); return fd; +} -error_free: - ep_free(ep); -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", - current, size, error)); - return error; +SYSCALL_DEFINE1(epoll_create, int, size) +{ + if (size < 0) + return -EINVAL; + + return sys_epoll_create1(0); } /* @@ -1091,8 +1147,8 @@ error_return: * the eventpoll file that enables the insertion/removal/change of * file descriptors inside the interest set. */ -asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, - struct epoll_event __user *event) +SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, + struct epoll_event __user *, event) { int error; struct file *file, *tfile; @@ -1189,8 +1245,8 @@ error_return: * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). */ -asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) +SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout) { int error; struct file *file; @@ -1241,15 +1297,15 @@ error_return: return error; } -#ifdef TIF_RESTORE_SIGMASK +#ifdef HAVE_SET_RESTORE_SIGMASK /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, const sigset_t __user *sigmask, - size_t sigsetsize) +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) { int error; sigset_t ksigmask, sigsaved; @@ -1279,7 +1335,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, sizeof(sigsaved)); - set_thread_flag(TIF_RESTORE_SIGMASK); + set_restore_sigmask(); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); } @@ -1287,11 +1343,18 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, return error; } -#endif /* #ifdef TIF_RESTORE_SIGMASK */ +#endif /* HAVE_SET_RESTORE_SIGMASK */ static int __init eventpoll_init(void) { - mutex_init(&epmutex); + struct sysinfo si; + + si_meminfo(&si); + /* + * Allows top 4% of lomem to be allocated for epoll watches (per user). + */ + max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / + EP_ITEM_COST; /* Initialize the structure used to perform safe poll wait head wake ups */ ep_poll_safewake_init(&psw); @@ -1309,4 +1372,3 @@ static int __init eventpoll_init(void) return 0; } fs_initcall(eventpoll_init); -