X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fselect.c;h=73715e90030f0c78059c98c6a4e0abf6b8a04ffa;hb=d554ed895dc8f293cc712c71f14b101ace82579a;hp=fdd8584e536dca956dd1b8a6ec4c68f2ef8ea9f5;hpb=4ce105d30e08fb8a1783c55a0e48aa3fa200c455;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/select.c b/fs/select.c index fdd8584..73715e9 100644 --- a/fs/select.c +++ b/fs/select.c @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -41,23 +42,32 @@ * better solutions.. */ -static unsigned long __estimate_accuracy(struct timespec *tv) +#define MAX_SLACK (100 * NSEC_PER_MSEC) + +static long __estimate_accuracy(struct timespec *tv) { - unsigned long slack; + long slack; int divfactor = 1000; + if (tv->tv_sec < 0) + return 0; + if (task_nice(current) > 0) divfactor = divfactor / 5; + if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor)) + return MAX_SLACK; + slack = tv->tv_nsec / divfactor; slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); - if (slack > 100 * NSEC_PER_MSEC) - slack = 100 * NSEC_PER_MSEC; + if (slack > MAX_SLACK) + return MAX_SLACK; + return slack; } -static unsigned long estimate_accuracy(struct timespec *tv) +static long estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; @@ -106,11 +116,12 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); + pwq->polling_task = current; + pwq->triggered = 0; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0; } - EXPORT_SYMBOL(poll_initwait); static void free_poll_entry(struct poll_table_entry *entry) @@ -139,12 +150,10 @@ void poll_freewait(struct poll_wqueues *pwq) free_page((unsigned long) old); } } - EXPORT_SYMBOL(poll_freewait); -static struct poll_table_entry *poll_get_entry(poll_table *_p) +static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p) { - struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); struct poll_table_page *table = p->table; if (p->inline_index < N_INLINE_POLL_ENTRIES) @@ -156,7 +165,6 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); if (!new_table) { p->error = -ENOMEM; - __set_current_state(TASK_RUNNING); return NULL; } new_table->entry = new_table->entries; @@ -168,20 +176,86 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p) return table->entry++; } +static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct poll_wqueues *pwq = wait->private; + DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task); + + /* + * Although this function is called under waitqueue lock, LOCK + * doesn't imply write barrier and the users expect write + * barrier semantics on wakeup functions. The following + * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() + * and is paired with set_mb() in poll_schedule_timeout. + */ + smp_wmb(); + pwq->triggered = 1; + + /* + * Perform the default wake up operation using a dummy + * waitqueue. + * + * TODO: This is hacky but there currently is no interface to + * pass in @sync. @sync is scheduled to be removed and once + * that happens, wake_up_process() can be used directly. + */ + return default_wake_function(&dummy_wait, mode, sync, key); +} + +static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct poll_table_entry *entry; + + entry = container_of(wait, struct poll_table_entry, wait); + if (key && !((unsigned long)key & entry->key)) + return 0; + return __pollwake(wait, mode, sync, key); +} + /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - struct poll_table_entry *entry = poll_get_entry(p); + struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); + struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; - init_waitqueue_entry(&entry->wait, current); + entry->key = p->key; + init_waitqueue_func_entry(&entry->wait, pollwake); + entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); } +int poll_schedule_timeout(struct poll_wqueues *pwq, int state, + ktime_t *expires, unsigned long slack) +{ + int rc = -EINTR; + + set_current_state(state); + if (!pwq->triggered) + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); + + /* + * Prepare for the next iteration. + * + * The following set_mb() serves two purposes. First, it's + * the counterpart rmb of the wmb in pollwake() such that data + * written before wake up is always visible after wake up. + * Second, the full barrier guarantees that triggered clearing + * doesn't pass event check of the next iteration. Note that + * this problem doesn't exist for the first iteration as + * add_wait_queue() has full barrier semantics. + */ + set_mb(pwq->triggered, 0); + + return rc; +} +EXPORT_SYMBOL(poll_schedule_timeout); + /** * poll_select_set_timeout - helper function to setup the timeout value * @to: pointer to timespec variable for the final timeout @@ -307,6 +381,18 @@ get_max: #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) +static inline void wait_key_set(poll_table *wait, unsigned long in, + unsigned long out, unsigned long bit) +{ + if (wait) { + wait->key = POLLEX_SET; + if (in & bit) + wait->key |= POLLIN_SET; + if (out & bit) + wait->key |= POLLOUT_SET; + } +} + int do_select(int n, fd_set_bits *fds, struct timespec *end_time) { ktime_t expire, *to = NULL; @@ -330,15 +416,13 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) timed_out = 1; } - if (end_time) + if (end_time && !timed_out) slack = estimate_accuracy(end_time); retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - set_current_state(TASK_INTERRUPTIBLE); - inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -365,20 +449,25 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) if (file) { f_op = file->f_op; mask = DEFAULT_POLLMASK; - if (f_op && f_op->poll) - mask = (*f_op->poll)(file, retval ? NULL : wait); + if (f_op && f_op->poll) { + wait_key_set(wait, in, out, bit); + mask = (*f_op->poll)(file, wait); + } fput_light(file, fput_needed); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; + wait = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; + wait = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; + wait = NULL; } } } @@ -408,10 +497,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, + to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); poll_freewait(&table); @@ -504,8 +593,8 @@ out_nofds: return ret; } -asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timeval __user *tvp) +SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timeval __user *, tvp) { struct timespec end_time, *to = NULL; struct timeval tv; @@ -516,8 +605,9 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, return -EFAULT; to = &end_time; - if (poll_select_set_timeout(to, tv.tv_sec, - tv.tv_usec * NSEC_PER_USEC)) + if (poll_select_set_timeout(to, + tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), + (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; } @@ -528,9 +618,9 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, - const sigset_t __user *sigmask, size_t sigsetsize) +static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timespec __user *tsp, + const sigset_t __user *sigmask, size_t sigsetsize) { sigset_t ksigmask, sigsaved; struct timespec ts, end_time, *to = NULL; @@ -556,7 +646,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = core_sys_select(n, inp, outp, exp, &end_time); + ret = core_sys_select(n, inp, outp, exp, to); ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { @@ -582,8 +672,9 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, * which has a pointer to the sigset_t itself followed by a size_t containing * the sigset size. */ -asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, void __user *sig) +SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timespec __user *, tsp, + void __user *, sig) { size_t sigsetsize = 0; sigset_t __user *up = NULL; @@ -596,7 +687,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, return -EFAULT; } - return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); + return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); } #endif /* HAVE_SET_RESTORE_SIGMASK */ @@ -630,8 +721,12 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = POLLNVAL; if (file != NULL) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) + if (file->f_op && file->f_op->poll) { + if (pwait) + pwait->key = pollfd->events | + POLLERR | POLLHUP; mask = file->f_op->poll(file, pwait); + } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; fput_light(file, fput_needed); @@ -656,13 +751,12 @@ static int do_poll(unsigned int nfds, struct poll_list *list, timed_out = 1; } - if (end_time) + if (end_time && !timed_out) slack = estimate_accuracy(end_time); for (;;) { struct poll_list *walk; - set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -705,10 +799,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list, to = &expire; } - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } - __set_current_state(TASK_RUNNING); return count; } @@ -728,7 +821,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct poll_list *walk = head; unsigned long todo = nfds; - if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; len = min_t(unsigned int, nfds, N_STACK_PPS); @@ -802,8 +895,8 @@ static long do_restart_poll(struct restart_block *restart_block) return ret; } -asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, - long timeout_msecs) +SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, + long, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; @@ -837,9 +930,9 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, - struct timespec __user *tsp, const sigset_t __user *sigmask, - size_t sigsetsize) +SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, + struct timespec __user *, tsp, const sigset_t __user *, sigmask, + size_t, sigsetsize) { sigset_t ksigmask, sigsaved; struct timespec ts, end_time, *to = NULL;