X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fsplice.c;h=ac22b00d86c3fab51fe3f39cc6cb50a8e7bbcc59;hb=35f3d14dbbc58447c61e38a162ea10add6b31dc7;hp=128ee36a719b43af307338f14da6cae087891c0f;hpb=f8cc774ce4844811a55e2352f1443055e3994e28;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/splice.c b/fs/splice.c index 128ee36..ac22b00 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -182,8 +183,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, do_wakeup = 0; page_nr = 0; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); for (;;) { if (!pipe->readers) { @@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, break; } - if (pipe->nrbufs < PIPE_BUFFERS) { - int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); + if (pipe->nrbufs < pipe->buffers) { + int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); struct pipe_buffer *buf = pipe->bufs + newbuf; buf->page = spd->pages[page_nr]; @@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, if (!--spd->nr_pages) break; - if (pipe->nrbufs < PIPE_BUFFERS) + if (pipe->nrbufs < pipe->buffers) continue; break; @@ -245,15 +245,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) { - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } while (page_nr < spd_pages) @@ -267,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) page_cache_release(spd->pages[i]); } +/* + * Check if we need to grow the arrays holding pages and partial page + * descriptions. + */ +int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +{ + if (pipe->buffers <= PIPE_DEF_BUFFERS) + return 0; + + spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); + + if (spd->pages && spd->partial) + return 0; + + kfree(spd->pages); + kfree(spd->partial); + return -ENOMEM; +} + +void splice_shrink_spd(struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) +{ + if (pipe->buffers <= PIPE_DEF_BUFFERS) + return; + + kfree(spd->pages); + kfree(spd->partial); +} + static int __generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, @@ -274,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, { struct address_space *mapping = in->f_mapping; unsigned int loff, nr_pages, req_pages; - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; struct page *page; pgoff_t index, end_index; loff_t isize; @@ -288,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, .spd_release = spd_release_page, }; + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); + nr_pages = min(req_pages, pipe->buffers); /* * Lookup the (hopefully) full range of pages we need. */ - spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); + spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages); index += spd.nr_pages; /* @@ -337,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unlock_page(page); } - pages[spd.nr_pages++] = page; + spd.pages[spd.nr_pages++] = page; index++; } @@ -358,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * this_len is the max we'll use from this page */ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); - page = pages[page_nr]; + page = spd.pages[page_nr]; if (PageReadahead(page)) page_cache_async_readahead(mapping, &in->f_ra, in, @@ -395,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, error = -ENOMEM; break; } - page_cache_release(pages[page_nr]); - pages[page_nr] = page; + page_cache_release(spd.pages[page_nr]); + spd.pages[page_nr] = page; } /* * page was already under io and is now done, great @@ -453,8 +484,8 @@ fill_it: len = this_len; } - partial[page_nr].offset = loff; - partial[page_nr].len = this_len; + spd.partial[page_nr].offset = loff; + spd.partial[page_nr].len = this_len; len -= this_len; loff = 0; spd.nr_pages++; @@ -466,12 +497,13 @@ fill_it: * we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) - page_cache_release(pages[page_nr++]); + page_cache_release(spd.pages[page_nr++]); in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; if (spd.nr_pages) - return splice_to_pipe(pipe, &spd); + error = splice_to_pipe(pipe, &spd); + splice_shrink_spd(pipe, &spd); return error; } @@ -505,14 +537,154 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, len = left; ret = __generic_file_splice_read(in, ppos, pipe, len, flags); - if (ret > 0) + if (ret > 0) { *ppos += ret; + file_accessed(in); + } return ret; } - EXPORT_SYMBOL(generic_file_splice_read); +static const struct pipe_buf_operations default_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +static ssize_t kernel_readv(struct file *file, const struct iovec *vec, + unsigned long vlen, loff_t offset) +{ + mm_segment_t old_fs; + loff_t pos = offset; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); + set_fs(old_fs); + + return res; +} + +static ssize_t kernel_write(struct file *file, const char *buf, size_t count, + loff_t pos) +{ + mm_segment_t old_fs; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_write(file, (const char __user *)buf, count, &pos); + set_fs(old_fs); + + return res; +} + +ssize_t default_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + unsigned int nr_pages; + unsigned int nr_freed; + size_t offset; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; + pgoff_t index; + ssize_t res; + size_t this_len; + int error; + int i; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &default_pipe_buf_ops, + .spd_release = spd_release_page, + }; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + res = -ENOMEM; + vec = __vec; + if (pipe->buffers > PIPE_DEF_BUFFERS) { + vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); + if (!vec) + goto shrink_ret; + } + + index = *ppos >> PAGE_CACHE_SHIFT; + offset = *ppos & ~PAGE_CACHE_MASK; + nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { + struct page *page; + + page = alloc_page(GFP_USER); + error = -ENOMEM; + if (!page) + goto err; + + this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset); + vec[i].iov_base = (void __user *) page_address(page); + vec[i].iov_len = this_len; + spd.pages[i] = page; + spd.nr_pages++; + len -= this_len; + offset = 0; + } + + res = kernel_readv(in, vec, spd.nr_pages, *ppos); + if (res < 0) { + error = res; + goto err; + } + + error = 0; + if (!res) + goto err; + + nr_freed = 0; + for (i = 0; i < spd.nr_pages; i++) { + this_len = min_t(size_t, vec[i].iov_len, res); + spd.partial[i].offset = 0; + spd.partial[i].len = this_len; + if (!this_len) { + __free_page(spd.pages[i]); + spd.pages[i] = NULL; + nr_freed++; + } + res -= this_len; + } + spd.nr_pages -= nr_freed; + + res = splice_to_pipe(pipe, &spd); + if (res > 0) + *ppos += res; + +shrink_ret: + if (vec != __vec) + kfree(vec); + splice_shrink_spd(pipe, &spd); + return res; + +err: + for (i = 0; i < spd.nr_pages; i++) + __free_page(spd.pages[i]); + + res = error; + goto shrink_ret; +} +EXPORT_SYMBOL(default_file_splice_read); + /* * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. @@ -527,9 +699,11 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, ret = buf->ops->confirm(pipe, buf); if (!ret) { more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - - ret = file->f_op->sendpage(file, buf->page, buf->offset, - sd->len, &pos, more); + if (file->f_op && file->f_op->sendpage) + ret = file->f_op->sendpage(file, buf->page, buf->offset, + sd->len, &pos, more); + else + ret = -EINVAL; } return ret; @@ -617,7 +791,6 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe) * @actor: handler that splices the data * * Description: - * This function loops over the pipe and calls @actor to do the * actual moving of a single struct pipe_buffer to the desired * destination. It returns when there's no more buffers left in @@ -661,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); pipe->nrbufs--; if (pipe->inode) sd->need_wakeup = true; @@ -714,7 +887,7 @@ EXPORT_SYMBOL(splice_from_pipe_next); /** * splice_from_pipe_begin - start splicing from pipe - * @pipe: pipe to splice from + * @sd: information about the splice operation * * Description: * This function should be called before a loop containing @@ -801,11 +974,9 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -837,8 +1008,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, }; ssize_t ret; - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); + pipe_lock(pipe); splice_from_pipe_begin(&sd); do { @@ -848,39 +1018,30 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); - if (!ret) + if (!ret) { + file_update_time(out); ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + } mutex_unlock(&inode->i_mutex); } while (ret > 0); splice_from_pipe_end(pipe, &sd); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; if (ret > 0) { unsigned long nr_pages; + int err; - *ppos += ret; nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - /* - * If file or inode is SYNC and we actually wrote some data, - * sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - - mutex_lock(&inode->i_mutex); - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); - mutex_unlock(&inode->i_mutex); - - if (err) - ret = err; - } + err = generic_write_sync(out, *ppos, ret); + if (err) + ret = err; + else + *ppos += ret; balance_dirty_pages_ratelimited_nr(mapping, nr_pages); } @@ -889,6 +1050,36 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, EXPORT_SYMBOL(generic_file_splice_write); +static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) +{ + int ret; + void *data; + + ret = buf->ops->confirm(pipe, buf); + if (ret) + return ret; + + data = buf->ops->map(pipe, buf, 0); + ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); + buf->ops->unmap(pipe, buf, data); + + return ret; +} + +static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); + if (ret > 0) + *ppos += ret; + + return ret; +} + /** * generic_splice_sendpage - splice data from a pipe to a socket * @pipe: pipe to splice from @@ -916,11 +1107,10 @@ EXPORT_SYMBOL(generic_splice_sendpage); static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int); int ret; - if (unlikely(!out->f_op || !out->f_op->splice_write)) - return -EINVAL; - if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; @@ -931,7 +1121,12 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (unlikely(ret < 0)) return ret; - return out->f_op->splice_write(pipe, out, ppos, len, flags); + if (out->f_op && out->f_op->splice_write) + splice_write = out->f_op->splice_write; + else + splice_write = default_file_splice_write; + + return splice_write(pipe, out, ppos, len, flags); } /* @@ -941,11 +1136,10 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); int ret; - if (unlikely(!in->f_op || !in->f_op->splice_read)) - return -EINVAL; - if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; @@ -953,7 +1147,12 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - return in->f_op->splice_read(in, ppos, pipe, len, flags); + if (in->f_op && in->f_op->splice_read) + splice_read = in->f_op->splice_read; + else + splice_read = default_file_splice_read; + + return splice_read(in, ppos, pipe, len, flags); } /** @@ -1062,7 +1261,7 @@ out_release: * If we did an incomplete transfer we must release * the pipe buffers in question: */ - for (i = 0; i < PIPE_BUFFERS; i++) { + for (i = 0; i < pipe->buffers; i++) { struct pipe_buffer *buf = pipe->bufs + i; if (buf->ops) { @@ -1120,6 +1319,9 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, return ret; } +static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags); /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a @@ -1140,16 +1342,37 @@ static long do_splice(struct file *in, loff_t __user *off_in, struct file *out, loff_t __user *off_out, size_t len, unsigned int flags) { - struct pipe_inode_info *pipe; + struct pipe_inode_info *ipipe; + struct pipe_inode_info *opipe; loff_t offset, *off; long ret; - pipe = pipe_info(in->f_path.dentry->d_inode); - if (pipe) { + ipipe = pipe_info(in->f_path.dentry->d_inode); + opipe = pipe_info(out->f_path.dentry->d_inode); + + if (ipipe && opipe) { + if (off_in || off_out) + return -ESPIPE; + + if (!(in->f_mode & FMODE_READ)) + return -EBADF; + + if (!(out->f_mode & FMODE_WRITE)) + return -EBADF; + + /* Splicing to self would be fun, but... */ + if (ipipe == opipe) + return -EINVAL; + + return splice_pipe_to_pipe(ipipe, opipe, len, flags); + } + + if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { - if (out->f_op->llseek == no_llseek) + if (!out->f_op || !out->f_op->llseek || + out->f_op->llseek == no_llseek) return -EINVAL; if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; @@ -1157,7 +1380,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &out->f_pos; - ret = do_splice_from(pipe, out, off, len, flags); + ret = do_splice_from(ipipe, out, off, len, flags); if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) ret = -EFAULT; @@ -1165,12 +1388,12 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; } - pipe = pipe_info(out->f_path.dentry->d_inode); - if (pipe) { + if (opipe) { if (off_out) return -ESPIPE; if (off_in) { - if (in->f_op->llseek == no_llseek) + if (!in->f_op || !in->f_op->llseek || + in->f_op->llseek == no_llseek) return -EINVAL; if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; @@ -1178,7 +1401,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &in->f_pos; - ret = do_splice_to(in, off, pipe, len, flags); + ret = do_splice_to(in, off, opipe, len, flags); if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) ret = -EFAULT; @@ -1198,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, */ static int get_iovec_page_array(const struct iovec __user *iov, unsigned int nr_vecs, struct page **pages, - struct partial_page *partial, int aligned) + struct partial_page *partial, int aligned, + unsigned int pipe_buffers) { int buffers = 0, error = 0; @@ -1241,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, break; npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (npages > PIPE_BUFFERS - buffers) - npages = PIPE_BUFFERS - buffers; + if (npages > pipe_buffers - buffers) + npages = pipe_buffers - buffers; error = get_user_pages_fast((unsigned long)base, npages, 0, &pages[buffers]); @@ -1277,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, * or if we mapped the max number of pages that we have * room for. */ - if (error < npages || buffers == PIPE_BUFFERS) + if (error < npages || buffers == pipe_buffers) break; nr_vecs--; @@ -1348,8 +1572,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, if (!pipe) return -EBADF; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); error = ret = 0; while (nr_segs) { @@ -1404,8 +1627,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, iov++; } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (!ret) ret = error; @@ -1422,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { struct pipe_inode_info *pipe; - struct page *pages[PIPE_BUFFERS]; - struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1431,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, .ops = &user_page_pipe_buf_ops, .spd_release = spd_release_page, }; + long ret; pipe = pipe_info(file->f_path.dentry->d_inode); if (!pipe) return -EBADF; - spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, - flags & SPLICE_F_GIFT); + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, + spd.partial, flags & SPLICE_F_GIFT, + pipe->buffers); if (spd.nr_pages <= 0) - return spd.nr_pages; + ret = spd.nr_pages; + else + ret = splice_to_pipe(pipe, &spd); - return splice_to_pipe(pipe, &spd); + splice_shrink_spd(pipe, &spd); + return ret; } /* @@ -1521,7 +1751,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, * Make sure there's data to read. Wait for input if we can, otherwise * return an appropriate error. */ -static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; @@ -1533,7 +1763,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (!pipe->nrbufs) { if (signal_pending(current)) { @@ -1551,7 +1781,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe_wait(pipe); } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1559,7 +1789,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Make sure there's writeable room. Wait for room if we can, otherwise * return an appropriate error. */ -static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; @@ -1567,13 +1797,13 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check ->nrbufs without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (pipe->nrbufs < PIPE_BUFFERS) + if (pipe->nrbufs < pipe->buffers) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); - while (pipe->nrbufs >= PIPE_BUFFERS) { + while (pipe->nrbufs >= pipe->buffers) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; @@ -1592,7 +1822,125 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe->waiting_writers--; } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); + return ret; +} + +/* + * Splice contents of ipipe to opipe. + */ +static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, + struct pipe_inode_info *opipe, + size_t len, unsigned int flags) +{ + struct pipe_buffer *ibuf, *obuf; + int ret = 0, nbuf; + bool input_wakeup = false; + + +retry: + ret = ipipe_prep(ipipe, flags); + if (ret) + return ret; + + ret = opipe_prep(opipe, flags); + if (ret) + return ret; + + /* + * Potential ABBA deadlock, work around it by ordering lock + * grabbing by pipe info address. Otherwise two different processes + * could deadlock (one doing tee from A -> B, the other from B -> A). + */ + pipe_double_lock(ipipe, opipe); + + do { + if (!opipe->readers) { + send_sig(SIGPIPE, current, 0); + if (!ret) + ret = -EPIPE; + break; + } + + if (!ipipe->nrbufs && !ipipe->writers) + break; + + /* + * Cannot make any progress, because either the input + * pipe is empty or the output pipe is full. + */ + if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) { + /* Already processed some buffers, break */ + if (ret) + break; + + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + + /* + * We raced with another reader/writer and haven't + * managed to process any buffers. A zero return + * value means EOF, so retry instead. + */ + pipe_unlock(ipipe); + pipe_unlock(opipe); + goto retry; + } + + ibuf = ipipe->bufs + ipipe->curbuf; + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); + obuf = opipe->bufs + nbuf; + + if (len >= ibuf->len) { + /* + * Simply move the whole buffer from ipipe to opipe + */ + *obuf = *ibuf; + ibuf->ops = NULL; + opipe->nrbufs++; + ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1); + ipipe->nrbufs--; + input_wakeup = true; + } else { + /* + * Get a reference to this pipe buffer, + * so we can copy the contents over. + */ + ibuf->ops->get(ipipe, ibuf); + *obuf = *ibuf; + + /* + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. + */ + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; + + obuf->len = len; + opipe->nrbufs++; + ibuf->offset += obuf->len; + ibuf->len -= obuf->len; + } + ret += obuf->len; + len -= obuf->len; + } while (len); + + pipe_unlock(ipipe); + pipe_unlock(opipe); + + /* + * If we put data in the output pipe, wakeup any potential readers. + */ + if (ret > 0) { + smp_mb(); + if (waitqueue_active(&opipe->wait)) + wake_up_interruptible(&opipe->wait); + kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); + } + if (input_wakeup) + wakeup_pipe_writers(ipipe); + return ret; } @@ -1608,10 +1956,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * Potential ABBA deadlock, work around it by ordering lock - * grabbing by inode address. Otherwise two different processes + * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - inode_double_lock(ipipe->inode, opipe->inode); + pipe_double_lock(ipipe, opipe); do { if (!opipe->readers) { @@ -1625,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, * If we have iterated all input buffers or ran out of * output room, break. */ - if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) + if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) break; - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); - nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1)); + nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1); /* * Get a reference to this pipe buffer, @@ -1662,7 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; - inode_double_unlock(ipipe->inode, opipe->inode); + pipe_unlock(ipipe); + pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. @@ -1699,9 +2048,9 @@ static long do_tee(struct file *in, struct file *out, size_t len, * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. */ - ret = link_ipipe_prep(ipipe, flags); + ret = ipipe_prep(ipipe, flags); if (!ret) { - ret = link_opipe_prep(opipe, flags); + ret = opipe_prep(opipe, flags); if (!ret) ret = link_pipe(ipipe, opipe, len, flags); }