X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fsplice.c;h=eeb1a86a701467fcc90882f803ff7b39585003fc;hb=18ea7e710d2452fa726814a406779188028cf1bf;hp=c804121601b062dfec62b1cc2202e8e9243ce9a7;hpb=cac36bb06efe4880234524e117e0e712b10b1f16;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/splice.c b/fs/splice.c index c804121..eeb1a86 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -28,6 +28,7 @@ #include #include #include +#include /* * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -85,6 +86,10 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, buf->flags &= ~PIPE_BUF_FLAG_LRU; } +/* + * Check whether the contents of buf is OK to access. Since the content + * is a page cache page, IO may be in flight. + */ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -159,7 +164,7 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { * @spd: data to fill * * Description: - * @spd contains a map of pages and len/offset tupples, a long with + * @spd contains a map of pages and len/offset tuples, along with * the struct pipe_buf_operations associated with these pages. This * function will link that data to the pipe. * @@ -249,18 +254,23 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, } while (page_nr < spd_pages) - page_cache_release(spd->pages[page_nr++]); + spd->spd_release(spd, page_nr++); return ret; } +static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) +{ + page_cache_release(spd->pages[i]); +} + static int __generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct address_space *mapping = in->f_mapping; - unsigned int loff, nr_pages; + unsigned int loff, nr_pages, req_pages; struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; struct page *page; @@ -272,32 +282,29 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, .partial = partial, .flags = flags, .ops = &page_cache_pipe_buf_ops, + .spd_release = spd_release_page, }; index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; - nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (nr_pages > PIPE_BUFFERS) - nr_pages = PIPE_BUFFERS; - - /* - * Don't try to 2nd guess the read-ahead logic, call into - * page_cache_readahead() like the page cache reads would do. - */ - page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); + req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); /* * Lookup the (hopefully) full range of pages we need. */ spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); + index += spd.nr_pages; /* * If find_get_pages_contig() returned fewer pages than we needed, - * allocate the rest and fill in the holes. + * readahead/allocate the rest and fill in the holes. */ + if (spd.nr_pages < nr_pages) + page_cache_sync_readahead(mapping, &in->f_ra, in, + index, req_pages - spd.nr_pages); + error = 0; - index += spd.nr_pages; while (spd.nr_pages < nr_pages) { /* * Page could be there, find_get_pages_contig() breaks on @@ -306,12 +313,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, page = find_get_page(mapping, index); if (!page) { /* - * Make sure the read-ahead engine is notified - * about this failure. - */ - handle_ra_miss(mapping, &in->f_ra, index); - - /* * page didn't exist, allocate one. */ page = page_cache_alloc_cold(mapping); @@ -319,7 +320,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); + mapping_gfp_mask(mapping)); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) @@ -356,6 +357,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); page = pages[page_nr]; + if (PageReadahead(page)) + page_cache_async_readahead(mapping, &in->f_ra, in, + page, index, req_pages - page_nr); + /* * If the page isn't uptodate, we may need to start io on it */ @@ -365,8 +370,10 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * for an in-flight io page */ if (flags & SPLICE_F_NONBLOCK) { - if (TestSetPageLocked(page)) + if (TestSetPageLocked(page)) { + error = -EAGAIN; break; + } } else lock_page(page); @@ -448,6 +455,7 @@ fill_it: */ while (page_nr < nr_pages) page_cache_release(pages[page_nr++]); + in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; if (spd.nr_pages) return splice_to_pipe(pipe, &spd); @@ -473,9 +481,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - ssize_t spliced; - int ret; loff_t isize, left; + int ret; isize = i_size_read(in->f_mapping->host); if (unlikely(*ppos >= isize)) @@ -485,29 +492,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, if (unlikely(left < len)) len = left; - ret = 0; - spliced = 0; - while (len) { - ret = __generic_file_splice_read(in, ppos, pipe, len, flags); - - if (ret < 0) - break; - else if (!ret) { - if (spliced) - break; - if (flags & SPLICE_F_NONBLOCK) { - ret = -EAGAIN; - break; - } - } - + ret = __generic_file_splice_read(in, ppos, pipe, len, flags); + if (ret > 0) *ppos += ret; - len -= ret; - spliced += ret; - } - - if (spliced) - return spliced; return ret; } @@ -563,7 +550,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; - pgoff_t index; + void *fsdata; int ret; /* @@ -573,49 +560,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (unlikely(ret)) return ret; - index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; -find_page: - page = find_lock_page(mapping, index); - if (!page) { - ret = -ENOMEM; - page = page_cache_alloc_cold(mapping); - if (unlikely(!page)) - goto out_ret; - - /* - * This will also lock the page - */ - ret = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); - if (unlikely(ret)) - goto out; - } - - ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); - if (unlikely(ret)) { - loff_t isize = i_size_read(mapping->host); - - if (ret != AOP_TRUNCATED_PAGE) - unlock_page(page); - page_cache_release(page); - if (ret == AOP_TRUNCATED_PAGE) - goto find_page; - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - if (sd->pos + this_len > isize) - vmtruncate(mapping->host, isize); - - goto out_ret; - } + ret = pagecache_write_begin(file, mapping, sd->pos, this_len, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + if (unlikely(ret)) + goto out; if (buf->page != page) { /* @@ -629,30 +583,9 @@ find_page: kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } - - ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto find_page; - } - if (ret < 0) - goto out; - /* - * Partial write has happened, so 'ret' already initialized by - * number of bytes written, Where is nothing we have to do here. - */ - } else - ret = this_len; - /* - * Return the number of bytes written and mark page as - * accessed, we are now done! - */ - mark_page_accessed(page); + ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, + page, fsdata); out: - page_cache_release(page); - unlock_page(page); -out_ret: return ret; } @@ -878,13 +811,18 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; + int killsuid, killpriv; ssize_t ret; - int err; + int err = 0; - err = should_remove_suid(out->f_path.dentry); - if (unlikely(err)) { + killpriv = security_inode_need_killpriv(out->f_path.dentry); + killsuid = should_remove_suid(out->f_path.dentry); + if (unlikely(killsuid || killpriv)) { mutex_lock(&inode->i_mutex); - err = __remove_suid(out->f_path.dentry, err); + if (killpriv) + err = security_inode_killpriv(out->f_path.dentry); + if (!err && killsuid) + err = __remove_suid(out->f_path.dentry, killsuid); mutex_unlock(&inode->i_mutex); if (err) return err; @@ -991,7 +929,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, * Description: * This is a special case helper to splice directly between two * points, without requiring an explicit pipe. Internally an allocated - * pipe is cached in the process, and reused during the life time of + * pipe is cached in the process, and reused during the lifetime of * that process. * */ @@ -1047,15 +985,11 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, sd->flags &= ~SPLICE_F_NONBLOCK; while (len) { - size_t read_len, max_read_len; + size_t read_len; + loff_t pos = sd->pos; - /* - * Do at most PIPE_BUFFERS pages worth of transfer: - */ - max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); - - ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags); - if (unlikely(ret < 0)) + ret = do_splice_to(in, &pos, pipe, len, flags); + if (unlikely(ret <= 0)) goto out_release; read_len = ret; @@ -1067,26 +1001,20 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); - if (unlikely(ret < 0)) + if (unlikely(ret <= 0)) goto out_release; bytes += ret; len -= ret; + sd->pos = pos; - /* - * In nonblocking mode, if we got back a short read then - * that was due to either an IO error or due to the - * pagecache entry not being there. In the IO error case - * the _next_ splice attempt will produce a clean IO error - * return value (not a short read), so in both cases it's - * correct to break out of the loop here: - */ - if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) - break; + if (ret < read_len) + goto out_release; } +done: pipe->nrbufs = pipe->curbuf = 0; - + file_accessed(in); return bytes; out_release: @@ -1102,16 +1030,11 @@ out_release: buf->ops = NULL; } } - pipe->nrbufs = pipe->curbuf = 0; - /* - * If we transferred some data, return the number of bytes: - */ - if (bytes > 0) - return bytes; - - return ret; + if (!bytes) + bytes = ret; + goto done; } EXPORT_SYMBOL(splice_direct_to_actor); @@ -1148,10 +1071,12 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, .pos = *ppos, .u.file = out, }; - size_t ret; + long ret; ret = splice_direct_to_actor(in, &sd, direct_splice_actor); - *ppos = sd.pos; + if (ret > 0) + *ppos += ret; + return ret; } @@ -1225,6 +1150,36 @@ static long do_splice(struct file *in, loff_t __user *off_in, } /* + * Do a copy-from-user while holding the mmap_semaphore for reading, in a + * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem + * for writing) and page faulting on the user memory pointed to by src. + * This assumes that we will very rarely hit the partial != 0 path, or this + * will not be a win. + */ +static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) +{ + int partial; + + if (!access_ok(VERIFY_READ, src, n)) + return -EFAULT; + + pagefault_disable(); + partial = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + /* + * Didn't copy everything, drop the mmap_sem and do a faulting copy + */ + if (unlikely(partial)) { + up_read(¤t->mm->mmap_sem); + partial = copy_from_user(dst, src, n); + down_read(¤t->mm->mmap_sem); + } + + return partial; +} + +/* * Map an iov into an array of pages and offset/length tupples. With the * partial_page structure, we can map several non-contiguous ranges into * our ones pages[] map instead of splitting that operation into pieces. @@ -1237,35 +1192,30 @@ static int get_iovec_page_array(const struct iovec __user *iov, { int buffers = 0, error = 0; - /* - * It's ok to take the mmap_sem for reading, even - * across a "get_user()". - */ down_read(¤t->mm->mmap_sem); while (nr_vecs) { unsigned long off, npages; + struct iovec entry; void __user *base; size_t len; int i; - /* - * Get user address base and length for this iovec. - */ - error = get_user(base, &iov->iov_base); - if (unlikely(error)) - break; - error = get_user(len, &iov->iov_len); - if (unlikely(error)) + error = -EFAULT; + if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) break; + base = entry.iov_base; + len = entry.iov_len; + /* * Sanity check this iovec. 0 read succeeds. */ + error = 0; if (unlikely(!len)) break; error = -EFAULT; - if (unlikely(!base)) + if (!access_ok(VERIFY_READ, base, len)) break; /* @@ -1369,10 +1319,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) ret = -EFAULT; + buf->ops->unmap(pipe, buf, src); out: if (ret > 0) sd->u.userptr += ret; - buf->ops->unmap(pipe, buf, src); return ret; } @@ -1421,6 +1371,11 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, break; } + if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { + error = -EFAULT; + break; + } + sd.len = 0; sd.total_len = len; sd.flags = flags; @@ -1469,6 +1424,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, .partial = partial, .flags = flags, .ops = &user_page_pipe_buf_ops, + .spd_release = spd_release_page, }; pipe = pipe_info(file->f_path.dentry->d_inode); @@ -1694,6 +1650,13 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); + /* + * return EAGAIN if we have the potential of some data in the + * future, otherwise just return 0 + */ + if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) + ret = -EAGAIN; + inode_double_unlock(ipipe->inode, opipe->inode); /* @@ -1734,11 +1697,8 @@ static long do_tee(struct file *in, struct file *out, size_t len, ret = link_ipipe_prep(ipipe, flags); if (!ret) { ret = link_opipe_prep(opipe, flags); - if (!ret) { + if (!ret) ret = link_pipe(ipipe, opipe, len, flags); - if (!ret && (flags & SPLICE_F_NONBLOCK)) - ret = -EAGAIN; - } } }