#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <linux/memcontrol.h>
#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/writeback.h>
*/
wait_on_page_writeback(page);
- if (PagePrivate(page))
- try_to_release_page(page, GFP_KERNEL);
+ if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+ goto out_unlock;
/*
* If we succeeded in removing the mapping, set LRU flag
* Raced with truncate or failed to remove page from current
* address space, unlock and return failure.
*/
+out_unlock:
unlock_page(page);
return 1;
}
break;
error = add_to_page_cache_lru(page, mapping, index,
- GFP_KERNEL);
+ mapping_gfp_mask(mapping));
if (unlikely(error)) {
page_cache_release(page);
if (error == -EEXIST)
* for an in-flight io page
*/
if (flags & SPLICE_F_NONBLOCK) {
- if (TestSetPageLocked(page))
+ if (!trylock_page(page)) {
+ error = -EAGAIN;
break;
+ }
} else
lock_page(page);
/*
- * page was truncated, stop here. if this isn't the
- * first page, we'll just complete what we already
- * added
+ * Page was truncated, or invalidated by the
+ * filesystem. Redo the find/create, but this time the
+ * page is kept locked, so there's no chance of another
+ * race with truncate/invalidate.
*/
if (!page->mapping) {
unlock_page(page);
- break;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+
+ if (!page) {
+ error = -ENOMEM;
+ break;
+ }
+ page_cache_release(pages[page_nr]);
+ pages[page_nr] = page;
}
/*
* page was already under io and is now done, great
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
- ssize_t spliced;
- int ret;
loff_t isize, left;
+ int ret;
isize = i_size_read(in->f_mapping->host);
if (unlikely(*ppos >= isize))
if (unlikely(left < len))
len = left;
- ret = 0;
- spliced = 0;
- while (len && !spliced) {
- ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
-
- if (ret < 0)
- break;
- else if (!ret) {
- if (spliced)
- break;
- if (flags & SPLICE_F_NONBLOCK) {
- ret = -EAGAIN;
- break;
- }
- }
-
+ ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
+ if (ret > 0)
*ppos += ret;
- len -= ret;
- spliced += ret;
- }
-
- if (spliced)
- return spliced;
return ret;
}
};
/*
- * The actor worker might be calling ->prepare_write and
- * ->commit_write. Most of the time, these expect i_mutex to
+ * The actor worker might be calling ->write_begin and
+ * ->write_end. Most of the time, these expect i_mutex to
* be held. Since this may result in an ABBA deadlock with
* pipe->inode, we have to order lock acquiry here.
*/
ssize_t ret;
int err;
- err = remove_suid(out->f_path.dentry);
+ err = file_remove_suid(out);
if (unlikely(err))
return err;
{
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
- int killsuid, killpriv;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.file = out,
+ };
ssize_t ret;
- int err = 0;
-
- killpriv = security_inode_need_killpriv(out->f_path.dentry);
- killsuid = should_remove_suid(out->f_path.dentry);
- if (unlikely(killsuid || killpriv)) {
- mutex_lock(&inode->i_mutex);
- if (killpriv)
- err = security_inode_killpriv(out->f_path.dentry);
- if (!err && killsuid)
- err = __remove_suid(out->f_path.dentry, killsuid);
- mutex_unlock(&inode->i_mutex);
- if (err)
- return err;
- }
- ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+ inode_double_lock(inode, pipe->inode);
+ ret = file_remove_suid(out);
+ if (likely(!ret))
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
+ inode_double_unlock(inode, pipe->inode);
if (ret > 0) {
unsigned long nr_pages;
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ int err;
+
mutex_lock(&inode->i_mutex);
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
if (unlikely(!(out->f_mode & FMODE_WRITE)))
return -EBADF;
+ if (unlikely(out->f_flags & O_APPEND))
+ return -EINVAL;
+
ret = rw_verify_area(WRITE, out, ppos, len);
if (unlikely(ret < 0))
return ret;
while (len) {
size_t read_len;
- loff_t pos = sd->pos;
+ loff_t pos = sd->pos, prev_pos = pos;
ret = do_splice_to(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
* could get stuck data in the internal pipe:
*/
ret = actor(pipe, sd);
- if (unlikely(ret <= 0))
+ if (unlikely(ret <= 0)) {
+ sd->pos = prev_pos;
goto out_release;
+ }
bytes += ret;
len -= ret;
sd->pos = pos;
- if (ret < read_len)
+ if (ret < read_len) {
+ sd->pos = prev_pos + ret;
goto out_release;
+ }
}
done:
ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0)
- *ppos += ret;
+ *ppos = sd.pos;
return ret;
}
}
/*
- * Do a copy-from-user while holding the mmap_semaphore for reading, in a
- * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
- * for writing) and page faulting on the user memory pointed to by src.
- * This assumes that we will very rarely hit the partial != 0 path, or this
- * will not be a win.
- */
-static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
-{
- int partial;
-
- pagefault_disable();
- partial = __copy_from_user_inatomic(dst, src, n);
- pagefault_enable();
-
- /*
- * Didn't copy everything, drop the mmap_sem and do a faulting copy
- */
- if (unlikely(partial)) {
- up_read(¤t->mm->mmap_sem);
- partial = copy_from_user(dst, src, n);
- down_read(¤t->mm->mmap_sem);
- }
-
- return partial;
-}
-
-/*
* Map an iov into an array of pages and offset/length tupples. With the
* partial_page structure, we can map several non-contiguous ranges into
* our ones pages[] map instead of splitting that operation into pieces.
{
int buffers = 0, error = 0;
- down_read(¤t->mm->mmap_sem);
-
while (nr_vecs) {
unsigned long off, npages;
struct iovec entry;
int i;
error = -EFAULT;
- if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
+ if (copy_from_user(&entry, iov, sizeof(entry)))
break;
base = entry.iov_base;
if (unlikely(!len))
break;
error = -EFAULT;
- if (unlikely(!base))
+ if (!access_ok(VERIFY_READ, base, len))
break;
/*
if (npages > PIPE_BUFFERS - buffers)
npages = PIPE_BUFFERS - buffers;
- error = get_user_pages(current, current->mm,
- (unsigned long) base, npages, 0, 0,
- &pages[buffers], NULL);
+ error = get_user_pages_fast((unsigned long)base, npages,
+ 0, &pages[buffers]);
if (unlikely(error <= 0))
break;
iov++;
}
- up_read(¤t->mm->mmap_sem);
-
if (buffers)
return buffers;
break;
}
+ if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
+ error = -EFAULT;
+ break;
+ }
+
sd.len = 0;
sd.total_len = len;
sd.flags = flags;
* Currently we punt and implement it as a normal copy, see pipe_to_user().
*
*/
-asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
- unsigned long nr_segs, unsigned int flags)
+SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
+ unsigned long, nr_segs, unsigned int, flags)
{
struct file *file;
long error;
return error;
}
-asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
- int fd_out, loff_t __user *off_out,
- size_t len, unsigned int flags)
+SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
+ int, fd_out, loff_t __user *, off_out,
+ size_t, len, unsigned int, flags)
{
long error;
struct file *in, *out;
i++;
} while (len);
+ /*
+ * return EAGAIN if we have the potential of some data in the
+ * future, otherwise just return 0
+ */
+ if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
+ ret = -EAGAIN;
+
inode_double_unlock(ipipe->inode, opipe->inode);
/*
ret = link_ipipe_prep(ipipe, flags);
if (!ret) {
ret = link_opipe_prep(opipe, flags);
- if (!ret) {
+ if (!ret)
ret = link_pipe(ipipe, opipe, len, flags);
- if (!ret && (flags & SPLICE_F_NONBLOCK))
- ret = -EAGAIN;
- }
}
}
return ret;
}
-asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
+SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
{
struct file *in;
int error, fput_in;