+/*
+ * Map an iov into an array of pages and offset/length tupples. With the
+ * partial_page structure, we can map several non-contiguous ranges into
+ * our ones pages[] map instead of splitting that operation into pieces.
+ * Could easily be exported as a generic helper for other users, in which
+ * case one would probably want to add a 'max_nr_pages' parameter as well.
+ */
+static int get_iovec_page_array(const struct iovec __user *iov,
+ unsigned int nr_vecs, struct page **pages,
+ struct partial_page *partial, int aligned)
+{
+ int buffers = 0, error = 0;
+
+ while (nr_vecs) {
+ unsigned long off, npages;
+ struct iovec entry;
+ void __user *base;
+ size_t len;
+ int i;
+
+ error = -EFAULT;
+ if (copy_from_user(&entry, iov, sizeof(entry)))
+ break;
+
+ base = entry.iov_base;
+ len = entry.iov_len;
+
+ /*
+ * Sanity check this iovec. 0 read succeeds.
+ */
+ error = 0;
+ if (unlikely(!len))
+ break;
+ error = -EFAULT;
+ if (!access_ok(VERIFY_READ, base, len))
+ break;
+
+ /*
+ * Get this base offset and number of pages, then map
+ * in the user pages.
+ */
+ off = (unsigned long) base & ~PAGE_MASK;
+
+ /*
+ * If asked for alignment, the offset must be zero and the
+ * length a multiple of the PAGE_SIZE.
+ */
+ error = -EINVAL;
+ if (aligned && (off || len & ~PAGE_MASK))
+ break;
+
+ npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (npages > PIPE_BUFFERS - buffers)
+ npages = PIPE_BUFFERS - buffers;
+
+ error = get_user_pages_fast((unsigned long)base, npages,
+ 0, &pages[buffers]);
+
+ if (unlikely(error <= 0))
+ break;
+
+ /*
+ * Fill this contiguous range into the partial page map.
+ */
+ for (i = 0; i < error; i++) {
+ const int plen = min_t(size_t, len, PAGE_SIZE - off);
+
+ partial[buffers].offset = off;
+ partial[buffers].len = plen;
+
+ off = 0;
+ len -= plen;
+ buffers++;
+ }
+
+ /*
+ * We didn't complete this iov, stop here since it probably
+ * means we have to move some of this into a pipe to
+ * be able to continue.
+ */
+ if (len)
+ break;
+
+ /*
+ * Don't continue if we mapped fewer pages than we asked for,
+ * or if we mapped the max number of pages that we have
+ * room for.
+ */
+ if (error < npages || buffers == PIPE_BUFFERS)
+ break;
+
+ nr_vecs--;
+ iov++;
+ }
+
+ if (buffers)
+ return buffers;
+
+ return error;
+}
+
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ char *src;
+ int ret;
+
+ ret = buf->ops->confirm(pipe, buf);
+ if (unlikely(ret))
+ return ret;
+
+ /*
+ * See if we can use the atomic maps, by prefaulting in the
+ * pages and doing an atomic copy
+ */
+ if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+ src = buf->ops->map(pipe, buf, 1);
+ ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+ sd->len);
+ buf->ops->unmap(pipe, buf, src);
+ if (!ret) {
+ ret = sd->len;
+ goto out;
+ }
+ }
+
+ /*
+ * No dice, use slow non-atomic map and copy
+ */
+ src = buf->ops->map(pipe, buf, 0);
+
+ ret = sd->len;
+ if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+ ret = -EFAULT;
+
+ buf->ops->unmap(pipe, buf, src);
+out:
+ if (ret > 0)
+ sd->u.userptr += ret;
+ return ret;
+}
+
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
+{
+ struct pipe_inode_info *pipe;
+ struct splice_desc sd;
+ ssize_t size;
+ int error;
+ long ret;
+
+ pipe = pipe_info(file->f_path.dentry->d_inode);
+ if (!pipe)
+ return -EBADF;
+
+ pipe_lock(pipe);
+
+ error = ret = 0;
+ while (nr_segs) {
+ void __user *base;
+ size_t len;
+
+ /*
+ * Get user address base and length for this iovec.
+ */
+ error = get_user(base, &iov->iov_base);
+ if (unlikely(error))
+ break;
+ error = get_user(len, &iov->iov_len);
+ if (unlikely(error))
+ break;
+
+ /*
+ * Sanity check this iovec. 0 read succeeds.
+ */
+ if (unlikely(!len))
+ break;
+ if (unlikely(!base)) {
+ error = -EFAULT;
+ break;
+ }
+
+ if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
+ error = -EFAULT;
+ break;
+ }
+
+ sd.len = 0;
+ sd.total_len = len;
+ sd.flags = flags;
+ sd.u.userptr = base;
+ sd.pos = 0;
+
+ size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+ if (size < 0) {
+ if (!ret)
+ ret = size;
+
+ break;
+ }
+
+ ret += size;
+
+ if (size < len)
+ break;
+
+ nr_segs--;
+ iov++;
+ }
+
+ pipe_unlock(pipe);
+
+ if (!ret)
+ ret = error;
+
+ return ret;
+}
+
+/*
+ * vmsplice splices a user address range into a pipe. It can be thought of
+ * as splice-from-memory, where the regular splice is splice-from-file (or
+ * to file). In both cases the output is a pipe, naturally.
+ */
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
+{
+ struct pipe_inode_info *pipe;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &user_page_pipe_buf_ops,
+ .spd_release = spd_release_page,
+ };
+
+ pipe = pipe_info(file->f_path.dentry->d_inode);
+ if (!pipe)
+ return -EBADF;
+
+ spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
+ flags & SPLICE_F_GIFT);
+ if (spd.nr_pages <= 0)
+ return spd.nr_pages;
+
+ return splice_to_pipe(pipe, &spd);
+}
+
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ * - memcpy() the data internally, at which point we might as well just
+ * do a regular read() on the buffer anyway.
+ * - Lots of nasty vm tricks, that are neither fast nor flexible (it
+ * has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
+SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
+ unsigned long, nr_segs, unsigned int, flags)
+{
+ struct file *file;
+ long error;
+ int fput;
+
+ if (unlikely(nr_segs > UIO_MAXIOV))
+ return -EINVAL;
+ else if (unlikely(!nr_segs))
+ return 0;
+
+ error = -EBADF;
+ file = fget_light(fd, &fput);
+ if (file) {
+ if (file->f_mode & FMODE_WRITE)
+ error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+ else if (file->f_mode & FMODE_READ)
+ error = vmsplice_to_user(file, iov, nr_segs, flags);
+
+ fput_light(file, fput);
+ }
+
+ return error;
+}
+
+SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
+ int, fd_out, loff_t __user *, off_out,
+ size_t, len, unsigned int, flags)