libertas: move association code from scan.c into assoc.c
[safe/jmp/linux-2.6] / fs / splice.c
index 421b3b8..0670c91 100644 (file)
@@ -164,7 +164,7 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
  * @spd:       data to fill
  *
  * Description:
- *    @spd contains a map of pages and len/offset tupples, a long with
+ *    @spd contains a map of pages and len/offset tuples, along with
  *    the struct pipe_buf_operations associated with these pages. This
  *    function will link that data to the pipe.
  *
@@ -254,18 +254,23 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
        }
 
        while (page_nr < spd_pages)
-               page_cache_release(spd->pages[page_nr++]);
+               spd->spd_release(spd, page_nr++);
 
        return ret;
 }
 
+static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+       page_cache_release(spd->pages[i]);
+}
+
 static int
 __generic_file_splice_read(struct file *in, loff_t *ppos,
                           struct pipe_inode_info *pipe, size_t len,
                           unsigned int flags)
 {
        struct address_space *mapping = in->f_mapping;
-       unsigned int loff, nr_pages;
+       unsigned int loff, nr_pages, req_pages;
        struct page *pages[PIPE_BUFFERS];
        struct partial_page partial[PIPE_BUFFERS];
        struct page *page;
@@ -277,14 +282,13 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                .partial = partial,
                .flags = flags,
                .ops = &page_cache_pipe_buf_ops,
+               .spd_release = spd_release_page,
        };
 
        index = *ppos >> PAGE_CACHE_SHIFT;
        loff = *ppos & ~PAGE_CACHE_MASK;
-       nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-       if (nr_pages > PIPE_BUFFERS)
-               nr_pages = PIPE_BUFFERS;
+       req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS);
 
        /*
         * Lookup the (hopefully) full range of pages we need.
@@ -297,8 +301,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
         * readahead/allocate the rest and fill in the holes.
         */
        if (spd.nr_pages < nr_pages)
-               page_cache_readahead_ondemand(mapping, &in->f_ra, in,
-                               NULL, index, nr_pages - spd.nr_pages);
+               page_cache_sync_readahead(mapping, &in->f_ra, in,
+                               index, req_pages - spd.nr_pages);
 
        error = 0;
        while (spd.nr_pages < nr_pages) {
@@ -354,8 +358,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                page = pages[page_nr];
 
                if (PageReadahead(page))
-                       page_cache_readahead_ondemand(mapping, &in->f_ra, in,
-                                       page, index, nr_pages - page_nr);
+                       page_cache_async_readahead(mapping, &in->f_ra, in,
+                                       page, index, req_pages - page_nr);
 
                /*
                 * If the page isn't uptodate, we may need to start io on it
@@ -449,7 +453,7 @@ fill_it:
         */
        while (page_nr < nr_pages)
                page_cache_release(pages[page_nr++]);
-       in->f_ra.prev_index = index;
+       in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
        if (spd.nr_pages)
                return splice_to_pipe(pipe, &spd);
@@ -565,7 +569,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
-       pgoff_t index;
+       void *fsdata;
        int ret;
 
        /*
@@ -575,49 +579,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        if (unlikely(ret))
                return ret;
 
-       index = sd->pos >> PAGE_CACHE_SHIFT;
        offset = sd->pos & ~PAGE_CACHE_MASK;
 
        this_len = sd->len;
        if (this_len + offset > PAGE_CACHE_SIZE)
                this_len = PAGE_CACHE_SIZE - offset;
 
-find_page:
-       page = find_lock_page(mapping, index);
-       if (!page) {
-               ret = -ENOMEM;
-               page = page_cache_alloc_cold(mapping);
-               if (unlikely(!page))
-                       goto out_ret;
-
-               /*
-                * This will also lock the page
-                */
-               ret = add_to_page_cache_lru(page, mapping, index,
-                                           GFP_KERNEL);
-               if (unlikely(ret))
-                       goto out;
-       }
-
-       ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-       if (unlikely(ret)) {
-               loff_t isize = i_size_read(mapping->host);
-
-               if (ret != AOP_TRUNCATED_PAGE)
-                       unlock_page(page);
-               page_cache_release(page);
-               if (ret == AOP_TRUNCATED_PAGE)
-                       goto find_page;
-
-               /*
-                * prepare_write() may have instantiated a few blocks
-                * outside i_size.  Trim these off again.
-                */
-               if (sd->pos + this_len > isize)
-                       vmtruncate(mapping->host, isize);
-
-               goto out_ret;
-       }
+       ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
+                               AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+       if (unlikely(ret))
+               goto out;
 
        if (buf->page != page) {
                /*
@@ -631,30 +602,9 @@ find_page:
                kunmap_atomic(dst, KM_USER1);
                buf->ops->unmap(pipe, buf, src);
        }
-
-       ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-       if (ret) {
-               if (ret == AOP_TRUNCATED_PAGE) {
-                       page_cache_release(page);
-                       goto find_page;
-               }
-               if (ret < 0)
-                       goto out;
-               /*
-                * Partial write has happened, so 'ret' already initialized by
-                * number of bytes written, Where is nothing we have to do here.
-                */
-       } else
-               ret = this_len;
-       /*
-        * Return the number of bytes written and mark page as
-        * accessed, we are now done!
-        */
-       mark_page_accessed(page);
+       ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
+                               page, fsdata);
 out:
-       page_cache_release(page);
-       unlock_page(page);
-out_ret:
        return ret;
 }
 
@@ -880,13 +830,18 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+       int killsuid, killpriv;
        ssize_t ret;
-       int err;
+       int err = 0;
 
-       err = should_remove_suid(out->f_path.dentry);
-       if (unlikely(err)) {
+       killpriv = security_inode_need_killpriv(out->f_path.dentry);
+       killsuid = should_remove_suid(out->f_path.dentry);
+       if (unlikely(killsuid || killpriv)) {
                mutex_lock(&inode->i_mutex);
-               err = __remove_suid(out->f_path.dentry, err);
+               if (killpriv)
+                       err = security_inode_killpriv(out->f_path.dentry);
+               if (!err && killsuid)
+                       err = __remove_suid(out->f_path.dentry, killsuid);
                mutex_unlock(&inode->i_mutex);
                if (err)
                        return err;
@@ -959,10 +914,6 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(ret < 0))
                return ret;
 
-       ret = security_file_permission(out, MAY_WRITE);
-       if (unlikely(ret < 0))
-               return ret;
-
        return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
 
@@ -985,10 +936,6 @@ static long do_splice_to(struct file *in, loff_t *ppos,
        if (unlikely(ret < 0))
                return ret;
 
-       ret = security_file_permission(in, MAY_READ);
-       if (unlikely(ret < 0))
-               return ret;
-
        return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
 
@@ -1001,7 +948,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
  * Description:
  *    This is a special case helper to splice directly between two
  *    points, without requiring an explicit pipe. Internally an allocated
- *    pipe is cached in the process, and reused during the life time of
+ *    pipe is cached in the process, and reused during the lifetime of
  *    that process.
  *
  */
@@ -1084,7 +1031,9 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
                        goto out_release;
        }
 
+done:
        pipe->nrbufs = pipe->curbuf = 0;
+       file_accessed(in);
        return bytes;
 
 out_release:
@@ -1100,16 +1049,11 @@ out_release:
                        buf->ops = NULL;
                }
        }
-       pipe->nrbufs = pipe->curbuf = 0;
-
-       /*
-        * If we transferred some data, return the number of bytes:
-        */
-       if (bytes > 0)
-               return bytes;
 
-       return ret;
+       if (!bytes)
+               bytes = ret;
 
+       goto done;
 }
 EXPORT_SYMBOL(splice_direct_to_actor);
 
@@ -1225,6 +1169,36 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 }
 
 /*
+ * Do a copy-from-user while holding the mmap_semaphore for reading, in a
+ * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
+ * for writing) and page faulting on the user memory pointed to by src.
+ * This assumes that we will very rarely hit the partial != 0 path, or this
+ * will not be a win.
+ */
+static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
+{
+       int partial;
+
+       if (!access_ok(VERIFY_READ, src, n))
+               return -EFAULT;
+
+       pagefault_disable();
+       partial = __copy_from_user_inatomic(dst, src, n);
+       pagefault_enable();
+
+       /*
+        * Didn't copy everything, drop the mmap_sem and do a faulting copy
+        */
+       if (unlikely(partial)) {
+               up_read(&current->mm->mmap_sem);
+               partial = copy_from_user(dst, src, n);
+               down_read(&current->mm->mmap_sem);
+       }
+
+       return partial;
+}
+
+/*
  * Map an iov into an array of pages and offset/length tupples. With the
  * partial_page structure, we can map several non-contiguous ranges into
  * our ones pages[] map instead of splitting that operation into pieces.
@@ -1237,35 +1211,30 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 {
        int buffers = 0, error = 0;
 
-       /*
-        * It's ok to take the mmap_sem for reading, even
-        * across a "get_user()".
-        */
        down_read(&current->mm->mmap_sem);
 
        while (nr_vecs) {
                unsigned long off, npages;
+               struct iovec entry;
                void __user *base;
                size_t len;
                int i;
 
-               /*
-                * Get user address base and length for this iovec.
-                */
-               error = get_user(base, &iov->iov_base);
-               if (unlikely(error))
-                       break;
-               error = get_user(len, &iov->iov_len);
-               if (unlikely(error))
+               error = -EFAULT;
+               if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry)))
                        break;
 
+               base = entry.iov_base;
+               len = entry.iov_len;
+
                /*
                 * Sanity check this iovec. 0 read succeeds.
                 */
+               error = 0;
                if (unlikely(!len))
                        break;
                error = -EFAULT;
-               if (unlikely(!base))
+               if (!access_ok(VERIFY_READ, base, len))
                        break;
 
                /*
@@ -1369,10 +1338,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
                ret = -EFAULT;
 
+       buf->ops->unmap(pipe, buf, src);
 out:
        if (ret > 0)
                sd->u.userptr += ret;
-       buf->ops->unmap(pipe, buf, src);
        return ret;
 }
 
@@ -1421,6 +1390,11 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
                        break;
                }
 
+               if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
+                       error = -EFAULT;
+                       break;
+               }
+
                sd.len = 0;
                sd.total_len = len;
                sd.flags = flags;
@@ -1469,6 +1443,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
                .partial = partial,
                .flags = flags,
                .ops = &user_page_pipe_buf_ops,
+               .spd_release = spd_release_page,
        };
 
        pipe = pipe_info(file->f_path.dentry->d_inode);
@@ -1694,6 +1669,13 @@ static int link_pipe(struct pipe_inode_info *ipipe,
                i++;
        } while (len);
 
+       /*
+        * return EAGAIN if we have the potential of some data in the
+        * future, otherwise just return 0
+        */
+       if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
+               ret = -EAGAIN;
+
        inode_double_unlock(ipipe->inode, opipe->inode);
 
        /*
@@ -1734,11 +1716,8 @@ static long do_tee(struct file *in, struct file *out, size_t len,
                ret = link_ipipe_prep(ipipe, flags);
                if (!ret) {
                        ret = link_opipe_prep(opipe, flags);
-                       if (!ret) {
+                       if (!ret)
                                ret = link_pipe(ipipe, opipe, len, flags);
-                               if (!ret && (flags & SPLICE_F_NONBLOCK))
-                                       ret = -EAGAIN;
-                       }
                }
        }