fs/ceph: Use ERR_CAST
[safe/jmp/linux-2.6] / fs / ceph / file.c
index 2d88c80..f06f902 100644 (file)
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
@@ -229,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
        /* do the open */
        req = prepare_open_request(dir->i_sb, flags, mode);
        if (IS_ERR(req))
-               return ERR_PTR(PTR_ERR(req));
+               return ERR_CAST(req);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        if (flags & O_CREAT) {
@@ -262,6 +263,9 @@ int ceph_release(struct inode *inode, struct file *file)
        kfree(cf->dir_info);
        dput(cf->dentry);
        kmem_cache_free(ceph_file_cachep, cf);
+
+       /* wake up anyone waiting for caps on this inode */
+       wake_up(&ci->i_cap_wq);
        return 0;
 }
 
@@ -313,16 +317,16 @@ void ceph_release_page_vector(struct page **pages, int num_pages)
 /*
  * allocate a vector new pages
  */
-static struct page **alloc_page_vector(int num_pages)
+struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
 {
        struct page **pages;
        int i;
 
-       pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+       pages = kmalloc(sizeof(*pages) * num_pages, flags);
        if (!pages)
                return ERR_PTR(-ENOMEM);
        for (i = 0; i < num_pages; i++) {
-               pages[i] = alloc_page(GFP_NOFS);
+               pages[i] = __page_cache_alloc(flags);
                if (pages[i] == NULL) {
                        ceph_release_page_vector(pages, i);
                        return ERR_PTR(-ENOMEM);
@@ -395,23 +399,22 @@ static void zero_page_vector_range(int off, int len, struct page **pages)
 {
        int i = off >> PAGE_CACHE_SHIFT;
 
+       off &= ~PAGE_CACHE_MASK;
+
        dout("zero_page_vector_page %u~%u\n", off, len);
-       BUG_ON(len < PAGE_CACHE_SIZE);
 
        /* leading partial page? */
-       if (off & ~PAGE_CACHE_MASK) {
+       if (off) {
+               int end = min((int)PAGE_CACHE_SIZE, off + len);
                dout("zeroing %d %p head from %d\n", i, pages[i],
-                    (int)(off & ~PAGE_CACHE_MASK));
-               zero_user_segment(pages[i], off & ~PAGE_CACHE_MASK,
-                                 PAGE_CACHE_SIZE);
-               off += PAGE_CACHE_SIZE;
-               off &= PAGE_CACHE_MASK;
+                    (int)off);
+               zero_user_segment(pages[i], off, end);
+               len -= (end - off);
                i++;
        }
        while (len >= PAGE_CACHE_SIZE) {
-               dout("zeroing %d %p\n", i, pages[i]);
+               dout("zeroing %d %p len=%d\n", i, pages[i], len);
                zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
-               off += PAGE_CACHE_SIZE;
                len -= PAGE_CACHE_SIZE;
                i++;
        }
@@ -432,12 +435,13 @@ static void zero_page_vector_range(int off, int len, struct page **pages)
  */
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
-                       struct page **pages, int num_pages)
+                       struct page **pages, int num_pages,
+                       int *checkeof)
 {
        struct ceph_client *client = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len;
-       int page_off = off & ~PAGE_CACHE_SIZE; /* first byte's offset in page */
+       int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
        int left, pages_left;
        int read;
        struct page **page_pos;
@@ -493,19 +497,12 @@ more:
                        dout("zero tail\n");
                        zero_page_vector_range(page_off + read, len - read,
                                               pages);
+                       read = len;
                        goto out;
                }
 
                /* check i_size */
-               ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
-               if (ret < 0)
-                       goto out;
-
-               /* hit EOF? */
-               if (pos >= inode->i_size)
-                       goto out;
-
-               goto more;
+               *checkeof = 1;
        }
 
 out:
@@ -522,7 +519,7 @@ out:
  * If the read spans object boundary, just do multiple reads.
  */
 static ssize_t ceph_sync_read(struct file *file, char __user *data,
-                             unsigned len, loff_t *poff)
+                             unsigned len, loff_t *poff, int *checkeof)
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct page **pages;
@@ -542,20 +539,24 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
                 * but it will at least behave sensibly when they are
                 * in sequence.
                 */
-               filemap_write_and_wait(inode->i_mapping);
        } else {
-               pages = alloc_page_vector(num_pages);
+               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
        }
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
-       ret = striped_read(inode, off, len, pages, num_pages);
+       ret = filemap_write_and_wait(inode->i_mapping);
+       if (ret < 0)
+               goto done;
+
+       ret = striped_read(inode, off, len, pages, num_pages, checkeof);
 
        if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
                ret = copy_page_vector_to_user(pages, data, off, ret);
        if (ret >= 0)
                *poff = off + ret;
 
+done:
        if (file->f_flags & O_DIRECT)
                put_page_vector(pages, num_pages);
        else
@@ -617,6 +618,16 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        else
                pos = *offset;
 
+       ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
+       if (ret < 0)
+               return ret;
+
+       ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                           pos >> PAGE_CACHE_SHIFT,
+                                           (pos + left) >> PAGE_CACHE_SHIFT);
+       if (ret < 0)
+               dout("invalidate_inode_pages2_range returned %d\n", ret);
+
        flags = CEPH_OSD_FLAG_ORDERSNAP |
                CEPH_OSD_FLAG_ONDISK |
                CEPH_OSD_FLAG_WRITE;
@@ -638,8 +649,8 @@ more:
                                    do_sync,
                                    ci->i_truncate_seq, ci->i_truncate_size,
                                    &mtime, false, 2);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       if (!req)
+               return -ENOMEM;
 
        num_pages = calc_pages_for(pos, len);
 
@@ -654,9 +665,10 @@ more:
                 * throw out any page cache pages in this range. this
                 * may block.
                 */
-               truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+               truncate_inode_pages_range(inode->i_mapping, pos, 
+                                          (pos+len) | (PAGE_CACHE_SIZE-1));
        } else {
-               pages = alloc_page_vector(num_pages);
+               pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
                        goto out;
@@ -732,11 +744,14 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
        size_t len = iov->iov_len;
        struct inode *inode = filp->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
+       void *base = iov->iov_base;
        ssize_t ret;
        int got = 0;
+       int checkeof = 0, read = 0;
 
        dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
             inode, ceph_vinop(inode), pos, (unsigned)len, inode);
+again:
        __ceph_do_pending_vmtruncate(inode);
        ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE,
                            &got, -1);
@@ -750,7 +765,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
            (iocb->ki_filp->f_flags & O_DIRECT) ||
            (inode->i_sb->s_flags & MS_SYNCHRONOUS))
                /* hmm, this isn't really async... */
-               ret = ceph_sync_read(filp, iov->iov_base, len, ppos);
+               ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
        else
                ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 
@@ -758,6 +773,23 @@ out:
        dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
             inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
        ceph_put_cap_refs(ci, got);
+
+       if (checkeof && ret >= 0) {
+               int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
+
+               /* hit EOF or hole? */
+               if (statret == 0 && *ppos < inode->i_size) {
+                       dout("aio_read sync_read hit hole, reading more\n");
+                       read += ret;
+                       base += ret;
+                       len -= ret;
+                       checkeof = 0;
+                       goto again;
+               }
+       }
+       if (ret >= 0)
+               ret += read;
+
        return ret;
 }
 
@@ -777,10 +809,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_dentry->d_inode;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc;
+       struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc;
        loff_t endoff = pos + iov->iov_len;
        int got = 0;
-       int ret;
+       int ret, err;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
@@ -811,9 +843,12 @@ retry_snap:
 
                if ((ret >= 0 || ret == -EIOCBQUEUED) &&
                    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
-                    || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)))
-                       ret = vfs_fsync_range(file, file->f_path.dentry,
+                    || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
+                       err = vfs_fsync_range(file, file->f_path.dentry,
                                              pos, pos + ret - 1, 1);
+                       if (err < 0)
+                               ret = err;
+               }
        }
        if (ret >= 0) {
                spin_lock(&inode->i_lock);