fs/ceph: Use ERR_CAST
[safe/jmp/linux-2.6] / fs / ceph / dir.c
index 4f74679..a86c1d5 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 
 #include "super.h"
@@ -50,8 +51,11 @@ int ceph_init_dentry(struct dentry *dentry)
                return -ENOMEM;          /* oh well */
 
        spin_lock(&dentry->d_lock);
-       if (dentry->d_fsdata) /* lost a race */
+       if (dentry->d_fsdata) {
+               /* lost a race */
+               kmem_cache_free(ceph_dentry_cachep, di);
                goto out_unlock;
+       }
        di->dentry = dentry;
        di->lease_session = NULL;
        dentry->d_fsdata = di;
@@ -124,13 +128,16 @@ more:
        dentry = list_entry(p, struct dentry, d_u.d_child);
        di = ceph_dentry(dentry);
        while (1) {
-               dout(" p %p/%p d_subdirs %p/%p\n", p->prev, p->next,
+               dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
+                    d_unhashed(dentry) ? "!hashed" : "hashed",
                     parent->d_subdirs.prev, parent->d_subdirs.next);
                if (p == &parent->d_subdirs) {
                        fi->at_end = 1;
                        goto out_unlock;
                }
                if (!d_unhashed(dentry) && dentry->d_inode &&
+                   ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
+                   ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
                    filp->f_pos <= di->offset)
                        break;
                dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
@@ -168,11 +175,11 @@ more:
        spin_lock(&inode->i_lock);
        spin_lock(&dcache_lock);
 
+       last = dentry;
+
        if (err < 0)
                goto out_unlock;
 
-       last = dentry;
-
        p = p->prev;
        filp->f_pos++;
 
@@ -226,6 +233,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
        u32 ftype;
        struct ceph_mds_reply_info_parsed *rinfo;
        const int max_entries = client->mount_args->max_readdir;
+       const int max_bytes = client->mount_args->max_readdir_bytes;
 
        dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
        if (fi->at_end)
@@ -286,8 +294,10 @@ more:
                        CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
                /* discard old result, if any */
-               if (fi->last_readdir)
+               if (fi->last_readdir) {
                        ceph_mdsc_put_request(fi->last_readdir);
+                       fi->last_readdir = NULL;
+               }
 
                /* requery frag tree, as the frag topology may have changed */
                frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
@@ -307,7 +317,8 @@ more:
                req->r_readdir_offset = fi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
                req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-               req->r_num_caps = max_entries;
+               req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
+               req->r_num_caps = max_entries + 1;
                err = ceph_mdsc_do_request(mdsc, NULL, req);
                if (err < 0) {
                        ceph_mdsc_put_request(req);
@@ -330,7 +341,7 @@ more:
                if (req->r_reply_info.dir_end) {
                        kfree(fi->last_name);
                        fi->last_name = NULL;
-                       fi->next_offset = 0;
+                       fi->next_offset = 2;
                } else {
                        rinfo = &req->r_reply_info;
                        err = note_last_dentry(fi,
@@ -473,7 +484,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
                                  struct dentry *dentry, int err)
 {
-       struct ceph_client *client = ceph_client(dentry->d_sb);
+       struct ceph_client *client = ceph_sb_to_client(dentry->d_sb);
        struct inode *parent = dentry->d_parent->d_inode;
 
        /* .snap dir? */
@@ -484,6 +495,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
                struct inode *inode = ceph_get_snapdir(parent);
                dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
                     dentry, dentry->d_name.len, dentry->d_name.name, inode);
+               BUG_ON(!d_unhashed(dentry));
                d_add(dentry, inode);
                err = 0;
        }
@@ -511,6 +523,12 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
        return dentry;
 }
 
+static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
+{
+       return ceph_ino(inode) == CEPH_INO_ROOT &&
+               strncmp(dentry->d_name.name, ".ceph", 5) == 0;
+}
+
 /*
  * Look up a single dir entry.  If there is a lookup intent, inform
  * the MDS so that it gets our 'caps wanted' value in a single op.
@@ -553,9 +571,9 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                if (strncmp(dentry->d_name.name,
                            client->mount_args->snapdir_name,
                            dentry->d_name.len) &&
+                   !is_root_ceph_dentry(dir, dentry) &&
                    (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
                    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
-                       di->offset = ci->i_max_offset++;
                        spin_unlock(&dir->i_lock);
                        dout(" dir %p complete, -ENOENT\n", dir);
                        d_add(dentry, NULL);
@@ -569,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
        req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
        if (IS_ERR(req))
-               return ERR_PTR(PTR_ERR(req));
+               return ERR_CAST(req);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        /* we only need inode linkage */
@@ -867,12 +885,30 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * do_request, above).  If there is no trace, we need
                 * to do it here.
                 */
+
+               /* d_move screws up d_subdirs order */
+               ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+
                d_move(old_dentry, new_dentry);
+
+               /* ensure target dentry is invalidated, despite
+                  rehashing bug in vfs_rename_dir */
+               ceph_invalidate_dentry_lease(new_dentry);
        }
        ceph_mdsc_put_request(req);
        return err;
 }
 
+/*
+ * Ensure a dentry lease will no longer revalidate.
+ */
+void ceph_invalidate_dentry_lease(struct dentry *dentry)
+{
+       spin_lock(&dentry->d_lock);
+       dentry->d_time = jiffies;
+       ceph_dentry(dentry)->lease_shared_gen = 0;
+       spin_unlock(&dentry->d_lock);
+}
 
 /*
  * Check if dentry lease is valid.  If not, delete the lease.  Try to
@@ -911,8 +947,6 @@ static int dentry_lease_is_valid(struct dentry *dentry)
                                di->lease_renew_after = 0;
                                di->lease_renew_from = jiffies;
                        }
-               } else {
-                       __ceph_mdsc_drop_dentry_lease(dentry);
                }
        }
        spin_unlock(&dentry->d_lock);
@@ -952,8 +986,9 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *dir = dentry->d_parent->d_inode;
 
-       dout("d_revalidate %p '%.*s' inode %p\n", dentry,
-            dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+       dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
+            dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
+            ceph_dentry(dentry)->offset);
 
        /* always trust cached snapped dentries, snapdir dentry */
        if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1030,7 +1065,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
        struct ceph_inode_info *ci = ceph_inode(inode);
        int left;
 
-       if (!ceph_test_opt(ceph_client(inode->i_sb), DIRSTAT))
+       if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
                return -EISDIR;
 
        if (!cf->dir_info) {
@@ -1128,11 +1163,11 @@ void ceph_dentry_lru_add(struct dentry *dn)
 {
        struct ceph_dentry_info *di = ceph_dentry(dn);
        struct ceph_mds_client *mdsc;
-       dout("dentry_lru_add %p %p\t%.*s\n",
-                       di, dn, dn->d_name.len, dn->d_name.name);
 
+       dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
+            dn->d_name.len, dn->d_name.name);
        if (di) {
-               mdsc = &ceph_client(dn->d_sb)->mdsc;
+               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_add_tail(&di->lru, &mdsc->dentry_lru);
                mdsc->num_dentry++;
@@ -1144,11 +1179,11 @@ void ceph_dentry_lru_touch(struct dentry *dn)
 {
        struct ceph_dentry_info *di = ceph_dentry(dn);
        struct ceph_mds_client *mdsc;
-       dout("dentry_lru_touch %p %p\t%.*s\n",
-                       di, dn, dn->d_name.len, dn->d_name.name);
 
+       dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
+            dn->d_name.len, dn->d_name.name, di->offset);
        if (di) {
-               mdsc = &ceph_client(dn->d_sb)->mdsc;
+               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_move_tail(&di->lru, &mdsc->dentry_lru);
                spin_unlock(&mdsc->dentry_lru_lock);
@@ -1160,10 +1195,10 @@ void ceph_dentry_lru_del(struct dentry *dn)
        struct ceph_dentry_info *di = ceph_dentry(dn);
        struct ceph_mds_client *mdsc;
 
-       dout("dentry_lru_del %p %p\t%.*s\n",
-                       di, dn, dn->d_name.len, dn->d_name.name);
+       dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
+            dn->d_name.len, dn->d_name.name);
        if (di) {
-               mdsc = &ceph_client(dn->d_sb)->mdsc;
+               mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc;
                spin_lock(&mdsc->dentry_lru_lock);
                list_del_init(&di->lru);
                mdsc->num_dentry--;