NFS: Fix the resolution problem with nfs_inode_attrs_need_update()
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Tue, 14 Oct 2008 23:16:07 +0000 (19:16 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Tue, 14 Oct 2008 23:23:17 +0000 (19:23 -0400)
It appears that 'jiffies' timestamps do not have high enough resolution for
nfs_inode_attrs_need_update(). One problem is that a GETATTR can be
launched within < 1 jiffy of the last operation that updated the attribute.
Another problem is that RPC calls can take < 1 jiffy to execute.

We can fix this by switching the variables to use a simple global counter
that gets incremented every time we start another GETATTR call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/dir.c
fs/nfs/inode.c
include/linux/nfs_fs.h
include/linux/nfs_xdr.h

index 49d5654..4807074 100644 (file)
@@ -156,6 +156,7 @@ typedef struct {
        decode_dirent_t decode;
        int             plus;
        unsigned long   timestamp;
+       unsigned long   gencount;
        int             timestamp_valid;
 } nfs_readdir_descriptor_t;
 
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
        struct file     *file = desc->file;
        struct inode    *inode = file->f_path.dentry->d_inode;
        struct rpc_cred *cred = nfs_file_cred(file);
-       unsigned long   timestamp;
+       unsigned long   timestamp, gencount;
        int             error;
 
        dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 
  again:
        timestamp = jiffies;
+       gencount = nfs_inc_attr_generation_counter();
        error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
                                          NFS_SERVER(inode)->dtsize, desc->plus);
        if (error < 0) {
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
                goto error;
        }
        desc->timestamp = timestamp;
+       desc->gencount = gencount;
        desc->timestamp_valid = 1;
        SetPageUptodate(page);
        /* Ensure consistent page alignment of the data.
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
        if (IS_ERR(p))
                return PTR_ERR(p);
        desc->ptr = p;
-       if (desc->timestamp_valid)
+       if (desc->timestamp_valid) {
                desc->entry->fattr->time_start = desc->timestamp;
-       else
+               desc->entry->fattr->gencount = desc->gencount;
+       } else
                desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
        return 0;
 }
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
        struct rpc_cred *cred = nfs_file_cred(file);
        struct page     *page = NULL;
        int             status;
-       unsigned long   timestamp;
+       unsigned long   timestamp, gencount;
 
        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
                        (unsigned long long)*desc->dir_cookie);
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                goto out;
        }
        timestamp = jiffies;
+       gencount = nfs_inc_attr_generation_counter();
        status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
                                                *desc->dir_cookie, page,
                                                NFS_SERVER(inode)->dtsize,
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
        desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
        if (status >= 0) {
                desc->timestamp = timestamp;
+               desc->gencount = gencount;
                desc->timestamp_valid = 1;
                if ((status = dir_decode(desc)) == 0)
                        desc->entry->prev_cookie = *desc->dir_cookie;
index de3f11e..116a3bd 100644 (file)
@@ -305,7 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                        init_special_inode(inode, inode->i_mode, fattr->rdev);
 
                nfsi->read_cache_jiffies = fattr->time_start;
-               nfsi->last_updated = now;
+               nfsi->attr_gencount = fattr->gencount;
                nfsi->cache_change_attribute = now;
                inode->i_atime = fattr->atime;
                inode->i_mtime = fattr->mtime;
@@ -909,6 +909,30 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
        return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
 }
 
+static unsigned long nfs_attr_generation_counter;
+
+static unsigned long nfs_read_attr_generation_counter(void)
+{
+       smp_rmb();
+       return nfs_attr_generation_counter;
+}
+
+unsigned long nfs_inc_attr_generation_counter(void)
+{
+       unsigned long ret;
+       smp_rmb();
+       ret = ++nfs_attr_generation_counter;
+       smp_wmb();
+       return ret;
+}
+
+void nfs_fattr_init(struct nfs_fattr *fattr)
+{
+       fattr->valid = 0;
+       fattr->time_start = jiffies;
+       fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
  * @inode - pointer to inode
@@ -922,8 +946,7 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
  * catch the case where ctime either didn't change, or went backwards
  * (if someone reset the clock on the server) by looking at whether
  * or not this RPC call was started after the inode was last updated.
- * Note also the check for jiffy wraparound if the last_updated timestamp
- * is later than 'jiffies'.
+ * Note also the check for wraparound of 'attr_gencount'
  *
  * The function returns 'true' if it thinks the attributes in 'fattr' are
  * more recent than the ones cached in the inode.
@@ -933,10 +956,10 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
 {
        const struct nfs_inode *nfsi = NFS_I(inode);
 
-       return time_after(fattr->time_start, nfsi->last_updated) ||
+       return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
                nfs_ctime_need_update(inode, fattr) ||
                nfs_size_need_update(inode, fattr) ||
-               time_after(nfsi->last_updated, jiffies);
+               ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
 }
 
 static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
@@ -1107,7 +1130,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                }
                /* If ctime has changed we should definitely clear access+acl caches */
                if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
-                       invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
        } else if (nfsi->change_attr != fattr->change_attr) {
                dprintk("NFS: change_attr change on server for file %s/%ld\n",
                                inode->i_sb->s_id, inode->i_ino);
@@ -1163,7 +1186,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
                nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                nfsi->attrtimeo_timestamp = now;
-               nfsi->last_updated = now;
+               nfsi->attr_gencount = nfs_inc_attr_generation_counter();
        } else {
                if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
                        if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
index ca563ee..ac8d023 100644 (file)
@@ -137,7 +137,7 @@ struct nfs_inode {
        unsigned long           attrtimeo_timestamp;
        __u64                   change_attr;            /* v4 only */
 
-       unsigned long           last_updated;
+       unsigned long           attr_gencount;
        /* "Generation counter" for the attribute cache. This is
         * bumped whenever we update the metadata on the
         * server.
@@ -344,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
 extern u64 nfs_compat_user_ino64(u64 fileid);
+extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
-
-static inline void nfs_fattr_init(struct nfs_fattr *fattr)
-{
-       fattr->valid = 0;
-       fattr->time_start = jiffies;
-}
+extern unsigned long nfs_inc_attr_generation_counter(void);
 
 /*
  * linux/fs/nfs/file.c
index 6ee6ae3..c1c31ac 100644 (file)
@@ -56,6 +56,7 @@ struct nfs_fattr {
        __u64                   change_attr;    /* NFSv4 change attribute */
        __u64                   pre_change_attr;/* pre-op NFSv4 change attribute */
        unsigned long           time_start;
+       unsigned long           gencount;
 };
 
 #define NFS_ATTR_WCC           0x0001          /* pre-op WCC data    */