nilfs2: fix lock order reversal in chcp operation

[safe/jmp/linux-2.6] / fs / ocfs2 / inode.c
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c

index 50dbc48..0297fb8 100644 (file)
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
  #include <linux/slab.h>
  #include <linux/highmem.h>
  #include <linux/pagemap.h>
+#include <linux/quotaops.h>
  
  #include <asm/byteorder.h>
  
@@ -37,6 +38,8 @@
  #include "ocfs2.h"
  
  #include "alloc.h"
+#include "dir.h"
+#include "blockcheck.h"
  #include "dlmglue.h"
  #include "extent_map.h"
  #include "file.h"
@@ -50,6 +53,7 @@
  #include "sysfile.h"
  #include "uptodate.h"
  #include "xattr.h"
+#include "refcounttree.h"
  
  #include "buffer_head_io.h"
  
@@ -110,6 +114,17 @@ void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
                 oi->ip_attr |= OCFS2_DIRSYNC_FL;
  }
  
+struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
+{
+       struct ocfs2_find_inode_args args;
+
+       args.fi_blkno = blkno;
+       args.fi_flags = 0;
+       args.fi_ino = ino_from_blkno(sb, blkno);
+       args.fi_sysfile_type = 0;
+
+       return ilookup5(sb, blkno, ocfs2_find_actor, &args);
+}
  struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
                          int sysfile_type)
  {
@@ -201,6 +216,8 @@ bail:
  static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
  {
         struct ocfs2_find_inode_args *args = opaque;
+       static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
+                                    ocfs2_file_ip_alloc_sem_key;
  
         mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
  
@@ -209,6 +226,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
         if (args->fi_sysfile_type != 0)
                 lockdep_set_class(&inode->i_mutex,
                         &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+       if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
+           args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_quota_ip_alloc_sem_key);
+       else
+               lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
+                                 &ocfs2_file_ip_alloc_sem_key);
  
         mlog_exit(0);
         return 0;
@@ -273,7 +299,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
                      (unsigned long long)OCFS2_I(inode)->ip_blkno,
                      (unsigned long long)le64_to_cpu(fe->i_blkno));
  
-       inode->i_nlink = le16_to_cpu(fe->i_links_count);
+       inode->i_nlink = ocfs2_read_links_count(fe);
  
         if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
                 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
@@ -349,6 +375,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
  
         ocfs2_set_inode_flags(inode);
  
+       OCFS2_I(inode)->ip_last_used_slot = 0;
+       OCFS2_I(inode)->ip_last_used_group = 0;
         mlog_exit_void();
  }
  
@@ -535,8 +563,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
                         goto out;
                 }
  
-               status = ocfs2_journal_access(handle, inode, fe_bh,
-                                             OCFS2_JOURNAL_ACCESS_WRITE);
+               status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+                                                fe_bh,
+                                                OCFS2_JOURNAL_ACCESS_WRITE);
                 if (status < 0) {
                         mlog_errno(status);
                         goto out;
@@ -603,7 +632,8 @@ static int ocfs2_remove_inode(struct inode *inode,
                 goto bail;
         }
  
-       handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
+       handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
+                                  ocfs2_quota_trans_credits(inode->i_sb));
         if (IS_ERR(handle)) {
                 status = PTR_ERR(handle);
                 mlog_errno(status);
@@ -618,8 +648,8 @@ static int ocfs2_remove_inode(struct inode *inode,
         }
  
         /* set the inodes dtime */
-       status = ocfs2_journal_access(handle, inode, di_bh,
-                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+                                        OCFS2_JOURNAL_ACCESS_WRITE);
         if (status < 0) {
                 mlog_errno(status);
                 goto bail_commit;
@@ -634,7 +664,8 @@ static int ocfs2_remove_inode(struct inode *inode,
                 goto bail_commit;
         }
  
-       ocfs2_remove_from_cache(inode, di_bh);
+       ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
+       vfs_dq_free_inode(inode);
  
         status = ocfs2_free_dinode(handle, inode_alloc_inode,
                                    inode_alloc_bh, di);
@@ -736,6 +767,15 @@ static int ocfs2_wipe_inode(struct inode *inode,
                 goto bail_unlock_dir;
         }
  
+       /* Remove any dir index tree */
+       if (S_ISDIR(inode->i_mode)) {
+               status = ocfs2_dx_dir_truncate(inode, di_bh);
+               if (status) {
+                       mlog_errno(status);
+                       goto bail_unlock_dir;
+               }
+       }
+
         /*Free extended attribute resources associated with this inode.*/
         status = ocfs2_xattr_remove(inode, di_bh);
         if (status < 0) {
@@ -743,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode,
                 goto bail_unlock_dir;
         }
  
+       status = ocfs2_remove_refcount_tree(inode, di_bh);
+       if (status < 0) {
+               mlog_errno(status);
+               goto bail_unlock_dir;
+       }
+
         status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
                                     orphan_dir_bh);
         if (status < 0)
@@ -917,7 +963,10 @@ void ocfs2_delete_inode(struct inode *inode)
  
         mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
  
-       if (is_bad_inode(inode)) {
+       /* When we fail in read_inode() we mark inode as bad. The second test
+        * catches the case when inode allocation fails before allocating
+        * a block for inode. */
+       if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
                 mlog(0, "Skipping delete of bad inode\n");
                 goto bail;
         }
@@ -942,6 +991,17 @@ void ocfs2_delete_inode(struct inode *inode)
                 goto bail;
         }
  
+       /*
+        * Synchronize us against ocfs2_get_dentry. We take this in
+        * shared mode so that all nodes can still concurrently
+        * process deletes.
+        */
+       status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
+       if (status < 0) {
+               mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
+               ocfs2_cleanup_delete_inode(inode, 0);
+               goto bail_unblock;
+       }
         /* Lock down the inode. This gives us an up to date view of
          * it's metadata (for verification), and allows us to
          * serialize delete_inode on multiple nodes.
@@ -955,7 +1015,7 @@ void ocfs2_delete_inode(struct inode *inode)
                 if (status != -ENOENT)
                         mlog_errno(status);
                 ocfs2_cleanup_delete_inode(inode, 0);
-               goto bail_unblock;
+               goto bail_unlock_nfs_sync;
         }
  
         /* Query the cluster. This will be the final decision made
@@ -998,6 +1058,10 @@ void ocfs2_delete_inode(struct inode *inode)
  bail_unlock_inode:
         ocfs2_inode_unlock(inode, 1);
         brelse(di_bh);
+
+bail_unlock_nfs_sync:
+       ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
+
  bail_unblock:
         status = sigprocmask(SIG_SETMASK, &oldset, NULL);
         if (status < 0)
@@ -1056,13 +1120,14 @@ void ocfs2_clear_inode(struct inode *inode)
         ocfs2_lock_res_free(&oi->ip_inode_lockres);
         ocfs2_lock_res_free(&oi->ip_open_lockres);
  
-       ocfs2_metadata_cache_purge(inode);
+       ocfs2_metadata_cache_exit(INODE_CACHE(inode));
  
-       mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached,
+       mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
                         "Clear inode of %llu, inode has %u cache items\n",
-                       (unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached);
+                       (unsigned long long)oi->ip_blkno,
+                       INODE_CACHE(inode)->ci_num_cached);
  
-       mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE),
+       mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
                         "Clear inode of %llu, inode has a bad flag\n",
                         (unsigned long long)oi->ip_blkno);
  
@@ -1089,9 +1154,7 @@ void ocfs2_clear_inode(struct inode *inode)
                         (unsigned long long)oi->ip_blkno, oi->ip_open_count);
  
         /* Clear all other flags. */
-       oi->ip_flags = OCFS2_INODE_CACHE_INLINE;
-       oi->ip_created_trans = 0;
-       oi->ip_last_trans = 0;
+       oi->ip_flags = 0;
         oi->ip_dir_start_lookup = 0;
         oi->ip_blkno = 0ULL;
  
@@ -1183,8 +1246,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
         mlog_entry("(inode %llu)\n",
                    (unsigned long long)OCFS2_I(inode)->ip_blkno);
  
-       status = ocfs2_journal_access(handle, inode, bh,
-                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
+                                        OCFS2_JOURNAL_ACCESS_WRITE);
         if (status < 0) {
                 mlog_errno(status);
                 goto leave;
@@ -1198,7 +1261,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
         spin_unlock(&OCFS2_I(inode)->ip_lock);
  
         fe->i_size = cpu_to_le64(i_size_read(inode));
-       fe->i_links_count = cpu_to_le16(inode->i_nlink);
+       ocfs2_set_links_count(fe, inode->i_nlink);
         fe->i_uid = cpu_to_le32(inode->i_uid);
         fe->i_gid = cpu_to_le32(inode->i_gid);
         fe->i_mode = cpu_to_le16(inode->i_mode);
@@ -1235,7 +1298,7 @@ void ocfs2_refresh_inode(struct inode *inode,
         OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
         ocfs2_set_inode_flags(inode);
         i_size_write(inode, le64_to_cpu(fe->i_size));
-       inode->i_nlink = le16_to_cpu(fe->i_links_count);
+       inode->i_nlink = ocfs2_read_links_count(fe);
         inode->i_uid = le32_to_cpu(fe->i_uid);
         inode->i_gid = le32_to_cpu(fe->i_gid);
         inode->i_mode = le16_to_cpu(fe->i_mode);
@@ -1256,7 +1319,7 @@ void ocfs2_refresh_inode(struct inode *inode,
  int ocfs2_validate_inode_block(struct super_block *sb,
                                struct buffer_head *bh)
  {
-       int rc = -EINVAL;
+       int rc;
         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
  
         mlog(0, "Validating dinode %llu\n",
@@ -1264,6 +1327,24 @@ int ocfs2_validate_inode_block(struct super_block *sb,
  
         BUG_ON(!buffer_uptodate(bh));
  
+       /*
+        * If the ecc fails, we return the error but otherwise
+        * leave the filesystem running.  We know any error is
+        * local to this block.
+        */
+       rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
+       if (rc) {
+               mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
+                    (unsigned long long)bh->b_blocknr);
+               goto bail;
+       }
+
+       /*
+        * Errors after here are fatal.
+        */
+
+       rc = -EINVAL;
+
         if (!OCFS2_IS_VALID_DINODE(di)) {
                 ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
                             (unsigned long long)bh->b_blocknr, 7,
@@ -1306,8 +1387,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
         int rc;
         struct buffer_head *tmp = *bh;
  
-       rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
-                              flags, ocfs2_validate_inode_block);
+       rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
+                              1, &tmp, flags, ocfs2_validate_inode_block);
  
         /* If ocfs2_read_blocks() got us a new bh, pass it up. */
         if (!rc && !*bh)
@@ -1320,3 +1401,56 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
  {
         return ocfs2_read_inode_block_full(inode, bh, 0);
  }
+
+
+static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       return oi->ip_blkno;
+}
+
+static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       return oi->vfs_inode.i_sb;
+}
+
+static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       spin_lock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       spin_unlock(&oi->ip_lock);
+}
+
+static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       mutex_lock(&oi->ip_io_mutex);
+}
+
+static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
+{
+       struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
+
+       mutex_unlock(&oi->ip_io_mutex);
+}
+
+const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
+       .co_owner               = ocfs2_inode_cache_owner,
+       .co_get_super           = ocfs2_inode_cache_get_super,
+       .co_cache_lock          = ocfs2_inode_cache_lock,
+       .co_cache_unlock        = ocfs2_inode_cache_unlock,
+       .co_io_lock             = ocfs2_inode_cache_io_lock,
+       .co_io_unlock           = ocfs2_inode_cache_io_unlock,
+};
+