[JFFS2] force the jffs2 GC daemon to behave a bit better
[safe/jmp/linux-2.6] / fs / ext4 / ialloc.c
index b47427a..4fb86a0 100644 (file)
@@ -84,7 +84,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
        }
 
        memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
-       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+       mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
                        bh->b_data);
 
        return EXT4_INODES_PER_GROUP(sb);
@@ -115,20 +115,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                            block_group, bitmap_blk);
                return NULL;
        }
-       if (buffer_uptodate(bh) &&
-           !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+       if (bitmap_uptodate(bh))
                return bh;
 
        lock_buffer(bh);
+       if (bitmap_uptodate(bh)) {
+               unlock_buffer(bh);
+               return bh;
+       }
        spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
                ext4_init_inode_bitmap(sb, bh, block_group, desc);
+               set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
                spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
                unlock_buffer(bh);
                return bh;
        }
        spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+       if (buffer_uptodate(bh)) {
+               /*
+                * if not uninit if bh is uptodate,
+                * bitmap is also uptodate
+                */
+               set_bitmap_uptodate(bh);
+               unlock_buffer(bh);
+               return bh;
+       }
+       /*
+        * submit the buffer_head for read. We can
+        * safely mark the bitmap as uptodate now.
+        * We do it here so the bitmap uptodate bit
+        * get set with buffer lock held.
+        */
+       set_bitmap_uptodate(bh);
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
                ext4_error(sb, __func__,
@@ -190,6 +210,11 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
        ino = inode->i_ino;
        ext4_debug("freeing inode %lu\n", ino);
+       trace_mark(ext4_free_inode,
+                  "dev %s ino %lu mode %d uid %lu gid %lu bocks %llu",
+                  sb->s_id, inode->i_ino, inode->i_mode,
+                  (unsigned long) inode->i_uid, (unsigned long) inode->i_gid,
+                  (unsigned long long) inode->i_blocks);
 
        /*
         * Note: we must free any quota before locking the superblock,
@@ -573,6 +598,79 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 }
 
 /*
+ * claim the inode from the inode bitmap. If the group
+ * is uninit we need to take the groups's sb_bgl_lock
+ * and clear the uninit flag. The inode bitmap update
+ * and group desc uninit flag clear should be done
+ * after holding sb_bgl_lock so that ext4_read_inode_bitmap
+ * doesn't race with the ext4_claim_inode
+ */
+static int ext4_claim_inode(struct super_block *sb,
+                       struct buffer_head *inode_bitmap_bh,
+                       unsigned long ino, ext4_group_t group, int mode)
+{
+       int free = 0, retval = 0, count;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+
+       spin_lock(sb_bgl_lock(sbi, group));
+       if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
+               /* not a free inode */
+               retval = 1;
+               goto err_ret;
+       }
+       ino++;
+       if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+                       ino > EXT4_INODES_PER_GROUP(sb)) {
+               spin_unlock(sb_bgl_lock(sbi, group));
+               ext4_error(sb, __func__,
+                          "reserved inode or inode > inodes count - "
+                          "block_group = %u, inode=%lu", group,
+                          ino + group * EXT4_INODES_PER_GROUP(sb));
+               return 1;
+       }
+       /* If we didn't allocate from within the initialized part of the inode
+        * table then we need to initialize up to this inode. */
+       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+
+               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+                       /* When marking the block group with
+                        * ~EXT4_BG_INODE_UNINIT we don't want to depend
+                        * on the value of bg_itable_unused even though
+                        * mke2fs could have initialized the same for us.
+                        * Instead we calculated the value below
+                        */
+
+                       free = 0;
+               } else {
+                       free = EXT4_INODES_PER_GROUP(sb) -
+                               ext4_itable_unused_count(sb, gdp);
+               }
+
+               /*
+                * Check the relative inode number against the last used
+                * relative inode number in this group. if it is greater
+                * we need to  update the bg_itable_unused count
+                *
+                */
+               if (ino > free)
+                       ext4_itable_unused_set(sb, gdp,
+                                       (EXT4_INODES_PER_GROUP(sb) - ino));
+       }
+       count = ext4_free_inodes_count(sb, gdp) - 1;
+       ext4_free_inodes_set(sb, gdp, count);
+       if (S_ISDIR(mode)) {
+               count = ext4_used_dirs_count(sb, gdp) + 1;
+               ext4_used_dirs_set(sb, gdp, count);
+       }
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+err_ret:
+       spin_unlock(sb_bgl_lock(sbi, group));
+       return retval;
+}
+
+/*
  * There are two policies for allocating an inode.  If the new inode is
  * a directory, then a forward search is made for a block group with both
  * free space and a low directory-to-inode ratio; if that fails, then of
@@ -594,7 +692,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
        struct ext4_super_block *es;
        struct ext4_inode_info *ei;
        struct ext4_sb_info *sbi;
-       int ret2, err = 0, count;
+       int ret2, err = 0;
        struct inode *ret;
        ext4_group_t i;
        int free = 0;
@@ -605,6 +703,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
                return ERR_PTR(-EPERM);
 
        sb = dir->i_sb;
+       trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
+                  dir->i_ino, mode);
        inode = new_inode(sb);
        if (!inode)
                return ERR_PTR(-ENOMEM);
@@ -658,8 +758,13 @@ repeat_in_this_group:
                        if (err)
                                goto fail;
 
-                       if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
-                                               ino, inode_bitmap_bh->b_data)) {
+                       BUFFER_TRACE(group_desc_bh, "get_write_access");
+                       err = ext4_journal_get_write_access(handle,
+                                                               group_desc_bh);
+                       if (err)
+                               goto fail;
+                       if (!ext4_claim_inode(sb, inode_bitmap_bh,
+                                               ino, group, mode)) {
                                /* we won it */
                                BUFFER_TRACE(inode_bitmap_bh,
                                        "call ext4_handle_dirty_metadata");
@@ -668,10 +773,13 @@ repeat_in_this_group:
                                                        inode_bitmap_bh);
                                if (err)
                                        goto fail;
+                               /* zero bit is inode number 1*/
+                               ino++;
                                goto got;
                        }
                        /* we lost it */
                        ext4_handle_release_buffer(handle, inode_bitmap_bh);
+                       ext4_handle_release_buffer(handle, group_desc_bh);
 
                        if (++ino < EXT4_INODES_PER_GROUP(sb))
                                goto repeat_in_this_group;
@@ -691,22 +799,6 @@ repeat_in_this_group:
        goto out;
 
 got:
-       ino++;
-       if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-           ino > EXT4_INODES_PER_GROUP(sb)) {
-               ext4_error(sb, __func__,
-                          "reserved inode or inode > inodes count - "
-                          "block_group = %u, inode=%lu", group,
-                          ino + group * EXT4_INODES_PER_GROUP(sb));
-               err = -EIO;
-               goto fail;
-       }
-
-       BUFFER_TRACE(group_desc_bh, "get_write_access");
-       err = ext4_journal_get_write_access(handle, group_desc_bh);
-       if (err)
-               goto fail;
-
        /* We may have to initialize the block bitmap if it isn't already */
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -743,49 +835,10 @@ got:
                if (err)
                        goto fail;
        }
-
-       spin_lock(sb_bgl_lock(sbi, group));
-       /* If we didn't allocate from within the initialized part of the inode
-        * table then we need to initialize up to this inode. */
-       if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-               if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-
-                       /* When marking the block group with
-                        * ~EXT4_BG_INODE_UNINIT we don't want to depend
-                        * on the value of bg_itable_unused even though
-                        * mke2fs could have initialized the same for us.
-                        * Instead we calculated the value below
-                        */
-
-                       free = 0;
-               } else {
-                       free = EXT4_INODES_PER_GROUP(sb) -
-                               ext4_itable_unused_count(sb, gdp);
-               }
-
-               /*
-                * Check the relative inode number against the last used
-                * relative inode number in this group. if it is greater
-                * we need to  update the bg_itable_unused count
-                *
-                */
-               if (ino > free)
-                       ext4_itable_unused_set(sb, gdp,
-                                       (EXT4_INODES_PER_GROUP(sb) - ino));
-       }
-
-       count = ext4_free_inodes_count(sb, gdp) - 1;
-       ext4_free_inodes_set(sb, gdp, count);
-       if (S_ISDIR(mode)) {
-               count = ext4_used_dirs_count(sb, gdp) + 1;
-               ext4_used_dirs_set(sb, gdp, count);
-       }
-       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-       spin_unlock(sb_bgl_lock(sbi, group));
        BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
-       if (err) goto fail;
+       if (err)
+               goto fail;
 
        percpu_counter_dec(&sbi->s_freeinodes_counter);
        if (S_ISDIR(mode))
@@ -864,7 +917,7 @@ got:
        if (err)
                goto fail_free_drop;
 
-       if (test_opt(sb, EXTENTS)) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                /* set extent flag only for directory, file and normal symlink*/
                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
                        EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
@@ -879,6 +932,8 @@ got:
        }
 
        ext4_debug("allocating inode %lu\n", inode->i_ino);
+       trace_mark(ext4_allocate_inode, "dev %s ino %lu dir %lu mode %d",
+                  sb->s_id, inode->i_ino, dir->i_ino, mode);
        goto really_out;
 fail:
        ext4_std_error(sb, err);