Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / fs / ext4 / mballoc.c
index 76e5fed..c9900aa 100644 (file)
  * mballoc.c contains the multiblocks allocation routines
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "group.h"
-
+#include "mballoc.h"
 /*
  * MUSTDO:
  *   - test ext4_ext_search_left() and ext4_ext_search_right()
  *
  */
 
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...)    printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC          1       /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC       2       /* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD                4       /* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE           8       /* free */
-
-#define EXT4_MB_HISTORY_DEFAULT                (EXT4_MB_HISTORY_ALLOC | \
-                                        EXT4_MB_HISTORY_PREALLOC)
-
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN         200
-
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN         10
-
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN  5
-
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS               1
-
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD    16      /* 64K */
-
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS         2
-
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC      512
-
-static struct kmem_cache *ext4_pspace_cachep;
-
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS     30
-
-struct ext4_free_metadata {
-       ext4_group_t group;
-       unsigned short num;
-       ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
-       struct list_head list;
-};
-
-struct ext4_group_info {
-       unsigned long   bb_state;
-       unsigned long   bb_tid;
-       struct ext4_free_metadata *bb_md_cur;
-       unsigned short  bb_first_free;
-       unsigned short  bb_free;
-       unsigned short  bb_fragments;
-       struct          list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
-       void            *bb_bitmap;
-#endif
-       unsigned short  bb_counters[];
-};
-
-#define EXT4_GROUP_INFO_NEED_INIT_BIT  0
-#define EXT4_GROUP_INFO_LOCKED_BIT     1
-
-#define EXT4_MB_GRP_NEED_INIT(grp)     \
-       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-
-
-struct ext4_prealloc_space {
-       struct list_head        pa_inode_list;
-       struct list_head        pa_group_list;
-       union {
-               struct list_head pa_tmp_list;
-               struct rcu_head pa_rcu;
-       } u;
-       spinlock_t              pa_lock;
-       atomic_t                pa_count;
-       unsigned                pa_deleted;
-       ext4_fsblk_t            pa_pstart;      /* phys. block */
-       ext4_lblk_t             pa_lstart;      /* log. block */
-       unsigned short          pa_len;         /* len of preallocated chunk */
-       unsigned short          pa_free;        /* how many blocks are free */
-       unsigned short          pa_linear;      /* consumed in one direction
-                                                * strictly, for grp prealloc */
-       spinlock_t              *pa_obj_lock;
-       struct inode            *pa_inode;      /* hack, for history only */
-};
-
-
-struct ext4_free_extent {
-       ext4_lblk_t fe_logical;
-       ext4_grpblk_t fe_start;
-       ext4_group_t fe_group;
-       int fe_len;
-};
-
-/*
- * Locality group:
- *   we try to group all related changes together
- *   so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
-       /* for allocator */
-       struct mutex            lg_mutex;       /* to serialize allocates */
-       struct list_head        lg_prealloc_list;/* list of preallocations */
-       spinlock_t              lg_prealloc_lock;
-};
-
-struct ext4_allocation_context {
-       struct inode *ac_inode;
-       struct super_block *ac_sb;
-
-       /* original request */
-       struct ext4_free_extent ac_o_ex;
-
-       /* goal request (after normalization) */
-       struct ext4_free_extent ac_g_ex;
-
-       /* the best found extent */
-       struct ext4_free_extent ac_b_ex;
-
-       /* copy of the bext found extent taken before preallocation efforts */
-       struct ext4_free_extent ac_f_ex;
-
-       /* number of iterations done. we have to track to limit searching */
-       unsigned long ac_ex_scanned;
-       __u16 ac_groups_scanned;
-       __u16 ac_found;
-       __u16 ac_tail;
-       __u16 ac_buddy;
-       __u16 ac_flags;         /* allocation hints */
-       __u8 ac_status;
-       __u8 ac_criteria;
-       __u8 ac_repeats;
-       __u8 ac_2order;         /* if request is to allocate 2^N blocks and
-                                * N > 0, the field stores N, otherwise 0 */
-       __u8 ac_op;             /* operation, for history only */
-       struct page *ac_bitmap_page;
-       struct page *ac_buddy_page;
-       struct ext4_prealloc_space *ac_pa;
-       struct ext4_locality_group *ac_lg;
-};
-
-#define AC_STATUS_CONTINUE     1
-#define AC_STATUS_FOUND                2
-#define AC_STATUS_BREAK                3
-
-struct ext4_mb_history {
-       struct ext4_free_extent orig;   /* orig allocation */
-       struct ext4_free_extent goal;   /* goal allocation */
-       struct ext4_free_extent result; /* result allocation */
-       unsigned pid;
-       unsigned ino;
-       __u16 found;    /* how many extents have been found */
-       __u16 groups;   /* how many groups have been scanned */
-       __u16 tail;     /* what tail broke some buddy */
-       __u16 buddy;    /* buddy the tail ^^^ broke */
-       __u16 flags;
-       __u8 cr:3;      /* which phase the result extent was found at */
-       __u8 op:4;
-       __u8 merged:1;
-};
-
-struct ext4_buddy {
-       struct page *bd_buddy_page;
-       void *bd_buddy;
-       struct page *bd_bitmap_page;
-       void *bd_bitmap;
-       struct ext4_group_info *bd_info;
-       struct super_block *bd_sb;
-       __u16 bd_blkbits;
-       ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b)    ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b)     ((e4b)->bd_buddy)
-
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-       return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-
-#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
-
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, unsigned long *count, int *errp);
-
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                       ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-                                       struct ext4_buddy *e4b, sector_t block,
-                                       int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
-                       struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-
-
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline void ext4_unlock_group(struct super_block *sb,
-                                       ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
-                                       ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-                                               &(grinfo->bb_state));
-}
-
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-                                       struct ext4_free_extent *fex)
+static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
-       ext4_fsblk_t block;
-
-       block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-                       + fex->fe_start
-                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-       return block;
-}
-
 #if BITS_PER_LONG == 64
-#define mb_correct_addr_and_bit(bit, addr)             \
-{                                                      \
-       bit += ((unsigned long) addr & 7UL) << 3;       \
-       addr = (void *) ((unsigned long) addr & ~7UL);  \
-}
+       *bit += ((unsigned long) addr & 7UL) << 3;
+       addr = (void *) ((unsigned long) addr & ~7UL);
 #elif BITS_PER_LONG == 32
-#define mb_correct_addr_and_bit(bit, addr)             \
-{                                                      \
-       bit += ((unsigned long) addr & 3UL) << 3;       \
-       addr = (void *) ((unsigned long) addr & ~3UL);  \
-}
+       *bit += ((unsigned long) addr & 3UL) << 3;
+       addr = (void *) ((unsigned long) addr & ~3UL);
 #else
 #error "how many bits you are?!"
 #endif
+       return addr;
+}
 
 static inline int mb_test_bit(int bit, void *addr)
 {
@@ -648,39 +351,58 @@ static inline int mb_test_bit(int bit, void *addr)
         * ext4_test_bit on architecture like powerpc
         * needs unsigned long aligned address
         */
-       mb_correct_addr_and_bit(bit, addr);
+       addr = mb_correct_addr_and_bit(&bit, addr);
        return ext4_test_bit(bit, addr);
 }
 
 static inline void mb_set_bit(int bit, void *addr)
 {
-       mb_correct_addr_and_bit(bit, addr);
+       addr = mb_correct_addr_and_bit(&bit, addr);
        ext4_set_bit(bit, addr);
 }
 
 static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
-       mb_correct_addr_and_bit(bit, addr);
+       addr = mb_correct_addr_and_bit(&bit, addr);
        ext4_set_bit_atomic(lock, bit, addr);
 }
 
 static inline void mb_clear_bit(int bit, void *addr)
 {
-       mb_correct_addr_and_bit(bit, addr);
+       addr = mb_correct_addr_and_bit(&bit, addr);
        ext4_clear_bit(bit, addr);
 }
 
 static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 {
-       mb_correct_addr_and_bit(bit, addr);
+       addr = mb_correct_addr_and_bit(&bit, addr);
        ext4_clear_bit_atomic(lock, bit, addr);
 }
 
+static inline int mb_find_next_zero_bit(void *addr, int max, int start)
+{
+       int fix = 0;
+       addr = mb_correct_addr_and_bit(&fix, addr);
+       max += fix;
+       start += fix;
+
+       return ext4_find_next_zero_bit(addr, max, start) - fix;
+}
+
+static inline int mb_find_next_bit(void *addr, int max, int start)
+{
+       int fix = 0;
+       addr = mb_correct_addr_and_bit(&fix, addr);
+       max += fix;
+       start += fix;
+
+       return ext4_find_next_bit(addr, max, start) - fix;
+}
+
 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
 {
        char *bb;
 
-       /* FIXME!! is this needed */
        BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
        BUG_ON(max == NULL);
 
@@ -718,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
-                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                       ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr,
                                   first + i, e4b->bd_group);
@@ -880,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
        list_for_each(cur, &grp->bb_prealloc_list) {
                ext4_group_t groupnr;
                struct ext4_prealloc_space *pa;
-               pa = list_entry(cur, struct ext4_prealloc_space, group_list);
-               ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+               pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
+               ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
                MB_CHECK_ASSERT(groupnr == e4b->bd_group);
-               for (i = 0; i < pa->len; i++)
+               for (i = 0; i < pa->pa_len; i++)
                        MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
        }
        return 0;
 }
 #undef MB_CHECK_ASSERT
 #define mb_check_buddy(e4b) __mb_check_buddy(e4b,      \
-                                       __FILE__, __FUNCTION__, __LINE__)
+                                       __FILE__, __func__, __LINE__)
 #else
 #define mb_check_buddy(e4b)
 #endif
@@ -906,7 +628,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
        unsigned short chunk;
        unsigned short border;
 
-       BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb));
+       BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
 
        border = 2 << sb->s_blocksize_bits;
 
@@ -946,12 +668,12 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 
        /* initialize buddy from bitmap which is aggregation
         * of on-disk bitmap and preallocations */
-       i = ext4_find_next_zero_bit(bitmap, max, 0);
+       i = mb_find_next_zero_bit(bitmap, max, 0);
        grp->bb_first_free = i;
        while (i < max) {
                fragments++;
                first = i;
-               i = ext4_find_next_bit(bitmap, max, i);
+               i = mb_find_next_bit(bitmap, max, i);
                len = i - first;
                free += len;
                if (len > 1)
@@ -959,14 +681,18 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
                else
                        grp->bb_counters[0]++;
                if (i < max)
-                       i = ext4_find_next_zero_bit(bitmap, max, i);
+                       i = mb_find_next_zero_bit(bitmap, max, i);
        }
        grp->bb_fragments = fragments;
 
        if (free != grp->bb_free) {
-               printk(KERN_DEBUG
+               ext4_error(sb, __func__,
                        "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
                        group, free, grp->bb_free);
+               /*
+                * If we intent to continue, we consider group descritor
+                * corrupt and update bb_free using bitmap value
+                */
                grp->bb_free = free;
        }
 
@@ -1146,8 +872,9 @@ out:
        return err;
 }
 
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-               struct ext4_buddy *e4b)
+static noinline_for_stack int
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                                       struct ext4_buddy *e4b)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct inode *inode = sbi->s_buddy_cache;
@@ -1345,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
-                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                       ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr, block,
                                   e4b->bd_group);
@@ -1778,7 +1505,7 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
                buddy = mb_find_buddy(e4b, i, &max);
                BUG_ON(buddy == NULL);
 
-               k = ext4_find_next_zero_bit(buddy, max, 0);
+               k = mb_find_next_zero_bit(buddy, max, 0);
                BUG_ON(k >= max);
 
                ac->ac_found++;
@@ -1818,16 +1545,33 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
        i = e4b->bd_info->bb_first_free;
 
        while (free && ac->ac_status == AC_STATUS_CONTINUE) {
-               i = ext4_find_next_zero_bit(bitmap,
+               i = mb_find_next_zero_bit(bitmap,
                                                EXT4_BLOCKS_PER_GROUP(sb), i);
                if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
-                       BUG_ON(free != 0);
+                       /*
+                        * IF we have corrupt bitmap, we won't find any
+                        * free blocks even though group info says we
+                        * we have free blocks
+                        */
+                       ext4_error(sb, __func__, "%d free blocks as per "
+                                       "group info. But bitmap says 0\n",
+                                       free);
                        break;
                }
 
                mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
                BUG_ON(ex.fe_len <= 0);
-               BUG_ON(free < ex.fe_len);
+               if (free < ex.fe_len) {
+                       ext4_error(sb, __func__, "%d free blocks as per "
+                                       "group info. But got %d blocks\n",
+                                       free, ex.fe_len);
+                       /*
+                        * The number of free blocks differs. This mostly
+                        * indicate that the bitmap is corrupt. So exit
+                        * without claiming the space.
+                        */
+                       break;
+               }
 
                ext4_mb_measure_extent(ac, &ex, e4b);
 
@@ -1926,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
 
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t group;
        ext4_group_t i;
@@ -2410,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
        int i;
 
        if (sbi->s_mb_proc != NULL) {
-               struct proc_dir_entry *p;
-               p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc);
-               if (p) {
-                       p->proc_fops = &ext4_mb_seq_history_fops;
-                       p->data = sb;
-               }
-               p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
-               if (p) {
-                       p->proc_fops = &ext4_mb_seq_groups_fops;
-                       p->data = sb;
-               }
+               proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
+                                &ext4_mb_seq_history_fops, sb);
+               proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
+                                &ext4_mb_seq_groups_fops, sb);
        }
 
        sbi->s_mb_history_max = 1000;
@@ -2433,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
        /* if we can't allocate history, then we simple won't use it */
 }
 
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline_for_stack void
+ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_mb_history h;
@@ -2533,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
                meta_group_info[j] = kzalloc(len, GFP_KERNEL);
                if (meta_group_info[j] == NULL) {
                        printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
-                       i--;
                        goto err_freebuddy;
                }
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
                                "EXT4-fs: can't read descriptor %lu\n", i);
+                       i++;
                        goto err_freebuddy;
                }
                memset(meta_group_info[j], 0, len);
@@ -2579,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
        return 0;
 
 err_freebuddy:
-       while (i >= 0) {
+       while (i-- > 0)
                kfree(ext4_get_group_info(sb, i));
-               i--;
-       }
        i = num_meta_group_infos;
 err_freemeta:
-       while (--i >= 0)
+       while (i-- > 0)
                kfree(sbi->s_group_info[i]);
        iput(sbi->s_buddy_cache);
 err_freesgi:
@@ -2769,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
 
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline_for_stack void
+ext4_mb_free_committed_blocks(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int err;
@@ -2828,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
        mb_debug("freed %u blocks in %u structures\n", count, count2);
 }
 
-#define EXT4_ROOT                      "ext4"
 #define EXT4_MB_STATS_NAME             "stats"
 #define EXT4_MB_MAX_TO_SCAN_NAME       "max_to_scan"
 #define EXT4_MB_MIN_TO_SCAN_NAME       "min_to_scan"
@@ -2902,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        struct proc_dir_entry *proc;
        char devname[64];
 
-       snprintf(devname, sizeof(devname) - 1, "%s",
-               bdevname(sb->s_bdev, devname));
+       bdevname(sb->s_bdev, devname);
        sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
 
        MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
@@ -2937,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
        if (sbi->s_mb_proc == NULL)
                return -EINVAL;
 
-       snprintf(devname, sizeof(devname) - 1, "%s",
-               bdevname(sb->s_bdev, devname));
+       bdevname(sb->s_bdev, devname);
        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -2959,12 +2694,19 @@ int __init init_ext4_mballoc(void)
        if (ext4_pspace_cachep == NULL)
                return -ENOMEM;
 
+       ext4_ac_cachep =
+               kmem_cache_create("ext4_alloc_context",
+                                    sizeof(struct ext4_allocation_context),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       if (ext4_ac_cachep == NULL) {
+               kmem_cache_destroy(ext4_pspace_cachep);
+               return -ENOMEM;
+       }
 #ifdef CONFIG_PROC_FS
-       proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs);
+       proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
        if (proc_root_ext4 == NULL)
-               printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT);
+               printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
 #endif
-
        return 0;
 }
 
@@ -2972,8 +2714,9 @@ void exit_ext4_mballoc(void)
 {
        /* XXX: synchronize_rcu(); */
        kmem_cache_destroy(ext4_pspace_cachep);
+       kmem_cache_destroy(ext4_ac_cachep);
 #ifdef CONFIG_PROC_FS
-       remove_proc_entry(EXT4_ROOT, proc_root_fs);
+       remove_proc_entry("fs/ext4", NULL);
 #endif
 }
 
@@ -2982,7 +2725,8 @@ void exit_ext4_mballoc(void)
  * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
  * Returns 0 if success or error code
  */
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                handle_t *handle)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -2992,7 +2736,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block;
-       int err;
+       int err, len;
 
        BUG_ON(ac->ac_status != AC_STATUS_FOUND);
        BUG_ON(ac->ac_b_ex.fe_len <= 0);
@@ -3001,8 +2745,6 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        sbi = EXT4_SB(sb);
        es = sbi->s_es;
 
-       ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
-                       gdp->bg_free_blocks_count);
 
        err = -EIO;
        bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -3018,6 +2760,9 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        if (!gdp)
                goto out_err;
 
+       ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
+                       gdp->bg_free_blocks_count);
+
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
                goto out_err;
@@ -3026,14 +2771,27 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                + ac->ac_b_ex.fe_start
                + le32_to_cpu(es->s_first_data_block);
 
-       if (block == ext4_block_bitmap(sb, gdp) ||
-                       block == ext4_inode_bitmap(sb, gdp) ||
-                       in_range(block, ext4_inode_table(sb, gdp),
-                               EXT4_SB(sb)->s_itb_per_group)) {
-
-               ext4_error(sb, __FUNCTION__,
+       len = ac->ac_b_ex.fe_len;
+       if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
+           in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
+           in_range(block, ext4_inode_table(sb, gdp),
+                    EXT4_SB(sb)->s_itb_per_group) ||
+           in_range(block + len - 1, ext4_inode_table(sb, gdp),
+                    EXT4_SB(sb)->s_itb_per_group)) {
+               ext4_error(sb, __func__,
                           "Allocating block in system zone - block = %llu",
                           block);
+               /* File system mounted not to panic on error
+                * Fix the bitmap and repeat the block allocation
+                * We leak some of the blocks here.
+                */
+               mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
+                               bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+                               ac->ac_b_ex.fe_len);
+               err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+               if (!err)
+                       err = -EAGAIN;
+               goto out_err;
        }
 #ifdef AGGRESSIVE_CHECK
        {
@@ -3055,9 +2813,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                                ac->ac_b_ex.fe_group,
                                                gdp));
        }
-       gdp->bg_free_blocks_count =
-               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
-                               - ac->ac_b_ex.fe_len);
+       le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3069,7 +2825,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
 out_err:
        sb->s_dirt = 1;
-       put_bh(bitmap_bh);
+       brelse(bitmap_bh);
        return err;
 }
 
@@ -3091,7 +2847,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
        else
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-       mb_debug("#%u: goal %lu blocks for locality group\n",
+       mb_debug("#%u: goal %u blocks for locality group\n",
                current->pid, ac->ac_g_ex.fe_len);
 }
 
@@ -3099,15 +2855,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  * Normalization means making request better in terms of
  * size and alignment
  */
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline_for_stack void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        int bsbits, max;
        ext4_lblk_t end;
-       struct list_head *cur;
        loff_t size, orig_size, start_off;
        ext4_lblk_t start, orig_start;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+       struct ext4_prealloc_space *pa;
 
        /* do normalize only data requests, metadata requests
           do not need preallocation */
@@ -3137,12 +2894,11 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        if (size < i_size_read(ac->ac_inode))
                size = i_size_read(ac->ac_inode);
 
-       /* max available blocks in a free group */
-       max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 -
-                               EXT4_SB(ac->ac_sb)->s_itb_per_group;
+       /* max size of free chunks */
+       max = 2 << bsbits;
 
-#define NRL_CHECK_SIZE(req, size, max,bits)    \
-               (req <= (size) || max <= ((size) >> bits))
+#define NRL_CHECK_SIZE(req, size, max, chunk_size)     \
+               (req <= (size) || max <= (chunk_size))
 
        /* first, try to predict filesize */
        /* XXX: should this table be tunable? */
@@ -3161,16 +2917,16 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                size = 512 * 1024;
        } else if (size <= 1024 * 1024) {
                size = 1024 * 1024;
-       } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
+       } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-                                               (20 - bsbits)) << 20;
-               size = 1024 * 1024;
-       } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
+                                               (21 - bsbits)) << 21;
+               size = 2 * 1024 * 1024;
+       } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (22 - bsbits)) << 22;
                size = 4 * 1024 * 1024;
        } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-                                       (8<<20)>>bsbits, max, bsbits)) {
+                                       (8<<20)>>bsbits, max, 8 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (23 - bsbits)) << 23;
                size = 8 * 1024 * 1024;
@@ -3193,12 +2949,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
        /* check we don't cross already preallocated blocks */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               struct ext4_prealloc_space *pa;
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
                unsigned long pa_end;
 
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
-
                if (pa->pa_deleted)
                        continue;
                spin_lock(&pa->pa_lock);
@@ -3240,10 +2993,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
        /* XXX: extra loop to check we really don't overlap preallocations */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               struct ext4_prealloc_space *pa;
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
                unsigned long pa_end;
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0) {
                        pa_end = pa->pa_lstart + pa->pa_len;
@@ -3335,7 +3086,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
 
-       mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
+       mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
 }
 
 /*
@@ -3344,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
                                struct ext4_prealloc_space *pa)
 {
-       unsigned len = ac->ac_o_ex.fe_len;
-
+       unsigned int len = ac->ac_o_ex.fe_len;
        ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
                                        &ac->ac_b_ex.fe_group,
                                        &ac->ac_b_ex.fe_start);
@@ -3354,13 +3104,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
        ac->ac_pa = pa;
 
        /* we don't correct pa_pstart or pa_plen here to avoid
-        * possible race when tte group is being loaded concurrently
+        * possible race when the group is being loaded concurrently
         * instead we correct pa later, after blocks are marked
-        * in on-disk bitmap -- see ext4_mb_release_context() */
-       /*
-        * FIXME!! but the other CPUs can look at this particular
-        * pa and think that it have enought free blocks if we
-        * don't update pa_free here right ?
+        * in on-disk bitmap -- see ext4_mb_release_context()
+        * Other CPUs are prevented from allocating from this pa by lg_mutex
         */
        mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
 }
@@ -3368,12 +3115,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
  * search goal blocks in preallocated space
  */
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
        struct ext4_prealloc_space *pa;
-       struct list_head *cur;
 
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3381,8 +3128,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
        /* first, try per-file preallocation */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 
                /* all fields in this condition don't change,
                 * so we can skip locking for them */
@@ -3414,8 +3160,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                return 0;
 
        rcu_read_lock();
-       list_for_each_rcu(cur, &lg->lg_prealloc_list) {
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+       list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
                        atomic_inc(&pa->pa_count);
@@ -3535,7 +3280,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
  * creates new preallocated space for given inode
  */
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_prealloc_space *pa;
@@ -3622,7 +3368,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
  * creates new preallocated space for locality group inodes belongs to
  */
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_locality_group *lg;
@@ -3695,11 +3442,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  * the caller MUST hold group/inode locks.
  * TODO: optimize the case when there are no in-core structures yet
  */
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
-                               struct buffer_head *bitmap_bh,
-                               struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
+                       struct ext4_prealloc_space *pa,
+                       struct ext4_allocation_context *ac)
 {
-       struct ext4_allocation_context ac;
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned long end;
@@ -3715,15 +3462,17 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
 
-       ac.ac_sb = sb;
-       ac.ac_inode = pa->pa_inode;
-       ac.ac_op = EXT4_MB_HISTORY_DISCARD;
+       if (ac) {
+               ac->ac_sb = sb;
+               ac->ac_inode = pa->pa_inode;
+               ac->ac_op = EXT4_MB_HISTORY_DISCARD;
+       }
 
        while (bit < end) {
-               bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
+               bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
                if (bit >= end)
                        break;
-               next = ext4_find_next_bit(bitmap_bh->b_data, end, bit);
+               next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
                if (next > end)
                        next = end;
                start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
@@ -3733,37 +3482,45 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                                (unsigned) group);
                free += next - bit;
 
-               ac.ac_b_ex.fe_group = group;
-               ac.ac_b_ex.fe_start = bit;
-               ac.ac_b_ex.fe_len = next - bit;
-               ac.ac_b_ex.fe_logical = 0;
-               ext4_mb_store_history(&ac);
+               if (ac) {
+                       ac->ac_b_ex.fe_group = group;
+                       ac->ac_b_ex.fe_start = bit;
+                       ac->ac_b_ex.fe_len = next - bit;
+                       ac->ac_b_ex.fe_logical = 0;
+                       ext4_mb_store_history(ac);
+               }
 
                mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                bit = next + 1;
        }
        if (free != pa->pa_free) {
-               printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n",
+               printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
                        pa, (unsigned long) pa->pa_lstart,
                        (unsigned long) pa->pa_pstart,
                        (unsigned long) pa->pa_len);
-               printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free);
+               ext4_error(sb, __func__, "free %u, pa_free %u\n",
+                                               free, pa->pa_free);
+               /*
+                * pa is already deleted so we use the value obtained
+                * from the bitmap and continue.
+                */
        }
-       BUG_ON(free != pa->pa_free);
        atomic_add(free, &sbi->s_mb_discarded);
 
        return err;
 }
 
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-                               struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+                               struct ext4_prealloc_space *pa,
+                               struct ext4_allocation_context *ac)
 {
-       struct ext4_allocation_context ac;
        struct super_block *sb = e4b->bd_sb;
        ext4_group_t group;
        ext4_grpblk_t bit;
 
-       ac.ac_op = EXT4_MB_HISTORY_DISCARD;
+       if (ac)
+               ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3771,13 +3528,15 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
        mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
        atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
 
-       ac.ac_sb = sb;
-       ac.ac_inode = NULL;
-       ac.ac_b_ex.fe_group = group;
-       ac.ac_b_ex.fe_start = bit;
-       ac.ac_b_ex.fe_len = pa->pa_len;
-       ac.ac_b_ex.fe_logical = 0;
-       ext4_mb_store_history(&ac);
+       if (ac) {
+               ac->ac_sb = sb;
+               ac->ac_inode = NULL;
+               ac->ac_b_ex.fe_group = group;
+               ac->ac_b_ex.fe_start = bit;
+               ac->ac_b_ex.fe_len = pa->pa_len;
+               ac->ac_b_ex.fe_logical = 0;
+               ext4_mb_store_history(ac);
+       }
 
        return 0;
 }
@@ -3791,12 +3550,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
  * - how many do we discard
  *   1) how many requested
  */
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline_for_stack int
+ext4_mb_discard_group_preallocations(struct super_block *sb,
                                        ext4_group_t group, int needed)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+       struct ext4_allocation_context *ac;
        struct list_head list;
        struct ext4_buddy e4b;
        int err;
@@ -3824,6 +3585,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
        grp = ext4_get_group_info(sb, group);
        INIT_LIST_HEAD(&list);
 
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3878,9 +3640,9 @@ repeat:
                spin_unlock(pa->pa_obj_lock);
 
                if (pa->pa_linear)
-                       ext4_mb_release_group_pa(&e4b, pa);
+                       ext4_mb_release_group_pa(&e4b, pa, ac);
                else
-                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
 
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3888,6 +3650,8 @@ repeat:
 
 out:
        ext4_unlock_group(sb, group);
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
        ext4_mb_release_desc(&e4b);
        put_bh(bitmap_bh);
        return free;
@@ -3908,6 +3672,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+       struct ext4_allocation_context *ac;
        ext4_group_t group = 0;
        struct list_head list;
        struct ext4_buddy e4b;
@@ -3922,6 +3687,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 
        INIT_LIST_HEAD(&list);
 
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -3986,7 +3752,7 @@ repeat:
 
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
 
                ext4_mb_release_desc(&e4b);
@@ -3995,6 +3761,8 @@ repeat:
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4054,7 +3822,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
                        printk(KERN_ERR "PA:%lu:%d:%u \n", i,
                                                        start, pa->pa_len);
                }
-               ext4_lock_group(sb, i);
+               ext4_unlock_group(sb, i);
 
                if (grp->bb_free == 0)
                        continue;
@@ -4113,7 +3881,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
        mutex_lock(&ac->ac_lg->lg_mutex);
 }
 
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        struct super_block *sb = ar->inode->i_sb;
@@ -4231,7 +4000,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                                 struct ext4_allocation_request *ar, int *errp)
 {
-       struct ext4_allocation_context ac;
+       struct ext4_allocation_context *ac = NULL;
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block = 0;
@@ -4257,53 +4026,70 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
        inquota = ar->len;
 
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+       if (!ac) {
+               *errp = -ENOMEM;
+               return 0;
+       }
+
        ext4_mb_poll_new_transaction(sb, handle);
 
-       *errp = ext4_mb_initialize_context(&ac, ar);
+       *errp = ext4_mb_initialize_context(ac, ar);
        if (*errp) {
                ar->len = 0;
                goto out;
        }
 
-       ac.ac_op = EXT4_MB_HISTORY_PREALLOC;
-       if (!ext4_mb_use_preallocated(&ac)) {
-
-               ac.ac_op = EXT4_MB_HISTORY_ALLOC;
-               ext4_mb_normalize_request(&ac, ar);
+       ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
+       if (!ext4_mb_use_preallocated(ac)) {
 
+               ac->ac_op = EXT4_MB_HISTORY_ALLOC;
+               ext4_mb_normalize_request(ac, ar);
 repeat:
                /* allocate space in core */
-               ext4_mb_regular_allocator(&ac);
+               ext4_mb_regular_allocator(ac);
 
                /* as we've just preallocated more space than
                 * user requested orinally, we store allocated
                 * space in a special descriptor */
-               if (ac.ac_status == AC_STATUS_FOUND &&
-                               ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len)
-                       ext4_mb_new_preallocation(&ac);
+               if (ac->ac_status == AC_STATUS_FOUND &&
+                               ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
+                       ext4_mb_new_preallocation(ac);
        }
 
-       if (likely(ac.ac_status == AC_STATUS_FOUND)) {
-               ext4_mb_mark_diskspace_used(&ac, handle);
-               *errp = 0;
-               block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex);
-               ar->len = ac.ac_b_ex.fe_len;
+       if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+               *errp = ext4_mb_mark_diskspace_used(ac, handle);
+               if (*errp ==  -EAGAIN) {
+                       ac->ac_b_ex.fe_group = 0;
+                       ac->ac_b_ex.fe_start = 0;
+                       ac->ac_b_ex.fe_len = 0;
+                       ac->ac_status = AC_STATUS_CONTINUE;
+                       goto repeat;
+               } else if (*errp) {
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
+               } else {
+                       block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+                       ar->len = ac->ac_b_ex.fe_len;
+               }
        } else {
-               freed  = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len);
+               freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
                if (freed)
                        goto repeat;
                *errp = -ENOSPC;
-               ac.ac_b_ex.fe_len = 0;
+               ac->ac_b_ex.fe_len = 0;
                ar->len = 0;
-               ext4_mb_show_ac(&ac);
+               ext4_mb_show_ac(ac);
        }
 
-       ext4_mb_release_context(&ac);
+       ext4_mb_release_context(ac);
 
 out:
        if (ar->len < inquota)
                DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
 
+       kmem_cache_free(ext4_ac_cachep, ac);
        return block;
 }
 static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4337,7 +4123,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
        ext4_mb_free_committed_blocks(sb);
 }
 
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline_for_stack int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                          ext4_group_t group, ext4_grpblk_t block, int count)
 {
        struct ext4_group_info *db = e4b->bd_info;
@@ -4405,9 +4192,9 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
                        unsigned long block, unsigned long count,
                        int metadata, unsigned long *freed)
 {
-       struct buffer_head *bitmap_bh = 0;
+       struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
-       struct ext4_allocation_context ac;
+       struct ext4_allocation_context *ac = NULL;
        struct ext4_group_desc *gdp;
        struct ext4_super_block *es;
        unsigned long overflow;
@@ -4428,7 +4215,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        if (block < le32_to_cpu(es->s_first_data_block) ||
            block + count < block ||
            block + count > ext4_blocks_count(es)) {
-               ext4_error(sb, __FUNCTION__,
+               ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %lu, count = %lu", block, count);
                goto error_return;
@@ -4436,9 +4223,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 
        ext4_debug("freeing block %lu\n", block);
 
-       ac.ac_op = EXT4_MB_HISTORY_FREE;
-       ac.ac_inode = inode;
-       ac.ac_sb = sb;
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+       if (ac) {
+               ac->ac_op = EXT4_MB_HISTORY_FREE;
+               ac->ac_inode = inode;
+               ac->ac_sb = sb;
+       }
 
 do_more:
        overflow = 0;
@@ -4466,9 +4256,11 @@ do_more:
            in_range(block + count - 1, ext4_inode_table(sb, gdp),
                      EXT4_SB(sb)->s_itb_per_group)) {
 
-               ext4_error(sb, __FUNCTION__,
+               ext4_error(sb, __func__,
                           "Freeing blocks in system zone - "
                           "Block = %lu, count = %lu", block, count);
+               /* err = 0. ext4_std_error should be a no op */
+               goto error_return;
        }
 
        BUFFER_TRACE(bitmap_bh, "getting write access");
@@ -4504,10 +4296,12 @@ do_more:
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_journal_dirty_metadata(handle, bitmap_bh);
 
-       ac.ac_b_ex.fe_group = block_group;
-       ac.ac_b_ex.fe_start = bit;
-       ac.ac_b_ex.fe_len = count;
-       ext4_mb_store_history(&ac);
+       if (ac) {
+               ac->ac_b_ex.fe_group = block_group;
+               ac->ac_b_ex.fe_start = bit;
+               ac->ac_b_ex.fe_len = count;
+               ext4_mb_store_history(ac);
+       }
 
        if (metadata) {
                /* blocks being freed are metadata. these blocks shouldn't
@@ -4522,8 +4316,7 @@ do_more:
        }
 
        spin_lock(sb_bgl_lock(sbi, block_group));
-       gdp->bg_free_blocks_count =
-               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
+       le16_add_cpu(&gdp->bg_free_blocks_count, count);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -4548,5 +4341,7 @@ do_more:
 error_return:
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
        return;
 }