#include <linux/buffer_head.h>
#include "ext4.h"
#include "ext4_jbd2.h"
-#include "group.h"
+#include "mballoc.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
}
static int ext4_group_used_meta_blocks(struct super_block *sb,
- ext4_group_t block_group)
+ ext4_group_t block_group,
+ struct ext4_group_desc *gdp)
{
ext4_fsblk_t tmp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int used_blocks = sbi->s_itb_per_group + 2;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
- struct ext4_group_desc *gdp;
- struct buffer_head *bh;
-
- gdp = ext4_get_group_desc(sb, block_group, &bh);
if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
block_group))
used_blocks--;
ext4_group_t block_group, struct ext4_group_desc *gdp)
{
int bit, bit_max;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
unsigned free_blocks, group_blocks;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
- ext4_error(sb, __func__,
- "Checksum bad for group %u", block_group);
- gdp->bg_free_blocks_count = 0;
- gdp->bg_free_inodes_count = 0;
- gdp->bg_itable_unused = 0;
+ ext4_error(sb, "Checksum bad for group %u",
+ block_group);
+ ext4_free_blks_set(sb, gdp, 0);
+ ext4_free_inodes_set(sb, gdp, 0);
+ ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
return 0;
}
bit_max += ext4_bg_num_gdb(sb, block_group);
}
- if (block_group == sbi->s_groups_count - 1) {
+ if (block_group == ngroups - 1) {
/*
* Even though mke2fs always initialize first and last group
* if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
* to make sure we calculate the right free blocks
*/
group_blocks = ext4_blocks_count(sbi->s_es) -
- le32_to_cpu(sbi->s_es->s_first_data_block) -
- (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
+ ext4_group_first_block_no(sb, ngroups - 1);
} else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
}
*/
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
}
- return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
+ return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
}
* when a file system is mounted (see ext4_fill_super).
*/
-
-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-
/**
* ext4_get_group_desc() -- load group descriptor from disk
* @sb: super block
ext4_group_t block_group,
struct buffer_head **bh)
{
- unsigned long group_desc;
- unsigned long offset;
+ unsigned int group_desc;
+ unsigned int offset;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (block_group >= sbi->s_groups_count) {
- ext4_error(sb, "ext4_get_group_desc",
- "block_group >= groups_count - "
- "block_group = %u, groups_count = %u",
- block_group, sbi->s_groups_count);
+ if (block_group >= ngroups) {
+ ext4_error(sb, "block_group >= groups_count - block_group = %u,"
+ " groups_count = %u", block_group, ngroups);
return NULL;
}
- smp_rmb();
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
- ext4_error(sb, "ext4_get_group_desc",
- "Group descriptor not loaded - "
- "block_group = %u, group_desc = %lu, desc = %lu",
+ ext4_error(sb, "Group descriptor not loaded - "
+ "block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
return NULL;
}
return 1;
err_out:
- ext4_error(sb, __func__,
- "Invalid block bitmap - "
- "block_group = %d, block = %llu",
+ ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
block_group, bitmap_blk);
return 0;
}
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, __func__,
- "Cannot read block bitmap - "
+ ext4_error(sb, "Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk);
return NULL;
}
- if (buffer_uptodate(bh) &&
- !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
+
+ if (bitmap_uptodate(bh))
return bh;
lock_buffer(bh);
- spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ if (bitmap_uptodate(bh)) {
+ unlock_buffer(bh);
+ return bh;
+ }
+ ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
+ set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
+ ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
- spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh;
}
- spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+ ext4_unlock_group(sb, block_group);
+ if (buffer_uptodate(bh)) {
+ /*
+ * if not uninit if bh is uptodate,
+ * bitmap is also uptodate
+ */
+ set_bitmap_uptodate(bh);
+ unlock_buffer(bh);
+ return bh;
+ }
+ /*
+ * submit the buffer_head for read. We can
+ * safely mark the bitmap as uptodate now.
+ * We do it here so the bitmap uptodate bit
+ * get set with buffer lock held.
+ */
+ set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
- ext4_error(sb, __func__,
- "Cannot read block bitmap - "
+ ext4_error(sb, "Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk);
return NULL;
}
/**
- * ext4_free_blocks_sb() -- Free given blocks and update quota
+ * ext4_add_groupblocks() -- Add given blocks to an existing group
* @handle: handle to this transaction
* @sb: super block
- * @block: start physcial block to free
+ * @block: start physcial block to add to the block group
* @count: number of blocks to free
- * @pdquot_freed_blocks: pointer to quota
*
- * XXX This function is only used by the on-line resizing code, which
- * should probably be fixed up to call the mballoc variant. There
- * this needs to be cleaned up later; in fact, I'm not convinced this
- * is 100% correct in the face of the mballoc code. The online resizing
- * code needs to be fixed up to more tightly (and correctly) interlock
- * with the mballoc code.
+ * This marks the blocks as free in the bitmap. We ask the
+ * mballoc to reload the buddy after this by setting group
+ * EXT4_GROUP_INFO_NEED_INIT_BIT flag
*/
-void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
- ext4_fsblk_t block, unsigned long count,
- unsigned long *pdquot_freed_blocks)
+void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+ ext4_fsblk_t block, unsigned long count)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
ext4_group_t block_group;
ext4_grpblk_t bit;
- unsigned long i;
- unsigned long overflow;
+ unsigned int i;
struct ext4_group_desc *desc;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
- int err = 0, ret;
- ext4_grpblk_t group_freed;
+ int err = 0, ret, blk_free_count;
+ ext4_grpblk_t blocks_freed;
+ struct ext4_group_info *grp;
- *pdquot_freed_blocks = 0;
sbi = EXT4_SB(sb);
es = sbi->s_es;
- if (block < le32_to_cpu(es->s_first_data_block) ||
- block + count < block ||
- block + count > ext4_blocks_count(es)) {
- ext4_error(sb, "ext4_free_blocks",
- "Freeing blocks not in datazone - "
- "block = %llu, count = %lu", block, count);
- goto error_return;
- }
+ ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
- ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
-
-do_more:
- overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
+ grp = ext4_get_group_info(sb, block_group);
/*
* Check to see if we are freeing blocks across a group
* boundary.
*/
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
- overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
- count -= overflow;
+ goto error_return;
}
- brelse(bitmap_bh);
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group)) {
- ext4_error(sb, "ext4_free_blocks",
- "Freeing blocks in system zones - "
+ ext4_error(sb, "Adding blocks in system zones - "
"Block = %llu, count = %lu",
block, count);
goto error_return;
}
/*
- * We are about to start releasing blocks in the bitmap,
+ * We are about to add blocks to the bitmap,
* so we need undo access.
*/
- /* @@@ check errors */
BUFFER_TRACE(bitmap_bh, "getting undo access");
err = ext4_journal_get_undo_access(handle, bitmap_bh);
if (err)
err = ext4_journal_get_write_access(handle, gd_bh);
if (err)
goto error_return;
-
- jbd_lock_bh_state(bitmap_bh);
-
- for (i = 0, group_freed = 0; i < count; i++) {
- /*
- * An HJ special. This is expensive...
- */
-#ifdef CONFIG_JBD2_DEBUG
- jbd_unlock_bh_state(bitmap_bh);
- {
- struct buffer_head *debug_bh;
- debug_bh = sb_find_get_block(sb, block + i);
- if (debug_bh) {
- BUFFER_TRACE(debug_bh, "Deleted!");
- if (!bh2jh(bitmap_bh)->b_committed_data)
- BUFFER_TRACE(debug_bh,
- "No commited data in bitmap");
- BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
- __brelse(debug_bh);
- }
- }
- jbd_lock_bh_state(bitmap_bh);
-#endif
- if (need_resched()) {
- jbd_unlock_bh_state(bitmap_bh);
- cond_resched();
- jbd_lock_bh_state(bitmap_bh);
- }
- /* @@@ This prevents newly-allocated data from being
- * freed and then reallocated within the same
- * transaction.
- *
- * Ideally we would want to allow that to happen, but to
- * do so requires making jbd2_journal_forget() capable of
- * revoking the queued write of a data block, which
- * implies blocking on the journal lock. *forget()
- * cannot block due to truncate races.
- *
- * Eventually we can fix this by making jbd2_journal_forget()
- * return a status indicating whether or not it was able
- * to revoke the buffer. On successful revoke, it is
- * safe not to set the allocation bit in the committed
- * bitmap, because we know that there is no outstanding
- * activity on the buffer any more and so it is safe to
- * reallocate it.
- */
- BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
- J_ASSERT_BH(bitmap_bh,
- bh2jh(bitmap_bh)->b_committed_data != NULL);
- ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
- bh2jh(bitmap_bh)->b_committed_data);
-
- /*
- * We clear the bit in the bitmap after setting the committed
- * data bit, because this is the reverse order to that which
- * the allocator uses.
- */
+ /*
+ * make sure we don't allow a parallel init on other groups in the
+ * same buddy cache
+ */
+ down_write(&grp->alloc_sem);
+ for (i = 0, blocks_freed = 0; i < count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit");
- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+ if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
bit + i, bitmap_bh->b_data)) {
- jbd_unlock_bh_state(bitmap_bh);
- ext4_error(sb, __func__,
- "bit already cleared for block %llu",
+ ext4_error(sb, "bit already cleared for block %llu",
(ext4_fsblk_t)(block + i));
- jbd_lock_bh_state(bitmap_bh);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
- group_freed++;
+ blocks_freed++;
}
}
- jbd_unlock_bh_state(bitmap_bh);
-
- spin_lock(sb_bgl_lock(sbi, block_group));
- le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
+ ext4_lock_group(sb, block_group);
+ blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
+ ext4_free_blks_set(sb, desc, blk_free_count);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
- spin_unlock(sb_bgl_lock(sbi, block_group));
- percpu_counter_add(&sbi->s_freeblocks_counter, count);
+ ext4_unlock_group(sb, block_group);
+ percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- spin_lock(sb_bgl_lock(sbi, flex_group));
- sbi->s_flex_groups[flex_group].free_blocks += count;
- spin_unlock(sb_bgl_lock(sbi, flex_group));
+ atomic_add(blocks_freed,
+ &sbi->s_flex_groups[flex_group].free_blocks);
}
+ /*
+ * request to reload the buddy with the
+ * new bitmap information
+ */
+ set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+ grp->bb_free += blocks_freed;
+ up_write(&grp->alloc_sem);
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
- if (!err) err = ret;
- *pdquot_freed_blocks += group_freed;
-
- if (overflow && !err) {
- block += count;
- count = overflow;
- goto do_more;
- }
+ if (!err)
+ err = ret;
sb->s_dirt = 1;
+
error_return:
brelse(bitmap_bh);
ext4_std_error(sb, err);
}
/**
- * ext4_free_blocks() -- Free given blocks and update quota
- * @handle: handle for this transaction
- * @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
- * @metadata: Are these metadata blocks
- */
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count,
- int metadata)
-{
- struct super_block *sb;
- unsigned long dquot_freed_blocks;
-
- /* this isn't the right place to decide whether block is metadata
- * inode.c/extents.c knows better, but for safety ... */
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- metadata = 1;
-
- /* We need to make sure we don't reuse
- * block released untill the transaction commit.
- * writeback mode have weak data consistency so
- * don't force data as metadata when freeing block
- * for writeback mode.
- */
- if (metadata == 0 && !ext4_should_writeback_data(inode))
- metadata = 1;
-
- sb = inode->i_sb;
-
- ext4_mb_free_blocks(handle, inode, block, count,
- metadata, &dquot_freed_blocks);
- if (dquot_freed_blocks)
- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
- return;
-}
-
-/**
* ext4_has_free_blocks()
* @sbi: in-core super block structure.
* @nblocks: number of needed blocks
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
- if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
+ if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
+ (*retries)++ > 3 ||
+ !EXT4_SB(sb)->s_journal)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
ret = ext4_mb_new_blocks(handle, &ar, errp);
if (count)
*count = ar.len;
-
/*
- * Account for the allocated meta blocks
+ * Account for the allocated meta blocks. We will never
+ * fail EDQUOT for metdata, but we do account for it.
*/
if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ dquot_alloc_block_nofail(inode, ar.len);
}
return ret;
}
ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp;
ext4_group_t i;
- ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
#ifdef EXT4FS_DEBUG
struct ext4_super_block *es;
ext4_fsblk_t bitmap_count;
- unsigned long x;
+ unsigned int x;
struct buffer_head *bitmap_bh = NULL;
es = EXT4_SB(sb)->s_es;
bitmap_count = 0;
gdp = NULL;
- smp_rmb();
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+ desc_count += ext4_free_blks_count(sb, gdp);
brelse(bitmap_bh);
bitmap_bh = ext4_read_block_bitmap(sb, i);
if (bitmap_bh == NULL)
continue;
x = ext4_count_free(bitmap_bh, sb->s_blocksize);
- printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
- i, le16_to_cpu(gdp->bg_free_blocks_count), x);
+ printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
+ i, ext4_free_blks_count(sb, gdp), x);
bitmap_count += x;
}
brelse(bitmap_bh);
return bitmap_count;
#else
desc_count = 0;
- smp_rmb();
for (i = 0; i < ngroups; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
- desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+ desc_count += ext4_free_blks_count(sb, gdp);
}
return desc_count;
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
ext4_group_t group)
{
- return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
+ if (!ext4_bg_has_super(sb, group))
+ return 0;
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
+ return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
+ else
+ return EXT4_SB(sb)->s_gdb_count;
}
/**