* ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
-static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
-static int ext4_ext_journal_restart(handle_t *handle, int needed)
+static int ext4_ext_truncate_extend_restart(handle_t *handle,
+ struct inode *inode,
+ int needed)
{
int err;
err = ext4_journal_extend(handle, needed);
if (err <= 0)
return err;
- return ext4_journal_restart(handle, needed);
+ err = ext4_truncate_restart_trans(handle, inode, needed);
+ /*
+ * We have dropped i_data_sem so someone might have cached again
+ * an extent we are going to truncate.
+ */
+ ext4_ext_invalidate_cache(inode);
+
+ return err;
}
/*
ext4_fsblk_t bg_start;
ext4_fsblk_t last_block;
ext4_grpblk_t colour;
+ ext4_group_t block_group;
+ int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
int depth;
if (path) {
}
/* OK. use inode's group */
- bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
+ block_group = ei->i_block_group;
+ if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
+ /*
+ * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
+ * block groups per flexgroup, reserve the first block
+ * group for directories and special files. Regular
+ * files will start at the second block group. This
+ * tends to speed up directory access and improves
+ * fsck times.
+ */
+ block_group &= ~(flex_size-1);
+ if (S_ISREG(inode->i_mode))
+ block_group++;
+ }
+ bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+ /*
+ * If we are doing delayed allocation, we don't need take
+ * colour into account.
+ */
+ if (test_opt(inode->i_sb, DELALLOC))
+ return bg_start;
+
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
return max;
}
-static int __ext4_ext_check_header(const char *function, struct inode *inode,
+static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
+{
+ ext4_fsblk_t block = ext_pblock(ext);
+ int len = ext4_ext_get_actual_len(ext);
+
+ return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+}
+
+static int ext4_valid_extent_idx(struct inode *inode,
+ struct ext4_extent_idx *ext_idx)
+{
+ ext4_fsblk_t block = idx_pblock(ext_idx);
+
+ return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
+}
+
+static int ext4_valid_extent_entries(struct inode *inode,
+ struct ext4_extent_header *eh,
+ int depth)
+{
+ struct ext4_extent *ext;
+ struct ext4_extent_idx *ext_idx;
+ unsigned short entries;
+ if (eh->eh_entries == 0)
+ return 1;
+
+ entries = le16_to_cpu(eh->eh_entries);
+
+ if (depth == 0) {
+ /* leaf entries */
+ ext = EXT_FIRST_EXTENT(eh);
+ while (entries) {
+ if (!ext4_valid_extent(inode, ext))
+ return 0;
+ ext++;
+ entries--;
+ }
+ } else {
+ ext_idx = EXT_FIRST_INDEX(eh);
+ while (entries) {
+ if (!ext4_valid_extent_idx(inode, ext_idx))
+ return 0;
+ ext_idx++;
+ entries--;
+ }
+ }
+ return 1;
+}
+
+static int __ext4_ext_check(const char *function, struct inode *inode,
struct ext4_extent_header *eh,
int depth)
{
error_msg = "invalid eh_entries";
goto corrupted;
}
+ if (!ext4_valid_extent_entries(inode, eh, depth)) {
+ error_msg = "invalid extent entries";
+ goto corrupted;
+ }
return 0;
corrupted:
ext4_error(inode->i_sb, function,
- "bad header in inode #%lu: %s - magic %x, "
+ "bad header/extent in inode #%lu: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
return -EIO;
}
-#define ext4_ext_check_header(inode, eh, depth) \
- __ext4_ext_check_header(__func__, inode, eh, depth)
+#define ext4_ext_check(inode, eh, depth) \
+ __ext4_ext_check(__func__, inode, eh, depth)
+
+int ext4_ext_check_inode(struct inode *inode)
+{
+ return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
+}
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
idx_pblock(path->p_idx));
} else if (path->p_ext) {
- ext_debug(" %d:%d:%llu ",
+ ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
+ ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
ext_pblock(path->p_ext));
} else
eh = path[depth].p_hdr;
ex = EXT_FIRST_EXTENT(eh);
+ ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
+
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
- ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
+ ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
+ ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex));
}
ext_debug("\n");
}
path->p_ext = l - 1;
- ext_debug(" -> %d:%llu:%d ",
+ ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
ext_pblock(path->p_ext),
+ ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
#ifdef CHECK_BINSEARCH
eh = ext_inode_hdr(inode);
depth = ext_depth(inode);
- if (ext4_ext_check_header(inode, eh, depth))
- return ERR_PTR(-EIO);
-
/* account possible depth increase */
if (!path) {
i = depth;
/* walk through the tree */
while (i) {
+ int need_to_validate = 0;
+
ext_debug("depth %d: num %d, max %d\n",
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
- bh = sb_bread(inode->i_sb, path[ppos].p_block);
- if (!bh)
+ bh = sb_getblk(inode->i_sb, path[ppos].p_block);
+ if (unlikely(!bh))
goto err;
-
+ if (!bh_uptodate_or_lock(bh)) {
+ if (bh_submit_read(bh) < 0) {
+ put_bh(bh);
+ goto err;
+ }
+ /* validate the extent entries */
+ need_to_validate = 1;
+ }
eh = ext_block_hdr(bh);
ppos++;
BUG_ON(ppos > depth);
path[ppos].p_hdr = eh;
i--;
- if (ext4_ext_check_header(inode, eh, i))
+ if (need_to_validate && ext4_ext_check(inode, eh, i))
goto err;
}
path[depth].p_ext++;
while (path[depth].p_ext <=
EXT_MAX_EXTENT(path[depth].p_hdr)) {
- ext_debug("move %d:%llu:%d in new leaf %llu\n",
+ ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
ext_pblock(path[depth].p_ext),
+ ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
/*memmove(ex++, path[depth].p_ext++,
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
ext4_fsblk_t block;
- int depth, ee_len;
+ int depth; /* Note, NOT eh_depth; depth from top of tree */
+ int ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
- if (ext4_ext_check_header(inode, eh, depth)) {
+ /* subtract from p_depth to get proper eh_depth */
+ if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
- if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+ if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
return err;
}
-static int
+int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
/* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
- ext_debug("append %d block to %d:%d (from %llu)\n",
+ ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
+ ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block),
+ ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
if (!nearex) {
/* there is no extent in this leaf, create first one */
- ext_debug("first extent in the leaf: %d:%llu:%d\n",
+ ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
+ ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block)
len = EXT_MAX_EXTENT(eh) - nearex;
len = (len - 1) * sizeof(struct ext4_extent);
len = len < 0 ? 0 : len;
- ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
+ ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
+ ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 2, nearex + 1, len);
BUG_ON(newext->ee_block == nearex->ee_block);
len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
len = len < 0 ? 0 : len;
- ext_debug("insert %d:%llu:%d before: nearest 0x%p, "
+ ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
ext_pblock(newext),
+ ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 1, nearex, len);
{
struct ext4_ext_cache *cex;
BUG_ON(len == 0);
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
cex = &EXT4_I(inode)->i_cached_extent;
cex->ec_type = type;
cex->ec_block = block;
cex->ec_len = len;
cex->ec_start = start;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
/*
struct ext4_extent *ex)
{
struct ext4_ext_cache *cex;
+ int ret = EXT4_EXT_CACHE_NO;
+ /*
+ * We borrow i_block_reservation_lock to protect i_cached_extent
+ */
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
cex = &EXT4_I(inode)->i_cached_extent;
/* has cache valid data? */
if (cex->ec_type == EXT4_EXT_CACHE_NO)
- return EXT4_EXT_CACHE_NO;
+ goto errout;
BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
cex->ec_type != EXT4_EXT_CACHE_EXTENT);
ext_debug("%u cached by %u:%u:%llu\n",
block,
cex->ec_block, cex->ec_len, cex->ec_start);
- return cex->ec_type;
+ ret = cex->ec_type;
}
-
- /* not in cache */
- return EXT4_EXT_CACHE_NO;
+errout:
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return ret;
}
/*
*/
/* 1 bitmap, 1 block group descriptor */
ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+ return ret;
}
}
ex = EXT_LAST_EXTENT(eh);
ex_ee_block = le32_to_cpu(ex->ee_block);
- if (ext4_ext_is_uninitialized(ex))
- uninitialized = 1;
ex_ee_len = ext4_ext_get_actual_len(ex);
while (ex >= EXT_FIRST_EXTENT(eh) &&
ex_ee_block + ex_ee_len > start) {
- ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
+
+ if (ext4_ext_is_uninitialized(ex))
+ uninitialized = 1;
+ else
+ uninitialized = 0;
+
+ ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
+ uninitialized, ex_ee_len);
path[depth].p_ext = ex;
a = ex_ee_block > start ? ex_ee_block : start;
}
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
- err = ext4_ext_journal_restart(handle, credits);
+ err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err)
goto out;
return -ENOMEM;
}
path[0].p_hdr = ext_inode_hdr(inode);
- if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) {
+ if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
err = -EIO;
goto out;
}
err = -EIO;
break;
}
- if (ext4_ext_check_header(inode, ext_block_hdr(bh),
+ if (ext4_ext_check(inode, ext_block_hdr(bh),
depth - i - 1)) {
err = -EIO;
break;
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
*/
newdepth = ext_depth(inode);
/*
- * update the extent length after successfull insert of the
+ * update the extent length after successful insert of the
* split extent
*/
orig_ex.ee_len = cpu_to_le16(ee_len -
} else if (err)
goto fix_extent_len;
out:
+ ext4_ext_show_leaf(inode, path);
return err ? err : allocated;
fix_extent_len:
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock,
unsigned int max_blocks, struct buffer_head *bh_result,
- int create, int extend_disksize)
+ int flags)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
- loff_t disksize;
__clear_bit(BH_New, &bh_result->b_state);
- ext_debug("blocks %u/%u requested for inode %u\n",
+ ext_debug("blocks %u/%u requested for inode %lu\n",
iblock, max_blocks, inode->i_ino);
/* check in cache */
cache_type = ext4_ext_in_cache(inode, iblock, &newex);
if (cache_type) {
if (cache_type == EXT4_EXT_CACHE_GAP) {
- if (!create) {
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/*
* block isn't allocated yet and
* user doesn't want to allocate it
newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
- ext_debug("%u fit into %lu:%d -> %llu\n", iblock,
+ ext_debug("%u fit into %u:%d -> %llu\n", iblock,
ee_block, ee_len, newblock);
/* Do not put uninitialized extent in the cache */
EXT4_EXT_CACHE_EXTENT);
goto out;
}
- if (create == EXT4_CREATE_UNINITIALIZED_EXT)
+ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
goto out;
- if (!create) {
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ if (allocated > max_blocks)
+ allocated = max_blocks;
/*
* We have blocks reserved already. We
* return allocated blocks so that delalloc
* the buffer head will be unmapped so that
* a read from the block returns 0s.
*/
- if (allocated > max_blocks)
- allocated = max_blocks;
set_buffer_unwritten(bh_result);
+ bh_result->b_bdev = inode->i_sb->s_bdev;
+ bh_result->b_blocknr = newblock;
goto out2;
}
* requested block isn't allocated yet;
* we couldn't try to create block if create flag is zero
*/
- if (!create) {
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
/*
* put just found gap into cache to speed up
* subsequent requests
* EXT_UNINIT_MAX_LEN.
*/
if (max_blocks > EXT_INIT_MAX_LEN &&
- create != EXT4_CREATE_UNINITIALIZED_EXT)
+ !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_INIT_MAX_LEN;
else if (max_blocks > EXT_UNINIT_MAX_LEN &&
- create == EXT4_CREATE_UNINITIALIZED_EXT)
+ (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
max_blocks = EXT_UNINIT_MAX_LEN;
/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
- ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
+ ext_debug("allocate new block: goal %llu, found %llu/%u\n",
ar.goal, newblock, allocated);
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(ar.len);
- if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
+ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
if (err) {
newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
outnew:
- if (extend_disksize) {
- disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
- if (disksize > i_size_read(inode))
- disksize = i_size_read(inode);
- if (disksize > EXT4_I(inode)->i_disksize)
- EXT4_I(inode)->i_disksize = disksize;
- }
-
set_buffer_new(bh_result);
/* Cache only when it is _not_ an uninitialized extent */
- if (create != EXT4_CREATE_UNINITIALIZED_EXT)
+ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
out:
ret = PTR_ERR(handle);
break;
}
- ret = ext4_get_blocks_wrap(handle, inode, block,
- max_blocks, &map_bh,
- EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
+ map_bh.b_state = 0;
+ ret = ext4_get_blocks(handle, inode, block,
+ max_blocks, &map_bh,
+ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
if (ret <= 0) {
#ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks "
"returned error inode#%lu, block=%u, "
- "max_blocks=%lu", __func__,
+ "max_blocks=%u", __func__,
inode->i_ino, block, max_blocks);
#endif
ext4_mark_inode_dirty(handle, inode);
void *data)
{
struct fiemap_extent_info *fieinfo = data;
- unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
+ unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
__u64 logical;
__u64 physical;
__u64 length;
*
* XXX this might miss a single-block extent at EXT_MAX_BLOCK
*/
- if (logical + length - 1 == EXT_MAX_BLOCK ||
- ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
+ if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
+ newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
+ loff_t size = i_size_read(inode);
+ loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
+
flags |= FIEMAP_EXTENT_LAST;
+ if ((flags & FIEMAP_EXTENT_DELALLOC) &&
+ logical+length > size)
+ length = (size - logical + bs - 1) & ~(bs-1);
+ }
error = fiemap_fill_next_extent(fieinfo, logical, physical,
length, flags);
* Walk the extent tree gathering extent information.
* ext4_ext_fiemap_cb will push extents back to user.
*/
- down_write(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
error = ext4_ext_walk_space(inode, start_blk, len_blks,
ext4_ext_fiemap_cb, fieinfo);
- up_write(&EXT4_I(inode)->i_data_sem);
+ up_read(&EXT4_I(inode)->i_data_sem);
}
return error;