#include "buffer_head_io.h"
#define NOT_ALLOC_NEW_GROUP 0
-#define ALLOC_NEW_GROUP 1
+#define ALLOC_NEW_GROUP 0x1
+#define ALLOC_GROUPS_FROM_GLOBAL 0x2
#define OCFS2_MAX_INODES_TO_STEAL 1024
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh,
- u64 max_block);
+ u64 max_block,
+ u64 *last_alloc_group,
+ int flags);
static int ocfs2_cluster_group_search(struct inode *inode,
struct buffer_head *group_bh,
u16 *bg_bit_off);
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
+ int flags,
struct ocfs2_alloc_context **ac);
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
struct inode *alloc_inode,
struct buffer_head *bh,
- u64 max_block)
+ u64 max_block,
+ u64 *last_alloc_group,
+ int flags)
{
int status, credits;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
cl = &fe->id2.i_chain;
status = ocfs2_reserve_clusters_with_limit(osb,
le16_to_cpu(cl->cl_cpg),
- max_block, &ac);
+ max_block, flags, &ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
goto bail;
}
+ if (last_alloc_group && *last_alloc_group != 0) {
+ mlog(0, "use old allocation group %llu for block group alloc\n",
+ (unsigned long long)*last_alloc_group);
+ ac->ac_last_group = *last_alloc_group;
+ }
status = ocfs2_claim_clusters(osb,
handle,
ac,
alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
status = 0;
+
+ /* save the new last alloc group so that the caller can cache it. */
+ if (last_alloc_group)
+ *last_alloc_group = ac->ac_last_group;
+
bail:
if (handle)
ocfs2_commit_trans(osb, handle);
struct ocfs2_alloc_context *ac,
int type,
u32 slot,
- int alloc_new_group)
+ u64 *last_alloc_group,
+ int flags)
{
int status;
u32 bits_wanted = ac->ac_bits_wanted;
goto bail;
}
- if (alloc_new_group != ALLOC_NEW_GROUP) {
+ if (!(flags & ALLOC_NEW_GROUP)) {
mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
"and we don't alloc a new group for it.\n",
slot, bits_wanted, free_bits);
}
status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
- ac->ac_max_block);
+ ac->ac_max_block,
+ last_alloc_group, flags);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
status = ocfs2_reserve_suballoc_bits(osb, (*ac),
EXTENT_ALLOC_SYSTEM_INODE,
- slot, ALLOC_NEW_GROUP);
+ slot, NULL, ALLOC_NEW_GROUP);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
status = ocfs2_reserve_suballoc_bits(osb, ac,
INODE_ALLOC_SYSTEM_INODE,
- slot, NOT_ALLOC_NEW_GROUP);
+ slot, NULL,
+ NOT_ALLOC_NEW_GROUP);
if (status >= 0) {
ocfs2_set_inode_steal_slot(osb, slot);
break;
{
int status;
s16 slot = ocfs2_get_inode_steal_slot(osb);
+ u64 alloc_group;
*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
goto inode_steal;
atomic_set(&osb->s_num_inodes_stolen, 0);
+ alloc_group = osb->osb_inode_alloc_group;
status = ocfs2_reserve_suballoc_bits(osb, *ac,
INODE_ALLOC_SYSTEM_INODE,
- osb->slot_num, ALLOC_NEW_GROUP);
+ osb->slot_num,
+ &alloc_group,
+ ALLOC_NEW_GROUP |
+ ALLOC_GROUPS_FROM_GLOBAL);
if (status >= 0) {
status = 0;
+ spin_lock(&osb->osb_lock);
+ osb->osb_inode_alloc_group = alloc_group;
+ spin_unlock(&osb->osb_lock);
+ mlog(0, "after reservation, new allocation group is "
+ "%llu\n", (unsigned long long)alloc_group);
+
/*
* Some inodes must be freed by us, so try to allocate
* from our own next time.
status = ocfs2_reserve_suballoc_bits(osb, ac,
GLOBAL_BITMAP_SYSTEM_INODE,
- OCFS2_INVALID_SLOT,
+ OCFS2_INVALID_SLOT, NULL,
ALLOC_NEW_GROUP);
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
* things a bit. */
static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
u32 bits_wanted, u64 max_block,
+ int flags,
struct ocfs2_alloc_context **ac)
{
int status;
(*ac)->ac_max_block = max_block;
status = -ENOSPC;
- if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
+ if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
+ ocfs2_alloc_should_use_local(osb, bits_wanted)) {
status = ocfs2_reserve_local_alloc_bits(osb,
bits_wanted,
*ac);
u32 bits_wanted,
struct ocfs2_alloc_context **ac)
{
- return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
+ return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
+ ALLOC_NEW_GROUP, ac);
}
/*
int nr)
{
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+ int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
return 0;
- if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
+
+ if (!buffer_jbd(bg_bh))
return 1;
+ jbd_lock_bh_state(bg_bh);
bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
- return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ if (bg)
+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
+ else
+ ret = 1;
+ jbd_unlock_bh_state(bg_bh);
+
+ return ret;
}
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
unsigned int tmp;
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
struct ocfs2_group_desc *undo_bg = NULL;
+ int cluster_bitmap = 0;
mlog_entry_void();
}
if (ocfs2_is_cluster_bitmap(alloc_inode))
- undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
+ cluster_bitmap = 1;
+
+ if (cluster_bitmap) {
+ jbd_lock_bh_state(group_bh);
+ undo_bg = (struct ocfs2_group_desc *)
+ bh2jh(group_bh)->b_committed_data;
+ BUG_ON(!undo_bg);
+ }
tmp = num_bits;
while(tmp--) {
ocfs2_clear_bit((bit_off + tmp),
(unsigned long *) bg->bg_bitmap);
- if (ocfs2_is_cluster_bitmap(alloc_inode))
+ if (cluster_bitmap)
ocfs2_set_bit(bit_off + tmp,
(unsigned long *) undo_bg->bg_bitmap);
}
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
+ if (cluster_bitmap)
+ jbd_unlock_bh_state(group_bh);
+
status = ocfs2_journal_dirty(handle, group_bh);
if (status < 0)
mlog_errno(status);
return ret;
}
+
+/*
+ * Read the inode specified by blkno to get suballoc_slot and
+ * suballoc_bit.
+ */
+static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
+ u16 *suballoc_slot, u16 *suballoc_bit)
+{
+ int status;
+ struct buffer_head *inode_bh = NULL;
+ struct ocfs2_dinode *inode_fe;
+
+ mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
+
+ /* dirty read disk */
+ status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
+ if (status < 0) {
+ mlog(ML_ERROR, "read block %llu failed %d\n",
+ (unsigned long long)blkno, status);
+ goto bail;
+ }
+
+ inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
+ if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
+ mlog(ML_ERROR, "invalid inode %llu requested\n",
+ (unsigned long long)blkno);
+ status = -EINVAL;
+ goto bail;
+ }
+
+ if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
+ (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
+ mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
+ (unsigned long long)blkno,
+ (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
+ status = -EINVAL;
+ goto bail;
+ }
+
+ if (suballoc_slot)
+ *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
+ if (suballoc_bit)
+ *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
+
+bail:
+ brelse(inode_bh);
+
+ mlog_exit(status);
+ return status;
+}
+
+/*
+ * test whether bit is SET in allocator bitmap or not. on success, 0
+ * is returned and *res is 1 for SET; 0 otherwise. when fails, errno
+ * is returned and *res is meaningless. Call this after you have
+ * cluster locked against suballoc, or you may get a result based on
+ * non-up2date contents
+ */
+static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
+ struct inode *suballoc,
+ struct buffer_head *alloc_bh, u64 blkno,
+ u16 bit, int *res)
+{
+ struct ocfs2_dinode *alloc_fe;
+ struct ocfs2_group_desc *group;
+ struct buffer_head *group_bh = NULL;
+ u64 bg_blkno;
+ int status;
+
+ mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
+ (unsigned int)bit);
+
+ alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
+ if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
+ mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
+ (unsigned int)bit,
+ ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
+ status = -EINVAL;
+ goto bail;
+ }
+
+ bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
+ status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
+ &group_bh);
+ if (status < 0) {
+ mlog(ML_ERROR, "read group %llu failed %d\n",
+ (unsigned long long)bg_blkno, status);
+ goto bail;
+ }
+
+ group = (struct ocfs2_group_desc *) group_bh->b_data;
+ *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
+
+bail:
+ brelse(group_bh);
+
+ mlog_exit(status);
+ return status;
+}
+
+/*
+ * Test if the bit representing this inode (blkno) is set in the
+ * suballocator.
+ *
+ * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
+ *
+ * In the event of failure, a negative value is returned and *res is
+ * meaningless.
+ *
+ * Callers must make sure to hold nfs_sync_lock to prevent
+ * ocfs2_delete_inode() on another node from accessing the same
+ * suballocator concurrently.
+ */
+int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
+{
+ int status;
+ u16 suballoc_bit = 0, suballoc_slot = 0;
+ struct inode *inode_alloc_inode;
+ struct buffer_head *alloc_bh = NULL;
+
+ mlog_entry("blkno: %llu", (unsigned long long)blkno);
+
+ status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
+ &suballoc_bit);
+ if (status < 0) {
+ mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
+ goto bail;
+ }
+
+ inode_alloc_inode =
+ ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
+ suballoc_slot);
+ if (!inode_alloc_inode) {
+ /* the error code could be inaccurate, but we are not able to
+ * get the correct one. */
+ status = -EINVAL;
+ mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
+ (u32)suballoc_slot);
+ goto bail;
+ }
+
+ mutex_lock(&inode_alloc_inode->i_mutex);
+ status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
+ if (status < 0) {
+ mutex_unlock(&inode_alloc_inode->i_mutex);
+ mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
+ (u32)suballoc_slot, status);
+ goto bail;
+ }
+
+ status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
+ blkno, suballoc_bit, res);
+ if (status < 0)
+ mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
+
+ ocfs2_inode_unlock(inode_alloc_inode, 0);
+ mutex_unlock(&inode_alloc_inode->i_mutex);
+
+ iput(inode_alloc_inode);
+ brelse(alloc_bh);
+bail:
+ mlog_exit(status);
+ return status;
+}