X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Focfs2%2Frefcounttree.c;h=4793f36f6518b25f312274d2fcdc84492de26340;hb=03e62303cf56e87337115f14842321043df2b4bb;hp=e3171c483685df6d44a86006b9cfbcc6823e868f;hpb=110a045aca62f6f564e3b68f89af2a3a5a6ecff2;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index e3171c4..4793f36 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -32,25 +32,42 @@ #include "dlmglue.h" #include "extent_map.h" #include "aops.h" +#include "xattr.h" +#include "namei.h" #include #include -#include #include #include #include #include +#include +#include +#include +#include +#include struct ocfs2_cow_context { struct inode *inode; u32 cow_start; u32 cow_len; - struct ocfs2_extent_tree di_et; - struct ocfs2_caching_info *ref_ci; + struct ocfs2_extent_tree data_et; + struct ocfs2_refcount_tree *ref_tree; struct buffer_head *ref_root_bh; struct ocfs2_alloc_context *meta_ac; struct ocfs2_alloc_context *data_ac; struct ocfs2_cached_dealloc_ctxt dealloc; + void *cow_object; + struct ocfs2_post_refcount *post_refcount; + int extra_credits; + int (*get_clusters)(struct ocfs2_cow_context *context, + u32 v_cluster, u32 *p_cluster, + u32 *num_clusters, + unsigned int *extent_flags); + int (*cow_duplicate_clusters)(handle_t *handle, + struct ocfs2_cow_context *context, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len); }; static inline struct ocfs2_refcount_tree * @@ -258,7 +275,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, spin_unlock(&osb->osb_lock); } -void ocfs2_kref_remove_refcount_tree(struct kref *kref) +static void ocfs2_kref_remove_refcount_tree(struct kref *kref) { struct ocfs2_refcount_tree *tree = container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); @@ -506,23 +523,6 @@ out: return ret; } -int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, - struct ocfs2_refcount_tree **ret_tree, - struct buffer_head **ref_bh) -{ - int ret; - u64 ref_blkno; - - ret = ocfs2_get_refcount_block(inode, &ref_blkno); - if (ret) { - mlog_errno(ret); - return ret; - } - - return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, - rw, ret_tree, ref_bh); -} - void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, struct ocfs2_refcount_tree *tree, int rw) { @@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; u16 suballoc_bit_start; u32 num_got; - u64 first_blkno; + u64 suballoc_loc, first_blkno; BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); @@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, goto out_commit; } - ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &first_blkno); if (ret) { @@ -625,7 +625,8 @@ static int ocfs2_create_refcount_tree(struct inode *inode, rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(rb, 0, inode->i_sb->s_blocksize); strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); - rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num); + rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); rb->rf_blkno = cpu_to_le64(first_blkno); @@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh) if (le32_to_cpu(rb->rf_count) == 1) { blk = le64_to_cpu(rb->rf_blkno); bit = le16_to_cpu(rb->rf_suballoc_bit); - bg_blkno = ocfs2_which_suballoc_group(blk, bit); + if (rb->rf_suballoc_loc) + bg_blkno = le64_to_cpu(rb->rf_suballoc_loc); + else + bg_blkno = ocfs2_which_suballoc_group(blk, bit); alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, @@ -915,6 +919,139 @@ out: } /* + * Try to remove refcount tree. The mechanism is: + * 1) Check whether i_clusters == 0, if no, exit. + * 2) check whether we have i_xattr_loc in dinode. if yes, exit. + * 3) Check whether we have inline xattr stored outside, if yes, exit. + * 4) Remove the tree. + */ +int ocfs2_try_remove_refcount_tree(struct inode *inode, + struct buffer_head *di_bh) +{ + int ret; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + + down_write(&oi->ip_xattr_sem); + down_write(&oi->ip_alloc_sem); + + if (oi->ip_clusters) + goto out; + + if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc) + goto out; + + if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL && + ocfs2_has_inline_xattr_value_outside(inode, di)) + goto out; + + ret = ocfs2_remove_refcount_tree(inode, di_bh); + if (ret) + mlog_errno(ret); +out: + up_write(&oi->ip_alloc_sem); + up_write(&oi->ip_xattr_sem); + return 0; +} + +/* + * Find the end range for a leaf refcount block indicated by + * el->l_recs[index].e_blkno. + */ +static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, + struct buffer_head *ref_root_bh, + struct ocfs2_extent_block *eb, + struct ocfs2_extent_list *el, + int index, u32 *cpos_end) +{ + int ret, i, subtree_root; + u32 cpos; + u64 blkno; + struct super_block *sb = ocfs2_metadata_cache_get_super(ci); + struct ocfs2_path *left_path = NULL, *right_path = NULL; + struct ocfs2_extent_tree et; + struct ocfs2_extent_list *tmp_el; + + if (index < le16_to_cpu(el->l_next_free_rec) - 1) { + /* + * We have a extent rec after index, so just use the e_cpos + * of the next extent rec. + */ + *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos); + return 0; + } + + if (!eb || (eb && !eb->h_next_leaf_blk)) { + /* + * We are the last extent rec, so any high cpos should + * be stored in this leaf refcount block. + */ + *cpos_end = UINT_MAX; + return 0; + } + + /* + * If the extent block isn't the last one, we have to find + * the subtree root between this extent block and the next + * leaf extent block and get the corresponding e_cpos from + * the subroot. Otherwise we may corrupt the b-tree. + */ + ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); + + left_path = ocfs2_new_path_from_et(&et); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos); + ret = ocfs2_find_path(ci, left_path, cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + right_path = ocfs2_new_path_from_path(left_path); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(ci, right_path, cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + subtree_root = ocfs2_find_subtree_root(&et, left_path, + right_path); + + tmp_el = left_path->p_node[subtree_root].el; + blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; + for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { + if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { + *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); + break; + } + } + + BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); + +out: + ocfs2_free_path(left_path); + ocfs2_free_path(right_path); + return ret; +} + +/* * Given a cpos and len, try to find the refcount record which contains cpos. * 1. If cpos can be found in one refcount record, return the record. * 2. If cpos can't be found, return a fake record which start from cpos @@ -929,10 +1066,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, struct buffer_head **ret_bh) { int ret = 0, i, found; - u32 low_cpos; + u32 low_cpos, uninitialized_var(cpos_end); struct ocfs2_extent_list *el; - struct ocfs2_extent_rec *tmp, *rec = NULL; - struct ocfs2_extent_block *eb; + struct ocfs2_extent_rec *rec = NULL; + struct ocfs2_extent_block *eb = NULL; struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *rb = @@ -980,12 +1117,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci, } } - /* adjust len when we have ocfs2_extent_rec after it. */ - if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) { - tmp = &el->l_recs[i+1]; + if (found) { + ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh, + eb, el, i, &cpos_end); + if (ret) { + mlog_errno(ret); + goto out; + } - if (le32_to_cpu(tmp->e_cpos) < cpos + len) - len = le32_to_cpu(tmp->e_cpos) - cpos; + if (cpos_end < low_cpos + len) + len = cpos_end - low_cpos; } ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno), @@ -1099,7 +1240,7 @@ static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb, static int ocfs2_change_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_leaf_bh, - int index, int change) + int index, int merge, int change) { int ret; struct ocfs2_refcount_block *rb = @@ -1128,12 +1269,10 @@ static int ocfs2_change_refcount_rec(handle_t *handle, } le16_add_cpu(&rl->rl_used, -1); - } else + } else if (merge) ocfs2_refcount_rec_merge(rb, index); - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, ref_leaf_bh); out: return ret; } @@ -1147,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, int ret; u16 suballoc_bit_start; u32 num_got; - u64 blkno; + u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct buffer_head *new_bh = NULL; struct ocfs2_refcount_block *new_rb; @@ -1161,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, goto out; } - ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { @@ -1192,7 +1331,8 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize); new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_blkno = cpu_to_le64(blkno); new_rb->rf_cpos = cpu_to_le32(0); @@ -1364,7 +1504,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, /* change old and new rl_used accordingly. */ le16_add_cpu(&rl->rl_used, -num_moved); - new_rl->rl_used = cpu_to_le32(num_moved); + new_rl->rl_used = cpu_to_le16(num_moved); sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), @@ -1387,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, int ret; u16 suballoc_bit_start; u32 num_got, new_cpos; - u64 blkno; + u64 suballoc_loc, blkno; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); struct ocfs2_refcount_block *root_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; @@ -1411,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, goto out; } - ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, + ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc, &suballoc_bit_start, &num_got, &blkno); if (ret) { @@ -1438,7 +1578,8 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; memset(new_rb, 0, sb->s_blocksize); strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); - new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num); + new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); + new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc); new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); new_rb->rf_blkno = cpu_to_le64(blkno); @@ -1557,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle, * 2 more credits, one for the leaf refcount block, one for * the extent block contains the extent rec. */ - ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); + ret = ocfs2_extend_trans(handle, 2); if (ret < 0) { mlog_errno(ret); goto out; @@ -1604,7 +1745,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *rec, - int index, + int index, int merge, struct ocfs2_alloc_context *meta_ac) { int ret; @@ -1662,13 +1803,10 @@ static int ocfs2_insert_refcount_rec(handle_t *handle, le16_add_cpu(&rf_list->rl_used, 1); - ocfs2_refcount_rec_merge(rb, index); + if (merge) + ocfs2_refcount_rec_merge(rb, index); - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) { - mlog_errno(ret); - goto out; - } + ocfs2_journal_dirty(handle, ref_leaf_bh); if (index == 0) { ret = ocfs2_adjust_refcount_rec(handle, ci, @@ -1696,7 +1834,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle, struct buffer_head *ref_root_bh, struct buffer_head *ref_leaf_bh, struct ocfs2_refcount_rec *split_rec, - int index, + int index, int merge, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { @@ -1742,7 +1880,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle, recs_need++; /* If the leaf block don't have enough record, expand it. */ - if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) { + if (le16_to_cpu(rf_list->rl_used) + recs_need > + le16_to_cpu(rf_list->rl_count)) { struct ocfs2_refcount_rec tmp_rec; u64 cpos = le64_to_cpu(orig_rec->r_cpos); len = le32_to_cpu(orig_rec->r_clusters); @@ -1804,7 +1943,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle, memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec)); le64_add_cpu(&tail_rec->r_cpos, le32_to_cpu(tail_rec->r_clusters) - len); - tail_rec->r_clusters = le32_to_cpu(len); + tail_rec->r_clusters = cpu_to_le32(len); } /* @@ -1834,12 +1973,11 @@ static int ocfs2_split_refcount_rec(handle_t *handle, le32_to_cpu(split_rec->r_refcount), (unsigned long long)ref_leaf_bh->b_blocknr, index); - ocfs2_refcount_rec_merge(rb, index); + if (merge) + ocfs2_refcount_rec_merge(rb, index); } - ret = ocfs2_journal_dirty(handle, ref_leaf_bh); - if (ret) - mlog_errno(ret); + ocfs2_journal_dirty(handle, ref_leaf_bh); out: brelse(new_bh); @@ -1849,7 +1987,7 @@ out: static int __ocfs2_increase_refcount(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, - u64 cpos, u32 len, + u64 cpos, u32 len, int merge, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc) { @@ -1889,7 +2027,8 @@ static int __ocfs2_increase_refcount(handle_t *handle, "count %u\n", (unsigned long long)cpos, set_len, le32_to_cpu(rec.r_refcount)); ret = ocfs2_change_refcount_rec(handle, ci, - ref_leaf_bh, index, 1); + ref_leaf_bh, index, + merge, 1); if (ret) { mlog_errno(ret); goto out; @@ -1902,7 +2041,8 @@ static int __ocfs2_increase_refcount(handle_t *handle, set_len); ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, - &rec, index, meta_ac); + &rec, index, + merge, meta_ac); if (ret) { mlog_errno(ret); goto out; @@ -1920,7 +2060,7 @@ static int __ocfs2_increase_refcount(handle_t *handle, set_len, le32_to_cpu(rec.r_refcount)); ret = ocfs2_split_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, - &rec, index, + &rec, index, merge, meta_ac, dealloc); if (ret) { mlog_errno(ret); @@ -1970,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle, */ ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, le16_to_cpu(rb->rf_suballoc_slot), + le64_to_cpu(rb->rf_suballoc_loc), le64_to_cpu(rb->rf_blkno), le16_to_cpu(rb->rf_suballoc_bit)); if (ret) { @@ -2013,6 +2154,18 @@ out: return ret; } +int ocfs2_increase_refcount(handle_t *handle, + struct ocfs2_caching_info *ci, + struct buffer_head *ref_root_bh, + u64 cpos, u32 len, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + return __ocfs2_increase_refcount(handle, ci, ref_root_bh, + cpos, len, 1, + meta_ac, dealloc); +} + static int ocfs2_decrease_refcount_rec(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *ref_root_bh, @@ -2033,7 +2186,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle, if (cpos == le64_to_cpu(rec->r_cpos) && len == le32_to_cpu(rec->r_clusters)) ret = ocfs2_change_refcount_rec(handle, ci, - ref_leaf_bh, index, -1); + ref_leaf_bh, index, 1, -1); else { struct ocfs2_refcount_rec split = *rec; split.r_cpos = cpu_to_le64(cpos); @@ -2049,7 +2202,7 @@ static int ocfs2_decrease_refcount_rec(handle_t *handle, le32_to_cpu(rec->r_clusters)); ret = ocfs2_split_refcount_rec(handle, ci, ref_root_bh, ref_leaf_bh, - &split, index, + &split, index, 1, meta_ac, dealloc); } @@ -2361,21 +2514,20 @@ out: * we gonna touch and whether we need to create new blocks. * * Normally the refcount blocks store these refcount should be - * continguous also, so that we can get the number easily. - * As for meta_ac, we will at most add split 2 refcount record and - * 2 more refcount block, so just check it in a rough way. + * contiguous also, so that we can get the number easily. + * We will at most add split 2 refcount records and 2 more + * refcount blocks, so just check it in a rough way. * * Caller must hold refcount tree lock. */ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, - struct buffer_head *di_bh, + u64 refcount_loc, u64 phys_blkno, u32 clusters, int *credits, - struct ocfs2_alloc_context **meta_ac) + int *ref_blocks) { - int ret, ref_blocks = 0; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + int ret; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; @@ -2392,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), - le64_to_cpu(di->i_refcount_loc), &tree); + refcount_loc, &tree); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_read_refcount_block(&tree->rf_ci, - le64_to_cpu(di->i_refcount_loc), + ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc, &ref_root_bh); if (ret) { mlog_errno(ret); @@ -2410,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, &tree->rf_ci, ref_root_bh, start_cpos, clusters, - &ref_blocks, credits); + ref_blocks, credits); if (ret) { mlog_errno(ret); goto out; } - mlog(0, "reserve new metadata %d, credits = %d\n", - ref_blocks, *credits); - - if (ref_blocks) { - ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), - ref_blocks, meta_ac); - if (ret) - mlog_errno(ret); - } + mlog(0, "reserve new metadata %d blocks, credits = %d\n", + *ref_blocks, *credits); out: brelse(ref_root_bh); @@ -2489,7 +2633,7 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, * get good I/O from the resulting extent tree. */ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, - struct buffer_head *di_bh, + struct ocfs2_extent_list *el, u32 cpos, u32 write_len, u32 max_cpos, @@ -2497,8 +2641,6 @@ static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, u32 *cow_len) { int ret = 0; - struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; - struct ocfs2_extent_list *el = &di->id2.i_list; int tree_height = le16_to_cpu(el->l_tree_depth), i; struct buffer_head *eb_bh = NULL; struct ocfs2_extent_block *eb = NULL; @@ -2769,13 +2911,13 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) return 0; } -static int ocfs2_duplicate_clusters(handle_t *handle, - struct ocfs2_cow_context *context, - u32 cpos, u32 old_cluster, - u32 new_cluster, u32 new_len) +static int ocfs2_duplicate_clusters_by_page(handle_t *handle, + struct ocfs2_cow_context *context, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len) { int ret = 0, partial; - struct ocfs2_caching_info *ci = context->di_et.et_ci; + struct ocfs2_caching_info *ci = context->data_et.et_ci; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; @@ -2792,7 +2934,7 @@ static int ocfs2_duplicate_clusters(handle_t *handle, while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; @@ -2804,8 +2946,12 @@ static int ocfs2_duplicate_clusters(handle_t *handle, page = grab_cache_page(mapping, page_index); - /* This page can't be dirtied before we CoW it out. */ - BUG_ON(PageDirty(page)); + /* + * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page + * can't be dirtied before we CoW it out. + */ + if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) + BUG_ON(PageDirty(page)); if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); @@ -2842,6 +2988,61 @@ unlock: return ret; } +static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, + struct ocfs2_cow_context *context, + u32 cpos, u32 old_cluster, + u32 new_cluster, u32 new_len) +{ + int ret = 0; + struct super_block *sb = context->inode->i_sb; + struct ocfs2_caching_info *ci = context->data_et.et_ci; + int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); + u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); + u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); + struct ocfs2_super *osb = OCFS2_SB(sb); + struct buffer_head *old_bh = NULL; + struct buffer_head *new_bh = NULL; + + mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster, + new_cluster, new_len); + + for (i = 0; i < blocks; i++, old_block++, new_block++) { + new_bh = sb_getblk(osb->sb, new_block); + if (new_bh == NULL) { + ret = -EIO; + mlog_errno(ret); + break; + } + + ocfs2_set_new_buffer_uptodate(ci, new_bh); + + ret = ocfs2_read_block(ci, old_block, &old_bh, NULL); + if (ret) { + mlog_errno(ret); + break; + } + + ret = ocfs2_journal_access(handle, ci, new_bh, + OCFS2_JOURNAL_ACCESS_CREATE); + if (ret) { + mlog_errno(ret); + break; + } + + memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); + ocfs2_journal_dirty(handle, new_bh); + + brelse(new_bh); + brelse(old_bh); + new_bh = NULL; + old_bh = NULL; + } + + brelse(new_bh); + brelse(old_bh); + return ret; +} + static int ocfs2_clear_ext_refcount(handle_t *handle, struct ocfs2_extent_tree *et, u32 cpos, u32 p_cluster, u32 len, @@ -2909,7 +3110,7 @@ static int ocfs2_replace_clusters(handle_t *handle, unsigned int ext_flags) { int ret; - struct ocfs2_caching_info *ci = context->di_et.et_ci; + struct ocfs2_caching_info *ci = context->data_et.et_ci; u64 ino = ocfs2_metadata_cache_owner(ci); mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", @@ -2917,15 +3118,15 @@ static int ocfs2_replace_clusters(handle_t *handle, /*If the old clusters is unwritten, no need to duplicate. */ if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { - ret = ocfs2_duplicate_clusters(handle, context, cpos, - old, new, len); + ret = context->cow_duplicate_clusters(handle, context, cpos, + old, new, len); if (ret) { mlog_errno(ret); goto out; } } - ret = ocfs2_clear_ext_refcount(handle, &context->di_et, + ret = ocfs2_clear_ext_refcount(handle, &context->data_et, cpos, new, len, ext_flags, context->meta_ac, &context->dealloc); if (ret) @@ -2958,7 +3159,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, while (offset < end) { page_index = offset >> PAGE_CACHE_SHIFT; - map_end = (page_index + 1) << PAGE_CACHE_SHIFT; + map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT; if (map_end > end) map_end = end; @@ -2983,6 +3184,15 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, return ret; } +static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context, + u32 v_cluster, u32 *p_cluster, + u32 *num_clusters, + unsigned int *extent_flags) +{ + return ocfs2_get_clusters(context->inode, v_cluster, p_cluster, + num_clusters, extent_flags); +} + static int ocfs2_make_clusters_writable(struct super_block *sb, struct ocfs2_cow_context *context, u32 cpos, u32 p_cluster, @@ -2994,14 +3204,15 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, struct ocfs2_super *osb = OCFS2_SB(sb); handle_t *handle; struct buffer_head *ref_leaf_bh = NULL; + struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci; struct ocfs2_refcount_rec rec; mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n", cpos, p_cluster, num_clusters, e_flags); ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, - &context->di_et, - context->ref_ci, + &context->data_et, + ref_ci, context->ref_root_bh, &context->meta_ac, &context->data_ac, &credits); @@ -3010,6 +3221,10 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, return ret; } + if (context->post_refcount) + credits += context->post_refcount->credits; + + credits += context->extra_credits; handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -3018,8 +3233,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, } while (num_clusters) { - ret = ocfs2_get_refcount_rec(context->ref_ci, - context->ref_root_bh, + ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, p_cluster, num_clusters, &rec, &index, &ref_leaf_bh); if (ret) { @@ -3041,7 +3255,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, */ if (le32_to_cpu(rec.r_refcount) == 1) { delete = 0; - ret = ocfs2_clear_ext_refcount(handle, &context->di_et, + ret = ocfs2_clear_ext_refcount(handle, + &context->data_et, cpos, p_cluster, set_len, e_flags, context->meta_ac, @@ -3053,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, } else { delete = 1; - ret = __ocfs2_claim_clusters(osb, handle, + ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, set_len, &new_bit, &new_len); @@ -3072,7 +3287,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, set_len = new_len; } - ret = __ocfs2_decrease_refcount(handle, context->ref_ci, + ret = __ocfs2_decrease_refcount(handle, ref_ci, context->ref_root_bh, p_cluster, set_len, context->meta_ac, @@ -3089,13 +3304,25 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, ref_leaf_bh = NULL; } + /* handle any post_cow action. */ + if (context->post_refcount && context->post_refcount->func) { + ret = context->post_refcount->func(context->inode, handle, + context->post_refcount->para); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + } + /* * Here we should write the new page out first if we are * in write-back mode. */ - ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); - if (ret) - mlog_errno(ret); + if (context->get_clusters == ocfs2_di_get_clusters) { + ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); + if (ret) + mlog_errno(ret); + } out_commit: ocfs2_commit_trans(osb, handle); @@ -3114,17 +3341,14 @@ out: return ret; } -static int ocfs2_replace_cow(struct inode *inode, - struct buffer_head *di_bh, - struct buffer_head *ref_root_bh, - struct ocfs2_caching_info *ref_ci, - u32 cow_start, u32 cow_len) +static int ocfs2_replace_cow(struct ocfs2_cow_context *context) { int ret = 0; - u32 p_cluster, num_clusters, start = cow_start; + struct inode *inode = context->inode; + u32 cow_start = context->cow_start, cow_len = context->cow_len; + u32 p_cluster, num_clusters; unsigned int ext_flags; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_cow_context *context; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " @@ -3133,26 +3357,11 @@ static int ocfs2_replace_cow(struct inode *inode, return -EROFS; } - context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); - if (!context) { - ret = -ENOMEM; - mlog_errno(ret); - return ret; - } - - context->inode = inode; - context->cow_start = cow_start; - context->cow_len = cow_len; - context->ref_ci = ref_ci; - context->ref_root_bh = ref_root_bh; - ocfs2_init_dealloc_ctxt(&context->dealloc); - ocfs2_init_dinode_extent_tree(&context->di_et, - INODE_CACHE(inode), di_bh); while (cow_len) { - ret = ocfs2_get_clusters(inode, cow_start, &p_cluster, - &num_clusters, &ext_flags); + ret = context->get_clusters(context, cow_start, &p_cluster, + &num_clusters, &ext_flags); if (ret) { mlog_errno(ret); break; @@ -3175,20 +3384,11 @@ static int ocfs2_replace_cow(struct inode *inode, cow_start += num_clusters; } - - /* - * truncate the extent map here since no matter whether we meet with - * any error during the action, we shouldn't trust cached extent map - * any more. - */ - ocfs2_extent_map_trunc(inode, start); - if (ocfs2_dealloc_has_cluster(&context->dealloc)) { ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &context->dealloc); } - kfree(context); return ret; } @@ -3208,10 +3408,11 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *ref_tree; + struct ocfs2_cow_context *context = NULL; BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); - ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, + ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list, cpos, write_len, max_cpos, &cow_start, &cow_len); if (ret) { @@ -3225,6 +3426,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, BUG_ON(cow_len == 0); + context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); + if (!context) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { @@ -3232,14 +3440,32 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, goto out; } - ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci, - cow_start, cow_len); + context->inode = inode; + context->cow_start = cow_start; + context->cow_len = cow_len; + context->ref_tree = ref_tree; + context->ref_root_bh = ref_root_bh; + context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; + context->get_clusters = ocfs2_di_get_clusters; + + ocfs2_init_dinode_extent_tree(&context->data_et, + INODE_CACHE(inode), di_bh); + + ret = ocfs2_replace_cow(context); if (ret) mlog_errno(ret); + /* + * truncate the extent map here since no matter whether we meet with + * any error during the action, we shouldn't trust cached extent map + * any more. + */ + ocfs2_extent_map_trunc(inode, cow_start); + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); out: + kfree(context); return ret; } @@ -3283,33 +3509,195 @@ int ocfs2_refcount_cow(struct inode *inode, return ret; } +static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context, + u32 v_cluster, u32 *p_cluster, + u32 *num_clusters, + unsigned int *extent_flags) +{ + struct inode *inode = context->inode; + struct ocfs2_xattr_value_root *xv = context->cow_object; + + return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster, + num_clusters, &xv->xr_list, + extent_flags); +} + /* - * Insert a new extent into refcount tree and mark a extent rec - * as refcounted in the dinode tree. + * Given a xattr value root, calculate the most meta/credits we need for + * refcount tree change if we truncate it to 0. */ -int ocfs2_add_refcount_flag(struct inode *inode, - struct ocfs2_extent_tree *data_et, - struct ocfs2_caching_info *ref_ci, - struct buffer_head *ref_root_bh, - u32 cpos, u32 p_cluster, u32 num_clusters, - struct ocfs2_cached_dealloc_ctxt *dealloc) +int ocfs2_refcounted_xattr_delete_need(struct inode *inode, + struct ocfs2_caching_info *ref_ci, + struct buffer_head *ref_root_bh, + struct ocfs2_xattr_value_root *xv, + int *meta_add, int *credits) { - int ret; - handle_t *handle; - int credits = 1, ref_blocks = 0; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_alloc_context *meta_ac = NULL; + int ret = 0, index, ref_blocks = 0; + u32 p_cluster, num_clusters; + u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters); + struct ocfs2_refcount_block *rb; + struct ocfs2_refcount_rec rec; + struct buffer_head *ref_leaf_bh = NULL; - ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, - ref_ci, ref_root_bh, - p_cluster, num_clusters, - &ref_blocks, &credits); - if (ret) { - mlog_errno(ret); - goto out; - } + while (cpos < clusters) { + ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, + &num_clusters, &xv->xr_list, + NULL); + if (ret) { + mlog_errno(ret); + goto out; + } - mlog(0, "reserve new metadata %d, credits = %d\n", + cpos += num_clusters; + + while (num_clusters) { + ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh, + p_cluster, num_clusters, + &rec, &index, + &ref_leaf_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + BUG_ON(!rec.r_refcount); + + rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; + + /* + * We really don't know whether the other clusters is in + * this refcount block or not, so just take the worst + * case that all the clusters are in this block and each + * one will split a refcount rec, so totally we need + * clusters * 2 new refcount rec. + */ + if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > + le16_to_cpu(rb->rf_records.rl_count)) + ref_blocks++; + + *credits += 1; + brelse(ref_leaf_bh); + ref_leaf_bh = NULL; + + if (num_clusters <= le32_to_cpu(rec.r_clusters)) + break; + else + num_clusters -= le32_to_cpu(rec.r_clusters); + p_cluster += num_clusters; + } + } + + *meta_add += ref_blocks; + if (!ref_blocks) + goto out; + + rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; + if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) + *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; + else { + struct ocfs2_extent_tree et; + + ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); + *credits += ocfs2_calc_extend_credits(inode->i_sb, + et.et_root_el, + ref_blocks); + } + +out: + brelse(ref_leaf_bh); + return ret; +} + +/* + * Do CoW for xattr. + */ +int ocfs2_refcount_cow_xattr(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_value_buf *vb, + struct ocfs2_refcount_tree *ref_tree, + struct buffer_head *ref_root_bh, + u32 cpos, u32 write_len, + struct ocfs2_post_refcount *post) +{ + int ret; + struct ocfs2_xattr_value_root *xv = vb->vb_xv; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_cow_context *context = NULL; + u32 cow_start, cow_len; + + BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + + ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list, + cpos, write_len, UINT_MAX, + &cow_start, &cow_len); + if (ret) { + mlog_errno(ret); + goto out; + } + + BUG_ON(cow_len == 0); + + context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); + if (!context) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + context->inode = inode; + context->cow_start = cow_start; + context->cow_len = cow_len; + context->ref_tree = ref_tree; + context->ref_root_bh = ref_root_bh;; + context->cow_object = xv; + + context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; + /* We need the extra credits for duplicate_clusters by jbd. */ + context->extra_credits = + ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len; + context->get_clusters = ocfs2_xattr_value_get_clusters; + context->post_refcount = post; + + ocfs2_init_xattr_value_extent_tree(&context->data_et, + INODE_CACHE(inode), vb); + + ret = ocfs2_replace_cow(context); + if (ret) + mlog_errno(ret); + +out: + kfree(context); + return ret; +} + +/* + * Insert a new extent into refcount tree and mark a extent rec + * as refcounted in the dinode tree. + */ +int ocfs2_add_refcount_flag(struct inode *inode, + struct ocfs2_extent_tree *data_et, + struct ocfs2_caching_info *ref_ci, + struct buffer_head *ref_root_bh, + u32 cpos, u32 p_cluster, u32 num_clusters, + struct ocfs2_cached_dealloc_ctxt *dealloc, + struct ocfs2_post_refcount *post) +{ + int ret; + handle_t *handle; + int credits = 1, ref_blocks = 0; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_alloc_context *meta_ac = NULL; + + ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, + ref_ci, ref_root_bh, + p_cluster, num_clusters, + &ref_blocks, &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + mlog(0, "reserve new metadata %d, credits = %d\n", ref_blocks, credits); if (ref_blocks) { @@ -3321,6 +3709,9 @@ int ocfs2_add_refcount_flag(struct inode *inode, } } + if (post) + credits += post->credits; + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -3337,10 +3728,18 @@ int ocfs2_add_refcount_flag(struct inode *inode, } ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, - p_cluster, num_clusters, + p_cluster, num_clusters, 0, meta_ac, dealloc); - if (ret) + if (ret) { mlog_errno(ret); + goto out_commit; + } + + if (post && post->func) { + ret = post->func(inode, handle, post->para); + if (ret) + mlog_errno(ret); + } out_commit: ocfs2_commit_trans(osb, handle); @@ -3350,10 +3749,44 @@ out: return ret; } +static int ocfs2_change_ctime(struct inode *inode, + struct buffer_head *di_bh) +{ + int ret; + handle_t *handle; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + + handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + inode->i_ctime = CURRENT_TIME; + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + + ocfs2_journal_dirty(handle, di_bh); + +out_commit: + ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); +out: + return ret; +} + static int ocfs2_attach_refcount_tree(struct inode *inode, struct buffer_head *di_bh) { - int ret; + int ret, data_changed = 0; struct buffer_head *ref_root_bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; @@ -3384,6 +3817,9 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, goto out; } + if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) + goto attach_xattr; + ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh); size = i_size_read(inode); @@ -3399,15 +3835,36 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, &ref_tree->rf_ci, ref_root_bh, cpos, p_cluster, num_clusters, - &dealloc); + &dealloc, NULL); if (ret) { mlog_errno(ret); - break; + goto unlock; } + + data_changed = 1; } cpos += num_clusters; } +attach_xattr: + if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) { + ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh, + &ref_tree->rf_ci, + ref_root_bh, + &dealloc); + if (ret) { + mlog_errno(ret); + goto unlock; + } + } + + if (data_changed) { + ret = ocfs2_change_ctime(inode, di_bh); + if (ret) + mlog_errno(ret); + } + +unlock: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); @@ -3457,17 +3914,16 @@ static int ocfs2_add_refcounted_extent(struct inode *inode, } ret = ocfs2_insert_extent(handle, et, cpos, - cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, - p_cluster)), + ocfs2_clusters_to_blocks(inode->i_sb, p_cluster), num_clusters, ext_flags, meta_ac); if (ret) { mlog_errno(ret); goto out_commit; } - ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, - p_cluster, num_clusters, - meta_ac, dealloc); + ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh, + p_cluster, num_clusters, + meta_ac, dealloc); if (ret) mlog_errno(ret); @@ -3479,6 +3935,49 @@ out: return ret; } +static int ocfs2_duplicate_inline_data(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh) +{ + int ret; + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb); + struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; + struct ocfs2_dinode *t_di = (struct ocfs2_dinode *)t_bh->b_data; + + BUG_ON(!(OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + t_di->id2.i_data.id_count = s_di->id2.i_data.id_count; + memcpy(t_di->id2.i_data.id_data, s_di->id2.i_data.id_data, + le16_to_cpu(s_di->id2.i_data.id_count)); + spin_lock(&OCFS2_I(t_inode)->ip_lock); + OCFS2_I(t_inode)->ip_dyn_features |= OCFS2_INLINE_DATA_FL; + t_di->i_dyn_features = cpu_to_le16(OCFS2_I(t_inode)->ip_dyn_features); + spin_unlock(&OCFS2_I(t_inode)->ip_lock); + + ocfs2_journal_dirty(handle, t_bh); + +out_commit: + ocfs2_commit_trans(osb, handle); +out: + return ret; +} + static int ocfs2_duplicate_extent_list(struct inode *s_inode, struct inode *t_inode, struct buffer_head *t_bh, @@ -3522,10 +4021,88 @@ out: return ret; } +/* + * change the new file's attributes to the src. + * + * reflink creates a snapshot of a file, that means the attributes + * must be identical except for three exceptions - nlink, ino, and ctime. + */ +static int ocfs2_complete_reflink(struct inode *s_inode, + struct buffer_head *s_bh, + struct inode *t_inode, + struct buffer_head *t_bh, + bool preserve) +{ + int ret; + handle_t *handle; + struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data; + loff_t size = i_size_read(s_inode); + + handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb), + OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + return ret; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + spin_lock(&OCFS2_I(t_inode)->ip_lock); + OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters; + OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr; + OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features; + spin_unlock(&OCFS2_I(t_inode)->ip_lock); + i_size_write(t_inode, size); + t_inode->i_blocks = s_inode->i_blocks; + + di->i_xattr_inline_size = s_di->i_xattr_inline_size; + di->i_clusters = s_di->i_clusters; + di->i_size = s_di->i_size; + di->i_dyn_features = s_di->i_dyn_features; + di->i_attr = s_di->i_attr; + + if (preserve) { + t_inode->i_uid = s_inode->i_uid; + t_inode->i_gid = s_inode->i_gid; + t_inode->i_mode = s_inode->i_mode; + di->i_uid = s_di->i_uid; + di->i_gid = s_di->i_gid; + di->i_mode = s_di->i_mode; + + /* + * update time. + * we want mtime to appear identical to the source and + * update ctime. + */ + t_inode->i_ctime = CURRENT_TIME; + + di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec); + + t_inode->i_mtime = s_inode->i_mtime; + di->i_mtime = s_di->i_mtime; + di->i_mtime_nsec = s_di->i_mtime_nsec; + } + + ocfs2_journal_dirty(handle, t_bh); + +out_commit: + ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle); + return ret; +} + static int ocfs2_create_reflink_node(struct inode *s_inode, struct buffer_head *s_bh, struct inode *t_inode, - struct buffer_head *t_bh) + struct buffer_head *t_bh, + bool preserve) { int ret; struct buffer_head *ref_root_bh = NULL; @@ -3544,6 +4121,14 @@ static int ocfs2_create_reflink_node(struct inode *s_inode, goto out; } + if (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_duplicate_inline_data(s_inode, s_bh, + t_inode, t_bh); + if (ret) + mlog_errno(ret); + goto out; + } + ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), 1, &ref_tree, &ref_root_bh); if (ret) { @@ -3555,9 +4140,12 @@ static int ocfs2_create_reflink_node(struct inode *s_inode, ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh, &ref_tree->rf_ci, ref_root_bh, &dealloc); - if (ret) + if (ret) { mlog_errno(ret); + goto out_unlock_refcount; + } +out_unlock_refcount: ocfs2_unlock_refcount_tree(osb, ref_tree, 1); brelse(ref_root_bh); out: @@ -3568,3 +4156,294 @@ out: return ret; } + +static int __ocfs2_reflink(struct dentry *old_dentry, + struct buffer_head *old_bh, + struct inode *new_inode, + bool preserve) +{ + int ret; + struct inode *inode = old_dentry->d_inode; + struct buffer_head *new_bh = NULL; + + ret = filemap_fdatawrite(inode->i_mapping); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_attach_refcount_tree(inode, old_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + mutex_lock(&new_inode->i_mutex); + ret = ocfs2_inode_lock(new_inode, &new_bh, 1); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + ret = ocfs2_create_reflink_node(inode, old_bh, + new_inode, new_bh, preserve); + if (ret) { + mlog_errno(ret); + goto inode_unlock; + } + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) { + ret = ocfs2_reflink_xattrs(inode, old_bh, + new_inode, new_bh, + preserve); + if (ret) { + mlog_errno(ret); + goto inode_unlock; + } + } + + ret = ocfs2_complete_reflink(inode, old_bh, + new_inode, new_bh, preserve); + if (ret) + mlog_errno(ret); + +inode_unlock: + ocfs2_inode_unlock(new_inode, 1); + brelse(new_bh); +out_unlock: + mutex_unlock(&new_inode->i_mutex); +out: + if (!ret) { + ret = filemap_fdatawait(inode->i_mapping); + if (ret) + mlog_errno(ret); + } + return ret; +} + +static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, bool preserve) +{ + int error; + struct inode *inode = old_dentry->d_inode; + struct buffer_head *old_bh = NULL; + struct inode *new_orphan_inode = NULL; + + if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) + return -EOPNOTSUPP; + + error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, + &new_orphan_inode); + if (error) { + mlog_errno(error); + goto out; + } + + error = ocfs2_inode_lock(inode, &old_bh, 1); + if (error) { + mlog_errno(error); + goto out; + } + + down_write(&OCFS2_I(inode)->ip_xattr_sem); + down_write(&OCFS2_I(inode)->ip_alloc_sem); + error = __ocfs2_reflink(old_dentry, old_bh, + new_orphan_inode, preserve); + up_write(&OCFS2_I(inode)->ip_alloc_sem); + up_write(&OCFS2_I(inode)->ip_xattr_sem); + + ocfs2_inode_unlock(inode, 1); + brelse(old_bh); + + if (error) { + mlog_errno(error); + goto out; + } + + /* If the security isn't preserved, we need to re-initialize them. */ + if (!preserve) { + error = ocfs2_init_security_and_acl(dir, new_orphan_inode); + if (error) + mlog_errno(error); + } +out: + if (!error) { + error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, + new_dentry); + if (error) + mlog_errno(error); + } + + if (new_orphan_inode) { + /* + * We need to open_unlock the inode no matter whether we + * succeed or not, so that other nodes can delete it later. + */ + ocfs2_open_unlock(new_orphan_inode); + if (error) + iput(new_orphan_inode); + } + + return error; +} + +/* + * Below here are the bits used by OCFS2_IOC_REFLINK() to fake + * sys_reflink(). This will go away when vfs_reflink() exists in + * fs/namei.c. + */ + +/* copied from may_create in VFS. */ +static inline int ocfs2_may_create(struct inode *dir, struct dentry *child) +{ + if (child->d_inode) + return -EEXIST; + if (IS_DEADDIR(dir)) + return -ENOENT; + return inode_permission(dir, MAY_WRITE | MAY_EXEC); +} + +/* copied from user_path_parent. */ +static int ocfs2_user_path_parent(const char __user *path, + struct nameidata *nd, char **name) +{ + char *s = getname(path); + int error; + + if (IS_ERR(s)) + return PTR_ERR(s); + + error = path_lookup(s, LOOKUP_PARENT, nd); + if (error) + putname(s); + else + *name = s; + + return error; +} + +/** + * ocfs2_vfs_reflink - Create a reference-counted link + * + * @old_dentry: source dentry + inode + * @dir: directory to create the target + * @new_dentry: target dentry + * @preserve: if true, preserve all file attributes + */ +static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, bool preserve) +{ + struct inode *inode = old_dentry->d_inode; + int error; + + if (!inode) + return -ENOENT; + + error = ocfs2_may_create(dir, new_dentry); + if (error) + return error; + + if (dir->i_sb != inode->i_sb) + return -EXDEV; + + /* + * A reflink to an append-only or immutable file cannot be created. + */ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return -EPERM; + + /* Only regular files can be reflinked. */ + if (!S_ISREG(inode->i_mode)) + return -EPERM; + + /* + * If the caller wants to preserve ownership, they require the + * rights to do so. + */ + if (preserve) { + if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN)) + return -EPERM; + if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) + return -EPERM; + } + + /* + * If the caller is modifying any aspect of the attributes, they + * are not creating a snapshot. They need read permission on the + * file. + */ + if (!preserve) { + error = inode_permission(inode, MAY_READ); + if (error) + return error; + } + + mutex_lock(&inode->i_mutex); + dquot_initialize(dir); + error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve); + mutex_unlock(&inode->i_mutex); + if (!error) + fsnotify_create(dir, new_dentry); + return error; +} +/* + * Most codes are copied from sys_linkat. + */ +int ocfs2_reflink_ioctl(struct inode *inode, + const char __user *oldname, + const char __user *newname, + bool preserve) +{ + struct dentry *new_dentry; + struct nameidata nd; + struct path old_path; + int error; + char *to = NULL; + + if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) + return -EOPNOTSUPP; + + error = user_path_at(AT_FDCWD, oldname, 0, &old_path); + if (error) { + mlog_errno(error); + return error; + } + + error = ocfs2_user_path_parent(newname, &nd, &to); + if (error) { + mlog_errno(error); + goto out; + } + + error = -EXDEV; + if (old_path.mnt != nd.path.mnt) + goto out_release; + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) { + mlog_errno(error); + goto out_unlock; + } + + error = mnt_want_write(nd.path.mnt); + if (error) { + mlog_errno(error); + goto out_dput; + } + + error = ocfs2_vfs_reflink(old_path.dentry, + nd.path.dentry->d_inode, + new_dentry, preserve); + mnt_drop_write(nd.path.mnt); +out_dput: + dput(new_dentry); +out_unlock: + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); +out_release: + path_put(&nd.path); + putname(to); +out: + path_put(&old_path); + + return error; +}