1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * Copyright (C) 2009 Oracle. All rights reserved.
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
18 #define MLOG_MASK_PREFIX ML_REFCOUNT
19 #include <cluster/masklog.h>
27 #include "buffer_head_io.h"
28 #include "blockcheck.h"
29 #include "refcounttree.h"
32 static inline struct ocfs2_refcount_tree *
33 cache_info_to_refcount(struct ocfs2_caching_info *ci)
35 return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
38 static int ocfs2_validate_refcount_block(struct super_block *sb,
39 struct buffer_head *bh)
42 struct ocfs2_refcount_block *rb =
43 (struct ocfs2_refcount_block *)bh->b_data;
45 mlog(0, "Validating refcount block %llu\n",
46 (unsigned long long)bh->b_blocknr);
48 BUG_ON(!buffer_uptodate(bh));
51 * If the ecc fails, we return the error but otherwise
52 * leave the filesystem running. We know any error is
53 * local to this block.
55 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
57 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
58 (unsigned long long)bh->b_blocknr);
63 if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
65 "Refcount block #%llu has bad signature %.*s",
66 (unsigned long long)bh->b_blocknr, 7,
71 if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
73 "Refcount block #%llu has an invalid rf_blkno "
75 (unsigned long long)bh->b_blocknr,
76 (unsigned long long)le64_to_cpu(rb->rf_blkno));
80 if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
82 "Refcount block #%llu has an invalid "
83 "rf_fs_generation of #%u",
84 (unsigned long long)bh->b_blocknr,
85 le32_to_cpu(rb->rf_fs_generation));
92 static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
94 struct buffer_head **bh)
97 struct buffer_head *tmp = *bh;
99 rc = ocfs2_read_block(ci, rb_blkno, &tmp,
100 ocfs2_validate_refcount_block);
102 /* If ocfs2_read_block() got us a new bh, pass it up. */
109 static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
111 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
116 static struct super_block *
117 ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
119 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
124 static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
126 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
128 spin_lock(&rf->rf_lock);
131 static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
133 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
135 spin_unlock(&rf->rf_lock);
138 static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
140 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
142 mutex_lock(&rf->rf_io_mutex);
145 static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
147 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
149 mutex_unlock(&rf->rf_io_mutex);
152 static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
153 .co_owner = ocfs2_refcount_cache_owner,
154 .co_get_super = ocfs2_refcount_cache_get_super,
155 .co_cache_lock = ocfs2_refcount_cache_lock,
156 .co_cache_unlock = ocfs2_refcount_cache_unlock,
157 .co_io_lock = ocfs2_refcount_cache_io_lock,
158 .co_io_unlock = ocfs2_refcount_cache_io_unlock,
161 static struct ocfs2_refcount_tree *
162 ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
164 struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
165 struct ocfs2_refcount_tree *tree = NULL;
168 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
170 if (blkno < tree->rf_blkno)
172 else if (blkno > tree->rf_blkno)
181 /* osb_lock is already locked. */
182 static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
183 struct ocfs2_refcount_tree *new)
185 u64 rf_blkno = new->rf_blkno;
186 struct rb_node *parent = NULL;
187 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
188 struct ocfs2_refcount_tree *tmp;
193 tmp = rb_entry(parent, struct ocfs2_refcount_tree,
196 if (rf_blkno < tmp->rf_blkno)
198 else if (rf_blkno > tmp->rf_blkno)
201 /* This should never happen! */
202 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
203 (unsigned long long)rf_blkno);
208 rb_link_node(&new->rf_node, parent, p);
209 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
212 static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
214 ocfs2_metadata_cache_exit(&tree->rf_ci);
215 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
216 ocfs2_lock_res_free(&tree->rf_lockres);
221 ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
222 struct ocfs2_refcount_tree *tree)
224 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
225 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
226 osb->osb_ref_tree_lru = NULL;
229 static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
230 struct ocfs2_refcount_tree *tree)
232 spin_lock(&osb->osb_lock);
233 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
234 spin_unlock(&osb->osb_lock);
237 void ocfs2_kref_remove_refcount_tree(struct kref *kref)
239 struct ocfs2_refcount_tree *tree =
240 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
242 ocfs2_free_refcount_tree(tree);
246 ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
248 kref_get(&tree->rf_getcnt);
252 ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
254 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
257 static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
258 struct super_block *sb)
260 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
261 mutex_init(&new->rf_io_mutex);
263 spin_lock_init(&new->rf_lock);
266 static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
267 struct ocfs2_refcount_tree *new,
268 u64 rf_blkno, u32 generation)
270 init_rwsem(&new->rf_sem);
271 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
272 rf_blkno, generation);
275 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
276 struct ocfs2_refcount_tree **ret_tree)
279 struct ocfs2_refcount_tree *tree, *new = NULL;
280 struct buffer_head *ref_root_bh = NULL;
281 struct ocfs2_refcount_block *ref_rb;
283 spin_lock(&osb->osb_lock);
284 if (osb->osb_ref_tree_lru &&
285 osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
286 tree = osb->osb_ref_tree_lru;
288 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
292 spin_unlock(&osb->osb_lock);
294 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
300 new->rf_blkno = rf_blkno;
301 kref_init(&new->rf_getcnt);
302 ocfs2_init_refcount_tree_ci(new, osb->sb);
305 * We need the generation to create the refcount tree lock and since
306 * it isn't changed during the tree modification, we are safe here to
307 * read without protection.
308 * We also have to purge the cache after we create the lock since the
309 * refcount block may have the stale data. It can only be trusted when
310 * we hold the refcount lock.
312 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
315 ocfs2_metadata_cache_exit(&new->rf_ci);
320 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
321 new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
322 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
324 ocfs2_metadata_cache_purge(&new->rf_ci);
326 spin_lock(&osb->osb_lock);
327 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
331 ocfs2_insert_refcount_tree(osb, new);
339 osb->osb_ref_tree_lru = tree;
341 spin_unlock(&osb->osb_lock);
344 ocfs2_free_refcount_tree(new);
350 static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
353 struct buffer_head *di_bh = NULL;
354 struct ocfs2_dinode *di;
356 ret = ocfs2_read_inode_block(inode, &di_bh);
362 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
364 di = (struct ocfs2_dinode *)di_bh->b_data;
365 *ref_blkno = le64_to_cpu(di->i_refcount_loc);
371 static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
372 struct ocfs2_refcount_tree *tree, int rw)
376 ret = ocfs2_refcount_lock(tree, rw);
383 down_write(&tree->rf_sem);
385 down_read(&tree->rf_sem);
392 * Lock the refcount tree pointed by ref_blkno and return the tree.
393 * In most case, we lock the tree and read the refcount block.
394 * So read it here if the caller really needs it.
396 * If the tree has been re-created by other node, it will free the
397 * old one and re-create it.
399 int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
400 u64 ref_blkno, int rw,
401 struct ocfs2_refcount_tree **ret_tree,
402 struct buffer_head **ref_bh)
404 int ret, delete_tree = 0;
405 struct ocfs2_refcount_tree *tree = NULL;
406 struct buffer_head *ref_root_bh = NULL;
407 struct ocfs2_refcount_block *rb;
410 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
416 ocfs2_refcount_tree_get(tree);
418 ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
421 ocfs2_refcount_tree_put(tree);
425 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
429 ocfs2_unlock_refcount_tree(osb, tree, rw);
430 ocfs2_refcount_tree_put(tree);
434 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
436 * If the refcount block has been freed and re-created, we may need
437 * to recreate the refcount tree also.
439 * Here we just remove the tree from the rb-tree, and the last
440 * kref holder will unlock and delete this refcount_tree.
441 * Then we goto "again" and ocfs2_get_refcount_tree will create
442 * the new refcount tree for us.
444 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
445 if (!tree->rf_removed) {
446 ocfs2_erase_refcount_tree_from_list(osb, tree);
447 tree->rf_removed = 1;
451 ocfs2_unlock_refcount_tree(osb, tree, rw);
453 * We get an extra reference when we create the refcount
454 * tree, so another put will destroy it.
457 ocfs2_refcount_tree_put(tree);
465 *ref_bh = ref_root_bh;
473 int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
474 struct ocfs2_refcount_tree **ret_tree,
475 struct buffer_head **ref_bh)
480 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
486 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
487 rw, ret_tree, ref_bh);
490 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
491 struct ocfs2_refcount_tree *tree, int rw)
494 up_write(&tree->rf_sem);
496 up_read(&tree->rf_sem);
498 ocfs2_refcount_unlock(tree, rw);
499 ocfs2_refcount_tree_put(tree);
502 void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
504 struct rb_node *node;
505 struct ocfs2_refcount_tree *tree;
506 struct rb_root *root = &osb->osb_rf_lock_tree;
508 while ((node = rb_last(root)) != NULL) {
509 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
511 mlog(0, "Purge tree %llu\n",
512 (unsigned long long) tree->rf_blkno);
514 rb_erase(&tree->rf_node, root);
515 ocfs2_free_refcount_tree(tree);