ocfs2: Basic tree root operation.
[safe/jmp/linux-2.6] / fs / ocfs2 / refcounttree.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * refcounttree.c
5  *
6  * Copyright (C) 2009 Oracle.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public
10  * License version 2 as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  */
17
18 #define MLOG_MASK_PREFIX ML_REFCOUNT
19 #include <cluster/masklog.h>
20 #include "ocfs2.h"
21 #include "inode.h"
22 #include "alloc.h"
23 #include "suballoc.h"
24 #include "journal.h"
25 #include "uptodate.h"
26 #include "super.h"
27 #include "buffer_head_io.h"
28 #include "blockcheck.h"
29 #include "refcounttree.h"
30 #include "sysfile.h"
31 #include "dlmglue.h"
32
33 static inline struct ocfs2_refcount_tree *
34 cache_info_to_refcount(struct ocfs2_caching_info *ci)
35 {
36         return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
37 }
38
39 static int ocfs2_validate_refcount_block(struct super_block *sb,
40                                          struct buffer_head *bh)
41 {
42         int rc;
43         struct ocfs2_refcount_block *rb =
44                 (struct ocfs2_refcount_block *)bh->b_data;
45
46         mlog(0, "Validating refcount block %llu\n",
47              (unsigned long long)bh->b_blocknr);
48
49         BUG_ON(!buffer_uptodate(bh));
50
51         /*
52          * If the ecc fails, we return the error but otherwise
53          * leave the filesystem running.  We know any error is
54          * local to this block.
55          */
56         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
57         if (rc) {
58                 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
59                      (unsigned long long)bh->b_blocknr);
60                 return rc;
61         }
62
63
64         if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
65                 ocfs2_error(sb,
66                             "Refcount block #%llu has bad signature %.*s",
67                             (unsigned long long)bh->b_blocknr, 7,
68                             rb->rf_signature);
69                 return -EINVAL;
70         }
71
72         if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
73                 ocfs2_error(sb,
74                             "Refcount block #%llu has an invalid rf_blkno "
75                             "of %llu",
76                             (unsigned long long)bh->b_blocknr,
77                             (unsigned long long)le64_to_cpu(rb->rf_blkno));
78                 return -EINVAL;
79         }
80
81         if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
82                 ocfs2_error(sb,
83                             "Refcount block #%llu has an invalid "
84                             "rf_fs_generation of #%u",
85                             (unsigned long long)bh->b_blocknr,
86                             le32_to_cpu(rb->rf_fs_generation));
87                 return -EINVAL;
88         }
89
90         return 0;
91 }
92
93 static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
94                                      u64 rb_blkno,
95                                      struct buffer_head **bh)
96 {
97         int rc;
98         struct buffer_head *tmp = *bh;
99
100         rc = ocfs2_read_block(ci, rb_blkno, &tmp,
101                               ocfs2_validate_refcount_block);
102
103         /* If ocfs2_read_block() got us a new bh, pass it up. */
104         if (!rc && !*bh)
105                 *bh = tmp;
106
107         return rc;
108 }
109
110 static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
111 {
112         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
113
114         return rf->rf_blkno;
115 }
116
117 static struct super_block *
118 ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
119 {
120         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
121
122         return rf->rf_sb;
123 }
124
125 static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
126 {
127         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
128
129         spin_lock(&rf->rf_lock);
130 }
131
132 static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
133 {
134         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
135
136         spin_unlock(&rf->rf_lock);
137 }
138
139 static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
140 {
141         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
142
143         mutex_lock(&rf->rf_io_mutex);
144 }
145
146 static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
147 {
148         struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
149
150         mutex_unlock(&rf->rf_io_mutex);
151 }
152
153 static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
154         .co_owner               = ocfs2_refcount_cache_owner,
155         .co_get_super           = ocfs2_refcount_cache_get_super,
156         .co_cache_lock          = ocfs2_refcount_cache_lock,
157         .co_cache_unlock        = ocfs2_refcount_cache_unlock,
158         .co_io_lock             = ocfs2_refcount_cache_io_lock,
159         .co_io_unlock           = ocfs2_refcount_cache_io_unlock,
160 };
161
162 static struct ocfs2_refcount_tree *
163 ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
164 {
165         struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
166         struct ocfs2_refcount_tree *tree = NULL;
167
168         while (n) {
169                 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
170
171                 if (blkno < tree->rf_blkno)
172                         n = n->rb_left;
173                 else if (blkno > tree->rf_blkno)
174                         n = n->rb_right;
175                 else
176                         return tree;
177         }
178
179         return NULL;
180 }
181
182 /* osb_lock is already locked. */
183 static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
184                                        struct ocfs2_refcount_tree *new)
185 {
186         u64 rf_blkno = new->rf_blkno;
187         struct rb_node *parent = NULL;
188         struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
189         struct ocfs2_refcount_tree *tmp;
190
191         while (*p) {
192                 parent = *p;
193
194                 tmp = rb_entry(parent, struct ocfs2_refcount_tree,
195                                rf_node);
196
197                 if (rf_blkno < tmp->rf_blkno)
198                         p = &(*p)->rb_left;
199                 else if (rf_blkno > tmp->rf_blkno)
200                         p = &(*p)->rb_right;
201                 else {
202                         /* This should never happen! */
203                         mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
204                              (unsigned long long)rf_blkno);
205                         BUG();
206                 }
207         }
208
209         rb_link_node(&new->rf_node, parent, p);
210         rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
211 }
212
213 static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
214 {
215         ocfs2_metadata_cache_exit(&tree->rf_ci);
216         ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
217         ocfs2_lock_res_free(&tree->rf_lockres);
218         kfree(tree);
219 }
220
221 static inline void
222 ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
223                                         struct ocfs2_refcount_tree *tree)
224 {
225         rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
226         if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
227                 osb->osb_ref_tree_lru = NULL;
228 }
229
230 static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
231                                         struct ocfs2_refcount_tree *tree)
232 {
233         spin_lock(&osb->osb_lock);
234         ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
235         spin_unlock(&osb->osb_lock);
236 }
237
238 void ocfs2_kref_remove_refcount_tree(struct kref *kref)
239 {
240         struct ocfs2_refcount_tree *tree =
241                 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
242
243         ocfs2_free_refcount_tree(tree);
244 }
245
246 static inline void
247 ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
248 {
249         kref_get(&tree->rf_getcnt);
250 }
251
252 static inline void
253 ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
254 {
255         kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
256 }
257
258 static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
259                                                struct super_block *sb)
260 {
261         ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
262         mutex_init(&new->rf_io_mutex);
263         new->rf_sb = sb;
264         spin_lock_init(&new->rf_lock);
265 }
266
267 static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
268                                         struct ocfs2_refcount_tree *new,
269                                         u64 rf_blkno, u32 generation)
270 {
271         init_rwsem(&new->rf_sem);
272         ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
273                                      rf_blkno, generation);
274 }
275
276 static struct ocfs2_refcount_tree*
277 ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
278 {
279         struct ocfs2_refcount_tree *new;
280
281         new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
282         if (!new)
283                 return NULL;
284
285         new->rf_blkno = rf_blkno;
286         kref_init(&new->rf_getcnt);
287         ocfs2_init_refcount_tree_ci(new, osb->sb);
288
289         return new;
290 }
291
292 static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
293                                    struct ocfs2_refcount_tree **ret_tree)
294 {
295         int ret = 0;
296         struct ocfs2_refcount_tree *tree, *new = NULL;
297         struct buffer_head *ref_root_bh = NULL;
298         struct ocfs2_refcount_block *ref_rb;
299
300         spin_lock(&osb->osb_lock);
301         if (osb->osb_ref_tree_lru &&
302             osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
303                 tree = osb->osb_ref_tree_lru;
304         else
305                 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
306         if (tree)
307                 goto out;
308
309         spin_unlock(&osb->osb_lock);
310
311         new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
312         if (!new) {
313                 ret = -ENOMEM;
314                 mlog_errno(ret);
315                 return ret;
316         }
317         /*
318          * We need the generation to create the refcount tree lock and since
319          * it isn't changed during the tree modification, we are safe here to
320          * read without protection.
321          * We also have to purge the cache after we create the lock since the
322          * refcount block may have the stale data. It can only be trusted when
323          * we hold the refcount lock.
324          */
325         ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
326         if (ret) {
327                 mlog_errno(ret);
328                 ocfs2_metadata_cache_exit(&new->rf_ci);
329                 kfree(new);
330                 return ret;
331         }
332
333         ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
334         new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
335         ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
336                                       new->rf_generation);
337         ocfs2_metadata_cache_purge(&new->rf_ci);
338
339         spin_lock(&osb->osb_lock);
340         tree = ocfs2_find_refcount_tree(osb, rf_blkno);
341         if (tree)
342                 goto out;
343
344         ocfs2_insert_refcount_tree(osb, new);
345
346         tree = new;
347         new = NULL;
348
349 out:
350         *ret_tree = tree;
351
352         osb->osb_ref_tree_lru = tree;
353
354         spin_unlock(&osb->osb_lock);
355
356         if (new)
357                 ocfs2_free_refcount_tree(new);
358
359         brelse(ref_root_bh);
360         return ret;
361 }
362
363 static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
364 {
365         int ret;
366         struct buffer_head *di_bh = NULL;
367         struct ocfs2_dinode *di;
368
369         ret = ocfs2_read_inode_block(inode, &di_bh);
370         if (ret) {
371                 mlog_errno(ret);
372                 goto out;
373         }
374
375         BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
376
377         di = (struct ocfs2_dinode *)di_bh->b_data;
378         *ref_blkno = le64_to_cpu(di->i_refcount_loc);
379         brelse(di_bh);
380 out:
381         return ret;
382 }
383
384 static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
385                                       struct ocfs2_refcount_tree *tree, int rw)
386 {
387         int ret;
388
389         ret = ocfs2_refcount_lock(tree, rw);
390         if (ret) {
391                 mlog_errno(ret);
392                 goto out;
393         }
394
395         if (rw)
396                 down_write(&tree->rf_sem);
397         else
398                 down_read(&tree->rf_sem);
399
400 out:
401         return ret;
402 }
403
404 /*
405  * Lock the refcount tree pointed by ref_blkno and return the tree.
406  * In most case, we lock the tree and read the refcount block.
407  * So read it here if the caller really needs it.
408  *
409  * If the tree has been re-created by other node, it will free the
410  * old one and re-create it.
411  */
412 int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
413                              u64 ref_blkno, int rw,
414                              struct ocfs2_refcount_tree **ret_tree,
415                              struct buffer_head **ref_bh)
416 {
417         int ret, delete_tree = 0;
418         struct ocfs2_refcount_tree *tree = NULL;
419         struct buffer_head *ref_root_bh = NULL;
420         struct ocfs2_refcount_block *rb;
421
422 again:
423         ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
424         if (ret) {
425                 mlog_errno(ret);
426                 return ret;
427         }
428
429         ocfs2_refcount_tree_get(tree);
430
431         ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
432         if (ret) {
433                 mlog_errno(ret);
434                 ocfs2_refcount_tree_put(tree);
435                 goto out;
436         }
437
438         ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
439                                         &ref_root_bh);
440         if (ret) {
441                 mlog_errno(ret);
442                 ocfs2_unlock_refcount_tree(osb, tree, rw);
443                 ocfs2_refcount_tree_put(tree);
444                 goto out;
445         }
446
447         rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
448         /*
449          * If the refcount block has been freed and re-created, we may need
450          * to recreate the refcount tree also.
451          *
452          * Here we just remove the tree from the rb-tree, and the last
453          * kref holder will unlock and delete this refcount_tree.
454          * Then we goto "again" and ocfs2_get_refcount_tree will create
455          * the new refcount tree for us.
456          */
457         if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
458                 if (!tree->rf_removed) {
459                         ocfs2_erase_refcount_tree_from_list(osb, tree);
460                         tree->rf_removed = 1;
461                         delete_tree = 1;
462                 }
463
464                 ocfs2_unlock_refcount_tree(osb, tree, rw);
465                 /*
466                  * We get an extra reference when we create the refcount
467                  * tree, so another put will destroy it.
468                  */
469                 if (delete_tree)
470                         ocfs2_refcount_tree_put(tree);
471                 brelse(ref_root_bh);
472                 ref_root_bh = NULL;
473                 goto again;
474         }
475
476         *ret_tree = tree;
477         if (ref_bh) {
478                 *ref_bh = ref_root_bh;
479                 ref_root_bh = NULL;
480         }
481 out:
482         brelse(ref_root_bh);
483         return ret;
484 }
485
486 int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
487                                       struct ocfs2_refcount_tree **ret_tree,
488                                       struct buffer_head **ref_bh)
489 {
490         int ret;
491         u64 ref_blkno;
492
493         ret = ocfs2_get_refcount_block(inode, &ref_blkno);
494         if (ret) {
495                 mlog_errno(ret);
496                 return ret;
497         }
498
499         return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
500                                         rw, ret_tree, ref_bh);
501 }
502
503 void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
504                                 struct ocfs2_refcount_tree *tree, int rw)
505 {
506         if (rw)
507                 up_write(&tree->rf_sem);
508         else
509                 up_read(&tree->rf_sem);
510
511         ocfs2_refcount_unlock(tree, rw);
512         ocfs2_refcount_tree_put(tree);
513 }
514
515 void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
516 {
517         struct rb_node *node;
518         struct ocfs2_refcount_tree *tree;
519         struct rb_root *root = &osb->osb_rf_lock_tree;
520
521         while ((node = rb_last(root)) != NULL) {
522                 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
523
524                 mlog(0, "Purge tree %llu\n",
525                      (unsigned long long) tree->rf_blkno);
526
527                 rb_erase(&tree->rf_node, root);
528                 ocfs2_free_refcount_tree(tree);
529         }
530 }
531
532 /*
533  * Create a refcount tree for an inode.
534  * We take for granted that the inode is already locked.
535  */
536 static int ocfs2_create_refcount_tree(struct inode *inode,
537                                       struct buffer_head *di_bh)
538 {
539         int ret;
540         handle_t *handle = NULL;
541         struct ocfs2_alloc_context *meta_ac = NULL;
542         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
543         struct ocfs2_inode_info *oi = OCFS2_I(inode);
544         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
545         struct buffer_head *new_bh = NULL;
546         struct ocfs2_refcount_block *rb;
547         struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
548         u16 suballoc_bit_start;
549         u32 num_got;
550         u64 first_blkno;
551
552         BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
553
554         mlog(0, "create tree for inode %lu\n", inode->i_ino);
555
556         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
557         if (ret) {
558                 mlog_errno(ret);
559                 goto out;
560         }
561
562         handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
563         if (IS_ERR(handle)) {
564                 ret = PTR_ERR(handle);
565                 mlog_errno(ret);
566                 goto out;
567         }
568
569         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
570                                       OCFS2_JOURNAL_ACCESS_WRITE);
571         if (ret) {
572                 mlog_errno(ret);
573                 goto out_commit;
574         }
575
576         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
577                                    &suballoc_bit_start, &num_got,
578                                    &first_blkno);
579         if (ret) {
580                 mlog_errno(ret);
581                 goto out_commit;
582         }
583
584         new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
585         if (!new_tree) {
586                 ret = -ENOMEM;
587                 mlog_errno(ret);
588                 goto out_commit;
589         }
590
591         new_bh = sb_getblk(inode->i_sb, first_blkno);
592         ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
593
594         ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
595                                       OCFS2_JOURNAL_ACCESS_CREATE);
596         if (ret) {
597                 mlog_errno(ret);
598                 goto out_commit;
599         }
600
601         /* Initialize ocfs2_refcount_block. */
602         rb = (struct ocfs2_refcount_block *)new_bh->b_data;
603         memset(rb, 0, inode->i_sb->s_blocksize);
604         strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
605         rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
606         rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
607         rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
608         rb->rf_blkno = cpu_to_le64(first_blkno);
609         rb->rf_count = cpu_to_le32(1);
610         rb->rf_records.rl_count =
611                         cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
612         spin_lock(&osb->osb_lock);
613         rb->rf_generation = osb->s_next_generation++;
614         spin_unlock(&osb->osb_lock);
615
616         ocfs2_journal_dirty(handle, new_bh);
617
618         spin_lock(&oi->ip_lock);
619         oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
620         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
621         di->i_refcount_loc = cpu_to_le64(first_blkno);
622         spin_unlock(&oi->ip_lock);
623
624         mlog(0, "created tree for inode %lu, refblock %llu\n",
625              inode->i_ino, (unsigned long long)first_blkno);
626
627         ocfs2_journal_dirty(handle, di_bh);
628
629         /*
630          * We have to init the tree lock here since it will use
631          * the generation number to create it.
632          */
633         new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
634         ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
635                                       new_tree->rf_generation);
636
637         spin_lock(&osb->osb_lock);
638         tree = ocfs2_find_refcount_tree(osb, first_blkno);
639
640         /*
641          * We've just created a new refcount tree in this block.  If
642          * we found a refcount tree on the ocfs2_super, it must be
643          * one we just deleted.  We free the old tree before
644          * inserting the new tree.
645          */
646         BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
647         if (tree)
648                 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
649         ocfs2_insert_refcount_tree(osb, new_tree);
650         spin_unlock(&osb->osb_lock);
651         new_tree = NULL;
652         if (tree)
653                 ocfs2_refcount_tree_put(tree);
654
655 out_commit:
656         ocfs2_commit_trans(osb, handle);
657
658 out:
659         if (new_tree) {
660                 ocfs2_metadata_cache_exit(&new_tree->rf_ci);
661                 kfree(new_tree);
662         }
663
664         brelse(new_bh);
665         if (meta_ac)
666                 ocfs2_free_alloc_context(meta_ac);
667
668         return ret;
669 }
670
671 static int ocfs2_set_refcount_tree(struct inode *inode,
672                                    struct buffer_head *di_bh,
673                                    u64 refcount_loc)
674 {
675         int ret;
676         handle_t *handle = NULL;
677         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
678         struct ocfs2_inode_info *oi = OCFS2_I(inode);
679         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
680         struct buffer_head *ref_root_bh = NULL;
681         struct ocfs2_refcount_block *rb;
682         struct ocfs2_refcount_tree *ref_tree;
683
684         BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
685
686         ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
687                                        &ref_tree, &ref_root_bh);
688         if (ret) {
689                 mlog_errno(ret);
690                 return ret;
691         }
692
693         handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
694         if (IS_ERR(handle)) {
695                 ret = PTR_ERR(handle);
696                 mlog_errno(ret);
697                 goto out;
698         }
699
700         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
701                                       OCFS2_JOURNAL_ACCESS_WRITE);
702         if (ret) {
703                 mlog_errno(ret);
704                 goto out_commit;
705         }
706
707         ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
708                                       OCFS2_JOURNAL_ACCESS_WRITE);
709         if (ret) {
710                 mlog_errno(ret);
711                 goto out_commit;
712         }
713
714         rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
715         le32_add_cpu(&rb->rf_count, 1);
716
717         ocfs2_journal_dirty(handle, ref_root_bh);
718
719         spin_lock(&oi->ip_lock);
720         oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
721         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
722         di->i_refcount_loc = cpu_to_le64(refcount_loc);
723         spin_unlock(&oi->ip_lock);
724         ocfs2_journal_dirty(handle, di_bh);
725
726 out_commit:
727         ocfs2_commit_trans(osb, handle);
728 out:
729         ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
730         brelse(ref_root_bh);
731
732         return ret;
733 }
734
735 int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
736 {
737         int ret, delete_tree = 0;
738         handle_t *handle = NULL;
739         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
740         struct ocfs2_inode_info *oi = OCFS2_I(inode);
741         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
742         struct ocfs2_refcount_block *rb;
743         struct inode *alloc_inode = NULL;
744         struct buffer_head *alloc_bh = NULL;
745         struct buffer_head *blk_bh = NULL;
746         struct ocfs2_refcount_tree *ref_tree;
747         int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
748         u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
749         u16 bit = 0;
750
751         if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
752                 return 0;
753
754         BUG_ON(!ref_blkno);
755         ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
756         if (ret) {
757                 mlog_errno(ret);
758                 return ret;
759         }
760
761         rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
762
763         /*
764          * If we are the last user, we need to free the block.
765          * So lock the allocator ahead.
766          */
767         if (le32_to_cpu(rb->rf_count) == 1) {
768                 blk = le64_to_cpu(rb->rf_blkno);
769                 bit = le16_to_cpu(rb->rf_suballoc_bit);
770                 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
771
772                 alloc_inode = ocfs2_get_system_file_inode(osb,
773                                         EXTENT_ALLOC_SYSTEM_INODE,
774                                         le16_to_cpu(rb->rf_suballoc_slot));
775                 if (!alloc_inode) {
776                         ret = -ENOMEM;
777                         mlog_errno(ret);
778                         goto out;
779                 }
780                 mutex_lock(&alloc_inode->i_mutex);
781
782                 ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
783                 if (ret) {
784                         mlog_errno(ret);
785                         goto out_mutex;
786                 }
787
788                 credits += OCFS2_SUBALLOC_FREE;
789         }
790
791         handle = ocfs2_start_trans(osb, credits);
792         if (IS_ERR(handle)) {
793                 ret = PTR_ERR(handle);
794                 mlog_errno(ret);
795                 goto out_unlock;
796         }
797
798         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
799                                       OCFS2_JOURNAL_ACCESS_WRITE);
800         if (ret) {
801                 mlog_errno(ret);
802                 goto out_commit;
803         }
804
805         ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
806                                       OCFS2_JOURNAL_ACCESS_WRITE);
807         if (ret) {
808                 mlog_errno(ret);
809                 goto out_commit;
810         }
811
812         spin_lock(&oi->ip_lock);
813         oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
814         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
815         di->i_refcount_loc = 0;
816         spin_unlock(&oi->ip_lock);
817         ocfs2_journal_dirty(handle, di_bh);
818
819         le32_add_cpu(&rb->rf_count , -1);
820         ocfs2_journal_dirty(handle, blk_bh);
821
822         if (!rb->rf_count) {
823                 delete_tree = 1;
824                 ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
825                 ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
826                                                alloc_bh, bit, bg_blkno, 1);
827                 if (ret)
828                         mlog_errno(ret);
829         }
830
831 out_commit:
832         ocfs2_commit_trans(osb, handle);
833 out_unlock:
834         if (alloc_inode) {
835                 ocfs2_inode_unlock(alloc_inode, 1);
836                 brelse(alloc_bh);
837         }
838 out_mutex:
839         if (alloc_inode) {
840                 mutex_unlock(&alloc_inode->i_mutex);
841                 iput(alloc_inode);
842         }
843 out:
844         ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
845         if (delete_tree)
846                 ocfs2_refcount_tree_put(ref_tree);
847         brelse(blk_bh);
848
849         return ret;
850 }