2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
20 #include <linux/sched.h>
21 #include <linux/writeback.h>
22 #include <linux/pagemap.h>
25 #include "transaction.h"
28 static int total_trans = 0;
29 extern struct kmem_cache *btrfs_trans_handle_cachep;
30 extern struct kmem_cache *btrfs_transaction_cachep;
32 #define BTRFS_ROOT_TRANS_TAG 0
33 #define BTRFS_ROOT_DEFRAG_TAG 1
35 static noinline void put_transaction(struct btrfs_transaction *transaction)
37 WARN_ON(transaction->use_count == 0);
38 transaction->use_count--;
39 if (transaction->use_count == 0) {
40 WARN_ON(total_trans == 0);
42 list_del_init(&transaction->list);
43 memset(transaction, 0, sizeof(*transaction));
44 kmem_cache_free(btrfs_transaction_cachep, transaction);
48 static noinline int join_transaction(struct btrfs_root *root)
50 struct btrfs_transaction *cur_trans;
51 cur_trans = root->fs_info->running_transaction;
53 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
57 root->fs_info->generation++;
58 root->fs_info->last_alloc = 0;
59 root->fs_info->last_data_alloc = 0;
60 cur_trans->num_writers = 1;
61 cur_trans->num_joined = 0;
62 cur_trans->transid = root->fs_info->generation;
63 init_waitqueue_head(&cur_trans->writer_wait);
64 init_waitqueue_head(&cur_trans->commit_wait);
65 cur_trans->in_commit = 0;
66 cur_trans->use_count = 1;
67 cur_trans->commit_done = 0;
68 cur_trans->start_time = get_seconds();
69 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
70 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
71 btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
72 extent_io_tree_init(&cur_trans->dirty_pages,
73 root->fs_info->btree_inode->i_mapping,
75 spin_lock(&root->fs_info->new_trans_lock);
76 root->fs_info->running_transaction = cur_trans;
77 spin_unlock(&root->fs_info->new_trans_lock);
79 cur_trans->num_writers++;
80 cur_trans->num_joined++;
86 static noinline int record_root_in_trans(struct btrfs_root *root)
88 u64 running_trans_id = root->fs_info->running_transaction->transid;
89 if (root->ref_cows && root->last_trans < running_trans_id) {
90 WARN_ON(root == root->fs_info->extent_root);
91 if (root->root_item.refs != 0) {
92 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
93 (unsigned long)root->root_key.objectid,
94 BTRFS_ROOT_TRANS_TAG);
95 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
96 (unsigned long)root->root_key.objectid,
97 BTRFS_ROOT_DEFRAG_TAG);
98 root->commit_root = btrfs_root_node(root);
102 root->last_trans = running_trans_id;
107 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
110 struct btrfs_trans_handle *h =
111 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
114 mutex_lock(&root->fs_info->trans_mutex);
115 ret = join_transaction(root);
118 record_root_in_trans(root);
119 h->transid = root->fs_info->running_transaction->transid;
120 h->transaction = root->fs_info->running_transaction;
121 h->blocks_reserved = num_blocks;
123 h->block_group = NULL;
124 h->alloc_exclude_nr = 0;
125 h->alloc_exclude_start = 0;
126 root->fs_info->running_transaction->use_count++;
127 mutex_unlock(&root->fs_info->trans_mutex);
131 static noinline int wait_for_commit(struct btrfs_root *root,
132 struct btrfs_transaction *commit)
135 mutex_lock(&root->fs_info->trans_mutex);
136 while(!commit->commit_done) {
137 prepare_to_wait(&commit->commit_wait, &wait,
138 TASK_UNINTERRUPTIBLE);
139 if (commit->commit_done)
141 mutex_unlock(&root->fs_info->trans_mutex);
143 mutex_lock(&root->fs_info->trans_mutex);
145 mutex_unlock(&root->fs_info->trans_mutex);
146 finish_wait(&commit->commit_wait, &wait);
150 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
151 struct btrfs_root *root, int throttle)
153 struct btrfs_transaction *cur_trans;
155 mutex_lock(&root->fs_info->trans_mutex);
156 cur_trans = root->fs_info->running_transaction;
157 WARN_ON(cur_trans != trans->transaction);
158 WARN_ON(cur_trans->num_writers < 1);
159 cur_trans->num_writers--;
161 if (waitqueue_active(&cur_trans->writer_wait))
162 wake_up(&cur_trans->writer_wait);
164 if (cur_trans->in_commit && throttle) {
166 mutex_unlock(&root->fs_info->trans_mutex);
167 ret = wait_for_commit(root, cur_trans);
169 mutex_lock(&root->fs_info->trans_mutex);
172 put_transaction(cur_trans);
173 mutex_unlock(&root->fs_info->trans_mutex);
174 memset(trans, 0, sizeof(*trans));
175 kmem_cache_free(btrfs_trans_handle_cachep, trans);
179 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
180 struct btrfs_root *root)
182 return __btrfs_end_transaction(trans, root, 0);
185 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
186 struct btrfs_root *root)
188 return __btrfs_end_transaction(trans, root, 1);
192 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
193 struct btrfs_root *root)
198 struct extent_io_tree *dirty_pages;
200 struct inode *btree_inode = root->fs_info->btree_inode;
205 if (!trans || !trans->transaction) {
206 return filemap_write_and_wait(btree_inode->i_mapping);
208 dirty_pages = &trans->transaction->dirty_pages;
210 ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
214 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
215 while(start <= end) {
216 index = start >> PAGE_CACHE_SHIFT;
217 start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
218 page = find_lock_page(btree_inode->i_mapping, index);
221 if (PageWriteback(page)) {
223 wait_on_page_writeback(page);
226 page_cache_release(page);
230 err = write_one_page(page, 0);
233 page_cache_release(page);
236 err = filemap_fdatawait(btree_inode->i_mapping);
242 static int update_cowonly_root(struct btrfs_trans_handle *trans,
243 struct btrfs_root *root)
247 struct btrfs_root *tree_root = root->fs_info->tree_root;
249 btrfs_write_dirty_block_groups(trans, root);
251 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
252 if (old_root_bytenr == root->node->start)
254 btrfs_set_root_bytenr(&root->root_item,
256 btrfs_set_root_level(&root->root_item,
257 btrfs_header_level(root->node));
258 ret = btrfs_update_root(trans, tree_root,
262 btrfs_write_dirty_block_groups(trans, root);
267 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
268 struct btrfs_root *root)
270 struct btrfs_fs_info *fs_info = root->fs_info;
271 struct list_head *next;
273 while(!list_empty(&fs_info->dirty_cowonly_roots)) {
274 next = fs_info->dirty_cowonly_roots.next;
276 root = list_entry(next, struct btrfs_root, dirty_list);
277 update_cowonly_root(trans, root);
283 struct list_head list;
284 struct btrfs_root *root;
285 struct btrfs_root *latest_root;
288 int btrfs_add_dead_root(struct btrfs_root *root,
289 struct btrfs_root *latest,
290 struct list_head *dead_list)
292 struct dirty_root *dirty;
294 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
298 dirty->latest_root = latest;
299 list_add(&dirty->list, dead_list);
303 static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
304 struct radix_tree_root *radix,
305 struct list_head *list)
307 struct dirty_root *dirty;
308 struct btrfs_root *gang[8];
309 struct btrfs_root *root;
316 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
318 BTRFS_ROOT_TRANS_TAG);
321 for (i = 0; i < ret; i++) {
323 radix_tree_tag_clear(radix,
324 (unsigned long)root->root_key.objectid,
325 BTRFS_ROOT_TRANS_TAG);
326 if (root->commit_root == root->node) {
327 WARN_ON(root->node->start !=
328 btrfs_root_bytenr(&root->root_item));
329 free_extent_buffer(root->commit_root);
330 root->commit_root = NULL;
332 /* make sure to update the root on disk
333 * so we get any updates to the block used
336 err = btrfs_update_root(trans,
337 root->fs_info->tree_root,
342 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
344 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
345 BUG_ON(!dirty->root);
347 memset(&root->root_item.drop_progress, 0,
348 sizeof(struct btrfs_disk_key));
349 root->root_item.drop_level = 0;
351 memcpy(dirty->root, root, sizeof(*root));
352 dirty->root->node = root->commit_root;
353 dirty->latest_root = root;
354 root->commit_root = NULL;
356 root->root_key.offset = root->fs_info->generation;
357 btrfs_set_root_bytenr(&root->root_item,
359 btrfs_set_root_level(&root->root_item,
360 btrfs_header_level(root->node));
361 err = btrfs_insert_root(trans, root->fs_info->tree_root,
367 refs = btrfs_root_refs(&dirty->root->root_item);
368 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
369 err = btrfs_update_root(trans, root->fs_info->tree_root,
370 &dirty->root->root_key,
371 &dirty->root->root_item);
375 list_add(&dirty->list, list);
386 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
388 struct btrfs_fs_info *info = root->fs_info;
390 struct btrfs_trans_handle *trans;
394 if (root->defrag_running)
396 trans = btrfs_start_transaction(root, 1);
398 root->defrag_running = 1;
399 ret = btrfs_defrag_leaves(trans, root, cacheonly);
400 nr = trans->blocks_used;
401 btrfs_end_transaction(trans, root);
402 btrfs_btree_balance_dirty(info->tree_root, nr);
405 trans = btrfs_start_transaction(root, 1);
409 root->defrag_running = 0;
411 radix_tree_tag_clear(&info->fs_roots_radix,
412 (unsigned long)root->root_key.objectid,
413 BTRFS_ROOT_DEFRAG_TAG);
414 btrfs_end_transaction(trans, root);
418 int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
420 struct btrfs_root *gang[1];
421 struct btrfs_root *root;
428 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
431 BTRFS_ROOT_DEFRAG_TAG);
434 for (i = 0; i < ret; i++) {
436 last = root->root_key.objectid + 1;
437 btrfs_defrag_root(root, 1);
440 btrfs_defrag_root(info->extent_root, 1);
444 static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
445 struct list_head *list)
447 struct dirty_root *dirty;
448 struct btrfs_trans_handle *trans;
455 while(!list_empty(list)) {
456 struct btrfs_root *root;
458 dirty = list_entry(list->next, struct dirty_root, list);
459 list_del_init(&dirty->list);
461 num_bytes = btrfs_root_used(&dirty->root->root_item);
462 root = dirty->latest_root;
463 atomic_inc(&root->fs_info->throttles);
465 mutex_lock(&root->fs_info->drop_mutex);
467 trans = btrfs_start_transaction(tree_root, 1);
468 ret = btrfs_drop_snapshot(trans, dirty->root);
469 if (ret != -EAGAIN) {
473 err = btrfs_update_root(trans,
475 &dirty->root->root_key,
476 &dirty->root->root_item);
479 nr = trans->blocks_used;
480 ret = btrfs_end_transaction(trans, tree_root);
483 mutex_unlock(&root->fs_info->drop_mutex);
484 btrfs_btree_balance_dirty(tree_root, nr);
486 mutex_lock(&root->fs_info->drop_mutex);
489 atomic_dec(&root->fs_info->throttles);
491 mutex_lock(&root->fs_info->alloc_mutex);
492 num_bytes -= btrfs_root_used(&dirty->root->root_item);
493 bytes_used = btrfs_root_used(&root->root_item);
495 record_root_in_trans(root);
496 btrfs_set_root_used(&root->root_item,
497 bytes_used - num_bytes);
499 mutex_unlock(&root->fs_info->alloc_mutex);
501 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
506 mutex_unlock(&root->fs_info->drop_mutex);
508 nr = trans->blocks_used;
509 ret = btrfs_end_transaction(trans, tree_root);
512 free_extent_buffer(dirty->root->node);
516 btrfs_btree_balance_dirty(tree_root, nr);
522 int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
523 struct btrfs_root *root)
525 struct btrfs_transaction *cur_trans = trans->transaction;
527 u64 root_objectid = 0;
531 atomic_inc(&root->fs_info->throttles);
533 ret = btrfs_find_first_ordered_inode(
534 &cur_trans->ordered_inode_tree,
535 &root_objectid, &objectid, &inode);
539 mutex_unlock(&root->fs_info->trans_mutex);
541 if (S_ISREG(inode->i_mode)) {
542 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
543 filemap_fdatawrite(inode->i_mapping);
544 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
548 mutex_lock(&root->fs_info->trans_mutex);
553 ret = btrfs_find_del_first_ordered_inode(
554 &cur_trans->ordered_inode_tree,
555 &root_objectid, &objectid, &inode);
558 mutex_unlock(&root->fs_info->trans_mutex);
560 if (S_ISREG(inode->i_mode)) {
561 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
562 filemap_write_and_wait(inode->i_mapping);
563 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
565 atomic_dec(&inode->i_count);
568 mutex_lock(&root->fs_info->trans_mutex);
570 atomic_dec(&root->fs_info->throttles);
574 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
575 struct btrfs_fs_info *fs_info,
576 struct btrfs_pending_snapshot *pending)
578 struct btrfs_key key;
579 struct btrfs_root_item *new_root_item;
580 struct btrfs_root *tree_root = fs_info->tree_root;
581 struct btrfs_root *root = pending->root;
582 struct extent_buffer *tmp;
583 struct extent_buffer *old;
588 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
589 if (!new_root_item) {
593 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
597 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
599 key.objectid = objectid;
601 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
603 old = btrfs_lock_root_node(root);
604 btrfs_cow_block(trans, root, old, NULL, 0, &old);
606 btrfs_copy_root(trans, root, old, &tmp, objectid);
607 btrfs_tree_unlock(old);
608 free_extent_buffer(old);
610 btrfs_set_root_bytenr(new_root_item, tmp->start);
611 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
612 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
614 btrfs_tree_unlock(tmp);
615 free_extent_buffer(tmp);
620 * insert the directory item
622 key.offset = (u64)-1;
623 namelen = strlen(pending->name);
624 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
625 pending->name, namelen,
626 root->fs_info->sb->s_root->d_inode->i_ino,
632 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
633 pending->name, strlen(pending->name), objectid,
634 root->fs_info->sb->s_root->d_inode->i_ino);
636 /* Invalidate existing dcache entry for new snapshot. */
637 btrfs_invalidate_dcache_root(root, pending->name, namelen);
640 kfree(new_root_item);
644 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
645 struct btrfs_fs_info *fs_info)
647 struct btrfs_pending_snapshot *pending;
648 struct list_head *head = &trans->transaction->pending_snapshots;
651 while(!list_empty(head)) {
652 pending = list_entry(head->next,
653 struct btrfs_pending_snapshot, list);
654 ret = create_pending_snapshot(trans, fs_info, pending);
656 list_del(&pending->list);
657 kfree(pending->name);
663 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
664 struct btrfs_root *root)
666 unsigned long joined = 0;
667 unsigned long timeout = 1;
668 struct btrfs_transaction *cur_trans;
669 struct btrfs_transaction *prev_trans = NULL;
670 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
671 struct list_head dirty_fs_roots;
672 struct extent_io_tree *pinned_copy;
676 INIT_LIST_HEAD(&dirty_fs_roots);
678 mutex_lock(&root->fs_info->trans_mutex);
679 if (trans->transaction->in_commit) {
680 cur_trans = trans->transaction;
681 trans->transaction->use_count++;
682 mutex_unlock(&root->fs_info->trans_mutex);
683 btrfs_end_transaction(trans, root);
685 ret = wait_for_commit(root, cur_trans);
688 mutex_lock(&root->fs_info->trans_mutex);
689 put_transaction(cur_trans);
690 mutex_unlock(&root->fs_info->trans_mutex);
695 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
699 extent_io_tree_init(pinned_copy,
700 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
702 trans->transaction->in_commit = 1;
703 printk("trans %Lu in commit\n", trans->transid);
704 cur_trans = trans->transaction;
705 if (cur_trans->list.prev != &root->fs_info->trans_list) {
706 prev_trans = list_entry(cur_trans->list.prev,
707 struct btrfs_transaction, list);
708 if (!prev_trans->commit_done) {
709 prev_trans->use_count++;
710 mutex_unlock(&root->fs_info->trans_mutex);
712 wait_for_commit(root, prev_trans);
714 mutex_lock(&root->fs_info->trans_mutex);
715 put_transaction(prev_trans);
720 joined = cur_trans->num_joined;
721 WARN_ON(cur_trans != trans->transaction);
722 prepare_to_wait(&cur_trans->writer_wait, &wait,
723 TASK_UNINTERRUPTIBLE);
725 if (cur_trans->num_writers > 1)
726 timeout = MAX_SCHEDULE_TIMEOUT;
730 mutex_unlock(&root->fs_info->trans_mutex);
732 schedule_timeout(timeout);
734 mutex_lock(&root->fs_info->trans_mutex);
735 finish_wait(&cur_trans->writer_wait, &wait);
736 ret = btrfs_write_ordered_inodes(trans, root);
738 } while (cur_trans->num_writers > 1 ||
739 (cur_trans->num_joined != joined));
741 ret = create_pending_snapshots(trans, root->fs_info);
744 WARN_ON(cur_trans != trans->transaction);
746 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
750 ret = btrfs_commit_tree_roots(trans, root);
753 cur_trans = root->fs_info->running_transaction;
754 spin_lock(&root->fs_info->new_trans_lock);
755 root->fs_info->running_transaction = NULL;
756 spin_unlock(&root->fs_info->new_trans_lock);
757 btrfs_set_super_generation(&root->fs_info->super_copy,
759 btrfs_set_super_root(&root->fs_info->super_copy,
760 root->fs_info->tree_root->node->start);
761 btrfs_set_super_root_level(&root->fs_info->super_copy,
762 btrfs_header_level(root->fs_info->tree_root->node));
764 btrfs_set_super_chunk_root(&root->fs_info->super_copy,
765 chunk_root->node->start);
766 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy,
767 btrfs_header_level(chunk_root->node));
768 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
769 sizeof(root->fs_info->super_copy));
771 btrfs_copy_pinned(root, pinned_copy);
773 mutex_unlock(&root->fs_info->trans_mutex);
774 ret = btrfs_write_and_wait_transaction(trans, root);
776 write_ctree_super(trans, root);
778 btrfs_finish_extent_commit(trans, root, pinned_copy);
779 mutex_lock(&root->fs_info->trans_mutex);
783 cur_trans->commit_done = 1;
784 printk("trans %Lu done in commit\n", cur_trans->transid);
785 root->fs_info->last_trans_committed = cur_trans->transid;
786 wake_up(&cur_trans->commit_wait);
787 put_transaction(cur_trans);
788 put_transaction(cur_trans);
790 if (root->fs_info->closing)
791 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
793 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
795 mutex_unlock(&root->fs_info->trans_mutex);
796 kmem_cache_free(btrfs_trans_handle_cachep, trans);
798 if (root->fs_info->closing) {
799 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
804 int btrfs_clean_old_snapshots(struct btrfs_root *root)
806 struct list_head dirty_roots;
807 INIT_LIST_HEAD(&dirty_roots);
809 mutex_lock(&root->fs_info->trans_mutex);
810 list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
811 mutex_unlock(&root->fs_info->trans_mutex);
813 if (!list_empty(&dirty_roots)) {
814 drop_dirty_roots(root, &dirty_roots);