Btrfs: Further reduce the concurrency penalty of defrag and drop_snapshot
[safe/jmp/linux-2.6] / fs / btrfs / tree-defrag.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static void reada_defrag(struct btrfs_root *root,
26                          struct btrfs_node *node)
27 {
28         int i;
29         u32 nritems;
30         u64 blocknr;
31         int ret;
32
33         nritems = btrfs_header_nritems(&node->header);
34         for (i = 0; i < nritems; i++) {
35                 blocknr = btrfs_node_blockptr(node, i);
36                 ret = readahead_tree_block(root, blocknr);
37                 if (ret)
38                         break;
39         }
40 }
41
42 static int defrag_walk_down(struct btrfs_trans_handle *trans,
43                             struct btrfs_root *root,
44                             struct btrfs_path *path, int *level,
45                             int cache_only)
46 {
47         struct buffer_head *next;
48         struct buffer_head *cur;
49         u64 blocknr;
50         int ret = 0;
51
52         WARN_ON(*level < 0);
53         WARN_ON(*level >= BTRFS_MAX_LEVEL);
54
55         while(*level > 0) {
56                 WARN_ON(*level < 0);
57                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
58                 cur = path->nodes[*level];
59
60                 if (!cache_only && *level > 1 && path->slots[*level] == 0)
61                         reada_defrag(root, btrfs_buffer_node(cur));
62
63                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
64                         WARN_ON(1);
65
66                 if (path->slots[*level] >=
67                     btrfs_header_nritems(btrfs_buffer_header(cur)))
68                         break;
69
70                 if (*level == 1) {
71                         ret = btrfs_realloc_node(trans, root,
72                                                  path->nodes[*level],
73                                                  cache_only);
74                         break;
75                 }
76                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
77                                               path->slots[*level]);
78
79                 if (cache_only) {
80                         next = btrfs_find_tree_block(root, blocknr);
81                         if (!next || !buffer_uptodate(next) ||
82                            buffer_locked(next)) {
83                                 brelse(next);
84                                 path->slots[*level]++;
85                                 continue;
86                         }
87                 } else {
88                         next = read_tree_block(root, blocknr);
89                 }
90                 ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
91                                       path->slots[*level], &next);
92                 BUG_ON(ret);
93                 ret = btrfs_realloc_node(trans, root, next, cache_only);
94                 BUG_ON(ret);
95                 WARN_ON(*level <= 0);
96                 if (path->nodes[*level-1])
97                         btrfs_block_release(root, path->nodes[*level-1]);
98                 path->nodes[*level-1] = next;
99                 *level = btrfs_header_level(btrfs_buffer_header(next));
100                 path->slots[*level] = 0;
101         }
102         WARN_ON(*level < 0);
103         WARN_ON(*level >= BTRFS_MAX_LEVEL);
104         btrfs_block_release(root, path->nodes[*level]);
105         path->nodes[*level] = NULL;
106         *level += 1;
107         WARN_ON(ret);
108         return 0;
109 }
110
111 static int defrag_walk_up(struct btrfs_trans_handle *trans,
112                           struct btrfs_root *root,
113                           struct btrfs_path *path, int *level,
114                           int cache_only)
115 {
116         int i;
117         int slot;
118         struct btrfs_node *node;
119
120         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
121                 slot = path->slots[i];
122                 if (slot < btrfs_header_nritems(
123                     btrfs_buffer_header(path->nodes[i])) - 1) {
124                         path->slots[i]++;
125                         *level = i;
126                         node = btrfs_buffer_node(path->nodes[i]);
127                         WARN_ON(i == 0);
128                         btrfs_disk_key_to_cpu(&root->defrag_progress,
129                                               &node->ptrs[path->slots[i]].key);
130                         root->defrag_level = i;
131                         return 0;
132                 } else {
133                         btrfs_block_release(root, path->nodes[*level]);
134                         path->nodes[*level] = NULL;
135                         *level = i + 1;
136                 }
137         }
138         return 1;
139 }
140
141 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
142                         struct btrfs_root *root, int cache_only)
143 {
144         struct btrfs_path *path = NULL;
145         struct buffer_head *tmp;
146         int ret = 0;
147         int wret;
148         int level;
149         int orig_level;
150         int i;
151
152         if (root->ref_cows == 0) {
153                 goto out;
154         }
155         path = btrfs_alloc_path();
156         if (!path)
157                 return -ENOMEM;
158
159         level = btrfs_header_level(btrfs_buffer_header(root->node));
160         orig_level = level;
161         if (level == 0) {
162                 goto out;
163         }
164         if (root->defrag_progress.objectid == 0) {
165                 get_bh(root->node);
166                 ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
167                 BUG_ON(ret);
168                 ret = btrfs_realloc_node(trans, root, root->node, cache_only);
169                 BUG_ON(ret);
170                 path->nodes[level] = root->node;
171                 path->slots[level] = 0;
172         } else {
173                 level = root->defrag_level;
174                 path->lowest_level = level;
175                 wret = btrfs_search_slot(trans, root, &root->defrag_progress,
176                                          path, 0, 1);
177
178                 if (wret < 0) {
179                         ret = wret;
180                         goto out;
181                 }
182                 while(level > 0 && !path->nodes[level])
183                         level--;
184                 if (!path->nodes[level]) {
185                         ret = 0;
186                         goto out;
187                 }
188         }
189
190         while(1) {
191                 wret = defrag_walk_down(trans, root, path, &level, cache_only);
192                 if (wret > 0)
193                         break;
194                 if (wret < 0)
195                         ret = wret;
196
197                 wret = defrag_walk_up(trans, root, path, &level, cache_only);
198                 if (wret > 0)
199                         break;
200                 if (wret < 0)
201                         ret = wret;
202                 ret = -EAGAIN;
203                 break;
204         }
205         for (i = 0; i <= orig_level; i++) {
206                 if (path->nodes[i]) {
207                         btrfs_block_release(root, path->nodes[i]);
208                         path->nodes[i] = 0;
209                 }
210         }
211 out:
212         if (path)
213                 btrfs_free_path(path);
214         if (ret != -EAGAIN) {
215                 memset(&root->defrag_progress, 0,
216                        sizeof(root->defrag_progress));
217         }
218         return ret;
219 }