Btrfs: Add mount -o degraded to allow mounts to continue with missing devices
[safe/jmp/linux-2.6] / fs / btrfs / volumes.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/blkdev.h>
22 #include <linux/random.h>
23 #include <asm/div64.h>
24 #include "ctree.h"
25 #include "extent_map.h"
26 #include "disk-io.h"
27 #include "transaction.h"
28 #include "print-tree.h"
29 #include "volumes.h"
30
31 struct map_lookup {
32         u64 type;
33         int io_align;
34         int io_width;
35         int stripe_len;
36         int sector_size;
37         int num_stripes;
38         int sub_stripes;
39         struct btrfs_bio_stripe stripes[];
40 };
41
42 #define map_lookup_size(n) (sizeof(struct map_lookup) + \
43                             (sizeof(struct btrfs_bio_stripe) * (n)))
44
45 static DEFINE_MUTEX(uuid_mutex);
46 static LIST_HEAD(fs_uuids);
47
48 void btrfs_lock_volumes(void)
49 {
50         mutex_lock(&uuid_mutex);
51 }
52
53 void btrfs_unlock_volumes(void)
54 {
55         mutex_unlock(&uuid_mutex);
56 }
57
58 int btrfs_cleanup_fs_uuids(void)
59 {
60         struct btrfs_fs_devices *fs_devices;
61         struct list_head *uuid_cur;
62         struct list_head *devices_cur;
63         struct btrfs_device *dev;
64
65         list_for_each(uuid_cur, &fs_uuids) {
66                 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
67                                         list);
68                 while(!list_empty(&fs_devices->devices)) {
69                         devices_cur = fs_devices->devices.next;
70                         dev = list_entry(devices_cur, struct btrfs_device,
71                                          dev_list);
72                         if (dev->bdev) {
73                                 close_bdev_excl(dev->bdev);
74                         }
75                         list_del(&dev->dev_list);
76                         kfree(dev->name);
77                         kfree(dev);
78                 }
79         }
80         return 0;
81 }
82
83 static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
84                                           u8 *uuid)
85 {
86         struct btrfs_device *dev;
87         struct list_head *cur;
88
89         list_for_each(cur, head) {
90                 dev = list_entry(cur, struct btrfs_device, dev_list);
91                 if (dev->devid == devid &&
92                     (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
93                         return dev;
94                 }
95         }
96         return NULL;
97 }
98
99 static struct btrfs_fs_devices *find_fsid(u8 *fsid)
100 {
101         struct list_head *cur;
102         struct btrfs_fs_devices *fs_devices;
103
104         list_for_each(cur, &fs_uuids) {
105                 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
106                 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
107                         return fs_devices;
108         }
109         return NULL;
110 }
111
112 static int device_list_add(const char *path,
113                            struct btrfs_super_block *disk_super,
114                            u64 devid, struct btrfs_fs_devices **fs_devices_ret)
115 {
116         struct btrfs_device *device;
117         struct btrfs_fs_devices *fs_devices;
118         u64 found_transid = btrfs_super_generation(disk_super);
119
120         fs_devices = find_fsid(disk_super->fsid);
121         if (!fs_devices) {
122                 fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS);
123                 if (!fs_devices)
124                         return -ENOMEM;
125                 INIT_LIST_HEAD(&fs_devices->devices);
126                 INIT_LIST_HEAD(&fs_devices->alloc_list);
127                 list_add(&fs_devices->list, &fs_uuids);
128                 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
129                 fs_devices->latest_devid = devid;
130                 fs_devices->latest_trans = found_transid;
131                 fs_devices->num_devices = 0;
132                 device = NULL;
133         } else {
134                 device = __find_device(&fs_devices->devices, devid,
135                                        disk_super->dev_item.uuid);
136         }
137         if (!device) {
138                 device = kzalloc(sizeof(*device), GFP_NOFS);
139                 if (!device) {
140                         /* we can safely leave the fs_devices entry around */
141                         return -ENOMEM;
142                 }
143                 device->devid = devid;
144                 memcpy(device->uuid, disk_super->dev_item.uuid,
145                        BTRFS_UUID_SIZE);
146                 device->barriers = 1;
147                 spin_lock_init(&device->io_lock);
148                 device->name = kstrdup(path, GFP_NOFS);
149                 if (!device->name) {
150                         kfree(device);
151                         return -ENOMEM;
152                 }
153                 list_add(&device->dev_list, &fs_devices->devices);
154                 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
155                 fs_devices->num_devices++;
156         }
157
158         if (found_transid > fs_devices->latest_trans) {
159                 fs_devices->latest_devid = devid;
160                 fs_devices->latest_trans = found_transid;
161         }
162         *fs_devices_ret = fs_devices;
163         return 0;
164 }
165
166 int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
167 {
168         struct list_head *head = &fs_devices->devices;
169         struct list_head *cur;
170         struct btrfs_device *device;
171
172         mutex_lock(&uuid_mutex);
173 again:
174         list_for_each(cur, head) {
175                 device = list_entry(cur, struct btrfs_device, dev_list);
176                 if (!device->in_fs_metadata) {
177 printk("getting rid of extra dev %s\n", device->name);
178                         if (device->bdev)
179                                 close_bdev_excl(device->bdev);
180                         list_del(&device->dev_list);
181                         list_del(&device->dev_alloc_list);
182                         fs_devices->num_devices--;
183                         kfree(device->name);
184                         kfree(device);
185                         goto again;
186                 }
187         }
188         mutex_unlock(&uuid_mutex);
189         return 0;
190 }
191 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
192 {
193         struct list_head *head = &fs_devices->devices;
194         struct list_head *cur;
195         struct btrfs_device *device;
196
197         mutex_lock(&uuid_mutex);
198         list_for_each(cur, head) {
199                 device = list_entry(cur, struct btrfs_device, dev_list);
200                 if (device->bdev) {
201                         close_bdev_excl(device->bdev);
202                 }
203                 device->bdev = NULL;
204                 device->in_fs_metadata = 0;
205         }
206         mutex_unlock(&uuid_mutex);
207         return 0;
208 }
209
210 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
211                        int flags, void *holder)
212 {
213         struct block_device *bdev;
214         struct list_head *head = &fs_devices->devices;
215         struct list_head *cur;
216         struct btrfs_device *device;
217         int ret;
218
219         mutex_lock(&uuid_mutex);
220         list_for_each(cur, head) {
221                 device = list_entry(cur, struct btrfs_device, dev_list);
222                 if (device->bdev)
223                         continue;
224
225                 if (!device->name)
226                         continue;
227
228                 bdev = open_bdev_excl(device->name, flags, holder);
229
230                 if (IS_ERR(bdev)) {
231                         printk("open %s failed\n", device->name);
232                         ret = PTR_ERR(bdev);
233                         goto fail;
234                 }
235                 set_blocksize(bdev, 4096);
236                 if (device->devid == fs_devices->latest_devid)
237                         fs_devices->latest_bdev = bdev;
238                 device->bdev = bdev;
239                 device->in_fs_metadata = 0;
240
241         }
242         mutex_unlock(&uuid_mutex);
243         return 0;
244 fail:
245         mutex_unlock(&uuid_mutex);
246         btrfs_close_devices(fs_devices);
247         return ret;
248 }
249
250 int btrfs_scan_one_device(const char *path, int flags, void *holder,
251                           struct btrfs_fs_devices **fs_devices_ret)
252 {
253         struct btrfs_super_block *disk_super;
254         struct block_device *bdev;
255         struct buffer_head *bh;
256         int ret;
257         u64 devid;
258         u64 transid;
259
260         mutex_lock(&uuid_mutex);
261
262         bdev = open_bdev_excl(path, flags, holder);
263
264         if (IS_ERR(bdev)) {
265                 ret = PTR_ERR(bdev);
266                 goto error;
267         }
268
269         ret = set_blocksize(bdev, 4096);
270         if (ret)
271                 goto error_close;
272         bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
273         if (!bh) {
274                 ret = -EIO;
275                 goto error_close;
276         }
277         disk_super = (struct btrfs_super_block *)bh->b_data;
278         if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
279             sizeof(disk_super->magic))) {
280                 ret = -EINVAL;
281                 goto error_brelse;
282         }
283         devid = le64_to_cpu(disk_super->dev_item.devid);
284         transid = btrfs_super_generation(disk_super);
285         if (disk_super->label[0])
286                 printk("device label %s ", disk_super->label);
287         else {
288                 /* FIXME, make a readl uuid parser */
289                 printk("device fsid %llx-%llx ",
290                        *(unsigned long long *)disk_super->fsid,
291                        *(unsigned long long *)(disk_super->fsid + 8));
292         }
293         printk("devid %Lu transid %Lu %s\n", devid, transid, path);
294         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
295
296 error_brelse:
297         brelse(bh);
298 error_close:
299         close_bdev_excl(bdev);
300 error:
301         mutex_unlock(&uuid_mutex);
302         return ret;
303 }
304
305 /*
306  * this uses a pretty simple search, the expectation is that it is
307  * called very infrequently and that a given device has a small number
308  * of extents
309  */
310 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
311                                 struct btrfs_device *device,
312                                 struct btrfs_path *path,
313                                 u64 num_bytes, u64 *start)
314 {
315         struct btrfs_key key;
316         struct btrfs_root *root = device->dev_root;
317         struct btrfs_dev_extent *dev_extent = NULL;
318         u64 hole_size = 0;
319         u64 last_byte = 0;
320         u64 search_start = 0;
321         u64 search_end = device->total_bytes;
322         int ret;
323         int slot = 0;
324         int start_found;
325         struct extent_buffer *l;
326
327         start_found = 0;
328         path->reada = 2;
329
330         /* FIXME use last free of some kind */
331
332         /* we don't want to overwrite the superblock on the drive,
333          * so we make sure to start at an offset of at least 1MB
334          */
335         search_start = max((u64)1024 * 1024, search_start);
336
337         if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
338                 search_start = max(root->fs_info->alloc_start, search_start);
339
340         key.objectid = device->devid;
341         key.offset = search_start;
342         key.type = BTRFS_DEV_EXTENT_KEY;
343         ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
344         if (ret < 0)
345                 goto error;
346         ret = btrfs_previous_item(root, path, 0, key.type);
347         if (ret < 0)
348                 goto error;
349         l = path->nodes[0];
350         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
351         while (1) {
352                 l = path->nodes[0];
353                 slot = path->slots[0];
354                 if (slot >= btrfs_header_nritems(l)) {
355                         ret = btrfs_next_leaf(root, path);
356                         if (ret == 0)
357                                 continue;
358                         if (ret < 0)
359                                 goto error;
360 no_more_items:
361                         if (!start_found) {
362                                 if (search_start >= search_end) {
363                                         ret = -ENOSPC;
364                                         goto error;
365                                 }
366                                 *start = search_start;
367                                 start_found = 1;
368                                 goto check_pending;
369                         }
370                         *start = last_byte > search_start ?
371                                 last_byte : search_start;
372                         if (search_end <= *start) {
373                                 ret = -ENOSPC;
374                                 goto error;
375                         }
376                         goto check_pending;
377                 }
378                 btrfs_item_key_to_cpu(l, &key, slot);
379
380                 if (key.objectid < device->devid)
381                         goto next;
382
383                 if (key.objectid > device->devid)
384                         goto no_more_items;
385
386                 if (key.offset >= search_start && key.offset > last_byte &&
387                     start_found) {
388                         if (last_byte < search_start)
389                                 last_byte = search_start;
390                         hole_size = key.offset - last_byte;
391                         if (key.offset > last_byte &&
392                             hole_size >= num_bytes) {
393                                 *start = last_byte;
394                                 goto check_pending;
395                         }
396                 }
397                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
398                         goto next;
399                 }
400
401                 start_found = 1;
402                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
403                 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
404 next:
405                 path->slots[0]++;
406                 cond_resched();
407         }
408 check_pending:
409         /* we have to make sure we didn't find an extent that has already
410          * been allocated by the map tree or the original allocation
411          */
412         btrfs_release_path(root, path);
413         BUG_ON(*start < search_start);
414
415         if (*start + num_bytes > search_end) {
416                 ret = -ENOSPC;
417                 goto error;
418         }
419         /* check for pending inserts here */
420         return 0;
421
422 error:
423         btrfs_release_path(root, path);
424         return ret;
425 }
426
427 int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
428                           struct btrfs_device *device,
429                           u64 start)
430 {
431         int ret;
432         struct btrfs_path *path;
433         struct btrfs_root *root = device->dev_root;
434         struct btrfs_key key;
435         struct btrfs_key found_key;
436         struct extent_buffer *leaf = NULL;
437         struct btrfs_dev_extent *extent = NULL;
438
439         path = btrfs_alloc_path();
440         if (!path)
441                 return -ENOMEM;
442
443         key.objectid = device->devid;
444         key.offset = start;
445         key.type = BTRFS_DEV_EXTENT_KEY;
446
447         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
448         if (ret > 0) {
449                 ret = btrfs_previous_item(root, path, key.objectid,
450                                           BTRFS_DEV_EXTENT_KEY);
451                 BUG_ON(ret);
452                 leaf = path->nodes[0];
453                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
454                 extent = btrfs_item_ptr(leaf, path->slots[0],
455                                         struct btrfs_dev_extent);
456                 BUG_ON(found_key.offset > start || found_key.offset +
457                        btrfs_dev_extent_length(leaf, extent) < start);
458                 ret = 0;
459         } else if (ret == 0) {
460                 leaf = path->nodes[0];
461                 extent = btrfs_item_ptr(leaf, path->slots[0],
462                                         struct btrfs_dev_extent);
463         }
464         BUG_ON(ret);
465
466         if (device->bytes_used > 0)
467                 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
468         ret = btrfs_del_item(trans, root, path);
469         BUG_ON(ret);
470
471         btrfs_free_path(path);
472         return ret;
473 }
474
475 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
476                            struct btrfs_device *device,
477                            u64 chunk_tree, u64 chunk_objectid,
478                            u64 chunk_offset,
479                            u64 num_bytes, u64 *start)
480 {
481         int ret;
482         struct btrfs_path *path;
483         struct btrfs_root *root = device->dev_root;
484         struct btrfs_dev_extent *extent;
485         struct extent_buffer *leaf;
486         struct btrfs_key key;
487
488         WARN_ON(!device->in_fs_metadata);
489         path = btrfs_alloc_path();
490         if (!path)
491                 return -ENOMEM;
492
493         ret = find_free_dev_extent(trans, device, path, num_bytes, start);
494         if (ret) {
495                 goto err;
496         }
497
498         key.objectid = device->devid;
499         key.offset = *start;
500         key.type = BTRFS_DEV_EXTENT_KEY;
501         ret = btrfs_insert_empty_item(trans, root, path, &key,
502                                       sizeof(*extent));
503         BUG_ON(ret);
504
505         leaf = path->nodes[0];
506         extent = btrfs_item_ptr(leaf, path->slots[0],
507                                 struct btrfs_dev_extent);
508         btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
509         btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
510         btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
511
512         write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
513                     (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
514                     BTRFS_UUID_SIZE);
515
516         btrfs_set_dev_extent_length(leaf, extent, num_bytes);
517         btrfs_mark_buffer_dirty(leaf);
518 err:
519         btrfs_free_path(path);
520         return ret;
521 }
522
523 static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
524 {
525         struct btrfs_path *path;
526         int ret;
527         struct btrfs_key key;
528         struct btrfs_chunk *chunk;
529         struct btrfs_key found_key;
530
531         path = btrfs_alloc_path();
532         BUG_ON(!path);
533
534         key.objectid = objectid;
535         key.offset = (u64)-1;
536         key.type = BTRFS_CHUNK_ITEM_KEY;
537
538         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
539         if (ret < 0)
540                 goto error;
541
542         BUG_ON(ret == 0);
543
544         ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
545         if (ret) {
546                 *offset = 0;
547         } else {
548                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
549                                       path->slots[0]);
550                 if (found_key.objectid != objectid)
551                         *offset = 0;
552                 else {
553                         chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
554                                                struct btrfs_chunk);
555                         *offset = found_key.offset +
556                                 btrfs_chunk_length(path->nodes[0], chunk);
557                 }
558         }
559         ret = 0;
560 error:
561         btrfs_free_path(path);
562         return ret;
563 }
564
565 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
566                            u64 *objectid)
567 {
568         int ret;
569         struct btrfs_key key;
570         struct btrfs_key found_key;
571
572         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
573         key.type = BTRFS_DEV_ITEM_KEY;
574         key.offset = (u64)-1;
575
576         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
577         if (ret < 0)
578                 goto error;
579
580         BUG_ON(ret == 0);
581
582         ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
583                                   BTRFS_DEV_ITEM_KEY);
584         if (ret) {
585                 *objectid = 1;
586         } else {
587                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
588                                       path->slots[0]);
589                 *objectid = found_key.offset + 1;
590         }
591         ret = 0;
592 error:
593         btrfs_release_path(root, path);
594         return ret;
595 }
596
597 /*
598  * the device information is stored in the chunk root
599  * the btrfs_device struct should be fully filled in
600  */
601 int btrfs_add_device(struct btrfs_trans_handle *trans,
602                      struct btrfs_root *root,
603                      struct btrfs_device *device)
604 {
605         int ret;
606         struct btrfs_path *path;
607         struct btrfs_dev_item *dev_item;
608         struct extent_buffer *leaf;
609         struct btrfs_key key;
610         unsigned long ptr;
611         u64 free_devid = 0;
612
613         root = root->fs_info->chunk_root;
614
615         path = btrfs_alloc_path();
616         if (!path)
617                 return -ENOMEM;
618
619         ret = find_next_devid(root, path, &free_devid);
620         if (ret)
621                 goto out;
622
623         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
624         key.type = BTRFS_DEV_ITEM_KEY;
625         key.offset = free_devid;
626
627         ret = btrfs_insert_empty_item(trans, root, path, &key,
628                                       sizeof(*dev_item));
629         if (ret)
630                 goto out;
631
632         leaf = path->nodes[0];
633         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
634
635         device->devid = free_devid;
636         btrfs_set_device_id(leaf, dev_item, device->devid);
637         btrfs_set_device_type(leaf, dev_item, device->type);
638         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
639         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
640         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
641         btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
642         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
643         btrfs_set_device_group(leaf, dev_item, 0);
644         btrfs_set_device_seek_speed(leaf, dev_item, 0);
645         btrfs_set_device_bandwidth(leaf, dev_item, 0);
646
647         ptr = (unsigned long)btrfs_device_uuid(dev_item);
648         write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
649         btrfs_mark_buffer_dirty(leaf);
650         ret = 0;
651
652 out:
653         btrfs_free_path(path);
654         return ret;
655 }
656
657 static int btrfs_rm_dev_item(struct btrfs_root *root,
658                              struct btrfs_device *device)
659 {
660         int ret;
661         struct btrfs_path *path;
662         struct block_device *bdev = device->bdev;
663         struct btrfs_device *next_dev;
664         struct btrfs_key key;
665         u64 total_bytes;
666         struct btrfs_fs_devices *fs_devices;
667         struct btrfs_trans_handle *trans;
668
669         root = root->fs_info->chunk_root;
670
671         path = btrfs_alloc_path();
672         if (!path)
673                 return -ENOMEM;
674
675         trans = btrfs_start_transaction(root, 1);
676         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
677         key.type = BTRFS_DEV_ITEM_KEY;
678         key.offset = device->devid;
679
680         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
681         if (ret < 0)
682                 goto out;
683
684         if (ret > 0) {
685                 ret = -ENOENT;
686                 goto out;
687         }
688
689         ret = btrfs_del_item(trans, root, path);
690         if (ret)
691                 goto out;
692
693         /*
694          * at this point, the device is zero sized.  We want to
695          * remove it from the devices list and zero out the old super
696          */
697         list_del_init(&device->dev_list);
698         list_del_init(&device->dev_alloc_list);
699         fs_devices = root->fs_info->fs_devices;
700
701         next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
702                               dev_list);
703         if (bdev == root->fs_info->sb->s_bdev)
704                 root->fs_info->sb->s_bdev = next_dev->bdev;
705         if (bdev == fs_devices->latest_bdev)
706                 fs_devices->latest_bdev = next_dev->bdev;
707
708         total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
709         btrfs_set_super_total_bytes(&root->fs_info->super_copy,
710                                     total_bytes - device->total_bytes);
711
712         total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
713         btrfs_set_super_num_devices(&root->fs_info->super_copy,
714                                     total_bytes - 1);
715 out:
716         btrfs_free_path(path);
717         btrfs_commit_transaction(trans, root);
718         return ret;
719 }
720
721 int btrfs_rm_device(struct btrfs_root *root, char *device_path)
722 {
723         struct btrfs_device *device;
724         struct block_device *bdev;
725         struct buffer_head *bh = NULL;
726         struct btrfs_super_block *disk_super;
727         u64 all_avail;
728         u64 devid;
729         int ret = 0;
730
731         mutex_lock(&root->fs_info->fs_mutex);
732         mutex_lock(&uuid_mutex);
733
734         all_avail = root->fs_info->avail_data_alloc_bits |
735                 root->fs_info->avail_system_alloc_bits |
736                 root->fs_info->avail_metadata_alloc_bits;
737
738         if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
739             btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
740                 printk("btrfs: unable to go below four devices on raid10\n");
741                 ret = -EINVAL;
742                 goto out;
743         }
744
745         if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
746             btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
747                 printk("btrfs: unable to go below two devices on raid1\n");
748                 ret = -EINVAL;
749                 goto out;
750         }
751
752         if (strcmp(device_path, "missing") == 0) {
753                 struct list_head *cur;
754                 struct list_head *devices;
755                 struct btrfs_device *tmp;
756
757                 device = NULL;
758                 devices = &root->fs_info->fs_devices->devices;
759                 list_for_each(cur, devices) {
760                         tmp = list_entry(cur, struct btrfs_device, dev_list);
761                         if (tmp->in_fs_metadata && !tmp->bdev) {
762                                 device = tmp;
763                                 break;
764                         }
765                 }
766                 bdev = NULL;
767                 bh = NULL;
768                 disk_super = NULL;
769                 if (!device) {
770                         printk("btrfs: no missing devices found to remove\n");
771                         goto out;
772                 }
773
774         } else {
775                 bdev = open_bdev_excl(device_path, 0,
776                                       root->fs_info->bdev_holder);
777                 if (IS_ERR(bdev)) {
778                         ret = PTR_ERR(bdev);
779                         goto out;
780                 }
781
782                 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
783                 if (!bh) {
784                         ret = -EIO;
785                         goto error_close;
786                 }
787                 disk_super = (struct btrfs_super_block *)bh->b_data;
788                 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
789                     sizeof(disk_super->magic))) {
790                         ret = -ENOENT;
791                         goto error_brelse;
792                 }
793                 if (memcmp(disk_super->fsid, root->fs_info->fsid,
794                            BTRFS_FSID_SIZE)) {
795                         ret = -ENOENT;
796                         goto error_brelse;
797                 }
798                 devid = le64_to_cpu(disk_super->dev_item.devid);
799                 device = btrfs_find_device(root, devid, NULL);
800                 if (!device) {
801                         ret = -ENOENT;
802                         goto error_brelse;
803                 }
804
805         }
806         root->fs_info->fs_devices->num_devices--;
807
808         ret = btrfs_shrink_device(device, 0);
809         if (ret)
810                 goto error_brelse;
811
812
813         ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
814         if (ret)
815                 goto error_brelse;
816
817         if (bh) {
818                 /* make sure this device isn't detected as part of
819                  * the FS anymore
820                  */
821                 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
822                 set_buffer_dirty(bh);
823                 sync_dirty_buffer(bh);
824
825                 brelse(bh);
826         }
827
828         if (device->bdev) {
829                 /* one close for the device struct or super_block */
830                 close_bdev_excl(device->bdev);
831         }
832         if (bdev) {
833                 /* one close for us */
834                 close_bdev_excl(bdev);
835         }
836         kfree(device->name);
837         kfree(device);
838         ret = 0;
839         goto out;
840
841 error_brelse:
842         brelse(bh);
843 error_close:
844         if (bdev)
845                 close_bdev_excl(bdev);
846 out:
847         mutex_unlock(&uuid_mutex);
848         mutex_unlock(&root->fs_info->fs_mutex);
849         return ret;
850 }
851
852 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
853 {
854         struct btrfs_trans_handle *trans;
855         struct btrfs_device *device;
856         struct block_device *bdev;
857         struct list_head *cur;
858         struct list_head *devices;
859         u64 total_bytes;
860         int ret = 0;
861
862
863         bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
864         if (!bdev) {
865                 return -EIO;
866         }
867         mutex_lock(&root->fs_info->fs_mutex);
868         trans = btrfs_start_transaction(root, 1);
869         devices = &root->fs_info->fs_devices->devices;
870         list_for_each(cur, devices) {
871                 device = list_entry(cur, struct btrfs_device, dev_list);
872                 if (device->bdev == bdev) {
873                         ret = -EEXIST;
874                         goto out;
875                 }
876         }
877
878         device = kzalloc(sizeof(*device), GFP_NOFS);
879         if (!device) {
880                 /* we can safely leave the fs_devices entry around */
881                 ret = -ENOMEM;
882                 goto out_close_bdev;
883         }
884
885         device->barriers = 1;
886         generate_random_uuid(device->uuid);
887         spin_lock_init(&device->io_lock);
888         device->name = kstrdup(device_path, GFP_NOFS);
889         if (!device->name) {
890                 kfree(device);
891                 goto out_close_bdev;
892         }
893         device->io_width = root->sectorsize;
894         device->io_align = root->sectorsize;
895         device->sector_size = root->sectorsize;
896         device->total_bytes = i_size_read(bdev->bd_inode);
897         device->dev_root = root->fs_info->dev_root;
898         device->bdev = bdev;
899         device->in_fs_metadata = 1;
900
901         ret = btrfs_add_device(trans, root, device);
902         if (ret)
903                 goto out_close_bdev;
904
905         total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
906         btrfs_set_super_total_bytes(&root->fs_info->super_copy,
907                                     total_bytes + device->total_bytes);
908
909         total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
910         btrfs_set_super_num_devices(&root->fs_info->super_copy,
911                                     total_bytes + 1);
912
913         list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
914         list_add(&device->dev_alloc_list,
915                  &root->fs_info->fs_devices->alloc_list);
916         root->fs_info->fs_devices->num_devices++;
917 out:
918         btrfs_end_transaction(trans, root);
919         mutex_unlock(&root->fs_info->fs_mutex);
920         return ret;
921
922 out_close_bdev:
923         close_bdev_excl(bdev);
924         goto out;
925 }
926
927 int btrfs_update_device(struct btrfs_trans_handle *trans,
928                         struct btrfs_device *device)
929 {
930         int ret;
931         struct btrfs_path *path;
932         struct btrfs_root *root;
933         struct btrfs_dev_item *dev_item;
934         struct extent_buffer *leaf;
935         struct btrfs_key key;
936
937         root = device->dev_root->fs_info->chunk_root;
938
939         path = btrfs_alloc_path();
940         if (!path)
941                 return -ENOMEM;
942
943         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
944         key.type = BTRFS_DEV_ITEM_KEY;
945         key.offset = device->devid;
946
947         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
948         if (ret < 0)
949                 goto out;
950
951         if (ret > 0) {
952                 ret = -ENOENT;
953                 goto out;
954         }
955
956         leaf = path->nodes[0];
957         dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
958
959         btrfs_set_device_id(leaf, dev_item, device->devid);
960         btrfs_set_device_type(leaf, dev_item, device->type);
961         btrfs_set_device_io_align(leaf, dev_item, device->io_align);
962         btrfs_set_device_io_width(leaf, dev_item, device->io_width);
963         btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
964         btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
965         btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
966         btrfs_mark_buffer_dirty(leaf);
967
968 out:
969         btrfs_free_path(path);
970         return ret;
971 }
972
973 int btrfs_grow_device(struct btrfs_trans_handle *trans,
974                       struct btrfs_device *device, u64 new_size)
975 {
976         struct btrfs_super_block *super_copy =
977                 &device->dev_root->fs_info->super_copy;
978         u64 old_total = btrfs_super_total_bytes(super_copy);
979         u64 diff = new_size - device->total_bytes;
980
981         btrfs_set_super_total_bytes(super_copy, old_total + diff);
982         return btrfs_update_device(trans, device);
983 }
984
985 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
986                             struct btrfs_root *root,
987                             u64 chunk_tree, u64 chunk_objectid,
988                             u64 chunk_offset)
989 {
990         int ret;
991         struct btrfs_path *path;
992         struct btrfs_key key;
993
994         root = root->fs_info->chunk_root;
995         path = btrfs_alloc_path();
996         if (!path)
997                 return -ENOMEM;
998
999         key.objectid = chunk_objectid;
1000         key.offset = chunk_offset;
1001         key.type = BTRFS_CHUNK_ITEM_KEY;
1002
1003         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1004         BUG_ON(ret);
1005
1006         ret = btrfs_del_item(trans, root, path);
1007         BUG_ON(ret);
1008
1009         btrfs_free_path(path);
1010         return 0;
1011 }
1012
1013 int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1014                         chunk_offset)
1015 {
1016         struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1017         struct btrfs_disk_key *disk_key;
1018         struct btrfs_chunk *chunk;
1019         u8 *ptr;
1020         int ret = 0;
1021         u32 num_stripes;
1022         u32 array_size;
1023         u32 len = 0;
1024         u32 cur;
1025         struct btrfs_key key;
1026
1027         array_size = btrfs_super_sys_array_size(super_copy);
1028
1029         ptr = super_copy->sys_chunk_array;
1030         cur = 0;
1031
1032         while (cur < array_size) {
1033                 disk_key = (struct btrfs_disk_key *)ptr;
1034                 btrfs_disk_key_to_cpu(&key, disk_key);
1035
1036                 len = sizeof(*disk_key);
1037
1038                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1039                         chunk = (struct btrfs_chunk *)(ptr + len);
1040                         num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1041                         len += btrfs_chunk_item_size(num_stripes);
1042                 } else {
1043                         ret = -EIO;
1044                         break;
1045                 }
1046                 if (key.objectid == chunk_objectid &&
1047                     key.offset == chunk_offset) {
1048                         memmove(ptr, ptr + len, array_size - (cur + len));
1049                         array_size -= len;
1050                         btrfs_set_super_sys_array_size(super_copy, array_size);
1051                 } else {
1052                         ptr += len;
1053                         cur += len;
1054                 }
1055         }
1056         return ret;
1057 }
1058
1059
1060 int btrfs_relocate_chunk(struct btrfs_root *root,
1061                          u64 chunk_tree, u64 chunk_objectid,
1062                          u64 chunk_offset)
1063 {
1064         struct extent_map_tree *em_tree;
1065         struct btrfs_root *extent_root;
1066         struct btrfs_trans_handle *trans;
1067         struct extent_map *em;
1068         struct map_lookup *map;
1069         int ret;
1070         int i;
1071
1072         printk("btrfs relocating chunk %llu\n",
1073                (unsigned long long)chunk_offset);
1074         root = root->fs_info->chunk_root;
1075         extent_root = root->fs_info->extent_root;
1076         em_tree = &root->fs_info->mapping_tree.map_tree;
1077
1078         /* step one, relocate all the extents inside this chunk */
1079         ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
1080         BUG_ON(ret);
1081
1082         trans = btrfs_start_transaction(root, 1);
1083         BUG_ON(!trans);
1084
1085         /*
1086          * step two, delete the device extents and the
1087          * chunk tree entries
1088          */
1089         spin_lock(&em_tree->lock);
1090         em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1091         spin_unlock(&em_tree->lock);
1092
1093         BUG_ON(em->start > chunk_offset ||
1094                em->start + em->len < chunk_offset);
1095         map = (struct map_lookup *)em->bdev;
1096
1097         for (i = 0; i < map->num_stripes; i++) {
1098                 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1099                                             map->stripes[i].physical);
1100                 BUG_ON(ret);
1101
1102                 if (map->stripes[i].dev) {
1103                         ret = btrfs_update_device(trans, map->stripes[i].dev);
1104                         BUG_ON(ret);
1105                 }
1106         }
1107         ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1108                                chunk_offset);
1109
1110         BUG_ON(ret);
1111
1112         if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1113                 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1114                 BUG_ON(ret);
1115         }
1116
1117         spin_lock(&em_tree->lock);
1118         remove_extent_mapping(em_tree, em);
1119         kfree(map);
1120         em->bdev = NULL;
1121
1122         /* once for the tree */
1123         free_extent_map(em);
1124         spin_unlock(&em_tree->lock);
1125
1126         /* once for us */
1127         free_extent_map(em);
1128
1129         btrfs_end_transaction(trans, root);
1130         return 0;
1131 }
1132
1133 static u64 div_factor(u64 num, int factor)
1134 {
1135         if (factor == 10)
1136                 return num;
1137         num *= factor;
1138         do_div(num, 10);
1139         return num;
1140 }
1141
1142
1143 int btrfs_balance(struct btrfs_root *dev_root)
1144 {
1145         int ret;
1146         struct list_head *cur;
1147         struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1148         struct btrfs_device *device;
1149         u64 old_size;
1150         u64 size_to_free;
1151         struct btrfs_path *path;
1152         struct btrfs_key key;
1153         struct btrfs_chunk *chunk;
1154         struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1155         struct btrfs_trans_handle *trans;
1156         struct btrfs_key found_key;
1157
1158
1159         dev_root = dev_root->fs_info->dev_root;
1160
1161         mutex_lock(&dev_root->fs_info->fs_mutex);
1162         /* step one make some room on all the devices */
1163         list_for_each(cur, devices) {
1164                 device = list_entry(cur, struct btrfs_device, dev_list);
1165                 old_size = device->total_bytes;
1166                 size_to_free = div_factor(old_size, 1);
1167                 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1168                 if (device->total_bytes - device->bytes_used > size_to_free)
1169                         continue;
1170
1171                 ret = btrfs_shrink_device(device, old_size - size_to_free);
1172                 BUG_ON(ret);
1173
1174                 trans = btrfs_start_transaction(dev_root, 1);
1175                 BUG_ON(!trans);
1176
1177                 ret = btrfs_grow_device(trans, device, old_size);
1178                 BUG_ON(ret);
1179
1180                 btrfs_end_transaction(trans, dev_root);
1181         }
1182
1183         /* step two, relocate all the chunks */
1184         path = btrfs_alloc_path();
1185         BUG_ON(!path);
1186
1187         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1188         key.offset = (u64)-1;
1189         key.type = BTRFS_CHUNK_ITEM_KEY;
1190
1191         while(1) {
1192                 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1193                 if (ret < 0)
1194                         goto error;
1195
1196                 /*
1197                  * this shouldn't happen, it means the last relocate
1198                  * failed
1199                  */
1200                 if (ret == 0)
1201                         break;
1202
1203                 ret = btrfs_previous_item(chunk_root, path, 0,
1204                                           BTRFS_CHUNK_ITEM_KEY);
1205                 if (ret) {
1206                         break;
1207                 }
1208                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1209                                       path->slots[0]);
1210                 if (found_key.objectid != key.objectid)
1211                         break;
1212                 chunk = btrfs_item_ptr(path->nodes[0],
1213                                        path->slots[0],
1214                                        struct btrfs_chunk);
1215                 key.offset = found_key.offset;
1216                 /* chunk zero is special */
1217                 if (key.offset == 0)
1218                         break;
1219
1220                 ret = btrfs_relocate_chunk(chunk_root,
1221                                            chunk_root->root_key.objectid,
1222                                            found_key.objectid,
1223                                            found_key.offset);
1224                 BUG_ON(ret);
1225                 btrfs_release_path(chunk_root, path);
1226         }
1227         ret = 0;
1228 error:
1229         btrfs_free_path(path);
1230         mutex_unlock(&dev_root->fs_info->fs_mutex);
1231         return ret;
1232 }
1233
1234 /*
1235  * shrinking a device means finding all of the device extents past
1236  * the new size, and then following the back refs to the chunks.
1237  * The chunk relocation code actually frees the device extent
1238  */
1239 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1240 {
1241         struct btrfs_trans_handle *trans;
1242         struct btrfs_root *root = device->dev_root;
1243         struct btrfs_dev_extent *dev_extent = NULL;
1244         struct btrfs_path *path;
1245         u64 length;
1246         u64 chunk_tree;
1247         u64 chunk_objectid;
1248         u64 chunk_offset;
1249         int ret;
1250         int slot;
1251         struct extent_buffer *l;
1252         struct btrfs_key key;
1253         struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1254         u64 old_total = btrfs_super_total_bytes(super_copy);
1255         u64 diff = device->total_bytes - new_size;
1256
1257
1258         path = btrfs_alloc_path();
1259         if (!path)
1260                 return -ENOMEM;
1261
1262         trans = btrfs_start_transaction(root, 1);
1263         if (!trans) {
1264                 ret = -ENOMEM;
1265                 goto done;
1266         }
1267
1268         path->reada = 2;
1269
1270         device->total_bytes = new_size;
1271         ret = btrfs_update_device(trans, device);
1272         if (ret) {
1273                 btrfs_end_transaction(trans, root);
1274                 goto done;
1275         }
1276         WARN_ON(diff > old_total);
1277         btrfs_set_super_total_bytes(super_copy, old_total - diff);
1278         btrfs_end_transaction(trans, root);
1279
1280         key.objectid = device->devid;
1281         key.offset = (u64)-1;
1282         key.type = BTRFS_DEV_EXTENT_KEY;
1283
1284         while (1) {
1285                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1286                 if (ret < 0)
1287                         goto done;
1288
1289                 ret = btrfs_previous_item(root, path, 0, key.type);
1290                 if (ret < 0)
1291                         goto done;
1292                 if (ret) {
1293                         ret = 0;
1294                         goto done;
1295                 }
1296
1297                 l = path->nodes[0];
1298                 slot = path->slots[0];
1299                 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1300
1301                 if (key.objectid != device->devid)
1302                         goto done;
1303
1304                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1305                 length = btrfs_dev_extent_length(l, dev_extent);
1306
1307                 if (key.offset + length <= new_size)
1308                         goto done;
1309
1310                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1311                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1312                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1313                 btrfs_release_path(root, path);
1314
1315                 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1316                                            chunk_offset);
1317                 if (ret)
1318                         goto done;
1319         }
1320
1321 done:
1322         btrfs_free_path(path);
1323         return ret;
1324 }
1325
1326 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1327                            struct btrfs_root *root,
1328                            struct btrfs_key *key,
1329                            struct btrfs_chunk *chunk, int item_size)
1330 {
1331         struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1332         struct btrfs_disk_key disk_key;
1333         u32 array_size;
1334         u8 *ptr;
1335
1336         array_size = btrfs_super_sys_array_size(super_copy);
1337         if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1338                 return -EFBIG;
1339
1340         ptr = super_copy->sys_chunk_array + array_size;
1341         btrfs_cpu_key_to_disk(&disk_key, key);
1342         memcpy(ptr, &disk_key, sizeof(disk_key));
1343         ptr += sizeof(disk_key);
1344         memcpy(ptr, chunk, item_size);
1345         item_size += sizeof(disk_key);
1346         btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1347         return 0;
1348 }
1349
1350 static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
1351                                int sub_stripes)
1352 {
1353         if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1354                 return calc_size;
1355         else if (type & BTRFS_BLOCK_GROUP_RAID10)
1356                 return calc_size * (num_stripes / sub_stripes);
1357         else
1358                 return calc_size * num_stripes;
1359 }
1360
1361
1362 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1363                       struct btrfs_root *extent_root, u64 *start,
1364                       u64 *num_bytes, u64 type)
1365 {
1366         u64 dev_offset;
1367         struct btrfs_fs_info *info = extent_root->fs_info;
1368         struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
1369         struct btrfs_path *path;
1370         struct btrfs_stripe *stripes;
1371         struct btrfs_device *device = NULL;
1372         struct btrfs_chunk *chunk;
1373         struct list_head private_devs;
1374         struct list_head *dev_list;
1375         struct list_head *cur;
1376         struct extent_map_tree *em_tree;
1377         struct map_lookup *map;
1378         struct extent_map *em;
1379         int min_stripe_size = 1 * 1024 * 1024;
1380         u64 physical;
1381         u64 calc_size = 1024 * 1024 * 1024;
1382         u64 max_chunk_size = calc_size;
1383         u64 min_free;
1384         u64 avail;
1385         u64 max_avail = 0;
1386         u64 percent_max;
1387         int num_stripes = 1;
1388         int min_stripes = 1;
1389         int sub_stripes = 0;
1390         int looped = 0;
1391         int ret;
1392         int index;
1393         int stripe_len = 64 * 1024;
1394         struct btrfs_key key;
1395
1396         if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1397             (type & BTRFS_BLOCK_GROUP_DUP)) {
1398                 WARN_ON(1);
1399                 type &= ~BTRFS_BLOCK_GROUP_DUP;
1400         }
1401         dev_list = &extent_root->fs_info->fs_devices->alloc_list;
1402         if (list_empty(dev_list))
1403                 return -ENOSPC;
1404
1405         if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
1406                 num_stripes = btrfs_super_num_devices(&info->super_copy);
1407                 min_stripes = 2;
1408         }
1409         if (type & (BTRFS_BLOCK_GROUP_DUP)) {
1410                 num_stripes = 2;
1411                 min_stripes = 2;
1412         }
1413         if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1414                 num_stripes = min_t(u64, 2,
1415                                   btrfs_super_num_devices(&info->super_copy));
1416                 if (num_stripes < 2)
1417                         return -ENOSPC;
1418                 min_stripes = 2;
1419         }
1420         if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1421                 num_stripes = btrfs_super_num_devices(&info->super_copy);
1422                 if (num_stripes < 4)
1423                         return -ENOSPC;
1424                 num_stripes &= ~(u32)1;
1425                 sub_stripes = 2;
1426                 min_stripes = 4;
1427         }
1428
1429         if (type & BTRFS_BLOCK_GROUP_DATA) {
1430                 max_chunk_size = 10 * calc_size;
1431                 min_stripe_size = 64 * 1024 * 1024;
1432         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1433                 max_chunk_size = 4 * calc_size;
1434                 min_stripe_size = 32 * 1024 * 1024;
1435         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1436                 calc_size = 8 * 1024 * 1024;
1437                 max_chunk_size = calc_size * 2;
1438                 min_stripe_size = 1 * 1024 * 1024;
1439         }
1440
1441         path = btrfs_alloc_path();
1442         if (!path)
1443                 return -ENOMEM;
1444
1445         /* we don't want a chunk larger than 10% of the FS */
1446         percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1447         max_chunk_size = min(percent_max, max_chunk_size);
1448
1449 again:
1450         if (calc_size * num_stripes > max_chunk_size) {
1451                 calc_size = max_chunk_size;
1452                 do_div(calc_size, num_stripes);
1453                 do_div(calc_size, stripe_len);
1454                 calc_size *= stripe_len;
1455         }
1456         /* we don't want tiny stripes */
1457         calc_size = max_t(u64, min_stripe_size, calc_size);
1458
1459         do_div(calc_size, stripe_len);
1460         calc_size *= stripe_len;
1461
1462         INIT_LIST_HEAD(&private_devs);
1463         cur = dev_list->next;
1464         index = 0;
1465
1466         if (type & BTRFS_BLOCK_GROUP_DUP)
1467                 min_free = calc_size * 2;
1468         else
1469                 min_free = calc_size;
1470
1471         /* we add 1MB because we never use the first 1MB of the device */
1472         min_free += 1024 * 1024;
1473
1474         /* build a private list of devices we will allocate from */
1475         while(index < num_stripes) {
1476                 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1477
1478                 if (device->total_bytes > device->bytes_used)
1479                         avail = device->total_bytes - device->bytes_used;
1480                 else
1481                         avail = 0;
1482                 cur = cur->next;
1483
1484                 if (device->in_fs_metadata && avail >= min_free) {
1485                         u64 ignored_start = 0;
1486                         ret = find_free_dev_extent(trans, device, path,
1487                                                    min_free,
1488                                                    &ignored_start);
1489                         if (ret == 0) {
1490                                 list_move_tail(&device->dev_alloc_list,
1491                                                &private_devs);
1492                                 index++;
1493                                 if (type & BTRFS_BLOCK_GROUP_DUP)
1494                                         index++;
1495                         }
1496                 } else if (device->in_fs_metadata && avail > max_avail)
1497                         max_avail = avail;
1498                 if (cur == dev_list)
1499                         break;
1500         }
1501         if (index < num_stripes) {
1502                 list_splice(&private_devs, dev_list);
1503                 if (index >= min_stripes) {
1504                         num_stripes = index;
1505                         if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1506                                 num_stripes /= sub_stripes;
1507                                 num_stripes *= sub_stripes;
1508                         }
1509                         looped = 1;
1510                         goto again;
1511                 }
1512                 if (!looped && max_avail > 0) {
1513                         looped = 1;
1514                         calc_size = max_avail;
1515                         goto again;
1516                 }
1517                 btrfs_free_path(path);
1518                 return -ENOSPC;
1519         }
1520         key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1521         key.type = BTRFS_CHUNK_ITEM_KEY;
1522         ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1523                               &key.offset);
1524         if (ret) {
1525                 btrfs_free_path(path);
1526                 return ret;
1527         }
1528
1529         chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
1530         if (!chunk) {
1531                 btrfs_free_path(path);
1532                 return -ENOMEM;
1533         }
1534
1535         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1536         if (!map) {
1537                 kfree(chunk);
1538                 btrfs_free_path(path);
1539                 return -ENOMEM;
1540         }
1541         btrfs_free_path(path);
1542         path = NULL;
1543
1544         stripes = &chunk->stripe;
1545         *num_bytes = chunk_bytes_by_type(type, calc_size,
1546                                          num_stripes, sub_stripes);
1547
1548         index = 0;
1549         while(index < num_stripes) {
1550                 struct btrfs_stripe *stripe;
1551                 BUG_ON(list_empty(&private_devs));
1552                 cur = private_devs.next;
1553                 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1554
1555                 /* loop over this device again if we're doing a dup group */
1556                 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
1557                     (index == num_stripes - 1))
1558                         list_move_tail(&device->dev_alloc_list, dev_list);
1559
1560                 ret = btrfs_alloc_dev_extent(trans, device,
1561                              info->chunk_root->root_key.objectid,
1562                              BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
1563                              calc_size, &dev_offset);
1564                 BUG_ON(ret);
1565                 device->bytes_used += calc_size;
1566                 ret = btrfs_update_device(trans, device);
1567                 BUG_ON(ret);
1568
1569                 map->stripes[index].dev = device;
1570                 map->stripes[index].physical = dev_offset;
1571                 stripe = stripes + index;
1572                 btrfs_set_stack_stripe_devid(stripe, device->devid);
1573                 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1574                 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1575                 physical = dev_offset;
1576                 index++;
1577         }
1578         BUG_ON(!list_empty(&private_devs));
1579
1580         /* key was set above */
1581         btrfs_set_stack_chunk_length(chunk, *num_bytes);
1582         btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
1583         btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
1584         btrfs_set_stack_chunk_type(chunk, type);
1585         btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
1586         btrfs_set_stack_chunk_io_align(chunk, stripe_len);
1587         btrfs_set_stack_chunk_io_width(chunk, stripe_len);
1588         btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
1589         btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
1590         map->sector_size = extent_root->sectorsize;
1591         map->stripe_len = stripe_len;
1592         map->io_align = stripe_len;
1593         map->io_width = stripe_len;
1594         map->type = type;
1595         map->num_stripes = num_stripes;
1596         map->sub_stripes = sub_stripes;
1597
1598         ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1599                                 btrfs_chunk_item_size(num_stripes));
1600         BUG_ON(ret);
1601         *start = key.offset;;
1602
1603         em = alloc_extent_map(GFP_NOFS);
1604         if (!em)
1605                 return -ENOMEM;
1606         em->bdev = (struct block_device *)map;
1607         em->start = key.offset;
1608         em->len = *num_bytes;
1609         em->block_start = 0;
1610
1611         if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1612                 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
1613                                     chunk, btrfs_chunk_item_size(num_stripes));
1614                 BUG_ON(ret);
1615         }
1616         kfree(chunk);
1617
1618         em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1619         spin_lock(&em_tree->lock);
1620         ret = add_extent_mapping(em_tree, em);
1621         spin_unlock(&em_tree->lock);
1622         BUG_ON(ret);
1623         free_extent_map(em);
1624         return ret;
1625 }
1626
1627 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
1628 {
1629         extent_map_tree_init(&tree->map_tree, GFP_NOFS);
1630 }
1631
1632 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
1633 {
1634         struct extent_map *em;
1635
1636         while(1) {
1637                 spin_lock(&tree->map_tree.lock);
1638                 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
1639                 if (em)
1640                         remove_extent_mapping(&tree->map_tree, em);
1641                 spin_unlock(&tree->map_tree.lock);
1642                 if (!em)
1643                         break;
1644                 kfree(em->bdev);
1645                 /* once for us */
1646                 free_extent_map(em);
1647                 /* once for the tree */
1648                 free_extent_map(em);
1649         }
1650 }
1651
1652 int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
1653 {
1654         struct extent_map *em;
1655         struct map_lookup *map;
1656         struct extent_map_tree *em_tree = &map_tree->map_tree;
1657         int ret;
1658
1659         spin_lock(&em_tree->lock);
1660         em = lookup_extent_mapping(em_tree, logical, len);
1661         spin_unlock(&em_tree->lock);
1662         BUG_ON(!em);
1663
1664         BUG_ON(em->start > logical || em->start + em->len < logical);
1665         map = (struct map_lookup *)em->bdev;
1666         if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
1667                 ret = map->num_stripes;
1668         else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
1669                 ret = map->sub_stripes;
1670         else
1671                 ret = 1;
1672         free_extent_map(em);
1673         return ret;
1674 }
1675
1676 static int find_live_mirror(struct map_lookup *map, int first, int num,
1677                             int optimal)
1678 {
1679         int i;
1680         if (map->stripes[optimal].dev->bdev)
1681                 return optimal;
1682         for (i = first; i < first + num; i++) {
1683                 if (map->stripes[i].dev->bdev)
1684                         return i;
1685         }
1686         /* we couldn't find one that doesn't fail.  Just return something
1687          * and the io error handling code will clean up eventually
1688          */
1689         return optimal;
1690 }
1691
1692 static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1693                              u64 logical, u64 *length,
1694                              struct btrfs_multi_bio **multi_ret,
1695                              int mirror_num, struct page *unplug_page)
1696 {
1697         struct extent_map *em;
1698         struct map_lookup *map;
1699         struct extent_map_tree *em_tree = &map_tree->map_tree;
1700         u64 offset;
1701         u64 stripe_offset;
1702         u64 stripe_nr;
1703         int stripes_allocated = 8;
1704         int stripes_required = 1;
1705         int stripe_index;
1706         int i;
1707         int num_stripes;
1708         int max_errors = 0;
1709         struct btrfs_multi_bio *multi = NULL;
1710
1711         if (multi_ret && !(rw & (1 << BIO_RW))) {
1712                 stripes_allocated = 1;
1713         }
1714 again:
1715         if (multi_ret) {
1716                 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
1717                                 GFP_NOFS);
1718                 if (!multi)
1719                         return -ENOMEM;
1720
1721                 atomic_set(&multi->error, 0);
1722         }
1723
1724         spin_lock(&em_tree->lock);
1725         em = lookup_extent_mapping(em_tree, logical, *length);
1726         spin_unlock(&em_tree->lock);
1727
1728         if (!em && unplug_page)
1729                 return 0;
1730
1731         if (!em) {
1732                 printk("unable to find logical %Lu len %Lu\n", logical, *length);
1733                 BUG();
1734         }
1735
1736         BUG_ON(em->start > logical || em->start + em->len < logical);
1737         map = (struct map_lookup *)em->bdev;
1738         offset = logical - em->start;
1739
1740         if (mirror_num > map->num_stripes)
1741                 mirror_num = 0;
1742
1743         /* if our multi bio struct is too small, back off and try again */
1744         if (rw & (1 << BIO_RW)) {
1745                 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1746                                  BTRFS_BLOCK_GROUP_DUP)) {
1747                         stripes_required = map->num_stripes;
1748                         max_errors = 1;
1749                 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1750                         stripes_required = map->sub_stripes;
1751                         max_errors = 1;
1752                 }
1753         }
1754         if (multi_ret && rw == WRITE &&
1755             stripes_allocated < stripes_required) {
1756                 stripes_allocated = map->num_stripes;
1757                 free_extent_map(em);
1758                 kfree(multi);
1759                 goto again;
1760         }
1761         stripe_nr = offset;
1762         /*
1763          * stripe_nr counts the total number of stripes we have to stride
1764          * to get to this block
1765          */
1766         do_div(stripe_nr, map->stripe_len);
1767
1768         stripe_offset = stripe_nr * map->stripe_len;
1769         BUG_ON(offset < stripe_offset);
1770
1771         /* stripe_offset is the offset of this block in its stripe*/
1772         stripe_offset = offset - stripe_offset;
1773
1774         if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
1775                          BTRFS_BLOCK_GROUP_RAID10 |
1776                          BTRFS_BLOCK_GROUP_DUP)) {
1777                 /* we limit the length of each bio to what fits in a stripe */
1778                 *length = min_t(u64, em->len - offset,
1779                               map->stripe_len - stripe_offset);
1780         } else {
1781                 *length = em->len - offset;
1782         }
1783
1784         if (!multi_ret && !unplug_page)
1785                 goto out;
1786
1787         num_stripes = 1;
1788         stripe_index = 0;
1789         if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
1790                 if (unplug_page || (rw & (1 << BIO_RW)))
1791                         num_stripes = map->num_stripes;
1792                 else if (mirror_num)
1793                         stripe_index = mirror_num - 1;
1794                 else {
1795                         stripe_index = find_live_mirror(map, 0,
1796                                             map->num_stripes,
1797                                             current->pid % map->num_stripes);
1798                 }
1799
1800         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
1801                 if (rw & (1 << BIO_RW))
1802                         num_stripes = map->num_stripes;
1803                 else if (mirror_num)
1804                         stripe_index = mirror_num - 1;
1805
1806         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1807                 int factor = map->num_stripes / map->sub_stripes;
1808
1809                 stripe_index = do_div(stripe_nr, factor);
1810                 stripe_index *= map->sub_stripes;
1811
1812                 if (unplug_page || (rw & (1 << BIO_RW)))
1813                         num_stripes = map->sub_stripes;
1814                 else if (mirror_num)
1815                         stripe_index += mirror_num - 1;
1816                 else {
1817                         stripe_index = find_live_mirror(map, stripe_index,
1818                                               map->sub_stripes, stripe_index +
1819                                               current->pid % map->sub_stripes);
1820                 }
1821         } else {
1822                 /*
1823                  * after this do_div call, stripe_nr is the number of stripes
1824                  * on this device we have to walk to find the data, and
1825                  * stripe_index is the number of our device in the stripe array
1826                  */
1827                 stripe_index = do_div(stripe_nr, map->num_stripes);
1828         }
1829         BUG_ON(stripe_index >= map->num_stripes);
1830
1831         for (i = 0; i < num_stripes; i++) {
1832                 if (unplug_page) {
1833                         struct btrfs_device *device;
1834                         struct backing_dev_info *bdi;
1835
1836                         device = map->stripes[stripe_index].dev;
1837                         if (device->bdev) {
1838                                 bdi = blk_get_backing_dev_info(device->bdev);
1839                                 if (bdi->unplug_io_fn) {
1840                                         bdi->unplug_io_fn(bdi, unplug_page);
1841                                 }
1842                         }
1843                 } else {
1844                         multi->stripes[i].physical =
1845                                 map->stripes[stripe_index].physical +
1846                                 stripe_offset + stripe_nr * map->stripe_len;
1847                         multi->stripes[i].dev = map->stripes[stripe_index].dev;
1848                 }
1849                 stripe_index++;
1850         }
1851         if (multi_ret) {
1852                 *multi_ret = multi;
1853                 multi->num_stripes = num_stripes;
1854                 multi->max_errors = max_errors;
1855         }
1856 out:
1857         free_extent_map(em);
1858         return 0;
1859 }
1860
1861 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1862                       u64 logical, u64 *length,
1863                       struct btrfs_multi_bio **multi_ret, int mirror_num)
1864 {
1865         return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
1866                                  mirror_num, NULL);
1867 }
1868
1869 int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
1870                       u64 logical, struct page *page)
1871 {
1872         u64 length = PAGE_CACHE_SIZE;
1873         return __btrfs_map_block(map_tree, READ, logical, &length,
1874                                  NULL, 0, page);
1875 }
1876
1877
1878 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1879 static void end_bio_multi_stripe(struct bio *bio, int err)
1880 #else
1881 static int end_bio_multi_stripe(struct bio *bio,
1882                                    unsigned int bytes_done, int err)
1883 #endif
1884 {
1885         struct btrfs_multi_bio *multi = bio->bi_private;
1886
1887 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1888         if (bio->bi_size)
1889                 return 1;
1890 #endif
1891         if (err)
1892                 atomic_inc(&multi->error);
1893
1894         if (atomic_dec_and_test(&multi->stripes_pending)) {
1895                 bio->bi_private = multi->private;
1896                 bio->bi_end_io = multi->end_io;
1897                 /* only send an error to the higher layers if it is
1898                  * beyond the tolerance of the multi-bio
1899                  */
1900                 if (atomic_read(&multi->error) > multi->max_errors) {
1901                         err = -EIO;
1902                 } else if (err) {
1903                         /*
1904                          * this bio is actually up to date, we didn't
1905                          * go over the max number of errors
1906                          */
1907                         set_bit(BIO_UPTODATE, &bio->bi_flags);
1908                         err = 0;
1909                 }
1910                 kfree(multi);
1911
1912 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1913                 bio_endio(bio, bio->bi_size, err);
1914 #else
1915                 bio_endio(bio, err);
1916 #endif
1917         } else {
1918                 bio_put(bio);
1919         }
1920 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1921         return 0;
1922 #endif
1923 }
1924
1925 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1926                   int mirror_num)
1927 {
1928         struct btrfs_mapping_tree *map_tree;
1929         struct btrfs_device *dev;
1930         struct bio *first_bio = bio;
1931         u64 logical = bio->bi_sector << 9;
1932         u64 length = 0;
1933         u64 map_length;
1934         struct btrfs_multi_bio *multi = NULL;
1935         int ret;
1936         int dev_nr = 0;
1937         int total_devs = 1;
1938
1939         length = bio->bi_size;
1940         map_tree = &root->fs_info->mapping_tree;
1941         map_length = length;
1942
1943         ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
1944                               mirror_num);
1945         BUG_ON(ret);
1946
1947         total_devs = multi->num_stripes;
1948         if (map_length < length) {
1949                 printk("mapping failed logical %Lu bio len %Lu "
1950                        "len %Lu\n", logical, length, map_length);
1951                 BUG();
1952         }
1953         multi->end_io = first_bio->bi_end_io;
1954         multi->private = first_bio->bi_private;
1955         atomic_set(&multi->stripes_pending, multi->num_stripes);
1956
1957         while(dev_nr < total_devs) {
1958                 if (total_devs > 1) {
1959                         if (dev_nr < total_devs - 1) {
1960                                 bio = bio_clone(first_bio, GFP_NOFS);
1961                                 BUG_ON(!bio);
1962                         } else {
1963                                 bio = first_bio;
1964                         }
1965                         bio->bi_private = multi;
1966                         bio->bi_end_io = end_bio_multi_stripe;
1967                 }
1968                 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
1969                 dev = multi->stripes[dev_nr].dev;
1970                 if (dev && dev->bdev) {
1971                         bio->bi_bdev = dev->bdev;
1972                         spin_lock(&dev->io_lock);
1973                         dev->total_ios++;
1974                         spin_unlock(&dev->io_lock);
1975                         submit_bio(rw, bio);
1976                 } else {
1977                         bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
1978                         bio->bi_sector = logical >> 9;
1979 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1980                         bio_endio(bio, bio->bi_size, -EIO);
1981 #else
1982                         bio_endio(bio, -EIO);
1983 #endif
1984                 }
1985                 dev_nr++;
1986         }
1987         if (total_devs == 1)
1988                 kfree(multi);
1989         return 0;
1990 }
1991
1992 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
1993                                        u8 *uuid)
1994 {
1995         struct list_head *head = &root->fs_info->fs_devices->devices;
1996
1997         return __find_device(head, devid, uuid);
1998 }
1999
2000 static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2001                                             u64 devid, u8 *dev_uuid)
2002 {
2003         struct btrfs_device *device;
2004         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2005
2006         device = kzalloc(sizeof(*device), GFP_NOFS);
2007         list_add(&device->dev_list,
2008                  &fs_devices->devices);
2009         list_add(&device->dev_alloc_list,
2010                  &fs_devices->alloc_list);
2011         device->barriers = 1;
2012         device->dev_root = root->fs_info->dev_root;
2013         device->devid = devid;
2014         fs_devices->num_devices++;
2015         spin_lock_init(&device->io_lock);
2016         memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2017         return device;
2018 }
2019
2020
2021 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2022                           struct extent_buffer *leaf,
2023                           struct btrfs_chunk *chunk)
2024 {
2025         struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2026         struct map_lookup *map;
2027         struct extent_map *em;
2028         u64 logical;
2029         u64 length;
2030         u64 devid;
2031         u8 uuid[BTRFS_UUID_SIZE];
2032         int num_stripes;
2033         int ret;
2034         int i;
2035
2036         logical = key->offset;
2037         length = btrfs_chunk_length(leaf, chunk);
2038
2039         spin_lock(&map_tree->map_tree.lock);
2040         em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
2041         spin_unlock(&map_tree->map_tree.lock);
2042
2043         /* already mapped? */
2044         if (em && em->start <= logical && em->start + em->len > logical) {
2045                 free_extent_map(em);
2046                 return 0;
2047         } else if (em) {
2048                 free_extent_map(em);
2049         }
2050
2051         map = kzalloc(sizeof(*map), GFP_NOFS);
2052         if (!map)
2053                 return -ENOMEM;
2054
2055         em = alloc_extent_map(GFP_NOFS);
2056         if (!em)
2057                 return -ENOMEM;
2058         num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2059         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
2060         if (!map) {
2061                 free_extent_map(em);
2062                 return -ENOMEM;
2063         }
2064
2065         em->bdev = (struct block_device *)map;
2066         em->start = logical;
2067         em->len = length;
2068         em->block_start = 0;
2069
2070         map->num_stripes = num_stripes;
2071         map->io_width = btrfs_chunk_io_width(leaf, chunk);
2072         map->io_align = btrfs_chunk_io_align(leaf, chunk);
2073         map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2074         map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2075         map->type = btrfs_chunk_type(leaf, chunk);
2076         map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
2077         for (i = 0; i < num_stripes; i++) {
2078                 map->stripes[i].physical =
2079                         btrfs_stripe_offset_nr(leaf, chunk, i);
2080                 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
2081                 read_extent_buffer(leaf, uuid, (unsigned long)
2082                                    btrfs_stripe_dev_uuid_nr(chunk, i),
2083                                    BTRFS_UUID_SIZE);
2084                 map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
2085
2086                 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
2087                         kfree(map);
2088                         free_extent_map(em);
2089                         return -EIO;
2090                 }
2091                 if (!map->stripes[i].dev) {
2092                         map->stripes[i].dev =
2093                                 add_missing_dev(root, devid, uuid);
2094                         if (!map->stripes[i].dev) {
2095                                 kfree(map);
2096                                 free_extent_map(em);
2097                                 return -EIO;
2098                         }
2099                 }
2100                 map->stripes[i].dev->in_fs_metadata = 1;
2101         }
2102
2103         spin_lock(&map_tree->map_tree.lock);
2104         ret = add_extent_mapping(&map_tree->map_tree, em);
2105         spin_unlock(&map_tree->map_tree.lock);
2106         BUG_ON(ret);
2107         free_extent_map(em);
2108
2109         return 0;
2110 }
2111
2112 static int fill_device_from_item(struct extent_buffer *leaf,
2113                                  struct btrfs_dev_item *dev_item,
2114                                  struct btrfs_device *device)
2115 {
2116         unsigned long ptr;
2117
2118         device->devid = btrfs_device_id(leaf, dev_item);
2119         device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2120         device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2121         device->type = btrfs_device_type(leaf, dev_item);
2122         device->io_align = btrfs_device_io_align(leaf, dev_item);
2123         device->io_width = btrfs_device_io_width(leaf, dev_item);
2124         device->sector_size = btrfs_device_sector_size(leaf, dev_item);
2125
2126         ptr = (unsigned long)btrfs_device_uuid(dev_item);
2127         read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
2128
2129         return 0;
2130 }
2131
2132 static int read_one_dev(struct btrfs_root *root,
2133                         struct extent_buffer *leaf,
2134                         struct btrfs_dev_item *dev_item)
2135 {
2136         struct btrfs_device *device;
2137         u64 devid;
2138         int ret;
2139         u8 dev_uuid[BTRFS_UUID_SIZE];
2140
2141         devid = btrfs_device_id(leaf, dev_item);
2142         read_extent_buffer(leaf, dev_uuid,
2143                            (unsigned long)btrfs_device_uuid(dev_item),
2144                            BTRFS_UUID_SIZE);
2145         device = btrfs_find_device(root, devid, dev_uuid);
2146         if (!device) {
2147                 printk("warning devid %Lu missing\n", devid);
2148                 device = add_missing_dev(root, devid, dev_uuid);
2149                 if (!device)
2150                         return -ENOMEM;
2151         }
2152
2153         fill_device_from_item(leaf, dev_item, device);
2154         device->dev_root = root->fs_info->dev_root;
2155         device->in_fs_metadata = 1;
2156         ret = 0;
2157 #if 0
2158         ret = btrfs_open_device(device);
2159         if (ret) {
2160                 kfree(device);
2161         }
2162 #endif
2163         return ret;
2164 }
2165
2166 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2167 {
2168         struct btrfs_dev_item *dev_item;
2169
2170         dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2171                                                      dev_item);
2172         return read_one_dev(root, buf, dev_item);
2173 }
2174
2175 int btrfs_read_sys_array(struct btrfs_root *root)
2176 {
2177         struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
2178         struct extent_buffer *sb;
2179         struct btrfs_disk_key *disk_key;
2180         struct btrfs_chunk *chunk;
2181         u8 *ptr;
2182         unsigned long sb_ptr;
2183         int ret = 0;
2184         u32 num_stripes;
2185         u32 array_size;
2186         u32 len = 0;
2187         u32 cur;
2188         struct btrfs_key key;
2189
2190         sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
2191                                           BTRFS_SUPER_INFO_SIZE);
2192         if (!sb)
2193                 return -ENOMEM;
2194         btrfs_set_buffer_uptodate(sb);
2195         write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
2196         array_size = btrfs_super_sys_array_size(super_copy);
2197
2198         ptr = super_copy->sys_chunk_array;
2199         sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
2200         cur = 0;
2201
2202         while (cur < array_size) {
2203                 disk_key = (struct btrfs_disk_key *)ptr;
2204                 btrfs_disk_key_to_cpu(&key, disk_key);
2205
2206                 len = sizeof(*disk_key); ptr += len;
2207                 sb_ptr += len;
2208                 cur += len;
2209
2210                 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2211                         chunk = (struct btrfs_chunk *)sb_ptr;
2212                         ret = read_one_chunk(root, &key, sb, chunk);
2213                         if (ret)
2214                                 break;
2215                         num_stripes = btrfs_chunk_num_stripes(sb, chunk);
2216                         len = btrfs_chunk_item_size(num_stripes);
2217                 } else {
2218                         ret = -EIO;
2219                         break;
2220                 }
2221                 ptr += len;
2222                 sb_ptr += len;
2223                 cur += len;
2224         }
2225         free_extent_buffer(sb);
2226         return ret;
2227 }
2228
2229 int btrfs_read_chunk_tree(struct btrfs_root *root)
2230 {
2231         struct btrfs_path *path;
2232         struct extent_buffer *leaf;
2233         struct btrfs_key key;
2234         struct btrfs_key found_key;
2235         int ret;
2236         int slot;
2237
2238         root = root->fs_info->chunk_root;
2239
2240         path = btrfs_alloc_path();
2241         if (!path)
2242                 return -ENOMEM;
2243
2244         /* first we search for all of the device items, and then we
2245          * read in all of the chunk items.  This way we can create chunk
2246          * mappings that reference all of the devices that are afound
2247          */
2248         key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2249         key.offset = 0;
2250         key.type = 0;
2251 again:
2252         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2253         while(1) {
2254                 leaf = path->nodes[0];
2255                 slot = path->slots[0];
2256                 if (slot >= btrfs_header_nritems(leaf)) {
2257                         ret = btrfs_next_leaf(root, path);
2258                         if (ret == 0)
2259                                 continue;
2260                         if (ret < 0)
2261                                 goto error;
2262                         break;
2263                 }
2264                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2265                 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2266                         if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
2267                                 break;
2268                         if (found_key.type == BTRFS_DEV_ITEM_KEY) {
2269                                 struct btrfs_dev_item *dev_item;
2270                                 dev_item = btrfs_item_ptr(leaf, slot,
2271                                                   struct btrfs_dev_item);
2272                                 ret = read_one_dev(root, leaf, dev_item);
2273                                 BUG_ON(ret);
2274                         }
2275                 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2276                         struct btrfs_chunk *chunk;
2277                         chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2278                         ret = read_one_chunk(root, &found_key, leaf, chunk);
2279                 }
2280                 path->slots[0]++;
2281         }
2282         if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2283                 key.objectid = 0;
2284                 btrfs_release_path(root, path);
2285                 goto again;
2286         }
2287
2288         btrfs_free_path(path);
2289         ret = 0;
2290 error:
2291         return ret;
2292 }
2293