2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/bio.h>
21 #include "extent_map.h"
23 #include "transaction.h"
24 #include "print-tree.h"
28 struct btrfs_device *dev;
33 * this uses a pretty simple search, the expectation is that it is
34 * called very infrequently and that a given device has a small number
37 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device,
39 struct btrfs_path *path,
40 u64 num_bytes, u64 *start)
43 struct btrfs_root *root = device->dev_root;
44 struct btrfs_dev_extent *dev_extent = NULL;
48 u64 search_end = device->total_bytes;
52 struct extent_buffer *l;
57 /* FIXME use last free of some kind */
59 key.objectid = device->devid;
60 key.offset = search_start;
61 key.type = BTRFS_DEV_EXTENT_KEY;
62 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
65 ret = btrfs_previous_item(root, path, 0, key.type);
69 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
72 slot = path->slots[0];
73 if (slot >= btrfs_header_nritems(l)) {
74 ret = btrfs_next_leaf(root, path);
81 if (search_start >= search_end) {
85 *start = search_start;
89 *start = last_byte > search_start ?
90 last_byte : search_start;
91 if (search_end <= *start) {
97 btrfs_item_key_to_cpu(l, &key, slot);
99 if (key.objectid < device->devid)
102 if (key.objectid > device->devid)
105 if (key.offset >= search_start && key.offset > last_byte &&
107 if (last_byte < search_start)
108 last_byte = search_start;
109 hole_size = key.offset - last_byte;
110 if (key.offset > last_byte &&
111 hole_size >= num_bytes) {
116 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
121 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
122 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
128 /* we have to make sure we didn't find an extent that has already
129 * been allocated by the map tree or the original allocation
131 btrfs_release_path(root, path);
132 BUG_ON(*start < search_start);
134 if (*start + num_bytes >= search_end) {
138 /* check for pending inserts here */
142 btrfs_release_path(root, path);
146 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
147 struct btrfs_device *device,
148 u64 owner, u64 num_bytes, u64 *start)
151 struct btrfs_path *path;
152 struct btrfs_root *root = device->dev_root;
153 struct btrfs_dev_extent *extent;
154 struct extent_buffer *leaf;
155 struct btrfs_key key;
157 path = btrfs_alloc_path();
161 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
165 key.objectid = device->devid;
167 key.type = BTRFS_DEV_EXTENT_KEY;
168 ret = btrfs_insert_empty_item(trans, root, path, &key,
172 leaf = path->nodes[0];
173 extent = btrfs_item_ptr(leaf, path->slots[0],
174 struct btrfs_dev_extent);
175 btrfs_set_dev_extent_owner(leaf, extent, owner);
176 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
177 btrfs_mark_buffer_dirty(leaf);
179 btrfs_free_path(path);
183 static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
185 struct btrfs_path *path;
187 struct btrfs_key key;
188 struct btrfs_key found_key;
190 path = btrfs_alloc_path();
193 key.objectid = (u64)-1;
194 key.offset = (u64)-1;
195 key.type = BTRFS_CHUNK_ITEM_KEY;
197 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
203 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
207 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
209 *objectid = found_key.objectid + found_key.offset;
213 btrfs_free_path(path);
217 static struct btrfs_device *next_device(struct list_head *head,
218 struct list_head *last)
220 struct list_head *next = last->next;
221 struct btrfs_device *dev;
223 if (list_empty(head))
229 dev = list_entry(next, struct btrfs_device, dev_list);
233 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
237 struct btrfs_key key;
238 struct btrfs_key found_key;
240 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
241 key.type = BTRFS_DEV_ITEM_KEY;
242 key.offset = (u64)-1;
244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
250 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
255 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
257 *objectid = found_key.offset + 1;
261 btrfs_release_path(root, path);
266 * the device information is stored in the chunk root
267 * the btrfs_device struct should be fully filled in
269 int btrfs_add_device(struct btrfs_trans_handle *trans,
270 struct btrfs_root *root,
271 struct btrfs_device *device)
274 struct btrfs_path *path;
275 struct btrfs_dev_item *dev_item;
276 struct extent_buffer *leaf;
277 struct btrfs_key key;
281 root = root->fs_info->chunk_root;
283 path = btrfs_alloc_path();
287 ret = find_next_devid(root, path, &free_devid);
291 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
292 key.type = BTRFS_DEV_ITEM_KEY;
293 key.offset = free_devid;
295 ret = btrfs_insert_empty_item(trans, root, path, &key,
296 sizeof(*dev_item) + device->name_len);
300 leaf = path->nodes[0];
301 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
303 btrfs_set_device_id(leaf, dev_item, device->devid);
304 btrfs_set_device_type(leaf, dev_item, device->type);
305 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
306 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
307 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
308 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
309 btrfs_set_device_partition(leaf, dev_item, device->partition);
310 btrfs_set_device_name_len(leaf, dev_item, device->name_len);
311 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
312 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
314 ptr = (unsigned long)btrfs_device_name(dev_item);
315 write_extent_buffer(leaf, device->name, ptr, device->name_len);
317 ptr = (unsigned long)btrfs_device_uuid(dev_item);
318 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
319 btrfs_mark_buffer_dirty(leaf);
323 btrfs_free_path(path);
326 int btrfs_update_device(struct btrfs_trans_handle *trans,
327 struct btrfs_device *device)
330 struct btrfs_path *path;
331 struct btrfs_root *root;
332 struct btrfs_dev_item *dev_item;
333 struct extent_buffer *leaf;
334 struct btrfs_key key;
336 root = device->dev_root->fs_info->chunk_root;
338 path = btrfs_alloc_path();
342 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
343 key.type = BTRFS_DEV_ITEM_KEY;
344 key.offset = device->devid;
346 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
355 leaf = path->nodes[0];
356 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
358 btrfs_set_device_id(leaf, dev_item, device->devid);
359 btrfs_set_device_type(leaf, dev_item, device->type);
360 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
361 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
362 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
363 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
364 btrfs_set_device_partition(leaf, dev_item, device->partition);
365 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
366 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
367 btrfs_mark_buffer_dirty(leaf);
370 btrfs_free_path(path);
374 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
375 struct btrfs_root *root,
376 struct btrfs_key *key,
377 struct btrfs_chunk *chunk, int item_size)
379 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
380 struct btrfs_disk_key disk_key;
384 array_size = btrfs_super_sys_array_size(super_copy);
385 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
388 ptr = super_copy->sys_chunk_array + array_size;
389 btrfs_cpu_key_to_disk(&disk_key, key);
390 memcpy(ptr, &disk_key, sizeof(disk_key));
391 ptr += sizeof(disk_key);
392 memcpy(ptr, chunk, item_size);
393 item_size += sizeof(disk_key);
394 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
398 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
399 struct btrfs_root *extent_root, u64 *start,
400 u64 *num_bytes, u32 type)
403 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
404 struct btrfs_stripe *stripes;
405 struct btrfs_device *device = NULL;
406 struct btrfs_chunk *chunk;
407 struct list_head *dev_list = &extent_root->fs_info->devices;
408 struct list_head *last_dev = extent_root->fs_info->last_device;
409 struct extent_map_tree *em_tree;
410 struct map_lookup *map;
411 struct extent_map *em;
413 u64 calc_size = 1024 * 1024 * 1024;
417 struct btrfs_key key;
420 ret = find_next_chunk(chunk_root, &key.objectid);
425 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
429 stripes = &chunk->stripe;
431 *num_bytes = calc_size;
432 while(index < num_stripes) {
433 device = next_device(dev_list, last_dev);
435 last_dev = &device->dev_list;
436 extent_root->fs_info->last_device = last_dev;
438 ret = btrfs_alloc_dev_extent(trans, device,
440 calc_size, &dev_offset);
443 device->bytes_used += calc_size;
444 ret = btrfs_update_device(trans, device);
447 btrfs_set_stack_stripe_devid(stripes + index, device->devid);
448 btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
449 physical = dev_offset;
453 /* key.objectid was set above */
454 key.offset = *num_bytes;
455 key.type = BTRFS_CHUNK_ITEM_KEY;
456 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
457 btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
458 btrfs_set_stack_chunk_type(chunk, type);
459 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
460 btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
461 btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
462 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
464 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
465 btrfs_chunk_item_size(num_stripes));
467 *start = key.objectid;
469 em = alloc_extent_map(GFP_NOFS);
472 map = kmalloc(sizeof(*map), GFP_NOFS);
478 em->bdev = (struct block_device *)map;
479 em->start = key.objectid;
480 em->len = key.offset;
483 map->physical = physical;
493 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
494 spin_lock(&em_tree->lock);
495 ret = add_extent_mapping(em_tree, em);
497 spin_unlock(&em_tree->lock);
502 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
504 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
507 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
509 struct extent_map *em;
512 spin_lock(&tree->map_tree.lock);
513 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
515 remove_extent_mapping(&tree->map_tree, em);
516 spin_unlock(&tree->map_tree.lock);
522 /* once for the tree */
527 int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
528 u64 logical, u64 *phys, u64 *length,
529 struct btrfs_device **dev)
531 struct extent_map *em;
532 struct map_lookup *map;
533 struct extent_map_tree *em_tree = &map_tree->map_tree;
537 spin_lock(&em_tree->lock);
538 em = lookup_extent_mapping(em_tree, logical, *length);
541 BUG_ON(em->start > logical || em->start + em->len < logical);
542 map = (struct map_lookup *)em->bdev;
543 offset = logical - em->start;
544 *phys = map->physical + offset;
545 *length = em->len - offset;
548 spin_unlock(&em_tree->lock);
552 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
554 struct btrfs_mapping_tree *map_tree;
555 struct btrfs_device *dev;
556 u64 logical = bio->bi_sector << 9;
560 struct bio_vec *bvec;
564 bio_for_each_segment(bvec, bio, i) {
565 length += bvec->bv_len;
567 map_tree = &root->fs_info->mapping_tree;
569 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
570 BUG_ON(map_length < length);
571 bio->bi_sector = physical >> 9;
572 bio->bi_bdev = dev->bdev;
577 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
579 struct btrfs_device *dev;
580 struct list_head *cur = root->fs_info->devices.next;
581 struct list_head *head = &root->fs_info->devices;
584 dev = list_entry(cur, struct btrfs_device, dev_list);
585 if (dev->devid == devid)
592 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
593 struct extent_buffer *leaf,
594 struct btrfs_chunk *chunk)
596 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
597 struct map_lookup *map;
598 struct extent_map *em;
604 logical = key->objectid;
605 length = key->offset;
606 spin_lock(&map_tree->map_tree.lock);
607 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
609 /* already mapped? */
610 if (em && em->start <= logical && em->start + em->len > logical) {
612 spin_unlock(&map_tree->map_tree.lock);
617 spin_unlock(&map_tree->map_tree.lock);
619 map = kzalloc(sizeof(*map), GFP_NOFS);
623 em = alloc_extent_map(GFP_NOFS);
626 map = kmalloc(sizeof(*map), GFP_NOFS);
632 em->bdev = (struct block_device *)map;
637 map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
638 devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
639 map->dev = btrfs_find_device(root, devid);
646 spin_lock(&map_tree->map_tree.lock);
647 ret = add_extent_mapping(&map_tree->map_tree, em);
649 spin_unlock(&map_tree->map_tree.lock);
655 static int fill_device_from_item(struct extent_buffer *leaf,
656 struct btrfs_dev_item *dev_item,
657 struct btrfs_device *device)
662 device->devid = btrfs_device_id(leaf, dev_item);
663 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
664 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
665 device->type = btrfs_device_type(leaf, dev_item);
666 device->io_align = btrfs_device_io_align(leaf, dev_item);
667 device->io_width = btrfs_device_io_width(leaf, dev_item);
668 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
669 device->rdev = btrfs_device_rdev(leaf, dev_item);
670 device->partition = btrfs_device_partition(leaf, dev_item);
671 device->name_len = btrfs_device_name_len(leaf, dev_item);
673 ptr = (unsigned long)btrfs_device_uuid(dev_item);
674 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
676 name = kmalloc(device->name_len + 1, GFP_NOFS);
680 ptr = (unsigned long)btrfs_device_name(dev_item);
681 read_extent_buffer(leaf, name, ptr, device->name_len);
682 name[device->name_len] = '\0';
686 static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key,
687 struct extent_buffer *leaf,
688 struct btrfs_dev_item *dev_item)
690 struct btrfs_device *device;
694 devid = btrfs_device_id(leaf, dev_item);
695 if (btrfs_find_device(root, devid))
698 device = kmalloc(sizeof(*device), GFP_NOFS);
702 fill_device_from_item(leaf, dev_item, device);
703 device->dev_root = root->fs_info->dev_root;
704 device->bdev = root->fs_info->sb->s_bdev;
705 list_add(&device->dev_list, &root->fs_info->devices);
706 memcpy(&device->dev_key, key, sizeof(*key));
709 ret = btrfs_open_device(device);
717 int btrfs_read_sys_array(struct btrfs_root *root)
719 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
720 struct extent_buffer *sb = root->fs_info->sb_buffer;
721 struct btrfs_disk_key *disk_key;
722 struct btrfs_dev_item *dev_item;
723 struct btrfs_chunk *chunk;
724 struct btrfs_key key;
729 unsigned long sb_ptr;
734 array_size = btrfs_super_sys_array_size(super_copy);
737 * we do this loop twice, once for the device items and
738 * once for all of the chunks. This way there are device
739 * structs filled in for every chunk
742 ptr = super_copy->sys_chunk_array;
743 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
746 while (cur < array_size) {
747 disk_key = (struct btrfs_disk_key *)ptr;
748 btrfs_disk_key_to_cpu(&key, disk_key);
750 len = sizeof(*disk_key);
755 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID &&
756 key.type == BTRFS_DEV_ITEM_KEY) {
757 dev_item = (struct btrfs_dev_item *)sb_ptr;
759 ret = read_one_dev(root, &key, sb, dev_item);
762 len = sizeof(*dev_item);
763 len += btrfs_device_name_len(sb, dev_item);
764 } else if (key.type == BTRFS_CHUNK_ITEM_KEY) {
766 chunk = (struct btrfs_chunk *)sb_ptr;
768 ret = read_one_chunk(root, &key, sb, chunk);
771 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
772 len = btrfs_chunk_item_size(num_stripes);
787 int btrfs_read_chunk_tree(struct btrfs_root *root)
789 struct btrfs_path *path;
790 struct extent_buffer *leaf;
791 struct btrfs_key key;
792 struct btrfs_key found_key;
796 root = root->fs_info->chunk_root;
798 path = btrfs_alloc_path();
802 /* first we search for all of the device items, and then we
803 * read in all of the chunk items. This way we can create chunk
804 * mappings that reference all of the devices that are afound
806 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
810 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
812 leaf = path->nodes[0];
813 slot = path->slots[0];
814 if (slot >= btrfs_header_nritems(leaf)) {
815 ret = btrfs_next_leaf(root, path);
822 btrfs_item_key_to_cpu(leaf, &found_key, slot);
823 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
824 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
826 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
827 struct btrfs_dev_item *dev_item;
828 dev_item = btrfs_item_ptr(leaf, slot,
829 struct btrfs_dev_item);
830 ret = read_one_dev(root, &found_key, leaf,
834 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
835 struct btrfs_chunk *chunk;
836 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
837 ret = read_one_chunk(root, &found_key, leaf, chunk);
841 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
843 btrfs_release_path(root, path);
847 btrfs_free_path(path);