X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fblock_dev.c;h=7d822fae7765b2bfdf0978e8ecf1296875073e85;hb=fe0754f0e5c0f070bf82b6e7e5e8fa5a188163fc;hp=44aaba202f78f87083605e11030d1b7613f2ccc0;hpb=09d967c6f32b35eab15b45862ae16e4f06259d8e;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/block_dev.c b/fs/block_dev.c index 44aaba2..7d822fa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -5,31 +5,34 @@ * Copyright (C) 2001 Andrea Arcangeli SuSE */ -#include #include #include #include #include #include #include -#include #include #include #include #include #include #include +#include #include #include #include #include +#include #include +#include "internal.h" struct bdev_inode { struct block_device bdev; struct inode vfs_inode; }; +static const struct address_space_operations def_blk_aops; + static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); @@ -55,17 +58,19 @@ static sector_t max_block(struct block_device *bdev) return retval; } -/* Kill _all_ buffers, dirty or not.. */ +/* Kill _all_ buffers and pagecache , dirty or not.. */ static void kill_bdev(struct block_device *bdev) { - invalidate_bdev(bdev, 1); + if (bdev->bd_inode->i_mapping->nrpages == 0) + return; + invalidate_bh_lrus(); truncate_inode_pages(bdev->bd_inode->i_mapping, 0); } int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ - if (size > PAGE_SIZE || size < 512 || (size & (size-1))) + if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ @@ -178,19 +183,31 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } -static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, blkdev_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + blkdev_get_block); } -static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_commit_write(page, from, to); + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + page_cache_release(page); + + return ret; } /* * private llseek: - * for a block special file file->f_dentry->d_inode->i_size is zero + * for a block special file file->f_path.dentry->d_inode->i_size is zero * so we compute the size by hand (just as in block_read/write above) */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) @@ -235,11 +252,11 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); -static kmem_cache_t * bdev_cachep __read_mostly; +static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { - struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL); + struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -253,24 +270,20 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(struct kmem_cache * cachep, void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - { - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); - mutex_init(&bdev->bd_mount_mutex); - INIT_LIST_HEAD(&bdev->bd_inodes); - INIT_LIST_HEAD(&bdev->bd_list); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + sema_init(&bdev->bd_mount_sem, 1); + INIT_LIST_HEAD(&bdev->bd_inodes); + INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_list); + INIT_LIST_HEAD(&bdev->bd_holder_list); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static inline void __bd_forget(struct inode *inode) @@ -292,7 +305,7 @@ static void bdev_clear_inode(struct inode *inode) spin_unlock(&bdev_lock); } -static struct super_operations bdev_sops = { +static const struct super_operations bdev_sops = { .statfs = simple_statfs, .alloc_inode = bdev_alloc_inode, .destroy_inode = bdev_destroy_inode, @@ -300,10 +313,10 @@ static struct super_operations bdev_sops = { .clear_inode = bdev_clear_inode, }; -static struct super_block *bd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); + return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); } static struct file_system_type bd_type = { @@ -321,12 +334,11 @@ void __init bdev_cache_init(void) bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), - init_once, NULL); + init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); - err = PTR_ERR(bd_mnt); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ @@ -392,12 +404,10 @@ EXPORT_SYMBOL(bdget); long nr_blockdev_pages(void) { - struct list_head *p; + struct block_device *bdev; long ret = 0; spin_lock(&bdev_lock); - list_for_each(p, &all_bdevs) { - struct block_device *bdev; - bdev = list_entry(p, struct block_device, bd_list); + list_for_each_entry(bdev, &all_bdevs, bd_list) { ret += bdev->bd_inode->i_mapping->nrpages; } spin_unlock(&bdev_lock); @@ -532,9 +542,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->kobj); + return kobject_get(&bdev->bd_part->dev.kobj); else - return kobject_get(&bdev->bd_disk->kobj); + return kobject_get(&bdev->bd_disk->dev.kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) @@ -545,11 +555,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev) return kobject_get(bdev->bd_disk->holder_dir); } -static void add_symlink(struct kobject *from, struct kobject *to) +static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) - return; - sysfs_create_link(from, to, kobject_name(to)); + return 0; + return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) @@ -642,38 +652,62 @@ static void free_bd_holder(struct bd_holder *bo) } /** + * find_bd_holder - find matching struct bd_holder from the block device + * + * @bdev: struct block device to be searched + * @bo: target struct bd_holder + * + * Returns matching entry with @bo in @bdev->bd_holder_list. + * If found, increment the reference count and return the pointer. + * If not found, returns NULL. + */ +static struct bd_holder *find_bd_holder(struct block_device *bdev, + struct bd_holder *bo) +{ + struct bd_holder *tmp; + + list_for_each_entry(tmp, &bdev->bd_holder_list, list) + if (tmp->sdir == bo->sdir) { + tmp->count++; + return tmp; + } + + return NULL; +} + +/** * add_bd_holder - create sysfs symlinks for bd_claim() relationship * * @bdev: block device to be bd_claimed * @bo: preallocated and initialized by alloc_bd_holder() * - * If there is no matching entry with @bo in @bdev->bd_holder_list, - * add @bo to the list, create symlinks. + * Add @bo to @bdev->bd_holder_list, create symlinks. * - * Returns 1 if @bo was added to the list. - * Returns 0 if @bo wasn't used by any reason and should be freed. + * Returns 0 if symlinks are created. + * Returns -ve if something fails. */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { - struct bd_holder *tmp; + int err; if (!bo) - return 0; - - list_for_each_entry(tmp, &bdev->bd_holder_list, list) { - if (tmp->sdir == bo->sdir) { - tmp->count++; - return 0; - } - } + return -EINVAL; if (!bd_holder_grab_dirs(bdev, bo)) - return 0; + return -EBUSY; + + err = add_symlink(bo->sdir, bo->sdev); + if (err) + return err; + + err = add_symlink(bo->hdir, bo->hdev); + if (err) { + del_symlink(bo->sdir, bo->sdev); + return err; + } - add_symlink(bo->sdir, bo->sdev); - add_symlink(bo->hdir, bo->hdev); list_add_tail(&bo->list, &bdev->bd_holder_list); - return 1; + return 0; } /** @@ -731,8 +765,8 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev, static int bd_claim_by_kobject(struct block_device *bdev, void *holder, struct kobject *kobj) { - int res; - struct bd_holder *bo; + int err; + struct bd_holder *bo, *found; if (!kobj) return -EINVAL; @@ -742,12 +776,24 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, return -ENOMEM; mutex_lock(&bdev->bd_mutex); - res = bd_claim(bdev, holder); - if (res || !add_bd_holder(bdev, bo)) - free_bd_holder(bo); - mutex_unlock(&bdev->bd_mutex); - return res; + err = bd_claim(bdev, holder); + if (err) + goto fail; + + found = find_bd_holder(bdev, bo); + if (found) + goto fail; + + err = add_bd_holder(bdev, bo); + if (err) + bd_release(bdev); + else + bo = NULL; +fail: + mutex_unlock(&bdev->bd_mutex); + free_bd_holder(bo); + return err; } /** @@ -761,15 +807,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, static void bd_release_from_kobject(struct block_device *bdev, struct kobject *kobj) { - struct bd_holder *bo; - if (!kobj) return; mutex_lock(&bdev->bd_mutex); bd_release(bdev); - if ((bo = del_bd_holder(bdev, kobj))) - free_bd_holder(bo); + free_bd_holder(del_bd_holder(bdev, kobj)); mutex_unlock(&bdev->bd_mutex); } @@ -870,7 +913,18 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int do_open(struct block_device *bdev, struct file *file) +static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, + int for_part); +static int __blkdev_put(struct block_device *bdev, int for_part); + +/* + * bd_mutex locking: + * + * mutex_lock(part->bd_mutex) + * mutex_lock_nested(whole->bd_mutex, 1) + */ + +static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; @@ -887,7 +941,7 @@ static int do_open(struct block_device *bdev, struct file *file) } owner = disk->fops->owner; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -914,25 +968,21 @@ static int do_open(struct block_device *bdev, struct file *file) ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get(whole, file->f_mode, file->f_flags); + BUG_ON(for_part); + ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock(&whole->bd_mutex); - whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { - whole->bd_part_count--; - mutex_unlock(&whole->bd_mutex); ret = -ENXIO; goto out_first; } - kobject_get(&p->kobj); + kobject_get(&p->dev.kobj); bdev->bd_part = p; bd_set_size(bdev, (loff_t) p->nr_sects << 9); - mutex_unlock(&whole->bd_mutex); } } else { put_disk(disk); @@ -945,13 +995,11 @@ static int do_open(struct block_device *bdev, struct file *file) } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); - } else { - mutex_lock(&bdev->bd_contains->bd_mutex); - bdev->bd_contains->bd_part_count++; - mutex_unlock(&bdev->bd_contains->bd_mutex); } } bdev->bd_openers++; + if (for_part) + bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); unlock_kernel(); return 0; @@ -960,7 +1008,7 @@ out_first: bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - blkdev_put(bdev->bd_contains); + __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -972,7 +1020,8 @@ out: return ret; } -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) +static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, + int for_part) { /* * This crockload is due to bad choice of ->open() type. @@ -984,12 +1033,16 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) struct dentry fake_dentry = {}; fake_file.f_mode = mode; fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; + fake_file.f_path.dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file); + return do_open(bdev, &fake_file, for_part); } +int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) +{ + return __blkdev_get(bdev, mode, flags, 0); +} EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) @@ -1006,8 +1059,10 @@ static int blkdev_open(struct inode * inode, struct file * filp) filp->f_flags |= O_LARGEFILE; bdev = bd_acquire(inode); + if (bdev == NULL) + return -ENOMEM; - res = do_open(bdev, filp); + res = do_open(bdev, filp, 0); if (res) return res; @@ -1021,14 +1076,18 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, int for_part) { int ret = 0; struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; + struct block_device *victim = NULL; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, for_part); lock_kernel(); + if (for_part) + bdev->bd_part_count--; + if (!--bdev->bd_openers) { sync_blockdev(bdev); kill_bdev(bdev); @@ -1036,10 +1095,6 @@ int blkdev_put(struct block_device *bdev) if (bdev->bd_contains == bdev) { if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock(&bdev->bd_contains->bd_mutex); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1048,22 +1103,27 @@ int blkdev_put(struct block_device *bdev) module_put(owner); if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); + kobject_put(&bdev->bd_part->dev.kobj); bdev->bd_part = NULL; } bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) { - blkdev_put(bdev->bd_contains); - } + if (bdev != bdev->bd_contains) + victim = bdev->bd_contains; bdev->bd_contains = NULL; } unlock_kernel(); mutex_unlock(&bdev->bd_mutex); bdput(bdev); + if (victim) + __blkdev_put(victim, 1); return ret; } +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, 0); +} EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) @@ -1074,33 +1134,17 @@ static int blkdev_close(struct inode * inode, struct file * filp) return blkdev_put(bdev); } -static ssize_t blkdev_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_write_nolock(file, &local_iov, 1, ppos); -} - -static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } -struct address_space_operations def_blk_aops = { +static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, - .prepare_write = blkdev_prepare_write, - .commit_write = blkdev_commit_write, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, .writepages = generic_writepages, .direct_IO = blkdev_direct_IO, }; @@ -1109,19 +1153,16 @@ const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = generic_file_read, - .write = blkdev_file_write, + .read = do_sync_read, + .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = blkdev_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .mmap = generic_file_mmap, .fsync = block_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .readv = generic_file_readv, - .writev = generic_file_write_nolock, - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -1161,19 +1202,19 @@ struct block_device *lookup_bdev(const char *path) if (error) return ERR_PTR(error); - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) + if (nd.path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_release(&nd); + path_put(&nd.path); return bdev; fail: bdev = ERR_PTR(error); @@ -1235,3 +1276,24 @@ void close_bdev_excl(struct block_device *bdev) } EXPORT_SYMBOL(close_bdev_excl); + +int __invalidate_device(struct block_device *bdev) +{ + struct super_block *sb = get_super(bdev); + int res = 0; + + if (sb) { + /* + * no need to lock the super, get_super holds the + * read mutex so the filesystem cannot go away + * under us (->put_super runs with the write lock + * hold). + */ + shrink_dcache_sb(sb); + res = invalidate_inodes(sb); + drop_super(sb); + } + invalidate_bdev(bdev); + return res; +} +EXPORT_SYMBOL(__invalidate_device);