X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fblock_dev.c;h=aff54219e04953386162690941d7e412f3a29c64;hb=a9c701e594669dd49fed448c27c64f20cfacc8a7;hp=aaa8301f43f1dba8bb6934abf6b89c8e0fad9182;hpb=df6c0cd9a872ebf2298f5d66d8c789f62dbe35fc;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/block_dev.c b/fs/block_dev.c index aaa8301..aff5421 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include "internal.h" @@ -30,6 +32,8 @@ struct bdev_inode { struct inode vfs_inode; }; +static const struct address_space_operations def_blk_aops; + static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); @@ -55,17 +59,19 @@ static sector_t max_block(struct block_device *bdev) return retval; } -/* Kill _all_ buffers, dirty or not.. */ +/* Kill _all_ buffers and pagecache , dirty or not.. */ static void kill_bdev(struct block_device *bdev) { - invalidate_bdev(bdev, 1); + if (bdev->bd_inode->i_mapping->nrpages == 0) + return; + invalidate_bh_lrus(); truncate_inode_pages(bdev->bd_inode->i_mapping, 0); } int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ - if (size > PAGE_SIZE || size < 512 || (size & (size-1))) + if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ @@ -178,19 +184,31 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } -static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, blkdev_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + blkdev_get_block); } -static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_commit_write(page, from, to); + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + page_cache_release(page); + + return ret; } /* * private llseek: - * for a block special file file->f_dentry->d_inode->i_size is zero + * for a block special file file->f_path.dentry->d_inode->i_size is zero * so we compute the size by hand (just as in block_read/write above) */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) @@ -235,11 +253,11 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); -static kmem_cache_t * bdev_cachep __read_mostly; +static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { - struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL); + struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; @@ -253,24 +271,20 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - { - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); - mutex_init(&bdev->bd_mount_mutex); - INIT_LIST_HEAD(&bdev->bd_inodes); - INIT_LIST_HEAD(&bdev->bd_list); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + sema_init(&bdev->bd_mount_sem, 1); + INIT_LIST_HEAD(&bdev->bd_inodes); + INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_list); + INIT_LIST_HEAD(&bdev->bd_holder_list); #endif - inode_init_once(&ei->vfs_inode); - } + inode_init_once(&ei->vfs_inode); } static inline void __bd_forget(struct inode *inode) @@ -292,7 +306,7 @@ static void bdev_clear_inode(struct inode *inode) spin_unlock(&bdev_lock); } -static struct super_operations bdev_sops = { +static const struct super_operations bdev_sops = { .statfs = simple_statfs, .alloc_inode = bdev_alloc_inode, .destroy_inode = bdev_destroy_inode, @@ -321,12 +335,11 @@ void __init bdev_cache_init(void) bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), - init_once, NULL); + init_once); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); - err = PTR_ERR(bd_mnt); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ @@ -392,12 +405,10 @@ EXPORT_SYMBOL(bdget); long nr_blockdev_pages(void) { - struct list_head *p; + struct block_device *bdev; long ret = 0; spin_lock(&bdev_lock); - list_for_each(p, &all_bdevs) { - struct block_device *bdev; - bdev = list_entry(p, struct block_device, bd_list); + list_for_each_entry(bdev, &all_bdevs, bd_list) { ret += bdev->bd_inode->i_mapping->nrpages; } spin_unlock(&bdev_lock); @@ -532,9 +543,9 @@ EXPORT_SYMBOL(bd_release); static struct kobject *bdev_get_kobj(struct block_device *bdev) { if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->kobj); + return kobject_get(&bdev->bd_part->dev.kobj); else - return kobject_get(&bdev->bd_disk->kobj); + return kobject_get(&bdev->bd_disk->dev.kobj); } static struct kobject *bdev_get_holder(struct block_device *bdev) @@ -651,7 +662,8 @@ static void free_bd_holder(struct bd_holder *bo) * If found, increment the reference count and return the pointer. * If not found, returns NULL. */ -static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo) +static struct bd_holder *find_bd_holder(struct block_device *bdev, + struct bd_holder *bo) { struct bd_holder *tmp; @@ -677,8 +689,7 @@ static int find_bd_holder(struct block_device *bdev, struct bd_holder *bo) */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { - struct bd_holder *tmp; - int ret; + int err; if (!bo) return -EINVAL; @@ -686,15 +697,18 @@ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) if (!bd_holder_grab_dirs(bdev, bo)) return -EBUSY; - ret = add_symlink(bo->sdir, bo->sdev); - if (ret == 0) { - ret = add_symlink(bo->hdir, bo->hdev); - if (ret) - del_symlink(bo->sdir, bo->sdev); + err = add_symlink(bo->sdir, bo->sdev); + if (err) + return err; + + err = add_symlink(bo->hdir, bo->hdev); + if (err) { + del_symlink(bo->sdir, bo->sdev); + return err; } - if (ret == 0) - list_add_tail(&bo->list, &bdev->bd_holder_list); - return ret; + + list_add_tail(&bo->list, &bdev->bd_holder_list); + return 0; } /** @@ -752,7 +766,7 @@ static struct bd_holder *del_bd_holder(struct block_device *bdev, static int bd_claim_by_kobject(struct block_device *bdev, void *holder, struct kobject *kobj) { - int res; + int err; struct bd_holder *bo, *found; if (!kobj) @@ -762,22 +776,25 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, if (!bo) return -ENOMEM; - mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); - res = bd_claim(bdev, holder); - if (res == 0) { - found = find_bd_holder(bdev, bo); - if (found == NULL) { - res = add_bd_holder(bdev, bo); - if (res) - bd_release(bdev); - } - } + mutex_lock(&bdev->bd_mutex); - if (res || found) - free_bd_holder(bo); - mutex_unlock(&bdev->bd_mutex); + err = bd_claim(bdev, holder); + if (err) + goto fail; - return res; + found = find_bd_holder(bdev, bo); + if (found) + goto fail; + + err = add_bd_holder(bdev, bo); + if (err) + bd_release(bdev); + else + bo = NULL; +fail: + mutex_unlock(&bdev->bd_mutex); + free_bd_holder(bo); + return err; } /** @@ -791,15 +808,12 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, static void bd_release_from_kobject(struct block_device *bdev, struct kobject *kobj) { - struct bd_holder *bo; - if (!kobj) return; - mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); + mutex_lock(&bdev->bd_mutex); bd_release(bdev); - if ((bo = del_bd_holder(bdev, kobj))) - free_bd_holder(bo); + free_bd_holder(del_bd_holder(bdev, kobj)); mutex_unlock(&bdev->bd_mutex); } @@ -854,22 +868,6 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); -static int -blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); - -struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) -{ - struct block_device *bdev = bdget(dev); - int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; - if (bdev) - err = blkdev_get_partition(bdev, mode, flags); - return err ? ERR_PTR(err) : bdev; -} - -EXPORT_SYMBOL(open_partition_by_devnum); - - /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -916,72 +914,39 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_put(struct block_device *bdev, unsigned int subclass) -{ - int ret = 0; - struct inode *bd_inode = bdev->bd_inode; - struct gendisk *disk = bdev->bd_disk; - - mutex_lock_nested(&bdev->bd_mutex, subclass); - lock_kernel(); - if (!--bdev->bd_openers) { - sync_blockdev(bdev); - kill_bdev(bdev); - } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - subclass + 1); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); - } - if (!bdev->bd_openers) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - - if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); - bdev->bd_part = NULL; - } - bdev->bd_disk = NULL; - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, subclass + 1); - bdev->bd_contains = NULL; - } - unlock_kernel(); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - return ret; -} - -int blkdev_put(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_NORMAL); -} -EXPORT_SYMBOL(blkdev_put); - -int blkdev_put_partition(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_PARTITION); -} -EXPORT_SYMBOL(blkdev_put_partition); +static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, + int for_part); +static int __blkdev_put(struct block_device *bdev, int for_part); -static int -blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); +/* + * bd_mutex locking: + * + * mutex_lock(part->bd_mutex) + * mutex_lock_nested(whole->bd_mutex, 1) + */ -static int -do_open(struct block_device *bdev, struct file *file, unsigned int subclass) +static int do_open(struct block_device *bdev, struct file *file, int for_part) { struct module *owner = NULL; struct gendisk *disk; - int ret = -ENXIO; + int ret; int part; + int perm = 0; + + if (file->f_mode & FMODE_READ) + perm |= MAY_READ; + if (file->f_mode & FMODE_WRITE) + perm |= MAY_WRITE; + /* + * hooks: /n/, see "layering violations". + */ + ret = devcgroup_inode_permission(bdev->bd_inode, perm); + if (ret != 0) { + bdput(bdev); + return ret; + } + ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); disk = get_gendisk(bdev->bd_dev, &part); @@ -992,8 +957,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) } owner = disk->fops->owner; - mutex_lock_nested(&bdev->bd_mutex, subclass); - + mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -1020,25 +984,21 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); + BUG_ON(for_part); + ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); - whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { - whole->bd_part_count--; - mutex_unlock(&whole->bd_mutex); ret = -ENXIO; goto out_first; } - kobject_get(&p->kobj); + kobject_get(&p->dev.kobj); bdev->bd_part = p; bd_set_size(bdev, (loff_t) p->nr_sects << 9); - mutex_unlock(&whole->bd_mutex); } } else { put_disk(disk); @@ -1051,14 +1011,11 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); - } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - BD_MUTEX_WHOLE); - bdev->bd_contains->bd_part_count++; - mutex_unlock(&bdev->bd_contains->bd_mutex); } } bdev->bd_openers++; + if (for_part) + bdev->bd_part_count++; mutex_unlock(&bdev->bd_mutex); unlock_kernel(); return 0; @@ -1067,7 +1024,7 @@ out_first: bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); + __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -1079,28 +1036,8 @@ out: return ret; } -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) -{ - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); -} - -EXPORT_SYMBOL(blkdev_get); - -static int -blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) +static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, + int for_part) { /* * This crockload is due to bad choice of ->open() type. @@ -1112,30 +1049,17 @@ blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) struct dentry fake_dentry = {}; fake_file.f_mode = mode; fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; + fake_file.f_path.dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); + return do_open(bdev, &fake_file, for_part); } -static int -blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) +int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) { - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); + return __blkdev_get(bdev, mode, flags, 0); } +EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) { @@ -1154,7 +1078,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (bdev == NULL) return -ENOMEM; - res = do_open(bdev, filp, BD_MUTEX_NORMAL); + res = do_open(bdev, filp, 0); if (res) return res; @@ -1168,6 +1092,56 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } +static int __blkdev_put(struct block_device *bdev, int for_part) +{ + int ret = 0; + struct inode *bd_inode = bdev->bd_inode; + struct gendisk *disk = bdev->bd_disk; + struct block_device *victim = NULL; + + mutex_lock_nested(&bdev->bd_mutex, for_part); + lock_kernel(); + if (for_part) + bdev->bd_part_count--; + + if (!--bdev->bd_openers) { + sync_blockdev(bdev); + kill_bdev(bdev); + } + if (bdev->bd_contains == bdev) { + if (disk->fops->release) + ret = disk->fops->release(bd_inode, NULL); + } + if (!bdev->bd_openers) { + struct module *owner = disk->fops->owner; + + put_disk(disk); + module_put(owner); + + if (bdev->bd_contains != bdev) { + kobject_put(&bdev->bd_part->dev.kobj); + bdev->bd_part = NULL; + } + bdev->bd_disk = NULL; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; + if (bdev != bdev->bd_contains) + victim = bdev->bd_contains; + bdev->bd_contains = NULL; + } + unlock_kernel(); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + if (victim) + __blkdev_put(victim, 1); + return ret; +} + +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, 0); +} +EXPORT_SYMBOL(blkdev_put); + static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); @@ -1181,12 +1155,12 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } -const struct address_space_operations def_blk_aops = { +static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, - .prepare_write = blkdev_prepare_write, - .commit_write = blkdev_commit_write, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, .writepages = generic_writepages, .direct_IO = blkdev_direct_IO, }; @@ -1205,7 +1179,6 @@ const struct file_operations def_blk_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif - .sendfile = generic_file_sendfile, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -1245,24 +1218,25 @@ struct block_device *lookup_bdev(const char *path) if (error) return ERR_PTR(error); - inode = nd.dentry->d_inode; + inode = nd.path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) + if (nd.path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_release(&nd); + path_put(&nd.path); return bdev; fail: bdev = ERR_PTR(error); goto out; } +EXPORT_SYMBOL(lookup_bdev); /** * open_bdev_excl - open a block device by name and set it up for use @@ -1336,7 +1310,7 @@ int __invalidate_device(struct block_device *bdev) res = invalidate_inodes(sb); drop_super(sb); } - invalidate_bdev(bdev, 0); + invalidate_bdev(bdev); return res; } EXPORT_SYMBOL(__invalidate_device);