X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fblock_dev.c;h=f45dbc18dd175891950ddb84fffa2bc6ce0df117;hb=6b65c5c61bf86086817a5ed786c8f45755ac83b3;hp=b721955d382ebbd3e8b1467bf02f53ad0ae7b8fa;hpb=c3279d1454cdfed02a557d789d8a6d08ab4cbe70;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/block_dev.c b/fs/block_dev.c index b721955..f45dbc1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -174,6 +175,152 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } +/* + * Write out and wait upon all the dirty data associated with a block + * device via its mapping. Does not take the superblock lock. + */ +int sync_blockdev(struct block_device *bdev) +{ + int ret = 0; + + if (bdev) + ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); + return ret; +} +EXPORT_SYMBOL(sync_blockdev); + +/* + * Write out and wait upon all dirty data associated with this + * device. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_bdev(struct block_device *bdev) +{ + struct super_block *sb = get_super(bdev); + if (sb) { + int res = fsync_super(sb); + drop_super(sb); + return res; + } + return sync_blockdev(bdev); +} +EXPORT_SYMBOL(fsync_bdev); + +/** + * freeze_bdev -- lock a filesystem and force it into a consistent state + * @bdev: blockdevice to lock + * + * This takes the block device bd_mount_sem to make sure no new mounts + * happen on bdev until thaw_bdev() is called. + * If a superblock is found on this device, we take the s_umount semaphore + * on it to make sure nobody unmounts until the snapshot creation is done. + * The reference counter (bd_fsfreeze_count) guarantees that only the last + * unfreeze process can unfreeze the frozen filesystem actually when multiple + * freeze requests arrive simultaneously. It counts up in freeze_bdev() and + * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze + * actually. + */ +struct super_block *freeze_bdev(struct block_device *bdev) +{ + struct super_block *sb; + int error = 0; + + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + bdev->bd_fsfreeze_count++; + sb = get_super(bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; + } + bdev->bd_fsfreeze_count++; + + down(&bdev->bd_mount_sem); + sb = get_super(bdev); + if (sb && !(sb->s_flags & MS_RDONLY)) { + sb->s_frozen = SB_FREEZE_WRITE; + smp_wmb(); + + __fsync_super(sb); + + sb->s_frozen = SB_FREEZE_TRANS; + smp_wmb(); + + sync_blockdev(sb->s_bdev); + + if (sb->s_op->freeze_fs) { + error = sb->s_op->freeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem freeze failed\n"); + sb->s_frozen = SB_UNFROZEN; + drop_super(sb); + up(&bdev->bd_mount_sem); + bdev->bd_fsfreeze_count--; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return ERR_PTR(error); + } + } + } + + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + + return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ +} +EXPORT_SYMBOL(freeze_bdev); + +/** + * thaw_bdev -- unlock filesystem + * @bdev: blockdevice to unlock + * @sb: associated superblock + * + * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + */ +int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + int error = 0; + + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (!bdev->bd_fsfreeze_count) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return -EINVAL; + } + + bdev->bd_fsfreeze_count--; + if (bdev->bd_fsfreeze_count > 0) { + if (sb) + drop_super(sb); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return 0; + } + + if (sb) { + BUG_ON(sb->s_bdev != bdev); + if (!(sb->s_flags & MS_RDONLY)) { + if (sb->s_op->unfreeze_fs) { + error = sb->s_op->unfreeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem thaw failed\n"); + sb->s_frozen = SB_FREEZE_TRANS; + bdev->bd_fsfreeze_count++; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return error; + } + } + sb->s_frozen = SB_UNFROZEN; + smp_wmb(); + wake_up(&sb->s_wait_unfrozen); + } + drop_super(sb); + } + + up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return 0; +} +EXPORT_SYMBOL(thaw_bdev); + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); @@ -285,6 +432,8 @@ static void init_once(void *foo) INIT_LIST_HEAD(&bdev->bd_holder_list); #endif inode_init_once(&ei->vfs_inode); + /* Initialize mutex for freeze. */ + mutex_init(&bdev->bd_fsfreeze_mutex); } static inline void __bd_forget(struct inode *inode) @@ -326,12 +475,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -static struct vfsmount *bd_mnt __read_mostly; -struct super_block *blockdev_superblock; +struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; + struct vfsmount *bd_mnt; + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), @@ -373,7 +523,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), + inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) @@ -463,7 +613,7 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (inode->i_bdev) { - if (inode->i_sb != blockdev_superblock) + if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } @@ -840,22 +990,45 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, unsigned mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; if (bdev) - err = blkdev_get(bdev, mode, flags); + err = blkdev_get(bdev, mode); return err ? ERR_PTR(err) : bdev; } EXPORT_SYMBOL(open_by_devnum); /** - * check_disk_size_change - checks for disk size change and adjusts - * bdev size. + * flush_disk - invalidates all buffer-cache entries on a disk + * + * @bdev: struct block device to be flushed * + * Invalidates all buffer-cache entries on a disk. It should be called + * when a disk has been changed -- either by a media change or online + * resize. + */ +static void flush_disk(struct block_device *bdev) +{ + if (__invalidate_device(bdev)) { + char name[BDEVNAME_SIZE] = ""; + + if (bdev->bd_disk) + disk_name(bdev->bd_disk, 0, name); + printk(KERN_WARNING "VFS: busy inodes on changed media or " + "resized disk %s\n", name); + } + + if (!bdev->bd_disk) + return; + if (disk_partitionable(bdev->bd_disk)) + bdev->bd_invalidated = 1; +} + +/** + * check_disk_size_change - checks for disk size change and adjusts bdev size. * @disk: struct gendisk to check * @bdev: struct bdev to adjust. * @@ -876,14 +1049,13 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) "%s: detected capacity change from %lld to %lld\n", name, bdev_size, disk_size); i_size_write(bdev->bd_inode, disk_size); + flush_disk(bdev); } } EXPORT_SYMBOL(check_disk_size_change); /** - * revalidate_disk - wrapper for lower-level driver's revalidate_disk - * call-back - * + * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated * * This routine is a wrapper for lower-level driver's revalidate_disk @@ -929,13 +1101,9 @@ int check_disk_change(struct block_device *bdev) if (!bdops->media_changed(bdev->bd_disk)) return 0; - if (__invalidate_device(bdev)) - printk("VFS: busy inodes on changed media.\n"); - + flush_disk(bdev); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (disk_partitionable(bdev->bd_disk)) - bdev->bd_invalidated = 1; return 1; } @@ -956,9 +1124,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, - int for_part); -static int __blkdev_put(struct block_device *bdev, int for_part); +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: @@ -967,17 +1133,16 @@ static int __blkdev_put(struct block_device *bdev, int for_part); * mutex_lock_nested(whole->bd_mutex, 1) */ -static int do_open(struct block_device *bdev, struct file *file, int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) perm |= MAY_READ; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". @@ -988,27 +1153,41 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } - ret = -ENXIO; - file->f_mapping = bdev->bd_inode->i_mapping; - lock_kernel(); + restart: + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { - ret = disk->fops->open(bdev->bd_inode, file); + ret = disk->fops->open(bdev, mode); + if (ret == -ERESTARTSYS) { + /* Lost a race with 'disk' being + * deleted, try again. + * See md.c + */ + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; + module_put(disk->fops->owner); + put_disk(disk); + bdev->bd_disk = NULL; + mutex_unlock(&bdev->bd_mutex); + goto restart; + } if (ret) goto out_clear; } @@ -1028,28 +1207,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!whole) goto out_clear; BUG_ON(for_part); - ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); + ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); + ret = bdev->bd_disk->fops->open(bdev, mode); if (ret) goto out_unlock_bdev; } @@ -1065,18 +1243,18 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, 1); + __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); @@ -1085,28 +1263,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, - int for_part) +int blkdev_get(struct block_device *bdev, fmode_t mode) { - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, for_part); -} - -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) -{ - return __blkdev_get(bdev, mode, flags, 0); + return __blkdev_get(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_get); @@ -1123,28 +1282,39 @@ static int blkdev_open(struct inode * inode, struct file * filp) */ filp->f_flags |= O_LARGEFILE; + if (filp->f_flags & O_NDELAY) + filp->f_mode |= FMODE_NDELAY; + if (filp->f_flags & O_EXCL) + filp->f_mode |= FMODE_EXCL; + if ((filp->f_flags & O_ACCMODE) == 3) + filp->f_mode |= FMODE_WRITE_IOCTL; + bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; - res = do_open(bdev, filp, 0); + filp->f_mapping = bdev->bd_inode->i_mapping; + + res = blkdev_get(bdev, filp->f_mode); if (res) return res; - if (!(filp->f_flags & O_EXCL) ) - return 0; + if (filp->f_mode & FMODE_EXCL) { + res = bd_claim(bdev, filp); + if (res) + goto out_blkdev_put; + } - if (!(res = bd_claim(bdev, filp))) - return 0; + return 0; - blkdev_put(bdev); + out_blkdev_put: + blkdev_put(bdev, filp->f_mode); return res; } -static int __blkdev_put(struct block_device *bdev, int for_part) +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { int ret = 0; - struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; @@ -1159,7 +1329,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) } if (bdev->bd_contains == bdev) { if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); + ret = disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1178,13 +1348,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part) mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) - __blkdev_put(victim, 1); + __blkdev_put(victim, mode, 1); return ret; } -int blkdev_put(struct block_device *bdev) +int blkdev_put(struct block_device *bdev, fmode_t mode) { - return __blkdev_put(bdev, 0); + return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1193,12 +1363,38 @@ static int blkdev_close(struct inode * inode, struct file * filp) struct block_device *bdev = I_BDEV(filp->f_mapping->host); if (bdev->bd_holder == filp) bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, filp->f_mode); } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); + struct block_device *bdev = I_BDEV(file->f_mapping->host); + fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + + return blkdev_ioctl(bdev, mode, cmd, arg); +} + +/* + * Try to release a page associated with block device when the system + * is under memory pressure. + */ +static int blkdev_releasepage(struct page *page, gfp_t wait) +{ + struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; + + if (super && super->s_op->bdev_try_to_free_page) + return super->s_op->bdev_try_to_free_page(super, page, wait); + + return try_to_free_buffers(page); } static const struct address_space_operations def_blk_aops = { @@ -1208,6 +1404,7 @@ static const struct address_space_operations def_blk_aops = { .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, + .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, }; @@ -1234,7 +1431,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); + res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } @@ -1243,40 +1440,39 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name + * @pathname: special file representing the block device * - * @path: special file representing the block device - * - * Get a reference to the blockdevice at @path in the current + * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *path) +struct block_device *lookup_bdev(const char *pathname) { struct block_device *bdev; struct inode *inode; - struct nameidata nd; + struct path path; int error; - if (!path || !*path) + if (!pathname || !*pathname) return ERR_PTR(-EINVAL); - error = path_lookup(path, LOOKUP_FOLLOW, &nd); + error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NODEV) + if (path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_put(&nd.path); + path_put(&path); return bdev; fail: bdev = ERR_PTR(error); @@ -1285,32 +1481,29 @@ fail: EXPORT_SYMBOL(lookup_bdev); /** - * open_bdev_excl - open a block device by name and set it up for use + * open_bdev_exclusive - open a block device by name and set it up for use * * @path: special file representing the block device - * @flags: %MS_RDONLY for opening read-only + * @mode: FMODE_... combination to pass be used * @holder: owner for exclusion * * Open the blockdevice described by the special file at @path, claim it * for the @holder. */ -struct block_device *open_bdev_excl(const char *path, int flags, void *holder) +struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - mode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - if (!(flags & MS_RDONLY)) - mode |= FMODE_WRITE; - error = blkdev_get(bdev, mode, 0); + error = blkdev_get(bdev, mode); if (error) return ERR_PTR(error); error = -EACCES; - if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto blkdev_put; error = bd_claim(bdev, holder); if (error) @@ -1319,26 +1512,27 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder) return bdev; blkdev_put: - blkdev_put(bdev); + blkdev_put(bdev, mode); return ERR_PTR(error); } -EXPORT_SYMBOL(open_bdev_excl); +EXPORT_SYMBOL(open_bdev_exclusive); /** - * close_bdev_excl - release a blockdevice openen by open_bdev_excl() + * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() * * @bdev: blockdevice to close + * @mode: mode, must match that used to open. * - * This is the counterpart to open_bdev_excl(). + * This is the counterpart to open_bdev_exclusive(). */ -void close_bdev_excl(struct block_device *bdev) +void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) { bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, mode); } -EXPORT_SYMBOL(close_bdev_excl); +EXPORT_SYMBOL(close_bdev_exclusive); int __invalidate_device(struct block_device *bdev) {