X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fblock_dev.c;h=26e5f50266205a7e7d2069bb92dd5600e77fe61c;hb=578454ff7eab61d13a26b568f99a89a2c9edc881;hp=05865b93f7e1cd32112e04aface1f649e550c6ab;hpb=56b26add02b4bdea81d5e0ebda60db1fe3311ad4;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/block_dev.c b/fs/block_dev.c index 05865b9..26e5f50 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -18,12 +18,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include "internal.h" @@ -75,7 +77,7 @@ int set_blocksize(struct block_device *bdev, int size) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_hardsect_size(bdev)) + if (size < bdev_logical_block_size(bdev)) return -EINVAL; /* Don't change the size if it is same as current */ @@ -105,7 +107,7 @@ EXPORT_SYMBOL(sb_set_blocksize); int sb_min_blocksize(struct super_block *sb, int size) { - int minsize = bdev_hardsect_size(sb->s_bdev); + int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) size = minsize; return sb_set_blocksize(sb, size); @@ -174,6 +176,124 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } +int __sync_blockdev(struct block_device *bdev, int wait) +{ + if (!bdev) + return 0; + if (!wait) + return filemap_flush(bdev->bd_inode->i_mapping); + return filemap_write_and_wait(bdev->bd_inode->i_mapping); +} + +/* + * Write out and wait upon all the dirty data associated with a block + * device via its mapping. Does not take the superblock lock. + */ +int sync_blockdev(struct block_device *bdev) +{ + return __sync_blockdev(bdev, 1); +} +EXPORT_SYMBOL(sync_blockdev); + +/* + * Write out and wait upon all dirty data associated with this + * device. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_bdev(struct block_device *bdev) +{ + struct super_block *sb = get_super(bdev); + if (sb) { + int res = sync_filesystem(sb); + drop_super(sb); + return res; + } + return sync_blockdev(bdev); +} +EXPORT_SYMBOL(fsync_bdev); + +/** + * freeze_bdev -- lock a filesystem and force it into a consistent state + * @bdev: blockdevice to lock + * + * If a superblock is found on this device, we take the s_umount semaphore + * on it to make sure nobody unmounts until the snapshot creation is done. + * The reference counter (bd_fsfreeze_count) guarantees that only the last + * unfreeze process can unfreeze the frozen filesystem actually when multiple + * freeze requests arrive simultaneously. It counts up in freeze_bdev() and + * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze + * actually. + */ +struct super_block *freeze_bdev(struct block_device *bdev) +{ + struct super_block *sb; + int error = 0; + + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (++bdev->bd_fsfreeze_count > 1) { + /* + * We don't even need to grab a reference - the first call + * to freeze_bdev grab an active reference and only the last + * thaw_bdev drops it. + */ + sb = get_super(bdev); + drop_super(sb); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; + } + + sb = get_active_super(bdev); + if (!sb) + goto out; + error = freeze_super(sb); + if (error) { + deactivate_super(sb); + bdev->bd_fsfreeze_count--; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return ERR_PTR(error); + } + deactivate_super(sb); + out: + sync_blockdev(bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; /* thaw_bdev releases s->s_umount */ +} +EXPORT_SYMBOL(freeze_bdev); + +/** + * thaw_bdev -- unlock filesystem + * @bdev: blockdevice to unlock + * @sb: associated superblock + * + * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + */ +int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + int error = -EINVAL; + + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (!bdev->bd_fsfreeze_count) + goto out; + + error = 0; + if (--bdev->bd_fsfreeze_count > 0) + goto out; + + if (!sb) + goto out; + + error = thaw_super(sb); + if (error) { + bdev->bd_fsfreeze_count++; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return error; + } +out: + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return 0; +} +EXPORT_SYMBOL(thaw_bdev); + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, blkdev_get_block, wbc); @@ -243,10 +363,28 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) * NULL first argument is nfsd_sync_dir() and that's not a directory. */ -static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) +int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync) { - return sync_blockdev(I_BDEV(filp->f_mapping->host)); + struct inode *bd_inode = filp->f_mapping->host; + struct block_device *bdev = I_BDEV(bd_inode); + int error; + + /* + * There is no need to serialise calls to blkdev_issue_flush with + * i_mutex and doing so causes performance issues with concurrent + * O_SYNC writers to a block device. + */ + mutex_unlock(&bd_inode->i_mutex); + + error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); + if (error == -EOPNOTSUPP) + error = 0; + + mutex_lock(&bd_inode->i_mutex); + + return error; } +EXPORT_SYMBOL(blkdev_fsync); /* * pseudo-fs @@ -267,7 +405,6 @@ static void bdev_destroy_inode(struct inode *inode) { struct bdev_inode *bdi = BDEV_I(inode); - bdi->bdev.bd_inode_backing_dev_info = NULL; kmem_cache_free(bdev_cachep, bdi); } @@ -278,13 +415,14 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_list); #endif inode_init_once(&ei->vfs_inode); + /* Initialize mutex for freeze. */ + mutex_init(&bdev->bd_fsfreeze_mutex); } static inline void __bd_forget(struct inode *inode) @@ -326,12 +464,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -static struct vfsmount *bd_mnt __read_mostly; -struct super_block *blockdev_superblock; +struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; + struct vfsmount *bd_mnt; + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), @@ -342,6 +481,11 @@ void __init bdev_cache_init(void) bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); + /* + * This vfsmount structure is only used to obtain the + * blockdev_superblock, so tell kmemleak not to report it. + */ + kmemleak_not_leak(bd_mnt); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } @@ -373,7 +517,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), + inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) @@ -403,6 +547,16 @@ struct block_device *bdget(dev_t dev) EXPORT_SYMBOL(bdget); +/** + * bdgrab -- Grab a reference to an already referenced block device + * @bdev: Block device to grab a reference to. + */ +struct block_device *bdgrab(struct block_device *bdev) +{ + atomic_inc(&bdev->bd_inode->i_count); + return bdev; +} + long nr_blockdev_pages(void) { struct block_device *bdev; @@ -463,7 +617,7 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (inode->i_bdev) { - if (inode->i_sb != blockdev_superblock) + if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } @@ -473,41 +627,209 @@ void bd_forget(struct inode *inode) iput(bdev->bd_inode); } -int bd_claim(struct block_device *bdev, void *holder) +/** + * bd_may_claim - test whether a block device can be claimed + * @bdev: block device of interest + * @whole: whole block device containing @bdev, may equal @bdev + * @holder: holder trying to claim @bdev + * + * Test whther @bdev can be claimed by @holder. + * + * CONTEXT: + * spin_lock(&bdev_lock). + * + * RETURNS: + * %true if @bdev can be claimed, %false otherwise. + */ +static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, + void *holder) { - int res; - spin_lock(&bdev_lock); - - /* first decide result */ if (bdev->bd_holder == holder) - res = 0; /* already a holder */ + return true; /* already a holder */ else if (bdev->bd_holder != NULL) - res = -EBUSY; /* held by someone else */ + return false; /* held by someone else */ else if (bdev->bd_contains == bdev) - res = 0; /* is a whole device which isn't held */ + return true; /* is a whole device which isn't held */ - else if (bdev->bd_contains->bd_holder == bd_claim) - res = 0; /* is a partition of a device that is being partitioned */ - else if (bdev->bd_contains->bd_holder != NULL) - res = -EBUSY; /* is a partition of a held device */ + else if (whole->bd_holder == bd_claim) + return true; /* is a partition of a device that is being partitioned */ + else if (whole->bd_holder != NULL) + return false; /* is a partition of a held device */ else - res = 0; /* is a partition of an un-held device */ + return true; /* is a partition of an un-held device */ +} + +/** + * bd_prepare_to_claim - prepare to claim a block device + * @bdev: block device of interest + * @whole: the whole device containing @bdev, may equal @bdev + * @holder: holder trying to claim @bdev + * + * Prepare to claim @bdev. This function fails if @bdev is already + * claimed by another holder and waits if another claiming is in + * progress. This function doesn't actually claim. On successful + * return, the caller has ownership of bd_claiming and bd_holder[s]. + * + * CONTEXT: + * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab + * it multiple times. + * + * RETURNS: + * 0 if @bdev can be claimed, -EBUSY otherwise. + */ +static int bd_prepare_to_claim(struct block_device *bdev, + struct block_device *whole, void *holder) +{ +retry: + /* if someone else claimed, fail */ + if (!bd_may_claim(bdev, whole, holder)) + return -EBUSY; + + /* if someone else is claiming, wait for it to finish */ + if (whole->bd_claiming && whole->bd_claiming != holder) { + wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); + DEFINE_WAIT(wait); + + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&bdev_lock); + schedule(); + finish_wait(wq, &wait); + spin_lock(&bdev_lock); + goto retry; + } + + /* yay, all mine */ + return 0; +} + +/** + * bd_start_claiming - start claiming a block device + * @bdev: block device of interest + * @holder: holder trying to claim @bdev + * + * @bdev is about to be opened exclusively. Check @bdev can be opened + * exclusively and mark that an exclusive open is in progress. Each + * successful call to this function must be matched with a call to + * either bd_claim() or bd_abort_claiming(). If this function + * succeeds, the matching bd_claim() is guaranteed to succeed. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to the block device containing @bdev on success, ERR_PTR() + * value on failure. + */ +static struct block_device *bd_start_claiming(struct block_device *bdev, + void *holder) +{ + struct gendisk *disk; + struct block_device *whole; + int partno, err; - /* now impose change */ - if (res==0) { + might_sleep(); + + /* + * @bdev might not have been initialized properly yet, look up + * and grab the outer block device the hard way. + */ + disk = get_gendisk(bdev->bd_dev, &partno); + if (!disk) + return ERR_PTR(-ENXIO); + + whole = bdget_disk(disk, 0); + put_disk(disk); + if (!whole) + return ERR_PTR(-ENOMEM); + + /* prepare to claim, if successful, mark claiming in progress */ + spin_lock(&bdev_lock); + + err = bd_prepare_to_claim(bdev, whole, holder); + if (err == 0) { + whole->bd_claiming = holder; + spin_unlock(&bdev_lock); + return whole; + } else { + spin_unlock(&bdev_lock); + bdput(whole); + return ERR_PTR(err); + } +} + +/* releases bdev_lock */ +static void __bd_abort_claiming(struct block_device *whole, void *holder) +{ + BUG_ON(whole->bd_claiming != holder); + whole->bd_claiming = NULL; + wake_up_bit(&whole->bd_claiming, 0); + + spin_unlock(&bdev_lock); + bdput(whole); +} + +/** + * bd_abort_claiming - abort claiming a block device + * @whole: whole block device returned by bd_start_claiming() + * @holder: holder trying to claim @bdev + * + * Abort a claiming block started by bd_start_claiming(). Note that + * @whole is not the block device to be claimed but the whole device + * returned by bd_start_claiming(). + * + * CONTEXT: + * Grabs and releases bdev_lock. + */ +static void bd_abort_claiming(struct block_device *whole, void *holder) +{ + spin_lock(&bdev_lock); + __bd_abort_claiming(whole, holder); /* releases bdev_lock */ +} + +/** + * bd_claim - claim a block device + * @bdev: block device to claim + * @holder: holder trying to claim @bdev + * + * Try to claim @bdev which must have been opened successfully. This + * function may be called with or without preceding + * blk_start_claiming(). In the former case, this function is always + * successful and terminates the claiming block. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * 0 if successful, -EBUSY if @bdev is already claimed. + */ +int bd_claim(struct block_device *bdev, void *holder) +{ + struct block_device *whole = bdev->bd_contains; + int res; + + might_sleep(); + + spin_lock(&bdev_lock); + + res = bd_prepare_to_claim(bdev, whole, holder); + if (res == 0) { /* note that for a whole device bd_holders * will be incremented twice, and bd_holder will * be set to bd_claim before being set to holder */ - bdev->bd_contains->bd_holders ++; - bdev->bd_contains->bd_holder = bd_claim; + whole->bd_holders++; + whole->bd_holder = bd_claim; bdev->bd_holders++; bdev->bd_holder = holder; } - spin_unlock(&bdev_lock); + + if (whole->bd_claiming) + __bd_abort_claiming(whole, holder); /* releases bdev_lock */ + else + spin_unlock(&bdev_lock); + return res; } - EXPORT_SYMBOL(bd_claim); void bd_release(struct block_device *bdev) @@ -944,7 +1266,7 @@ EXPORT_SYMBOL(revalidate_disk); int check_disk_change(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; - struct block_device_operations * bdops = disk->fops; + const struct block_device_operations *bdops = disk->fops; if (!bdops->media_changed) return 0; @@ -961,7 +1283,7 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_hardsect_size(bdev); + unsigned bsize = bdev_logical_block_size(bdev); bdev->bd_inode->i_size = size; while (bsize < PAGE_CACHE_SIZE) { @@ -986,7 +1308,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,26 +1325,41 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + restart: + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); + if (ret == -ERESTARTSYS) { + /* Lost a race with 'disk' being + * deleted, try again. + * See md.c + */ + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; + module_put(disk->fops->owner); + put_disk(disk); + bdev->bd_disk = NULL; + mutex_unlock(&bdev->bd_mutex); + goto restart; + } if (ret) goto out_clear; } @@ -1049,18 +1385,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); - put_disk(disk); module_put(disk->fops->owner); - part = NULL; + put_disk(disk); disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1415,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1427,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); @@ -1108,6 +1443,7 @@ EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) { + struct block_device *whole = NULL; struct block_device *bdev; int res; @@ -1130,19 +1466,25 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (bdev == NULL) return -ENOMEM; + if (filp->f_mode & FMODE_EXCL) { + whole = bd_start_claiming(bdev, filp); + if (IS_ERR(whole)) { + bdput(bdev); + return PTR_ERR(whole); + } + } + filp->f_mapping = bdev->bd_inode->i_mapping; res = blkdev_get(bdev, filp->f_mode); - if (res) - return res; - if (!(filp->f_mode & FMODE_EXCL)) - return 0; - - if (!(res = bd_claim(bdev, filp))) - return 0; + if (whole) { + if (res == 0) + BUG_ON(bd_claim(bdev, filp) != 0); + else + bd_abort_claiming(whole, filp); + } - blkdev_put(bdev, filp->f_mode); return res; } @@ -1204,11 +1546,60 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + return blkdev_ioctl(bdev, mode, cmd, arg); } +/* + * Write data to the block device. Only intended for the block device itself + * and the raw driver which basically is a fake block device. + * + * Does not take i_mutex for the write and thus is not for general purpose + * use. + */ +ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + ssize_t ret; + + BUG_ON(iocb->ki_pos != pos); + + ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + if (ret > 0 || ret == -EIOCBQUEUED) { + ssize_t err; + + err = generic_write_sync(file, pos, ret); + if (err < 0 && ret > 0) + ret = err; + } + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_aio_write); + +/* + * Try to release a page associated with block device when the system + * is under memory pressure. + */ +static int blkdev_releasepage(struct page *page, gfp_t wait) +{ + struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; + + if (super && super->s_op->bdev_try_to_free_page) + return super->s_op->bdev_try_to_free_page(super, page, wait); + + return try_to_free_buffers(page); +} + static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, @@ -1216,6 +1607,7 @@ static const struct address_space_operations def_blk_aops = { .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, + .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, }; @@ -1226,9 +1618,9 @@ const struct file_operations def_blk_fops = { .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write_nolock, + .aio_write = blkdev_aio_write, .mmap = generic_file_mmap, - .fsync = block_fsync, + .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, @@ -1251,39 +1643,39 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name - * @path: special file representing the block device + * @pathname: special file representing the block device * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *path) +struct block_device *lookup_bdev(const char *pathname) { struct block_device *bdev; struct inode *inode; - struct nameidata nd; + struct path path; int error; - if (!path || !*path) + if (!pathname || !*pathname) return ERR_PTR(-EINVAL); - error = path_lookup(path, LOOKUP_FOLLOW, &nd); + error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NODEV) + if (path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_put(&nd.path); + path_put(&path); return bdev; fail: bdev = ERR_PTR(error); @@ -1303,27 +1695,34 @@ EXPORT_SYMBOL(lookup_bdev); */ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { - struct block_device *bdev; - int error = 0; + struct block_device *bdev, *whole; + int error; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; + whole = bd_start_claiming(bdev, holder); + if (IS_ERR(whole)) { + bdput(bdev); + return whole; + } + error = blkdev_get(bdev, mode); if (error) - return ERR_PTR(error); + goto out_abort_claiming; + error = -EACCES; if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) - goto blkdev_put; - error = bd_claim(bdev, holder); - if (error) - goto blkdev_put; + goto out_blkdev_put; + BUG_ON(bd_claim(bdev, holder) != 0); return bdev; - -blkdev_put: + +out_blkdev_put: blkdev_put(bdev, mode); +out_abort_claiming: + bd_abort_claiming(whole, holder); return ERR_PTR(error); }