block: remove duplicate BUG_ON() in bd_finish_claiming()
[safe/jmp/linux-2.6] / fs / block_dev.c
index e59440c..99d6af8 100644 (file)
@@ -172,8 +172,9 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
 
-       return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
-                               iov, offset, nr_segs, blkdev_get_blocks, NULL);
+       return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
+                               I_BDEV(inode), iov, offset, nr_segs,
+                               blkdev_get_blocks, NULL);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -245,37 +246,14 @@ struct super_block *freeze_bdev(struct block_device *bdev)
        sb = get_active_super(bdev);
        if (!sb)
                goto out;
-       if (sb->s_flags & MS_RDONLY) {
-               sb->s_frozen = SB_FREEZE_TRANS;
-               up_write(&sb->s_umount);
+       error = freeze_super(sb);
+       if (error) {
+               deactivate_super(sb);
+               bdev->bd_fsfreeze_count--;
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return sb;
-       }
-
-       sb->s_frozen = SB_FREEZE_WRITE;
-       smp_wmb();
-
-       sync_filesystem(sb);
-
-       sb->s_frozen = SB_FREEZE_TRANS;
-       smp_wmb();
-
-       sync_blockdev(sb->s_bdev);
-
-       if (sb->s_op->freeze_fs) {
-               error = sb->s_op->freeze_fs(sb);
-               if (error) {
-                       printk(KERN_ERR
-                               "VFS:Filesystem freeze failed\n");
-                       sb->s_frozen = SB_UNFROZEN;
-                       deactivate_locked_super(sb);
-                       bdev->bd_fsfreeze_count--;
-                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                       return ERR_PTR(error);
-               }
+               return ERR_PTR(error);
        }
-       up_write(&sb->s_umount);
-
+       deactivate_super(sb);
  out:
        sync_blockdev(bdev);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -296,40 +274,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 
        mutex_lock(&bdev->bd_fsfreeze_mutex);
        if (!bdev->bd_fsfreeze_count)
-               goto out_unlock;
+               goto out;
 
        error = 0;
        if (--bdev->bd_fsfreeze_count > 0)
-               goto out_unlock;
+               goto out;
 
        if (!sb)
-               goto out_unlock;
-
-       BUG_ON(sb->s_bdev != bdev);
-       down_write(&sb->s_umount);
-       if (sb->s_flags & MS_RDONLY)
-               goto out_unfrozen;
-
-       if (sb->s_op->unfreeze_fs) {
-               error = sb->s_op->unfreeze_fs(sb);
-               if (error) {
-                       printk(KERN_ERR
-                               "VFS:Filesystem thaw failed\n");
-                       sb->s_frozen = SB_FREEZE_TRANS;
-                       bdev->bd_fsfreeze_count++;
-                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                       return error;
-               }
-       }
-
-out_unfrozen:
-       sb->s_frozen = SB_UNFROZEN;
-       smp_wmb();
-       wake_up(&sb->s_wait_unfrozen);
+               goto out;
 
-       if (sb)
-               deactivate_locked_super(sb);
-out_unlock:
+       error = thaw_super(sb);
+       if (error) {
+               bdev->bd_fsfreeze_count++;
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               return error;
+       }
+out:
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        return 0;
 }
@@ -350,8 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
                        struct page **pagep, void **fsdata)
 {
        *pagep = NULL;
-       return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                               blkdev_get_block);
+       return block_write_begin_newtrunc(file, mapping, pos, len, flags,
+                               pagep, fsdata, blkdev_get_block);
 }
 
 static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -399,23 +359,25 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
        return retval;
 }
        
-/*
- *     Filp is never NULL; the only case when ->fsync() is called with
- *     NULL first argument is nfsd_sync_dir() and that's not a directory.
- */
-int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, int datasync)
 {
-       struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+       struct inode *bd_inode = filp->f_mapping->host;
+       struct block_device *bdev = I_BDEV(bd_inode);
        int error;
 
-       error = sync_blockdev(bdev);
-       if (error)
-               return error;
-       
-       error = blkdev_issue_flush(bdev, NULL);
+       /*
+        * There is no need to serialise calls to blkdev_issue_flush with
+        * i_mutex and doing so causes performance issues with concurrent
+        * O_SYNC writers to a block device.
+        */
+       mutex_unlock(&bd_inode->i_mutex);
+
+       error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
        if (error == -EOPNOTSUPP)
                error = 0;
+
+       mutex_lock(&bd_inode->i_mutex);
+
        return error;
 }
 EXPORT_SYMBOL(blkdev_fsync);
@@ -694,11 +656,181 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
 }
 
 /**
+ * bd_prepare_to_claim - prepare to claim a block device
+ * @bdev: block device of interest
+ * @whole: the whole device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Prepare to claim @bdev.  This function fails if @bdev is already
+ * claimed by another holder and waits if another claiming is in
+ * progress.  This function doesn't actually claim.  On successful
+ * return, the caller has ownership of bd_claiming and bd_holder[s].
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).  Might release bdev_lock, sleep and regrab
+ * it multiple times.
+ *
+ * RETURNS:
+ * 0 if @bdev can be claimed, -EBUSY otherwise.
+ */
+static int bd_prepare_to_claim(struct block_device *bdev,
+                              struct block_device *whole, void *holder)
+{
+retry:
+       /* if someone else claimed, fail */
+       if (!bd_may_claim(bdev, whole, holder))
+               return -EBUSY;
+
+       /* if someone else is claiming, wait for it to finish */
+       if (whole->bd_claiming && whole->bd_claiming != holder) {
+               wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&bdev_lock);
+               schedule();
+               finish_wait(wq, &wait);
+               spin_lock(&bdev_lock);
+               goto retry;
+       }
+
+       /* yay, all mine */
+       return 0;
+}
+
+/**
+ * bd_start_claiming - start claiming a block device
+ * @bdev: block device of interest
+ * @holder: holder trying to claim @bdev
+ *
+ * @bdev is about to be opened exclusively.  Check @bdev can be opened
+ * exclusively and mark that an exclusive open is in progress.  Each
+ * successful call to this function must be matched with a call to
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to the block device containing @bdev on success, ERR_PTR()
+ * value on failure.
+ */
+static struct block_device *bd_start_claiming(struct block_device *bdev,
+                                             void *holder)
+{
+       struct gendisk *disk;
+       struct block_device *whole;
+       int partno, err;
+
+       might_sleep();
+
+       /*
+        * @bdev might not have been initialized properly yet, look up
+        * and grab the outer block device the hard way.
+        */
+       disk = get_gendisk(bdev->bd_dev, &partno);
+       if (!disk)
+               return ERR_PTR(-ENXIO);
+
+       whole = bdget_disk(disk, 0);
+       module_put(disk->fops->owner);
+       put_disk(disk);
+       if (!whole)
+               return ERR_PTR(-ENOMEM);
+
+       /* prepare to claim, if successful, mark claiming in progress */
+       spin_lock(&bdev_lock);
+
+       err = bd_prepare_to_claim(bdev, whole, holder);
+       if (err == 0) {
+               whole->bd_claiming = holder;
+               spin_unlock(&bdev_lock);
+               return whole;
+       } else {
+               spin_unlock(&bdev_lock);
+               bdput(whole);
+               return ERR_PTR(err);
+       }
+}
+
+/* releases bdev_lock */
+static void __bd_abort_claiming(struct block_device *whole, void *holder)
+{
+       BUG_ON(whole->bd_claiming != holder);
+       whole->bd_claiming = NULL;
+       wake_up_bit(&whole->bd_claiming, 0);
+
+       spin_unlock(&bdev_lock);
+       bdput(whole);
+}
+
+/**
+ * bd_abort_claiming - abort claiming a block device
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Abort a claiming block started by bd_start_claiming().  Note that
+ * @whole is not the block device to be claimed but the whole device
+ * returned by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_abort_claiming(struct block_device *whole, void *holder)
+{
+       spin_lock(&bdev_lock);
+       __bd_abort_claiming(whole, holder);             /* releases bdev_lock */
+}
+
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+                                       void *holder)
+{
+       /* note that for a whole device bd_holders
+        * will be incremented twice, and bd_holder will
+        * be set to bd_claim before being set to holder
+        */
+       whole->bd_holders++;
+       whole->bd_holder = bd_claim;
+       bdev->bd_holders++;
+       bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+                               struct block_device *whole, void *holder)
+{
+       spin_lock(&bdev_lock);
+       BUG_ON(!bd_may_claim(bdev, whole, holder));
+       __bd_claim(bdev, whole, holder);
+       __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
+/**
  * bd_claim - claim a block device
  * @bdev: block device to claim
  * @holder: holder trying to claim @bdev
  *
- * Try to claim @bdev.
+ * Try to claim @bdev which must have been opened successfully.
+ *
+ * CONTEXT:
+ * Might sleep.
  *
  * RETURNS:
  * 0 if successful, -EBUSY if @bdev is already claimed.
@@ -706,23 +838,16 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
 int bd_claim(struct block_device *bdev, void *holder)
 {
        struct block_device *whole = bdev->bd_contains;
-       int res = -EBUSY;
-
-       spin_lock(&bdev_lock);
+       int res;
 
-       if (bd_may_claim(bdev, whole, holder)) {
-               /* note that for a whole device bd_holders
-                * will be incremented twice, and bd_holder will
-                * be set to bd_claim before being set to holder
-                */
-               whole->bd_holders++;
-               whole->bd_holder = bd_claim;
-               bdev->bd_holders++;
-               bdev->bd_holder = holder;
-               res = 0;
-       }
+       might_sleep();
 
+       spin_lock(&bdev_lock);
+       res = bd_prepare_to_claim(bdev, whole, holder);
+       if (res == 0)
+               __bd_claim(bdev, whole, holder);
        spin_unlock(&bdev_lock);
+
        return res;
 }
 EXPORT_SYMBOL(bd_claim);
@@ -1338,6 +1463,7 @@ EXPORT_SYMBOL(blkdev_get);
 
 static int blkdev_open(struct inode * inode, struct file * filp)
 {
+       struct block_device *whole = NULL;
        struct block_device *bdev;
        int res;
 
@@ -1360,22 +1486,25 @@ static int blkdev_open(struct inode * inode, struct file * filp)
        if (bdev == NULL)
                return -ENOMEM;
 
+       if (filp->f_mode & FMODE_EXCL) {
+               whole = bd_start_claiming(bdev, filp);
+               if (IS_ERR(whole)) {
+                       bdput(bdev);
+                       return PTR_ERR(whole);
+               }
+       }
+
        filp->f_mapping = bdev->bd_inode->i_mapping;
 
        res = blkdev_get(bdev, filp->f_mode);
-       if (res)
-               return res;
 
-       if (filp->f_mode & FMODE_EXCL) {
-               res = bd_claim(bdev, filp);
-               if (res)
-                       goto out_blkdev_put;
+       if (whole) {
+               if (res == 0)
+                       bd_finish_claiming(bdev, whole, filp);
+               else
+                       bd_abort_claiming(whole, filp);
        }
 
-       return 0;
-
- out_blkdev_put:
-       blkdev_put(bdev, filp->f_mode);
        return res;
 }
 
@@ -1586,27 +1715,34 @@ EXPORT_SYMBOL(lookup_bdev);
  */
 struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
 {
-       struct block_device *bdev;
-       int error = 0;
+       struct block_device *bdev, *whole;
+       int error;
 
        bdev = lookup_bdev(path);
        if (IS_ERR(bdev))
                return bdev;
 
+       whole = bd_start_claiming(bdev, holder);
+       if (IS_ERR(whole)) {
+               bdput(bdev);
+               return whole;
+       }
+
        error = blkdev_get(bdev, mode);
        if (error)
-               return ERR_PTR(error);
+               goto out_abort_claiming;
+
        error = -EACCES;
        if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
-               goto blkdev_put;
-       error = bd_claim(bdev, holder);
-       if (error)
-               goto blkdev_put;
+               goto out_blkdev_put;
 
+       bd_finish_claiming(bdev, whole, holder);
        return bdev;
-       
-blkdev_put:
+
+out_blkdev_put:
        blkdev_put(bdev, mode);
+out_abort_claiming:
+       bd_abort_claiming(whole, holder);
        return ERR_PTR(error);
 }