*/
#include "dm.h"
-#include "dm-bio-list.h"
#include "dm-uevent.h"
#include <linux/init.h>
/*
* Bits for the md->flags field.
*/
-#define DMF_BLOCK_IO 0
+#define DMF_BLOCK_IO_FOR_SUSPEND 0
#define DMF_SUSPENDED 1
#define DMF_FROZEN 2
#define DMF_FREEING 3
#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
+#define DMF_QUEUE_IO_TO_THREAD 6
/*
* Work processed by per-device workqueue.
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
- spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
atomic_t open_count;
wait_queue_head_t wait;
struct work_struct work;
struct bio_list deferred;
- struct bio_list pushback;
+ spinlock_t deferred_lock;
+
+ /*
+ * An error from the barrier request currently being processed.
+ */
+ int barrier_error;
/*
* Processing queue (flush/barriers)
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
part_stat_unlock();
+ /*
+ * After this is decremented the bio must not be touched if it is
+ * a barrier.
+ */
dm_disk(md)->part0.in_flight = pending =
atomic_dec_return(&md->pending);
/*
* Add the bio to the list of deferred io.
*/
-static int queue_io(struct mapped_device *md, struct bio *bio)
+static void queue_io(struct mapped_device *md, struct bio *bio)
{
down_write(&md->io_lock);
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
- up_write(&md->io_lock);
- return 1;
- }
-
+ spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio);
+ spin_unlock_irq(&md->deferred_lock);
+
+ if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
+ queue_work(md->wq, &md->work);
up_write(&md->io_lock);
- return 0; /* deferred successfully */
}
/*
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
- * This must be handled before the sleeper on
- * suspend queue merges the pushback list.
*/
- spin_lock_irqsave(&md->pushback_lock, flags);
+ spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
- bio_list_add(&md->pushback, io->bio);
+ bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
- spin_unlock_irqrestore(&md->pushback_lock, flags);
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- end_io_acct(io);
-
io_error = io->error;
bio = io->bio;
- free_io(md, io);
+ if (bio_barrier(bio)) {
+ /*
+ * There can be just one barrier request so we use
+ * a per-device variable for error reporting.
+ * Note that you can't touch the bio after end_io_acct
+ */
+ md->barrier_error = io_error;
+ end_io_acct(io);
+ } else {
+ end_io_acct(io);
- if (io_error != DM_ENDIO_REQUEUE) {
- trace_block_bio_complete(md->queue, bio);
+ if (io_error != DM_ENDIO_REQUEUE) {
+ trace_block_bio_complete(md->queue, bio);
- bio_endio(bio, io_error);
+ bio_endio(bio, io_error);
+ }
}
+
+ free_io(md, io);
}
}
clone->bi_sector = sector;
clone->bi_bdev = bio->bi_bdev;
- clone->bi_rw = bio->bi_rw;
+ clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
clone->bi_vcnt = 1;
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone->bi_io_vec->bv_len = clone->bi_size;
clone->bi_flags |= 1 << BIO_CLONED;
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(clone, bio, GFP_NOIO);
+ bio_integrity_trim(clone,
+ bio_sector_offset(bio, idx, offset), len);
+ }
+
return clone;
}
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
+ clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
clone->bi_size = to_bytes(len);
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
+ if (bio_integrity(bio)) {
+ bio_integrity_clone(clone, bio, GFP_NOIO);
+
+ if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
+ bio_integrity_trim(clone,
+ bio_sector_offset(bio, idx, 0), len);
+ }
+
return clone;
}
ci.map = dm_get_table(md);
if (unlikely(!ci.map)) {
- bio_io_error(bio);
- return;
- }
- if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
- dm_table_put(ci.map);
- bio_endio(bio, -EOPNOTSUPP);
+ if (!bio_barrier(bio))
+ bio_io_error(bio);
+ else
+ md->barrier_error = -EIO;
return;
}
+
ci.md = md;
ci.bio = bio;
ci.io = alloc_io(md);
*/
static int dm_request(struct request_queue *q, struct bio *bio)
{
- int r = -EIO;
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
int cpu;
part_stat_unlock();
/*
- * If we're suspended we have to queue
- * this io for later.
+ * If we're suspended or the thread is processing barriers
+ * we have to queue this io for later.
*/
- while (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
+ unlikely(bio_barrier(bio))) {
up_read(&md->io_lock);
- if (bio_rw(bio) != READA)
- r = queue_io(md, bio);
+ if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
+ bio_rw(bio) == READA) {
+ bio_io_error(bio);
+ return 0;
+ }
- if (r <= 0)
- goto out_req;
+ queue_io(md, bio);
- /*
- * We're in a while loop, because someone could suspend
- * before we get to the following read lock.
- */
- down_read(&md->io_lock);
+ return 0;
}
__split_and_process_bio(md, bio);
up_read(&md->io_lock);
return 0;
-
-out_req:
- if (r < 0)
- bio_io_error(bio);
-
- return 0;
}
static void dm_unplug_all(struct request_queue *q)
struct mapped_device *md = congested_data;
struct dm_table *map;
- if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+ if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
map = dm_get_table(md);
if (map) {
r = dm_table_any_congested(map, bdi_bits);
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
- spin_lock_init(&md->pushback_lock);
+ spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
+ blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
mempool_destroy(md->tio_pool);
mempool_destroy(md->io_pool);
bioset_free(md->bs);
+ blk_integrity_unregister(md->disk);
del_gendisk(md->disk);
free_minor(minor);
}
EXPORT_SYMBOL_GPL(dm_put);
-static int dm_wait_for_completion(struct mapped_device *md)
+static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{
int r = 0;
+ DECLARE_WAITQUEUE(wait, current);
+
+ dm_unplug_all(md->queue);
+
+ add_wait_queue(&md->wait, &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(interruptible);
smp_mb();
if (!atomic_read(&md->pending))
break;
- if (signal_pending(current)) {
+ if (interruptible == TASK_INTERRUPTIBLE &&
+ signal_pending(current)) {
r = -EINTR;
break;
}
}
set_current_state(TASK_RUNNING);
+ remove_wait_queue(&md->wait, &wait);
+
return r;
}
-/*
- * Process the deferred bios
- */
-static void __flush_deferred_io(struct mapped_device *md)
+static int dm_flush(struct mapped_device *md)
{
- struct bio *c;
-
- while ((c = bio_list_pop(&md->deferred)))
- __split_and_process_bio(md, c);
-
- clear_bit(DMF_BLOCK_IO, &md->flags);
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+ return 0;
}
-static void __merge_pushback_list(struct mapped_device *md)
+static void process_barrier(struct mapped_device *md, struct bio *bio)
{
- unsigned long flags;
+ int error = dm_flush(md);
+
+ if (unlikely(error)) {
+ bio_endio(bio, error);
+ return;
+ }
+ if (bio_empty_barrier(bio)) {
+ bio_endio(bio, 0);
+ return;
+ }
+
+ __split_and_process_bio(md, bio);
+
+ error = dm_flush(md);
+
+ if (!error && md->barrier_error)
+ error = md->barrier_error;
- spin_lock_irqsave(&md->pushback_lock, flags);
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- bio_list_merge_head(&md->deferred, &md->pushback);
- bio_list_init(&md->pushback);
- spin_unlock_irqrestore(&md->pushback_lock, flags);
+ if (md->barrier_error != DM_ENDIO_REQUEUE)
+ bio_endio(bio, error);
}
+/*
+ * Process the deferred bios
+ */
static void dm_wq_work(struct work_struct *work)
{
struct mapped_device *md = container_of(work, struct mapped_device,
work);
+ struct bio *c;
down_write(&md->io_lock);
- __flush_deferred_io(md);
+
+ while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
+ spin_lock_irq(&md->deferred_lock);
+ c = bio_list_pop(&md->deferred);
+ spin_unlock_irq(&md->deferred_lock);
+
+ if (!c) {
+ clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
+ break;
+ }
+
+ up_write(&md->io_lock);
+
+ if (bio_barrier(c))
+ process_barrier(md, c);
+ else
+ __split_and_process_bio(md, c);
+
+ down_write(&md->io_lock);
+ }
+
up_write(&md->io_lock);
}
static void dm_queue_flush(struct mapped_device *md)
{
+ clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+ smp_mb__after_clear_bit();
queue_work(md->wq, &md->work);
- flush_workqueue(md->wq);
}
/*
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
- DECLARE_WAITQUEUE(wait, current);
int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
}
/*
- * First we set the BLOCK_IO flag so no more ios will be mapped.
+ * Here we must make sure that no processes are submitting requests
+ * to target drivers i.e. no one may be executing
+ * __split_and_process_bio. This is called from dm_request and
+ * dm_wq_work.
+ *
+ * To get all processes out of __split_and_process_bio in dm_request,
+ * we take the write lock. To prevent any process from reentering
+ * __split_and_process_bio from dm_request, we set
+ * DMF_QUEUE_IO_TO_THREAD.
+ *
+ * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
+ * and call flush_workqueue(md->wq). flush_workqueue will wait until
+ * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
+ * further calls to __split_and_process_bio from dm_wq_work.
*/
down_write(&md->io_lock);
- set_bit(DMF_BLOCK_IO, &md->flags);
-
- add_wait_queue(&md->wait, &wait);
+ set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+ set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
up_write(&md->io_lock);
- /* unplug */
- if (map)
- dm_table_unplug_all(map);
+ flush_workqueue(md->wq);
/*
- * Wait for the already-mapped ios to complete.
+ * At this point no more requests are entering target request routines.
+ * We call dm_wait_for_completion to wait for all existing requests
+ * to finish.
*/
- r = dm_wait_for_completion(md);
+ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock);
- remove_wait_queue(&md->wait, &wait);
-
if (noflush)
- __merge_pushback_list(md);
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock);
/* were we interrupted ? */
goto out; /* pushback list is already flushed, so skip flush */
}
+ /*
+ * If dm_wait_for_completion returned 0, the device is completely
+ * quiescent now. There is no request-processing activity. All new
+ * requests are being added to md->deferred list.
+ */
+
dm_table_postsuspend_targets(map);
set_bit(DMF_SUSPENDED, &md->flags);