* This file is released under the GPL.
*/
-#include "dm-io.h"
+#include <linux/device-mapper.h>
#include <linux/bio.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
-
-static struct bio_set *_bios;
+#include <linux/dm-io.h>
struct dm_io_client {
mempool_t *pool;
/* FIXME: can we shrink this ? */
struct io {
- unsigned long error;
+ unsigned long error_bits;
+ unsigned long eopnotsupp_bits;
atomic_t count;
struct task_struct *sleeper;
struct dm_io_client *client;
* io. Since async io is likely to be the majority of io we'll
* have the same number of io contexts as bios! (FIXME: must reduce this).
*/
-static unsigned _num_ios;
-static mempool_t *_io_pool;
-
-/*
- * Temporary functions to allow old and new interfaces to co-exist.
- */
-static struct bio_set *bios(struct dm_io_client *client)
-{
- return client ? client->bios : _bios;
-}
-
-static mempool_t *io_pool(struct dm_io_client *client)
-{
- return client ? client->pool : _io_pool;
-}
static unsigned int pages_to_ios(unsigned int pages)
{
return 4 * pages; /* too many ? */
}
-static int resize_pool(unsigned int new_ios)
-{
- int r = 0;
-
- if (_io_pool) {
- if (new_ios == 0) {
- /* free off the pool */
- mempool_destroy(_io_pool);
- _io_pool = NULL;
- bioset_free(_bios);
-
- } else {
- /* resize the pool */
- r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
- }
-
- } else {
- /* create new pool */
- _io_pool = mempool_create_kmalloc_pool(new_ios,
- sizeof(struct io));
- if (!_io_pool)
- return -ENOMEM;
-
- _bios = bioset_create(16, 16);
- if (!_bios) {
- mempool_destroy(_io_pool);
- _io_pool = NULL;
- return -ENOMEM;
- }
- }
-
- if (!r)
- _num_ios = new_ios;
-
- return r;
-}
-
-int dm_io_get(unsigned int num_pages)
-{
- return resize_pool(_num_ios + pages_to_ios(num_pages));
-}
-
-void dm_io_put(unsigned int num_pages)
-{
- resize_pool(_num_ios - pages_to_ios(num_pages));
-}
-
/*
* Create a client with mempool and bioset.
*/
if (!client->pool)
goto bad;
- client->bios = bioset_create(16, 16);
+ client->bios = bioset_create(16, 0);
if (!client->bios)
goto bad;
*---------------------------------------------------------------*/
static void dec_count(struct io *io, unsigned int region, int error)
{
- if (error)
- set_bit(region, &io->error);
+ if (error) {
+ set_bit(region, &io->error_bits);
+ if (error == -EOPNOTSUPP)
+ set_bit(region, &io->eopnotsupp_bits);
+ }
if (atomic_dec_and_test(&io->count)) {
if (io->sleeper)
wake_up_process(io->sleeper);
else {
- int r = io->error;
+ unsigned long r = io->error_bits;
io_notify_fn fn = io->callback;
void *context = io->context;
- mempool_free(io, io_pool(io->client));
+ mempool_free(io, io->client->pool);
fn(r, context);
}
}
}
-static int endio(struct bio *bio, unsigned int done, int error)
+static void endio(struct bio *bio, int error)
{
struct io *io;
unsigned region;
- /* keep going until we've finished */
- if (bio->bi_size)
- return 1;
-
if (error && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
bio_put(bio);
dec_count(io, region, error);
-
- return 0;
}
/*-----------------------------------------------------------------
{
struct io *io = bio->bi_private;
- bio_free(bio, bios(io->client));
+ bio_free(bio, io->client->bios);
}
/*
/*-----------------------------------------------------------------
* IO routines that accept a list of pages.
*---------------------------------------------------------------*/
-static void do_region(int rw, unsigned int region, struct io_region *where,
+static void do_region(int rw, unsigned region, struct dm_io_region *where,
struct dpages *dp, struct io *io)
{
struct bio *bio;
* bvec for bio_get/set_region() and decrement bi_max_vecs
* to hide it from bio_add_page().
*/
- num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2;
- bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, bios(io->client));
+ num_bvecs = dm_sector_div_up(remaining,
+ (PAGE_SIZE >> SECTOR_SHIFT));
+ num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev),
+ num_bvecs);
+ if (unlikely(num_bvecs > BIO_MAX_PAGES))
+ num_bvecs = BIO_MAX_PAGES;
+ bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
}
static void dispatch_io(int rw, unsigned int num_regions,
- struct io_region *where, struct dpages *dp,
+ struct dm_io_region *where, struct dpages *dp,
struct io *io, int sync)
{
int i;
struct dpages old_pages = *dp;
if (sync)
- rw |= (1 << BIO_RW_SYNC);
+ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
/*
* For multiple regions we need to be careful to rewind
}
static int sync_io(struct dm_io_client *client, unsigned int num_regions,
- struct io_region *where, int rw, struct dpages *dp,
+ struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
struct io io;
- if (num_regions > 1 && rw != WRITE) {
+ if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
return -EIO;
}
- io.error = 0;
+retry:
+ io.error_bits = 0;
+ io.eopnotsupp_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
io.client = client;
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&io.count) || signal_pending(current))
+ if (!atomic_read(&io.count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
- if (atomic_read(&io.count))
- return -EINTR;
+ if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+ rw &= ~(1 << BIO_RW_BARRIER);
+ goto retry;
+ }
if (error_bits)
- *error_bits = io.error;
+ *error_bits = io.error_bits;
- return io.error ? -EIO : 0;
+ return io.error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
- struct io_region *where, int rw, struct dpages *dp,
+ struct dm_io_region *where, int rw, struct dpages *dp,
io_notify_fn fn, void *context)
{
struct io *io;
- if (num_regions > 1 && rw != WRITE) {
+ if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
fn(1, context);
return -EIO;
}
- io = mempool_alloc(io_pool(client), GFP_NOIO);
- io->error = 0;
+ io = mempool_alloc(client->pool, GFP_NOIO);
+ io->error_bits = 0;
+ io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
io->client = client;
return 0;
}
-int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- unsigned long *error_bits)
-{
- struct dpages dp;
- list_dp_init(&dp, pl, offset);
- return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
-}
-
-int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, unsigned long *error_bits)
-{
- struct dpages dp;
- bvec_dp_init(&dp, bvec);
- return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
-}
-
-int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, unsigned long *error_bits)
-{
- struct dpages dp;
- vm_dp_init(&dp, data);
- return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
-}
-
-int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- io_notify_fn fn, void *context)
-{
- struct dpages dp;
- list_dp_init(&dp, pl, offset);
- return async_io(NULL, num_regions, where, rw, &dp, fn, context);
-}
-
-int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, io_notify_fn fn, void *context)
-{
- struct dpages dp;
- bvec_dp_init(&dp, bvec);
- return async_io(NULL, num_regions, where, rw, &dp, fn, context);
-}
-
-int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, io_notify_fn fn, void *context)
-{
- struct dpages dp;
- vm_dp_init(&dp, data);
- return async_io(NULL, num_regions, where, rw, &dp, fn, context);
-}
-
static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
{
/* Set up dpages based on memory type */
}
/*
- * New collapsed (a)synchronous interface
+ * New collapsed (a)synchronous interface.
+ *
+ * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
+ * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in
+ * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
+ * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
*/
int dm_io(struct dm_io_request *io_req, unsigned num_regions,
- struct io_region *where, unsigned long *sync_error_bits)
+ struct dm_io_region *where, unsigned long *sync_error_bits)
{
int r;
struct dpages dp;
&dp, io_req->notify.fn, io_req->notify.context);
}
EXPORT_SYMBOL(dm_io);
-
-EXPORT_SYMBOL(dm_io_get);
-EXPORT_SYMBOL(dm_io_put);
-EXPORT_SYMBOL(dm_io_sync);
-EXPORT_SYMBOL(dm_io_async);
-EXPORT_SYMBOL(dm_io_sync_bvec);
-EXPORT_SYMBOL(dm_io_async_bvec);
-EXPORT_SYMBOL(dm_io_sync_vm);
-EXPORT_SYMBOL(dm_io_async_vm);