/*
* Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/blktrace_api.h>
-#include <linux/smp_lock.h>
+#include <trace/block.h>
#define DM_MSG_PREFIX "core"
static DEFINE_SPINLOCK(_minor_lock);
/*
+ * For bio-based dm.
* One of these is allocated per bio.
*/
struct dm_io {
struct mapped_device *md;
int error;
- struct bio *bio;
atomic_t io_count;
+ struct bio *bio;
unsigned long start_time;
};
/*
+ * For bio-based dm.
* One of these is allocated per target within a bio. Hopefully
* this will be simplified out one day.
*/
union map_info info;
};
+DEFINE_TRACE(block_bio_complete);
+
+/*
+ * For request-based dm.
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+ struct mapped_device *md;
+ struct dm_target *ti;
+ struct request *orig, clone;
+ int error;
+ union map_info info;
+};
+
+/*
+ * For request-based dm.
+ * One of these is allocated per bio.
+ */
+struct dm_rq_clone_bio_info {
+ struct bio *orig;
+ struct request *rq;
+};
+
union map_info *dm_get_mapinfo(struct bio *bio)
{
if (bio && bio->bi_private)
*/
struct dm_wq_req {
enum {
- DM_WQ_FLUSH_ALL,
DM_WQ_FLUSH_DEFERRED,
} type;
struct work_struct work;
/* forced geometry settings */
struct hd_geometry geometry;
+
+ /* sysfs handle */
+ struct kobject kobj;
};
#define MIN_IOS 256
static struct kmem_cache *_io_cache;
static struct kmem_cache *_tio_cache;
+static struct kmem_cache *_rq_tio_cache;
+static struct kmem_cache *_rq_bio_info_cache;
static int __init local_init(void)
{
- int r;
+ int r = -ENOMEM;
/* allocate a slab for the dm_ios */
_io_cache = KMEM_CACHE(dm_io, 0);
if (!_io_cache)
- return -ENOMEM;
+ return r;
/* allocate a slab for the target ios */
_tio_cache = KMEM_CACHE(dm_target_io, 0);
- if (!_tio_cache) {
- kmem_cache_destroy(_io_cache);
- return -ENOMEM;
- }
+ if (!_tio_cache)
+ goto out_free_io_cache;
+
+ _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
+ if (!_rq_tio_cache)
+ goto out_free_tio_cache;
+
+ _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
+ if (!_rq_bio_info_cache)
+ goto out_free_rq_tio_cache;
r = dm_uevent_init();
- if (r) {
- kmem_cache_destroy(_tio_cache);
- kmem_cache_destroy(_io_cache);
- return r;
- }
+ if (r)
+ goto out_free_rq_bio_info_cache;
_major = major;
r = register_blkdev(_major, _name);
- if (r < 0) {
- kmem_cache_destroy(_tio_cache);
- kmem_cache_destroy(_io_cache);
- dm_uevent_exit();
- return r;
- }
+ if (r < 0)
+ goto out_uevent_exit;
if (!_major)
_major = r;
return 0;
+
+out_uevent_exit:
+ dm_uevent_exit();
+out_free_rq_bio_info_cache:
+ kmem_cache_destroy(_rq_bio_info_cache);
+out_free_rq_tio_cache:
+ kmem_cache_destroy(_rq_tio_cache);
+out_free_tio_cache:
+ kmem_cache_destroy(_tio_cache);
+out_free_io_cache:
+ kmem_cache_destroy(_io_cache);
+
+ return r;
}
static void local_exit(void)
{
+ kmem_cache_destroy(_rq_bio_info_cache);
+ kmem_cache_destroy(_rq_tio_cache);
kmem_cache_destroy(_tio_cache);
kmem_cache_destroy(_io_cache);
unregister_blkdev(_major, _name);
/*
* Block device functions
*/
-static int dm_blk_open(struct inode *inode, struct file *file)
+static int dm_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mapped_device *md;
spin_lock(&_minor_lock);
- md = inode->i_bdev->bd_disk->private_data;
+ md = bdev->bd_disk->private_data;
if (!md)
goto out;
return md ? 0 : -ENXIO;
}
-static int dm_blk_close(struct inode *inode, struct file *file)
+static int dm_blk_close(struct gendisk *disk, fmode_t mode)
{
- struct mapped_device *md;
-
- md = inode->i_bdev->bd_disk->private_data;
+ struct mapped_device *md = disk->private_data;
atomic_dec(&md->open_count);
dm_put(md);
return 0;
return dm_get_geometry(md, geo);
}
-static int dm_blk_ioctl(struct inode *inode, struct file *file,
+static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- struct mapped_device *md;
- struct dm_table *map;
+ struct mapped_device *md = bdev->bd_disk->private_data;
+ struct dm_table *map = dm_get_table(md);
struct dm_target *tgt;
int r = -ENOTTY;
- /* We don't really need this lock, but we do need 'inode'. */
- unlock_kernel();
-
- md = inode->i_bdev->bd_disk->private_data;
-
- map = dm_get_table(md);
-
if (!map || !dm_table_get_size(map))
goto out;
}
if (tgt->type->ioctl)
- r = tgt->type->ioctl(tgt, inode, file, cmd, arg);
+ r = tgt->type->ioctl(tgt, cmd, arg);
out:
dm_table_put(map);
- lock_kernel();
return r;
}
static void start_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
+ int cpu;
io->start_time = jiffies;
- preempt_disable();
- disk_round_stats(dm_disk(md));
- preempt_enable();
- dm_disk(md)->in_flight = atomic_inc_return(&md->pending);
+ cpu = part_stat_lock();
+ part_round_stats(cpu, &dm_disk(md)->part0);
+ part_stat_unlock();
+ dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
}
-static int end_io_acct(struct dm_io *io)
+static void end_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->bio;
unsigned long duration = jiffies - io->start_time;
- int pending;
+ int pending, cpu;
int rw = bio_data_dir(bio);
- preempt_disable();
- disk_round_stats(dm_disk(md));
- preempt_enable();
- dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
+ cpu = part_stat_lock();
+ part_round_stats(cpu, &dm_disk(md)->part0);
+ part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
+ part_stat_unlock();
- disk_stat_add(dm_disk(md), ticks[rw], duration);
+ dm_disk(md)->part0.in_flight = pending =
+ atomic_dec_return(&md->pending);
- return !pending;
+ /* nudge anyone waiting on suspend queue */
+ if (!pending)
+ wake_up(&md->wait);
}
/*
spin_unlock_irqrestore(&io->md->pushback_lock, flags);
}
- if (end_io_acct(io))
- /* nudge anyone waiting on suspend queue */
- wake_up(&io->md->wait);
+ end_io_acct(io);
if (io->error != DM_ENDIO_REQUEUE) {
- blk_add_trace_bio(io->md->queue, io->bio,
- BLK_TA_COMPLETE);
+ trace_block_bio_complete(io->md->queue, io->bio);
bio_endio(io->bio, io->error);
}
if (r == DM_MAPIO_REMAPPED) {
/* the bio has been remapped so dispatch it */
- blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
+ trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev,
clone->bi_sector, sector);
clone->bi_size = to_bytes(len);
clone->bi_io_vec->bv_offset = offset;
clone->bi_io_vec->bv_len = clone->bi_size;
+ clone->bi_flags |= 1 << BIO_CLONED;
return clone;
}
ci.map = dm_get_table(md);
if (unlikely(!ci.map))
return -EIO;
-
+ if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
+ dm_table_put(ci.map);
+ bio_endio(bio, -EOPNOTSUPP);
+ return 0;
+ }
ci.md = md;
ci.bio = bio;
ci.io = alloc_io(md);
* CRUD END
*---------------------------------------------------------------*/
+static int dm_merge_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
+{
+ struct mapped_device *md = q->queuedata;
+ struct dm_table *map = dm_get_table(md);
+ struct dm_target *ti;
+ sector_t max_sectors;
+ int max_size = 0;
+
+ if (unlikely(!map))
+ goto out;
+
+ ti = dm_table_find_target(map, bvm->bi_sector);
+ if (!dm_target_is_valid(ti))
+ goto out_table;
+
+ /*
+ * Find maximum amount of I/O that won't need splitting
+ */
+ max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
+ (sector_t) BIO_MAX_SECTORS);
+ max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
+ if (max_size < 0)
+ max_size = 0;
+
+ /*
+ * merge_bvec_fn() returns number of bytes
+ * it can accept at this offset
+ * max is precomputed maximal io size
+ */
+ if (max_size && ti->type->merge)
+ max_size = ti->type->merge(ti, bvm, biovec, max_size);
+
+out_table:
+ dm_table_put(map);
+
+out:
+ /*
+ * Always allow an entire first page
+ */
+ if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
+ max_size = biovec->bv_len;
+
+ return max_size;
+}
+
/*
* The request function that just remaps the bio built up by
* dm_merge_bvec.
int r = -EIO;
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
-
- /*
- * There is no use in forwarding any barrier request since we can't
- * guarantee it is (or can be) handled by the targets correctly.
- */
- if (unlikely(bio_barrier(bio))) {
- bio_endio(bio, -EOPNOTSUPP);
- return 0;
- }
+ int cpu;
down_read(&md->io_lock);
- disk_stat_inc(dm_disk(md), ios[rw]);
- disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio));
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
+ part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
+ part_stat_unlock();
/*
* If we're suspended we have to queue
static int dm_any_congested(void *congested_data, int bdi_bits)
{
- int r;
- struct mapped_device *md = (struct mapped_device *) congested_data;
- struct dm_table *map = dm_get_table(md);
+ int r = bdi_bits;
+ struct mapped_device *md = congested_data;
+ struct dm_table *map;
- if (!map || test_bit(DMF_BLOCK_IO, &md->flags))
- r = bdi_bits;
- else
- r = dm_table_any_congested(map, bdi_bits);
+ if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+ map = dm_get_table(md);
+ if (map) {
+ r = dm_table_any_congested(map, bdi_bits);
+ dm_table_put(map);
+ }
+ }
- dm_table_put(map);
return r;
}
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
+ blk_queue_merge_bvec(md->queue, dm_merge_bvec);
md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
if (!md->io_pool)
if (!md->tio_pool)
goto bad_tio_pool;
- md->bs = bioset_create(16, 16);
+ md->bs = bioset_create(16, 0);
if (!md->bs)
goto bad_no_bioset;
static void free_dev(struct mapped_device *md)
{
- int minor = md->disk->first_minor;
+ int minor = MINOR(disk_devt(md->disk));
if (md->suspended_bdev) {
unlock_fs(md);
list_splice_init(&md->uevent_list, &uevents);
spin_unlock_irqrestore(&md->uevent_lock, flags);
- dm_send_uevents(&uevents, &md->disk->dev.kobj);
+ dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
atomic_inc(&md->event_nr);
wake_up(&md->eventq);
if (md->suspended_bdev)
__set_size(md, size);
- if (size == 0)
+
+ if (!size) {
+ dm_table_destroy(t);
return 0;
+ }
- dm_table_get(t);
dm_table_event_callback(t, event_callback, md);
write_lock(&md->map_lock);
write_lock(&md->map_lock);
md->map = NULL;
write_unlock(&md->map_lock);
- dm_table_put(map);
+ dm_table_destroy(map);
}
/*
if (!md)
return -ENXIO;
+ dm_sysfs_init(md);
+
*result = md;
return 0;
}
md = idr_find(&_minor_idr, minor);
if (md && (md == MINOR_ALLOCED ||
- (dm_disk(md)->first_minor != minor) ||
+ (MINOR(disk_devt(dm_disk(md))) != minor) ||
test_bit(DMF_FREEING, &md->flags))) {
md = NULL;
goto out;
if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
map = dm_get_table(md);
- idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor);
+ idr_replace(&_minor_idr, MINOR_ALLOCED,
+ MINOR(disk_devt(dm_disk(md))));
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
if (!dm_suspended(md)) {
dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map);
}
- __unbind(md);
+ dm_sysfs_exit(md);
dm_table_put(map);
+ __unbind(md);
free_dev(md);
}
}
down_write(&md->io_lock);
switch (req->type) {
- case DM_WQ_FLUSH_ALL:
- __merge_pushback_list(md);
- /* pass through */
case DM_WQ_FLUSH_DEFERRED:
__flush_deferred_io(md);
break;
if (!md->suspended_bdev) {
DMWARN("bdget failed in dm_suspend");
r = -ENOMEM;
- goto flush_and_out;
+ goto out;
}
/*
set_bit(DMF_SUSPENDED, &md->flags);
-flush_and_out:
- if (r && noflush)
- /*
- * Because there may be already I/Os in the pushback list,
- * flush them before return.
- */
- dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
-
out:
if (r && md->suspended_bdev) {
bdput(md->suspended_bdev);
*---------------------------------------------------------------*/
void dm_kobject_uevent(struct mapped_device *md)
{
- kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE);
+ kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
}
uint32_t dm_next_uevent_seq(struct mapped_device *md)
return md->disk;
}
+struct kobject *dm_kobject(struct mapped_device *md)
+{
+ return &md->kobj;
+}
+
+/*
+ * struct mapped_device should not be exported outside of dm.c
+ * so use this check to verify that kobj is part of md structure
+ */
+struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
+{
+ struct mapped_device *md;
+
+ md = container_of(kobj, struct mapped_device, kobj);
+ if (&md->kobj != kobj)
+ return NULL;
+
+ dm_get(md);
+ return md;
+}
+
int dm_suspended(struct mapped_device *md)
{
return test_bit(DMF_SUSPENDED, &md->flags);