*/
#include <linux/blkdev.h>
-#include <linux/ctype.h>
#include <linux/device-mapper.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
#include "dm-exception-store.h"
-#include "dm-snap.h"
-#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
*/
#define MIN_IOS 256
+#define DM_TRACKED_CHUNK_HASH_SIZE 16
+#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
+ (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+ uint32_t hash_mask;
+ unsigned hash_shift;
+ struct list_head *table;
+};
+
+struct dm_snapshot {
+ struct rw_semaphore lock;
+
+ struct dm_dev *origin;
+
+ /* List of snapshots per Origin */
+ struct list_head list;
+
+ /* You can't use a snapshot if this is 0 (e.g. if full) */
+ int valid;
+
+ /* Origin writes don't trigger exceptions until this is set */
+ int active;
+
+ mempool_t *pending_pool;
+
+ atomic_t pending_exceptions_count;
+
+ struct exception_table pending;
+ struct exception_table complete;
+
+ /*
+ * pe_lock protects all pending_exception operations and access
+ * as well as the snapshot_bios list.
+ */
+ spinlock_t pe_lock;
+
+ /* The on disk metadata handler */
+ struct dm_exception_store *store;
+
+ struct dm_kcopyd_client *kcopyd_client;
+
+ /* Queue of snapshot writes for ksnapd to flush */
+ struct bio_list queued_bios;
+ struct work_struct queued_bios_work;
+
+ /* Chunks with outstanding reads */
+ mempool_t *tracked_chunk_pool;
+ spinlock_t tracked_chunk_lock;
+ struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+ chunk_t chunk)
+{
+ return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+ /*
+ * There is only ever one instance of a particular block
+ * device so we can compare pointers safely.
+ */
+ return lhs == rhs;
+}
+
struct dm_snap_pending_exception {
struct dm_snap_exception e;
*/
static int register_snapshot(struct dm_snapshot *snap)
{
+ struct dm_snapshot *l;
struct origin *o, *new_o;
struct block_device *bdev = snap->origin->bdev;
__insert_origin(o);
}
- list_add_tail(&snap->list, &o->snapshots);
+ /* Sort the list according to chunk size, largest-first smallest-last */
+ list_for_each_entry(l, &o->snapshots, list)
+ if (l->store->chunk_size < snap->store->chunk_size)
+ break;
+ list_add_tail(&snap->list, &l->list);
up_write(&_origins_lock);
return 0;
* Calculate based on the size of the original volume or
* the COW volume...
*/
- cow_dev_size = get_dev_size(s->cow->bdev);
+ cow_dev_size = get_dev_size(s->store->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
- hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+ hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
hash_size = rounddown_pow_of_two(hash_size);
}
/*
- * Round a number up to the nearest 'size' boundary. size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
- size--;
- return (n + size) & ~size;
-}
-
-static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
- char **error)
-{
- unsigned long chunk_size;
- char *value;
-
- chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
- if (*chunk_size_arg == '\0' || *value != '\0') {
- *error = "Invalid chunk size";
- return -EINVAL;
- }
-
- if (!chunk_size) {
- s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
- return 0;
- }
-
- /*
- * Chunk size must be multiple of page size. Silently
- * round up if it's not.
- */
- chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
- /* Check chunk_size is a power of 2 */
- if (!is_power_of_2(chunk_size)) {
- *error = "Chunk size is not a power of 2";
- return -EINVAL;
- }
-
- /* Validate the chunk size against the device block size */
- if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
- *error = "Chunk size is not a multiple of device blocksize";
- return -EINVAL;
- }
-
- s->chunk_size = chunk_size;
- s->chunk_mask = chunk_size - 1;
- s->chunk_shift = ffs(chunk_size) - 1;
-
- return 0;
-}
-
-/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_snapshot *s;
int i;
int r = -EINVAL;
- char persistent;
char *origin_path;
- char *cow_path;
+ struct dm_exception_store *store;
+ unsigned args_used;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
origin_path = argv[0];
- cow_path = argv[1];
- persistent = toupper(*argv[2]);
+ argv++;
+ argc--;
- if (persistent != 'P' && persistent != 'N') {
- ti->error = "Persistent flag is not P or N";
+ r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+ if (r) {
+ ti->error = "Couldn't create exception store";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
+ argv += args_used;
+ argc -= args_used;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (s == NULL) {
+ if (!s) {
ti->error = "Cannot allocate snapshot context private "
"structure";
r = -ENOMEM;
- goto bad1;
+ goto bad_snap;
}
r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
- goto bad2;
- }
-
- r = dm_get_device(ti, cow_path, 0, 0,
- FMODE_READ | FMODE_WRITE, &s->cow);
- if (r) {
- dm_put_device(ti, s->origin);
- ti->error = "Cannot get COW device";
- goto bad2;
+ goto bad_origin;
}
- r = set_chunk_size(s, argv[3], &ti->error);
- if (r)
- goto bad3;
-
+ s->store = store;
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
- s->ti = ti;
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM;
- goto bad3;
+ goto bad_hash_tables;
}
- r = dm_exception_store_create(argv[2], &s->store);
- if (r) {
- ti->error = "Couldn't create exception store";
- r = -EINVAL;
- goto bad4;
- }
-
- s->store->snap = s;
-
r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
if (r) {
ti->error = "Could not create kcopyd client";
- goto bad5;
+ goto bad_kcopyd;
}
s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
if (!s->pending_pool) {
ti->error = "Could not allocate mempool for pending exceptions";
- goto bad6;
+ goto bad_pending_pool;
}
s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
bio_list_init(&s->queued_bios);
INIT_WORK(&s->queued_bios_work, flush_queued_bios);
+ if (!s->store->chunk_size) {
+ ti->error = "Chunk size not set";
+ goto bad_load_and_register;
+ }
+
/* Add snapshot to the list of snapshots for this origin */
/* Exceptions aren't triggered till snapshot_resume() is called */
if (register_snapshot(s)) {
}
ti->private = s;
- ti->split_io = s->chunk_size;
+ ti->split_io = s->store->chunk_size;
+ ti->num_flush_requests = 1;
return 0;
- bad_load_and_register:
+bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
- bad_tracked_chunk_pool:
+bad_tracked_chunk_pool:
mempool_destroy(s->pending_pool);
- bad6:
+bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
- bad5:
- s->store->type->dtr(s->store);
-
- bad4:
+bad_kcopyd:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
- bad3:
- dm_put_device(ti, s->cow);
+bad_hash_tables:
dm_put_device(ti, s->origin);
- bad2:
+bad_origin:
kfree(s);
- bad1:
+bad_snap:
+ dm_exception_store_destroy(store);
+
+bad_args:
return r;
}
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
-
- s->store->type->dtr(s->store);
}
static void snapshot_dtr(struct dm_target *ti)
mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
- dm_put_device(ti, s->cow);
+
+ dm_exception_store_destroy(s->store);
kfree(s);
}
s->valid = 0;
- dm_table_event(s->ti->table);
+ dm_table_event(s->store->ti->table);
}
static void get_pending_exception(struct dm_snap_pending_exception *pe)
dev_size = get_dev_size(bdev);
src.bdev = bdev;
- src.sector = chunk_to_sector(s, pe->e.old_chunk);
- src.count = min(s->chunk_size, dev_size - src.sector);
+ src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
+ src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
- dest.bdev = s->cow->bdev;
- dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+ dest.bdev = s->store->cow->bdev;
+ dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
dest.count = src.count;
/* Hand over to kcopyd */
static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
struct bio *bio, chunk_t chunk)
{
- bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
- (chunk - e->old_chunk)) +
- (bio->bi_sector & s->chunk_mask);
+ bio->bi_bdev = s->store->cow->bdev;
+ bio->bi_sector = chunk_to_sector(s->store,
+ dm_chunk_number(e->new_chunk) +
+ (chunk - e->old_chunk)) +
+ (bio->bi_sector &
+ s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- chunk = sector_to_chunk(s, bio->bi_sector);
+ if (unlikely(bio_empty_barrier(bio))) {
+ bio->bi_bdev = s->store->cow->bdev;
+ return DM_MAPIO_REMAPPED;
+ }
+
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
+ unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
+ down_write(&snap->lock);
+
switch (type) {
case STATUSTYPE_INFO:
if (!snap->valid)
- snprintf(result, maxlen, "Invalid");
+ DMEMIT("Invalid");
else {
if (snap->store->type->fraction_full) {
sector_t numerator, denominator;
snap->store->type->fraction_full(snap->store,
&numerator,
&denominator);
- snprintf(result, maxlen, "%llu/%llu",
- (unsigned long long)numerator,
- (unsigned long long)denominator);
+ DMEMIT("%llu/%llu",
+ (unsigned long long)numerator,
+ (unsigned long long)denominator);
}
else
- snprintf(result, maxlen, "Unknown");
+ DMEMIT("Unknown");
}
break;
* to make private copies if the output is to
* make sense.
*/
- snprintf(result, maxlen, "%s %s %s %llu",
- snap->origin->name, snap->cow->name,
- snap->store->type->name,
- (unsigned long long)snap->chunk_size);
+ DMEMIT("%s", snap->origin->name);
+ snap->store->type->status(snap->store, type, result + sz,
+ maxlen - sz);
break;
}
+ up_write(&snap->lock);
+
return 0;
}
+static int snapshot_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct dm_snapshot *snap = ti->private;
+
+ return fn(ti, snap->origin, 0, ti->len, data);
+}
+
+
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+ if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, bio->bi_sector);
/*
* Check exception table to see if block
}
ti->private = dev;
+ ti->num_flush_requests = 1;
+
return 0;
}
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;
+ if (unlikely(bio_empty_barrier(bio)))
+ return DM_MAPIO_REMAPPED;
+
/* Only tell snapshots if this is a write */
return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
}
struct dm_dev *dev = ti->private;
struct dm_snapshot *snap;
struct origin *o;
- chunk_t chunk_size = 0;
+ unsigned chunk_size = 0;
down_read(&_origins_lock);
o = __lookup_origin(dev->bdev);
if (o)
list_for_each_entry (snap, &o->snapshots, list)
- chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+ chunk_size = min_not_zero(chunk_size,
+ snap->store->chunk_size);
up_read(&_origins_lock);
ti->split_io = chunk_size;
return 0;
}
+static int origin_iterate_devices(struct dm_target *ti,
+ iterate_devices_callout_fn fn, void *data)
+{
+ struct dm_dev *dev = ti->private;
+
+ return fn(ti, dev, 0, ti->len, data);
+}
+
static struct target_type origin_target = {
.name = "snapshot-origin",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = origin_ctr,
.dtr = origin_dtr,
.map = origin_map,
.resume = origin_resume,
.status = origin_status,
+ .iterate_devices = origin_iterate_devices,
};
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
.end_io = snapshot_end_io,
.resume = snapshot_resume,
.status = snapshot_status,
+ .iterate_devices = snapshot_iterate_devices,
};
static int __init dm_snapshot_init(void)
r = dm_register_target(&snapshot_target);
if (r) {
DMERR("snapshot target register failed %d", r);
- return r;
+ goto bad_register_snapshot_target;
}
r = dm_register_target(&origin_target);
dm_unregister_target(&origin_target);
bad1:
dm_unregister_target(&snapshot_target);
+
+bad_register_snapshot_target:
+ dm_exception_store_exit();
return r;
}