*/
#include <linux/blkdev.h>
-#include <linux/ctype.h>
#include <linux/device-mapper.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
#include "dm-exception-store.h"
-#include "dm-snap.h"
-#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
*/
#define MIN_IOS 256
+#define DM_TRACKED_CHUNK_HASH_SIZE 16
+#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
+ (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+ uint32_t hash_mask;
+ unsigned hash_shift;
+ struct list_head *table;
+};
+
+struct dm_snapshot {
+ struct rw_semaphore lock;
+
+ struct dm_dev *origin;
+
+ /* List of snapshots per Origin */
+ struct list_head list;
+
+ /* You can't use a snapshot if this is 0 (e.g. if full) */
+ int valid;
+
+ /* Origin writes don't trigger exceptions until this is set */
+ int active;
+
+ mempool_t *pending_pool;
+
+ atomic_t pending_exceptions_count;
+
+ struct exception_table pending;
+ struct exception_table complete;
+
+ /*
+ * pe_lock protects all pending_exception operations and access
+ * as well as the snapshot_bios list.
+ */
+ spinlock_t pe_lock;
+
+ /* The on disk metadata handler */
+ struct dm_exception_store *store;
+
+ struct dm_kcopyd_client *kcopyd_client;
+
+ /* Queue of snapshot writes for ksnapd to flush */
+ struct bio_list queued_bios;
+ struct work_struct queued_bios_work;
+
+ /* Chunks with outstanding reads */
+ mempool_t *tracked_chunk_pool;
+ spinlock_t tracked_chunk_lock;
+ struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+ chunk_t chunk)
+{
+ return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+ /*
+ * There is only ever one instance of a particular block
+ * device so we can compare pointers safely.
+ */
+ return lhs == rhs;
+}
+
struct dm_snap_pending_exception {
struct dm_snap_exception e;
/*
* Allocate room for a suitable hash table.
*/
-static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift)
+static int init_hash_tables(struct dm_snapshot *s)
{
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
* Calculate based on the size of the original volume or
* the COW volume...
*/
- cow_dev_size = get_dev_size(s->cow->bdev);
+ cow_dev_size = get_dev_size(s->store->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
- hash_size = min(origin_dev_size, cow_dev_size) >> chunk_shift;
+ hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
hash_size = rounddown_pow_of_two(hash_size);
}
/*
- * Round a number up to the nearest 'size' boundary. size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
- size--;
- return (n + size) & ~size;
-}
-
-static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
- chunk_t *chunk_size, chunk_t *chunk_mask,
- chunk_t *chunk_shift, char **error)
-{
- unsigned long chunk_size_ulong;
- char *value;
-
- chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
- if (*chunk_size_arg == '\0' || *value != '\0') {
- *error = "Invalid chunk size";
- return -EINVAL;
- }
-
- if (!chunk_size_ulong) {
- *chunk_size = *chunk_mask = *chunk_shift = 0;
- return 0;
- }
-
- /*
- * Chunk size must be multiple of page size. Silently
- * round up if it's not.
- */
- chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
-
- /* Check chunk_size is a power of 2 */
- if (!is_power_of_2(chunk_size_ulong)) {
- *error = "Chunk size is not a power of 2";
- return -EINVAL;
- }
-
- /* Validate the chunk size against the device block size */
- if (chunk_size_ulong % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
- *error = "Chunk size is not a multiple of device blocksize";
- return -EINVAL;
- }
-
- *chunk_size = chunk_size_ulong;
- *chunk_mask = chunk_size_ulong - 1;
- *chunk_shift = ffs(chunk_size_ulong) - 1;
-
- return 0;
-}
-
-/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_snapshot *s;
int i;
int r = -EINVAL;
- char persistent;
char *origin_path;
- char *cow_path;
- chunk_t chunk_size, chunk_mask, chunk_shift;
+ struct dm_exception_store *store;
+ unsigned args_used;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
origin_path = argv[0];
- cow_path = argv[1];
- persistent = toupper(*argv[2]);
+ argv++;
+ argc--;
- if (persistent != 'P' && persistent != 'N') {
- ti->error = "Persistent flag is not P or N";
+ r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+ if (r) {
+ ti->error = "Couldn't create exception store";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
+ argv += args_used;
+ argc -= args_used;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (s == NULL) {
+ if (!s) {
ti->error = "Cannot allocate snapshot context private "
"structure";
r = -ENOMEM;
- goto bad1;
+ goto bad_snap;
}
r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
- goto bad2;
- }
-
- r = dm_get_device(ti, cow_path, 0, 0,
- FMODE_READ | FMODE_WRITE, &s->cow);
- if (r) {
- dm_put_device(ti, s->origin);
- ti->error = "Cannot get COW device";
- goto bad2;
+ goto bad_origin;
}
- r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift,
- &ti->error);
- if (r)
- goto bad3;
-
+ s->store = store;
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
spin_lock_init(&s->pe_lock);
/* Allocate hash table for COW data */
- if (init_hash_tables(s, chunk_shift)) {
+ if (init_hash_tables(s)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM;
- goto bad3;
+ goto bad_hash_tables;
}
- r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask,
- chunk_shift, &s->store);
- if (r) {
- ti->error = "Couldn't create exception store";
- r = -EINVAL;
- goto bad4;
- }
-
- s->store->snap = s;
-
r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
if (r) {
ti->error = "Could not create kcopyd client";
- goto bad5;
+ goto bad_kcopyd;
}
s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
if (!s->pending_pool) {
ti->error = "Could not allocate mempool for pending exceptions";
- goto bad6;
+ goto bad_pending_pool;
}
s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
ti->private = s;
ti->split_io = s->store->chunk_size;
+ ti->num_flush_requests = 1;
return 0;
- bad_load_and_register:
+bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
- bad_tracked_chunk_pool:
+bad_tracked_chunk_pool:
mempool_destroy(s->pending_pool);
- bad6:
+bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
- bad5:
- s->store->type->dtr(s->store);
-
- bad4:
+bad_kcopyd:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
- bad3:
- dm_put_device(ti, s->cow);
+bad_hash_tables:
dm_put_device(ti, s->origin);
- bad2:
+bad_origin:
kfree(s);
- bad1:
+bad_snap:
+ dm_exception_store_destroy(store);
+
+bad_args:
return r;
}
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
-
- s->store->type->dtr(s->store);
}
static void snapshot_dtr(struct dm_target *ti)
mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
- dm_put_device(ti, s->cow);
+
+ dm_exception_store_destroy(s->store);
kfree(s);
}
dev_size = get_dev_size(bdev);
src.bdev = bdev;
- src.sector = chunk_to_sector(s, pe->e.old_chunk);
+ src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
src.count = min(s->store->chunk_size, dev_size - src.sector);
- dest.bdev = s->cow->bdev;
- dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+ dest.bdev = s->store->cow->bdev;
+ dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
dest.count = src.count;
/* Hand over to kcopyd */
static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
struct bio *bio, chunk_t chunk)
{
- bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
- (chunk - e->old_chunk)) +
- (bio->bi_sector & s->store->chunk_mask);
+ bio->bi_bdev = s->store->cow->bdev;
+ bio->bi_sector = chunk_to_sector(s->store,
+ dm_chunk_number(e->new_chunk) +
+ (chunk - e->old_chunk)) +
+ (bio->bi_sector &
+ s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- chunk = sector_to_chunk(s, bio->bi_sector);
+ if (unlikely(bio_empty_barrier(bio))) {
+ bio->bi_bdev = s->store->cow->bdev;
+ return DM_MAPIO_REMAPPED;
+ }
+
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
+ unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
switch (type) {
case STATUSTYPE_INFO:
if (!snap->valid)
- snprintf(result, maxlen, "Invalid");
+ DMEMIT("Invalid");
else {
if (snap->store->type->fraction_full) {
sector_t numerator, denominator;
snap->store->type->fraction_full(snap->store,
&numerator,
&denominator);
- snprintf(result, maxlen, "%llu/%llu",
- (unsigned long long)numerator,
- (unsigned long long)denominator);
+ DMEMIT("%llu/%llu",
+ (unsigned long long)numerator,
+ (unsigned long long)denominator);
}
else
- snprintf(result, maxlen, "Unknown");
+ DMEMIT("Unknown");
}
break;
* to make private copies if the output is to
* make sense.
*/
- snprintf(result, maxlen, "%s %s %s %llu",
- snap->origin->name, snap->cow->name,
- snap->store->type->name,
- (unsigned long long)snap->store->chunk_size);
+ DMEMIT("%s", snap->origin->name);
+ snap->store->type->status(snap->store, type, result + sz,
+ maxlen - sz);
break;
}
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, bio->bi_sector);
/*
* Check exception table to see if block
}
ti->private = dev;
+ ti->num_flush_requests = 1;
+
return 0;
}
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;
+ if (unlikely(bio_empty_barrier(bio)))
+ return DM_MAPIO_REMAPPED;
+
/* Only tell snapshots if this is a write */
return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
}