X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fdm-crypt.c;h=e412980763bded7f0de5f05c01e9a896e254ef7e;hb=f653398c86a1c104f0992bd788dd4bb065449be4;hp=6dbaeee48ced47b45d71b38a8fd6bc9cf5256730;hpb=48527fa7cf7fefb84e9fe03cddd08ddafc9f15f3;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6dbaeee..e412980 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,11 +1,12 @@ /* * Copyright (C) 2003 Christophe Saout * Copyright (C) 2004 Clemens Fruhwirth - * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ +#include #include #include #include @@ -22,28 +23,16 @@ #include #include -#include "dm.h" +#include #define DM_MSG_PREFIX "crypt" #define MESG_STR(x) x, sizeof(x) /* - * per bio private data - */ -struct crypt_io { - struct dm_target *target; - struct bio *base_bio; - struct bio *first_clone; - struct work_struct work; - atomic_t pending; - int error; - int post_process; -}; - -/* * context holding the current state of a multi-part conversion */ struct convert_context { + struct completion restart; struct bio *bio_in; struct bio *bio_out; unsigned int offset_in; @@ -51,14 +40,36 @@ struct convert_context { unsigned int idx_in; unsigned int idx_out; sector_t sector; - int write; + atomic_t pending; +}; + +/* + * per bio private data + */ +struct dm_crypt_io { + struct dm_target *target; + struct bio *base_bio; + struct work_struct work; + + struct convert_context ctx; + + atomic_t pending; + int error; + sector_t sector; + struct dm_crypt_io *base_io; +}; + +struct dm_crypt_request { + struct convert_context *ctx; + struct scatterlist sg_in; + struct scatterlist sg_out; }; struct crypt_config; struct crypt_iv_operations { int (*ctr)(struct crypt_config *cc, struct dm_target *ti, - const char *opts); + const char *opts); void (*dtr)(struct crypt_config *cc); const char *(*status)(struct crypt_config *cc); int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector); @@ -74,25 +85,48 @@ struct crypt_config { sector_t start; /* - * pool for per bio private data and - * for encryption buffer pages + * pool for per bio private data, crypto requests and + * encryption requeusts/buffer pages */ mempool_t *io_pool; + mempool_t *req_pool; mempool_t *page_pool; struct bio_set *bs; + struct workqueue_struct *io_queue; + struct workqueue_struct *crypt_queue; + /* * crypto related data */ struct crypt_iv_operations *iv_gen_ops; char *iv_mode; - struct crypto_cipher *iv_gen_private; + union { + struct crypto_cipher *essiv_tfm; + int benbi_shift; + } iv_gen_private; sector_t iv_offset; unsigned int iv_size; + /* + * Layout of each crypto request: + * + * struct ablkcipher_request + * context + * padding + * struct dm_crypt_request + * padding + * IV + * + * The padding is added so that dm_crypt_request and the IV are + * correctly aligned. + */ + unsigned int dmreq_start; + struct ablkcipher_request *req; + char cipher[CRYPTO_MAX_ALG_NAME]; char chainmode[CRYPTO_MAX_ALG_NAME]; - struct crypto_blkcipher *tfm; + struct crypto_ablkcipher *tfm; unsigned long flags; unsigned int key_size; u8 key[0]; @@ -102,13 +136,16 @@ struct crypt_config { #define MIN_POOL_PAGES 32 #define MIN_BIO_PAGES 8 -static kmem_cache_t *_crypt_io_pool; +static struct kmem_cache *_crypt_io_pool; + +static void clone_init(struct dm_crypt_io *, struct bio *); +static void kcryptd_queue_crypt(struct dm_crypt_io *io); /* * Different IV generation algorithms: * * plain: the initial vector is the 32-bit little-endian version of the sector - * number, padded with zeros if neccessary. + * number, padded with zeros if necessary. * * essiv: "encrypted sector|salt initial vector", the sector number is * encrypted with the bulk cipher using a salt as key. The salt @@ -117,6 +154,9 @@ static kmem_cache_t *_crypt_io_pool; * benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1 * (needed for LRW-32-AES and possible other narrow block modes) * + * null: the initial vector is always zero. Provides compatibility with + * obsolete loop_fish2 devices. Do not use for new devices. + * * plumb: unimplemented, see: * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 */ @@ -130,7 +170,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, - const char *opts) + const char *opts) { struct crypto_cipher *essiv_tfm; struct crypto_hash *hash_tfm; @@ -160,7 +200,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, return -ENOMEM; } - sg_set_buf(&sg, cc->key, cc->key_size); + sg_init_one(&sg, cc->key, cc->key_size); desc.tfm = hash_tfm; desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; err = crypto_hash_digest(&desc, &sg, cc->key_size, salt); @@ -168,6 +208,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, if (err) { ti->error = "Error calculating hash in ESSIV"; + kfree(salt); return err; } @@ -179,9 +220,9 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, return PTR_ERR(essiv_tfm); } if (crypto_cipher_blocksize(essiv_tfm) != - crypto_blkcipher_ivsize(cc->tfm)) { + crypto_ablkcipher_ivsize(cc->tfm)) { ti->error = "Block size of ESSIV cipher does " - "not match IV size of block cipher"; + "not match IV size of block cipher"; crypto_free_cipher(essiv_tfm); kfree(salt); return -EINVAL; @@ -195,29 +236,29 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, } kfree(salt); - cc->iv_gen_private = essiv_tfm; + cc->iv_gen_private.essiv_tfm = essiv_tfm; return 0; } static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - crypto_free_cipher(cc->iv_gen_private); - cc->iv_gen_private = NULL; + crypto_free_cipher(cc->iv_gen_private.essiv_tfm); + cc->iv_gen_private.essiv_tfm = NULL; } static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { memset(iv, 0, cc->iv_size); *(u64 *)iv = cpu_to_le64(sector); - crypto_cipher_encrypt_one(cc->iv_gen_private, iv, iv); + crypto_cipher_encrypt_one(cc->iv_gen_private.essiv_tfm, iv, iv); return 0; } static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned int bs = crypto_blkcipher_blocksize(cc->tfm); - int log = long_log2(bs); + unsigned bs = crypto_ablkcipher_blocksize(cc->tfm); + int log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ @@ -232,21 +273,30 @@ static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, return -EINVAL; } - cc->iv_gen_private = (void *)(9 - log); + cc->iv_gen_private.benbi_shift = 9 - log; return 0; } static void crypt_iv_benbi_dtr(struct crypt_config *cc) { - cc->iv_gen_private = NULL; } static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector) { + __be64 val; + memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */ - put_unaligned(cpu_to_be64(((u64)sector << (u32)cc->iv_gen_private) + 1), - (__be64 *)(iv + cc->iv_size - sizeof(u64))); + + val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi_shift) + 1); + put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64))); + + return 0; +} + +static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, sector_t sector) +{ + memset(iv, 0, cc->iv_size); return 0; } @@ -267,165 +317,204 @@ static struct crypt_iv_operations crypt_iv_benbi_ops = { .generator = crypt_iv_benbi_gen }; -static int -crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, - struct scatterlist *in, unsigned int length, - int write, sector_t sector) +static struct crypt_iv_operations crypt_iv_null_ops = { + .generator = crypt_iv_null_gen +}; + +static void crypt_convert_init(struct crypt_config *cc, + struct convert_context *ctx, + struct bio *bio_out, struct bio *bio_in, + sector_t sector) { - u8 iv[cc->iv_size]; - struct blkcipher_desc desc = { - .tfm = cc->tfm, - .info = iv, - .flags = CRYPTO_TFM_REQ_MAY_SLEEP, - }; - int r; + ctx->bio_in = bio_in; + ctx->bio_out = bio_out; + ctx->offset_in = 0; + ctx->offset_out = 0; + ctx->idx_in = bio_in ? bio_in->bi_idx : 0; + ctx->idx_out = bio_out ? bio_out->bi_idx : 0; + ctx->sector = sector + cc->iv_offset; + init_completion(&ctx->restart); +} + +static struct dm_crypt_request *dmreq_of_req(struct crypt_config *cc, + struct ablkcipher_request *req) +{ + return (struct dm_crypt_request *)((char *)req + cc->dmreq_start); +} + +static struct ablkcipher_request *req_of_dmreq(struct crypt_config *cc, + struct dm_crypt_request *dmreq) +{ + return (struct ablkcipher_request *)((char *)dmreq - cc->dmreq_start); +} + +static int crypt_convert_block(struct crypt_config *cc, + struct convert_context *ctx, + struct ablkcipher_request *req) +{ + struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); + struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); + struct dm_crypt_request *dmreq; + u8 *iv; + int r = 0; + + dmreq = dmreq_of_req(cc, req); + iv = (u8 *)ALIGN((unsigned long)(dmreq + 1), + crypto_ablkcipher_alignmask(cc->tfm) + 1); + + dmreq->ctx = ctx; + sg_init_table(&dmreq->sg_in, 1); + sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, + bv_in->bv_offset + ctx->offset_in); + + sg_init_table(&dmreq->sg_out, 1); + sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT, + bv_out->bv_offset + ctx->offset_out); + + ctx->offset_in += 1 << SECTOR_SHIFT; + if (ctx->offset_in >= bv_in->bv_len) { + ctx->offset_in = 0; + ctx->idx_in++; + } + + ctx->offset_out += 1 << SECTOR_SHIFT; + if (ctx->offset_out >= bv_out->bv_len) { + ctx->offset_out = 0; + ctx->idx_out++; + } if (cc->iv_gen_ops) { - r = cc->iv_gen_ops->generator(cc, iv, sector); + r = cc->iv_gen_ops->generator(cc, iv, ctx->sector); if (r < 0) return r; - - if (write) - r = crypto_blkcipher_encrypt_iv(&desc, out, in, length); - else - r = crypto_blkcipher_decrypt_iv(&desc, out, in, length); - } else { - if (write) - r = crypto_blkcipher_encrypt(&desc, out, in, length); - else - r = crypto_blkcipher_decrypt(&desc, out, in, length); } + ablkcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out, + 1 << SECTOR_SHIFT, iv); + + if (bio_data_dir(ctx->bio_in) == WRITE) + r = crypto_ablkcipher_encrypt(req); + else + r = crypto_ablkcipher_decrypt(req); + return r; } -static void -crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, - struct bio *bio_out, struct bio *bio_in, - sector_t sector, int write) +static void kcryptd_async_done(struct crypto_async_request *async_req, + int error); +static void crypt_alloc_req(struct crypt_config *cc, + struct convert_context *ctx) { - ctx->bio_in = bio_in; - ctx->bio_out = bio_out; - ctx->offset_in = 0; - ctx->offset_out = 0; - ctx->idx_in = bio_in ? bio_in->bi_idx : 0; - ctx->idx_out = bio_out ? bio_out->bi_idx : 0; - ctx->sector = sector + cc->iv_offset; - ctx->write = write; + if (!cc->req) + cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); + ablkcipher_request_set_tfm(cc->req, cc->tfm); + ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + kcryptd_async_done, + dmreq_of_req(cc, cc->req)); } /* * Encrypt / decrypt data from one bio to another one (can be the same one) */ static int crypt_convert(struct crypt_config *cc, - struct convert_context *ctx) + struct convert_context *ctx) { - int r = 0; + int r; + + atomic_set(&ctx->pending, 1); while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { - struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); - struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); - struct scatterlist sg_in = { - .page = bv_in->bv_page, - .offset = bv_in->bv_offset + ctx->offset_in, - .length = 1 << SECTOR_SHIFT - }; - struct scatterlist sg_out = { - .page = bv_out->bv_page, - .offset = bv_out->bv_offset + ctx->offset_out, - .length = 1 << SECTOR_SHIFT - }; - - ctx->offset_in += sg_in.length; - if (ctx->offset_in >= bv_in->bv_len) { - ctx->offset_in = 0; - ctx->idx_in++; - } - ctx->offset_out += sg_out.length; - if (ctx->offset_out >= bv_out->bv_len) { - ctx->offset_out = 0; - ctx->idx_out++; + crypt_alloc_req(cc, ctx); + + atomic_inc(&ctx->pending); + + r = crypt_convert_block(cc, ctx, cc->req); + + switch (r) { + /* async */ + case -EBUSY: + wait_for_completion(&ctx->restart); + INIT_COMPLETION(ctx->restart); + /* fall through*/ + case -EINPROGRESS: + cc->req = NULL; + ctx->sector++; + continue; + + /* sync */ + case 0: + atomic_dec(&ctx->pending); + ctx->sector++; + cond_resched(); + continue; + + /* error */ + default: + atomic_dec(&ctx->pending); + return r; } - - r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length, - ctx->write, ctx->sector); - if (r < 0) - break; - - ctx->sector++; } - return r; + return 0; } - static void dm_crypt_bio_destructor(struct bio *bio) - { - struct crypt_io *io = bio->bi_private; +static void dm_crypt_bio_destructor(struct bio *bio) +{ + struct dm_crypt_io *io = bio->bi_private; struct crypt_config *cc = io->target->private; bio_free(bio, cc->bs); - } +} /* * Generate a new unfragmented bio with the given size * This should never violate the device limitations - * May return a smaller bio when running out of pages + * May return a smaller bio when running out of pages, indicated by + * *out_of_pages set to 1. */ -static struct bio * -crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, - struct bio *base_bio, unsigned int *bio_vec_idx) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, + unsigned *out_of_pages) { + struct crypt_config *cc = io->target->private; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; - unsigned int i; - - if (base_bio) { - clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_max_vecs, cc->bs); - __bio_clone(clone, base_bio); - } else - clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); + unsigned i, len; + struct page *page; + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) return NULL; - clone->bi_destructor = dm_crypt_bio_destructor; - - /* if the last bio was not complete, continue where that one ended */ - clone->bi_idx = *bio_vec_idx; - clone->bi_vcnt = *bio_vec_idx; - clone->bi_size = 0; - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - /* clone->bi_idx pages have already been allocated */ - size -= clone->bi_idx * PAGE_SIZE; - - for (i = clone->bi_idx; i < nr_iovecs; i++) { - struct bio_vec *bv = bio_iovec_idx(clone, i); + clone_init(io, clone); + *out_of_pages = 0; - bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask); - if (!bv->bv_page) + for (i = 0; i < nr_iovecs; i++) { + page = mempool_alloc(cc->page_pool, gfp_mask); + if (!page) { + *out_of_pages = 1; break; + } /* * if additional pages cannot be allocated without waiting, * return a partially allocated bio, the caller will then try * to allocate additional bios while submitting this partial bio */ - if ((i - clone->bi_idx) == (MIN_BIO_PAGES - 1)) + if (i == (MIN_BIO_PAGES - 1)) gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; - bv->bv_offset = 0; - if (size > PAGE_SIZE) - bv->bv_len = PAGE_SIZE; - else - bv->bv_len = size; + len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + + if (!bio_add_page(clone, page, len, 0)) { + mempool_free(page, cc->page_pool); + break; + } - clone->bi_size += bv->bv_len; - clone->bi_vcnt++; - size -= bv->bv_len; + size -= len; } if (!clone->bi_size) { @@ -433,42 +522,15 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size, return NULL; } - /* - * Remember the last bio_vec allocated to be able - * to correctly continue after the splitting. - */ - *bio_vec_idx = clone->bi_vcnt; - return clone; } -static void crypt_free_buffer_pages(struct crypt_config *cc, - struct bio *clone, unsigned int bytes) +static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) { - unsigned int i, start, end; + unsigned int i; struct bio_vec *bv; - /* - * This is ugly, but Jens Axboe thinks that using bi_idx in the - * endio function is too dangerous at the moment, so I calculate the - * correct position using bi_vcnt and bi_size. - * The bv_offset and bv_len fields might already be modified but we - * know that we always allocated whole pages. - * A fix to the bi_idx issue in the kernel is in the works, so - * we will hopefully be able to revert to the cleaner solution soon. - */ - i = clone->bi_vcnt - 1; - bv = bio_iovec_idx(clone, i); - end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - clone->bi_size; - start = end - bytes; - - start >>= PAGE_SHIFT; - if (!clone->bi_size) - end = clone->bi_vcnt; - else - end >>= PAGE_SHIFT; - - for (i = start; i < end; i++) { + for (i = 0; i < clone->bi_vcnt; i++) { bv = bio_iovec_idx(clone, i); BUG_ON(!bv->bv_page); mempool_free(bv->bv_page, cc->page_pool); @@ -476,80 +538,97 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, } } +static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, + struct bio *bio, sector_t sector) +{ + struct crypt_config *cc = ti->private; + struct dm_crypt_io *io; + + io = mempool_alloc(cc->io_pool, GFP_NOIO); + io->target = ti; + io->base_bio = bio; + io->sector = sector; + io->error = 0; + io->base_io = NULL; + atomic_set(&io->pending, 0); + + return io; +} + +static void crypt_inc_pending(struct dm_crypt_io *io) +{ + atomic_inc(&io->pending); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. + * If base_io is set, wait for the last fragment to complete. */ -static void dec_pending(struct crypt_io *io, int error) +static void crypt_dec_pending(struct dm_crypt_io *io) { - struct crypt_config *cc = (struct crypt_config *) io->target->private; - - if (error < 0) - io->error = error; + struct crypt_config *cc = io->target->private; + struct bio *base_bio = io->base_bio; + struct dm_crypt_io *base_io = io->base_io; + int error = io->error; if (!atomic_dec_and_test(&io->pending)) return; - if (io->first_clone) - bio_put(io->first_clone); - - bio_endio(io->base_bio, io->base_bio->bi_size, io->error); - mempool_free(io, cc->io_pool); + + if (likely(!base_io)) + bio_endio(base_bio, error); + else { + if (error && !base_io->error) + base_io->error = error; + crypt_dec_pending(base_io); + } } /* - * kcryptd: + * kcryptd/kcryptd_io: * * Needed because it would be very unwise to do decryption in an * interrupt context. + * + * kcryptd performs the actual encryption or decryption. + * + * kcryptd_io performs the IO submission. + * + * They must be separated as otherwise the final stages could be + * starved by new requests which can block in the first stages due + * to memory allocation. */ -static struct workqueue_struct *_kcryptd_workqueue; -static void kcryptd_do_work(struct work_struct *work); - -static void kcryptd_queue_io(struct crypt_io *io) -{ - INIT_WORK(&io->work, kcryptd_do_work); - queue_work(_kcryptd_workqueue, &io->work); -} - -static int crypt_endio(struct bio *clone, unsigned int done, int error) +static void crypt_endio(struct bio *clone, int error) { - struct crypt_io *io = clone->bi_private; + struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->target->private; - unsigned read_io = bio_data_dir(clone) == READ; + unsigned rw = bio_data_dir(clone); + + if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) + error = -EIO; /* - * free the processed pages, even if - * it's only a partially completed write + * free the processed pages */ - if (!read_io) - crypt_free_buffer_pages(cc, clone, done); - - /* keep going - not finished yet */ - if (unlikely(clone->bi_size)) - return 1; + if (rw == WRITE) + crypt_free_buffer_pages(cc, clone); - if (!read_io) - goto out; + bio_put(clone); - if (unlikely(!bio_flagged(clone, BIO_UPTODATE))) { - error = -EIO; - goto out; + if (rw == READ && !error) { + kcryptd_queue_crypt(io); + return; } - bio_put(clone); - io->post_process = 1; - kcryptd_queue_io(io); - return 0; + if (unlikely(error)) + io->error = error; -out: - bio_put(clone); - dec_pending(io, error); - return error; + crypt_dec_pending(io); } -static void clone_init(struct crypt_io *io, struct bio *clone) +static void clone_init(struct dm_crypt_io *io, struct bio *clone) { struct crypt_config *cc = io->target->private; @@ -557,16 +636,16 @@ static void clone_init(struct crypt_io *io, struct bio *clone) clone->bi_end_io = crypt_endio; clone->bi_bdev = cc->dev->bdev; clone->bi_rw = io->base_bio->bi_rw; + clone->bi_destructor = dm_crypt_bio_destructor; } -static void process_read(struct crypt_io *io) +static void kcryptd_io_read(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; struct bio *base_bio = io->base_bio; struct bio *clone; - sector_t sector = base_bio->bi_sector - io->target->begin; - atomic_inc(&io->pending); + crypt_inc_pending(io); /* * The block layer might modify the bvec array, so always @@ -575,106 +654,228 @@ static void process_read(struct crypt_io *io) */ clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); + io->error = -ENOMEM; + crypt_dec_pending(io); return; } clone_init(io, clone); - clone->bi_destructor = dm_crypt_bio_destructor; clone->bi_idx = 0; clone->bi_vcnt = bio_segments(base_bio); clone->bi_size = base_bio->bi_size; - clone->bi_sector = cc->start + sector; + clone->bi_sector = cc->start + io->sector; memcpy(clone->bi_io_vec, bio_iovec(base_bio), sizeof(struct bio_vec) * clone->bi_vcnt); generic_make_request(clone); } -static void process_write(struct crypt_io *io) +static void kcryptd_io_write(struct dm_crypt_io *io) +{ + struct bio *clone = io->ctx.bio_out; + generic_make_request(clone); +} + +static void kcryptd_io(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + + if (bio_data_dir(io->base_bio) == READ) + kcryptd_io_read(io); + else + kcryptd_io_write(io); +} + +static void kcryptd_queue_io(struct dm_crypt_io *io) { struct crypt_config *cc = io->target->private; - struct bio *base_bio = io->base_bio; - struct bio *clone; - struct convert_context ctx; - unsigned remaining = base_bio->bi_size; - sector_t sector = base_bio->bi_sector - io->target->begin; - unsigned bvec_idx = 0; - atomic_inc(&io->pending); + INIT_WORK(&io->work, kcryptd_io); + queue_work(cc->io_queue, &io->work); +} + +static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, + int error, int async) +{ + struct bio *clone = io->ctx.bio_out; + struct crypt_config *cc = io->target->private; + + if (unlikely(error < 0)) { + crypt_free_buffer_pages(cc, clone); + bio_put(clone); + io->error = -EIO; + crypt_dec_pending(io); + return; + } + + /* crypt_convert should have filled the clone bio */ + BUG_ON(io->ctx.idx_out < clone->bi_vcnt); + + clone->bi_sector = cc->start + io->sector; + + if (async) + kcryptd_queue_io(io); + else + generic_make_request(clone); +} + +static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->target->private; + struct bio *clone; + struct dm_crypt_io *new_io; + int crypt_finished; + unsigned out_of_pages = 0; + unsigned remaining = io->base_bio->bi_size; + sector_t sector = io->sector; + int r; - crypt_convert_init(cc, &ctx, NULL, base_bio, sector, 1); + /* + * Prevent io from disappearing until this function completes. + */ + crypt_inc_pending(io); + crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); /* * The allocated buffers can be smaller than the whole bio, * so repeat the whole process until all the data can be handled. */ while (remaining) { - clone = crypt_alloc_buffer(cc, base_bio->bi_size, - io->first_clone, &bvec_idx); + clone = crypt_alloc_buffer(io, remaining, &out_of_pages); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); - return; + io->error = -ENOMEM; + break; } - ctx.bio_out = clone; + io->ctx.bio_out = clone; + io->ctx.idx_out = 0; + + remaining -= clone->bi_size; + sector += bio_sectors(clone); + + crypt_inc_pending(io); + r = crypt_convert(cc, &io->ctx); + crypt_finished = atomic_dec_and_test(&io->ctx.pending); - if (unlikely(crypt_convert(cc, &ctx) < 0)) { - crypt_free_buffer_pages(cc, clone, clone->bi_size); - bio_put(clone); - dec_pending(io, -EIO); - return; + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, r, 0); + + /* + * If there was an error, do not try next fragments. + * For async, error is processed in async handler. + */ + if (unlikely(r < 0)) + break; + + io->sector = sector; } - clone_init(io, clone); - clone->bi_sector = cc->start + sector; + /* + * Out of memory -> run queues + * But don't wait if split was due to the io size restriction + */ + if (unlikely(out_of_pages)) + congestion_wait(BLK_RW_ASYNC, HZ/100); + + /* + * With async crypto it is unsafe to share the crypto context + * between fragments, so switch to a new dm_crypt_io structure. + */ + if (unlikely(!crypt_finished && remaining)) { + new_io = crypt_io_alloc(io->target, io->base_bio, + sector); + crypt_inc_pending(new_io); + crypt_convert_init(cc, &new_io->ctx, NULL, + io->base_bio, sector); + new_io->ctx.idx_in = io->ctx.idx_in; + new_io->ctx.offset_in = io->ctx.offset_in; - if (!io->first_clone) { /* - * hold a reference to the first clone, because it - * holds the bio_vec array and that can't be freed - * before all other clones are released + * Fragments after the first use the base_io + * pending count. */ - bio_get(clone); - io->first_clone = clone; + if (!io->base_io) + new_io->base_io = io; + else { + new_io->base_io = io->base_io; + crypt_inc_pending(io->base_io); + crypt_dec_pending(io); + } + + io = new_io; } + } - remaining -= clone->bi_size; - sector += bio_sectors(clone); + crypt_dec_pending(io); +} - /* prevent bio_put of first_clone */ - if (remaining) - atomic_inc(&io->pending); +static void kcryptd_crypt_read_done(struct dm_crypt_io *io, int error) +{ + if (unlikely(error < 0)) + io->error = -EIO; - generic_make_request(clone); + crypt_dec_pending(io); +} - /* out of memory -> run queues */ - if (remaining) - congestion_wait(bio_data_dir(clone), HZ/100); - } +static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->target->private; + int r = 0; + + crypt_inc_pending(io); + + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); + + r = crypt_convert(cc, &io->ctx); + + if (atomic_dec_and_test(&io->ctx.pending)) + kcryptd_crypt_read_done(io, r); + + crypt_dec_pending(io); } -static void process_read_endio(struct crypt_io *io) +static void kcryptd_async_done(struct crypto_async_request *async_req, + int error) { + struct dm_crypt_request *dmreq = async_req->data; + struct convert_context *ctx = dmreq->ctx; + struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); struct crypt_config *cc = io->target->private; - struct convert_context ctx; - crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio, - io->base_bio->bi_sector - io->target->begin, 0); + if (error == -EINPROGRESS) { + complete(&ctx->restart); + return; + } + + mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); + + if (!atomic_dec_and_test(&ctx->pending)) + return; - dec_pending(io, crypt_convert(cc, &ctx)); + if (bio_data_dir(io->base_bio) == READ) + kcryptd_crypt_read_done(io, error); + else + kcryptd_crypt_write_io_submit(io, error, 1); } -static void kcryptd_do_work(struct work_struct *work) +static void kcryptd_crypt(struct work_struct *work) { - struct crypt_io *io = container_of(work, struct crypt_io, work); + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - if (io->post_process) - process_read_endio(io); - else if (bio_data_dir(io->base_bio) == READ) - process_read(io); + if (bio_data_dir(io->base_bio) == READ) + kcryptd_crypt_read_convert(io); else - process_write(io); + kcryptd_crypt_write_convert(io); +} + +static void kcryptd_queue_crypt(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->target->private; + + INIT_WORK(&io->work, kcryptd_crypt); + queue_work(cc->crypt_queue, &io->work); } /* @@ -728,7 +929,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key) cc->key_size = key_size; /* initial settings */ if ((!key_size && strcmp(key, "-")) || - (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) + (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) return -EINVAL; set_bit(DM_CRYPT_KEY_VALID, &cc->flags); @@ -750,7 +951,7 @@ static int crypt_wipe_key(struct crypt_config *cc) static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - struct crypto_blkcipher *tfm; + struct crypto_ablkcipher *tfm; char *tmp; char *cipher; char *chainmode; @@ -784,10 +985,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (crypt_set_key(cc, argv[1])) { ti->error = "Error decoding key"; - goto bad1; + goto bad_cipher; } - /* Compatiblity mode for old dm-crypt cipher strings */ + /* Compatibility mode for old dm-crypt cipher strings */ if (!chainmode || (strcmp(chainmode, "plain") == 0 && !ivmode)) { chainmode = "cbc"; ivmode = "plain"; @@ -795,19 +996,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (strcmp(chainmode, "ecb") && !ivmode) { ti->error = "This chaining mode requires an IV mechanism"; - goto bad1; + goto bad_cipher; } - if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, - cipher) >= CRYPTO_MAX_ALG_NAME) { + if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", + chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) { ti->error = "Chain mode + cipher name is too long"; - goto bad1; + goto bad_cipher; } - tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_ablkcipher(cc->cipher, 0, 0); if (IS_ERR(tfm)) { ti->error = "Error allocating crypto tfm"; - goto bad1; + goto bad_cipher; } strcpy(cc->cipher, cipher); @@ -827,20 +1028,22 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops = &crypt_iv_essiv_ops; else if (strcmp(ivmode, "benbi") == 0) cc->iv_gen_ops = &crypt_iv_benbi_ops; + else if (strcmp(ivmode, "null") == 0) + cc->iv_gen_ops = &crypt_iv_null_ops; else { ti->error = "Invalid IV mode"; - goto bad2; + goto bad_ivmode; } if (cc->iv_gen_ops && cc->iv_gen_ops->ctr && cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) - goto bad2; + goto bad_ivmode; - cc->iv_size = crypto_blkcipher_ivsize(tfm); + cc->iv_size = crypto_ablkcipher_ivsize(tfm); if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, - (unsigned int)(sizeof(u64) / sizeof(u8))); + (unsigned int)(sizeof(u64) / sizeof(u8))); else { if (cc->iv_gen_ops) { DMWARN("Selected cipher does not support IVs"); @@ -853,42 +1056,56 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); if (!cc->io_pool) { ti->error = "Cannot allocate crypt io mempool"; - goto bad3; + goto bad_slab_pool; + } + + cc->dmreq_start = sizeof(struct ablkcipher_request); + cc->dmreq_start += crypto_ablkcipher_reqsize(tfm); + cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); + cc->dmreq_start += crypto_ablkcipher_alignmask(tfm) & + ~(crypto_tfm_ctx_alignment() - 1); + + cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + + sizeof(struct dm_crypt_request) + cc->iv_size); + if (!cc->req_pool) { + ti->error = "Cannot allocate crypt request mempool"; + goto bad_req_pool; } + cc->req = NULL; cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; - goto bad4; + goto bad_page_pool; } - cc->bs = bioset_create(MIN_IOS, MIN_IOS, 4); + cc->bs = bioset_create(MIN_IOS, 0); if (!cc->bs) { ti->error = "Cannot allocate crypt bioset"; goto bad_bs; } - if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) { + if (crypto_ablkcipher_setkey(tfm, cc->key, key_size) < 0) { ti->error = "Error setting key"; - goto bad5; + goto bad_device; } if (sscanf(argv[2], "%llu", &tmpll) != 1) { ti->error = "Invalid iv_offset sector"; - goto bad5; + goto bad_device; } cc->iv_offset = tmpll; if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = "Invalid device sector"; - goto bad5; + goto bad_device; } cc->start = tmpll; if (dm_get_device(ti, argv[3], cc->start, ti->len, - dm_table_get_mode(ti->table), &cc->dev)) { + dm_table_get_mode(ti->table), &cc->dev)) { ti->error = "Device lookup failed"; - goto bad5; + goto bad_device; } if (ivmode && cc->iv_gen_ops) { @@ -897,30 +1114,50 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL); if (!cc->iv_mode) { ti->error = "Error kmallocing iv_mode string"; - goto bad5; + goto bad_ivmode_string; } strcpy(cc->iv_mode, ivmode); } else cc->iv_mode = NULL; + cc->io_queue = create_singlethread_workqueue("kcryptd_io"); + if (!cc->io_queue) { + ti->error = "Couldn't create kcryptd io queue"; + goto bad_io_queue; + } + + cc->crypt_queue = create_singlethread_workqueue("kcryptd"); + if (!cc->crypt_queue) { + ti->error = "Couldn't create kcryptd queue"; + goto bad_crypt_queue; + } + + ti->num_flush_requests = 1; ti->private = cc; return 0; -bad5: +bad_crypt_queue: + destroy_workqueue(cc->io_queue); +bad_io_queue: + kfree(cc->iv_mode); +bad_ivmode_string: + dm_put_device(ti, cc->dev); +bad_device: bioset_free(cc->bs); bad_bs: mempool_destroy(cc->page_pool); -bad4: +bad_page_pool: + mempool_destroy(cc->req_pool); +bad_req_pool: mempool_destroy(cc->io_pool); -bad3: +bad_slab_pool: if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); -bad2: - crypto_free_blkcipher(tfm); -bad1: +bad_ivmode: + crypto_free_ablkcipher(tfm); +bad_cipher: /* Must zero key material before freeing */ - memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); - kfree(cc); + kzfree(cc); return -EINVAL; } @@ -928,36 +1165,47 @@ static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = (struct crypt_config *) ti->private; + destroy_workqueue(cc->io_queue); + destroy_workqueue(cc->crypt_queue); + + if (cc->req) + mempool_free(cc->req, cc->req_pool); + bioset_free(cc->bs); mempool_destroy(cc->page_pool); + mempool_destroy(cc->req_pool); mempool_destroy(cc->io_pool); kfree(cc->iv_mode); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); - crypto_free_blkcipher(cc->tfm); + crypto_free_ablkcipher(cc->tfm); dm_put_device(ti, cc->dev); /* Must zero key material before freeing */ - memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); - kfree(cc); + kzfree(cc); } static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct crypt_config *cc = ti->private; - struct crypt_io *io; + struct dm_crypt_io *io; + struct crypt_config *cc; - io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; - io->base_bio = bio; - io->first_clone = NULL; - io->error = io->post_process = 0; - atomic_set(&io->pending, 0); - kcryptd_queue_io(io); + if (unlikely(bio_empty_barrier(bio))) { + cc = ti->private; + bio->bi_bdev = cc->dev->bdev; + return DM_MAPIO_REMAPPED; + } - return 0; + io = crypt_io_alloc(ti, bio, bio->bi_sector - ti->begin); + + if (bio_data_dir(io->base_bio) == READ) + kcryptd_queue_io(io); + else + kcryptd_queue_crypt(io); + + return DM_MAPIO_SUBMITTED; } static int crypt_status(struct dm_target *ti, status_type_t type, @@ -1050,9 +1298,32 @@ error: return -EINVAL; } +static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct crypt_config *cc = ti->private; + struct request_queue *q = bdev_get_queue(cc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cc->dev->bdev; + bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + +static int crypt_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct crypt_config *cc = ti->private; + + return fn(ti, cc->dev, cc->start, ti->len, data); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 3, 0}, + .version = {1, 7, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1062,48 +1333,30 @@ static struct target_type crypt_target = { .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, + .merge = crypt_merge, + .iterate_devices = crypt_iterate_devices, }; static int __init dm_crypt_init(void) { int r; - _crypt_io_pool = kmem_cache_create("dm-crypt_io", - sizeof(struct crypt_io), - 0, 0, NULL, NULL); + _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0); if (!_crypt_io_pool) return -ENOMEM; - _kcryptd_workqueue = create_workqueue("kcryptd"); - if (!_kcryptd_workqueue) { - r = -ENOMEM; - DMERR("couldn't create kcryptd"); - goto bad1; - } - r = dm_register_target(&crypt_target); if (r < 0) { DMERR("register failed %d", r); - goto bad2; + kmem_cache_destroy(_crypt_io_pool); } - return 0; - -bad2: - destroy_workqueue(_kcryptd_workqueue); -bad1: - kmem_cache_destroy(_crypt_io_pool); return r; } static void __exit dm_crypt_exit(void) { - int r = dm_unregister_target(&crypt_target); - - if (r < 0) - DMERR("unregister failed %d", r); - - destroy_workqueue(_kcryptd_workqueue); + dm_unregister_target(&crypt_target); kmem_cache_destroy(_crypt_io_pool); }