* miss any bits.
*/
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/bitops.h>
#include <linux/kthread.h>
-#include <asm/atomic.h>
#include "raid6.h"
#include <linux/raid/bitmap.h>
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
#endif
+/*
+ * We maintain a biased count of active stripes in the bottom 16 bits of
+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
+ */
+static inline int raid5_bi_phys_segments(struct bio *bio)
+{
+ return bio->bi_phys_segments & 0xffff;
+}
+
+static inline int raid5_bi_hw_segments(struct bio *bio)
+{
+ return (bio->bi_phys_segments >> 16) & 0xffff;
+}
+
+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+{
+ --bio->bi_phys_segments;
+ return raid5_bi_phys_segments(bio);
+}
+
+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+{
+ unsigned short val = raid5_bi_hw_segments(bio);
+
+ --val;
+ bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
+ return val;
+}
+
+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+{
+ bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
+}
+
static inline int raid6_next_disk(int disk, int raid_disks)
{
disk++;
return 0;
}
-static void raid5_build_block (struct stripe_head *sh, int i);
+static void raid5_build_block(struct stripe_head *sh, int i);
static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
{
while (rbi && rbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev->sector);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
release_stripe(sh);
}
-static struct dma_async_tx_descriptor *
-ops_run_compute5(struct stripe_head *sh, unsigned long ops_request)
+static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
{
/* kernel stack size limits the total number of disks */
int disks = sh->disks;
ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute5, sh);
- /* ack now if postxor is not set to be run */
- if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
- async_tx_ack(tx);
-
return tx;
}
overlap_clear++;
}
- if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
- tx = ops_run_compute5(sh, ops_request);
+ if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
+ tx = ops_run_compute5(sh);
+ /* terminate the chain if postxor is not set to be run */
+ if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+ async_tx_ack(tx);
+ }
if (test_bit(STRIPE_OP_PREXOR, &ops_request))
tx = ops_run_prexor(sh, tx);
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
- int err = 0;
+ int err;
struct kmem_cache *sc;
int i;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
- md_allow_write(conf->mddev);
+ err = md_allow_write(conf->mddev);
+ if (err)
+ return err;
/* Step 1 */
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
release_stripe(sh);
}
-static void raid5_end_write_request (struct bio *bi, int error)
+static void raid5_end_write_request(struct bio *bi, int error)
{
struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf;
static sector_t compute_blocknr(struct stripe_head *sh, int i);
-static void raid5_build_block (struct stripe_head *sh, int i)
+static void raid5_build_block(struct stripe_head *sh, int i)
{
struct r5dev *dev = &sh->dev[i];
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
- printk (KERN_ALERT
- "raid5: Disk failure on %s, disabling device.\n"
- "raid5: Operation continuing on %d devices.\n",
- bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
+ printk(KERN_ALERT
+ "raid5: Disk failure on %s, disabling device.\n"
+ "raid5: Operation continuing on %d devices.\n",
+ bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
}
*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
break;
default:
- printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
- conf->algorithm);
+ printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
+ conf->algorithm);
}
break;
}
}
break;
default:
- printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
- conf->algorithm);
+ printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
+ conf->algorithm);
}
break;
}
chunk_number = stripe * data_disks + i;
r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
- check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
+ check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
printk(KERN_ERR "compute_blocknr: map not correct\n");
return 0;
}
static void
-handle_write_operations5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
{
int i, pd_idx = sh->pd_idx, disks = sh->disks;
if (*bip)
bi->bi_next = *bip;
*bip = bi;
- bi->bi_phys_segments ++;
+ bi->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sh->lock);
}
static void
-handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
+handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s, int disks,
struct bio **return_bi)
{
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
bi->bi_next = *return_bi;
*return_bi = bi;
}
md_wakeup_thread(conf->mddev->thread);
}
-/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
- * to process
+/* fetch_block5 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill5 to continue
*/
-static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
- struct stripe_head_state *s, int disk_idx, int disks)
+static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
+ int disk_idx, int disks)
{
struct r5dev *dev = &sh->dev[disk_idx];
struct r5dev *failed_dev = &sh->dev[s->failed_num];
/* is the data in this block needed, and can we get it? */
if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding || (s->failed &&
- (failed_dev->toread || (failed_dev->towrite &&
- !test_bit(R5_OVERWRITE, &failed_dev->flags)
- ))))) {
+ !test_bit(R5_UPTODATE, &dev->flags) &&
+ (dev->toread ||
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ s->syncing || s->expanding ||
+ (s->failed &&
+ (failed_dev->toread ||
+ (failed_dev->towrite &&
+ !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) {
/* We would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
*/
* subsequent operation.
*/
s->uptodate++;
- return 0; /* uptodate + compute == disks */
+ return 1; /* uptodate + compute == disks */
} else if (test_bit(R5_Insync, &dev->flags)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
}
}
- return ~0;
+ return 0;
}
-static void handle_issuing_new_read_requests5(struct stripe_head *sh,
+/**
+ * handle_stripe_fill5 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill5(struct stripe_head *sh,
struct stripe_head_state *s, int disks)
{
int i;
* midst of changing due to a write
*/
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
- !sh->reconstruct_state) {
+ !sh->reconstruct_state)
for (i = disks; i--; )
- if (__handle_issuing_new_read_requests5(
- sh, s, i, disks) == 0)
+ if (fetch_block5(sh, s, i, disks))
break;
- }
set_bit(STRIPE_HANDLE, &sh->state);
}
-static void handle_issuing_new_read_requests6(struct stripe_head *sh,
+static void handle_stripe_fill6(struct stripe_head *sh,
struct stripe_head_state *s, struct r6_state *r6s,
int disks)
{
}
-/* handle_completed_write_requests
+/* handle_stripe_clean_event
* any written block on an uptodate or failed drive can be returned.
* Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
* never LOCKED, so we don't need to test 'failed' directly.
*/
-static void handle_completed_write_requests(raid5_conf_t *conf,
+static void handle_stripe_clean_event(raid5_conf_t *conf,
struct stripe_head *sh, int disks, struct bio **return_bi)
{
int i;
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
- if (--wbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(wbi)) {
md_write_end(conf->mddev);
wbi->bi_next = *return_bi;
*return_bi = wbi;
md_wakeup_thread(conf->mddev->thread);
}
-static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
+static void handle_stripe_dirtying5(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s, int disks)
{
int rmw = 0, rcw = 0, i;
if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
- handle_write_operations5(sh, s, rcw == 0, 0);
+ schedule_reconstruction5(sh, s, rcw == 0, 0);
}
-static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
+static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s,
struct r6_state *r6s, int disks)
{
*
*/
-static void handle_stripe5(struct stripe_head *sh)
+static bool handle_stripe5(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
* need to be failed
*/
if (s.failed > 1 && s.to_read+s.to_write+s.written)
- handle_requests_to_failed_array(conf, sh, &s, disks,
- &return_bi);
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
if (s.failed > 1 && s.syncing) {
md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
!test_bit(R5_LOCKED, &dev->flags) &&
test_bit(R5_UPTODATE, &dev->flags)) ||
(s.failed == 1 && s.failed_num == sh->pd_idx)))
- handle_completed_write_requests(conf, sh, disks, &return_bi);
+ handle_stripe_clean_event(conf, sh, disks, &return_bi);
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
*/
if (s.to_read || s.non_overwrite ||
(s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
- handle_issuing_new_read_requests5(sh, &s, disks);
+ handle_stripe_fill5(sh, &s, disks);
/* Now we check to see if any write operations have recently
* completed
* block.
*/
if (s.to_write && !sh->reconstruct_state && !sh->check_state)
- handle_issuing_new_write_requests5(conf, sh, &s, disks);
+ handle_stripe_dirtying5(conf, sh, &s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
if (sh->reconstruct_state == reconstruct_state_result) {
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
- for (i = conf->raid_disks; i--; )
+ for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
- set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
+ }
}
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
sh->disks = conf->raid_disks;
sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
conf->raid_disks);
- handle_write_operations5(sh, &s, 1, 1);
+ schedule_reconstruction5(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
{
raid6_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
copy_data(0, rbi, dev->page, dev->sector);
rbi2 = r5_next_bio(rbi, dev->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
+
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
* might need to be failed
*/
if (s.failed > 2 && s.to_read+s.to_write+s.written)
- handle_requests_to_failed_array(conf, sh, &s, disks,
- &return_bi);
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
if (s.failed > 2 && s.syncing) {
md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
&& !test_bit(R5_LOCKED, &qdev->flags)
&& test_bit(R5_UPTODATE, &qdev->flags)))))
- handle_completed_write_requests(conf, sh, disks, &return_bi);
+ handle_stripe_clean_event(conf, sh, disks, &return_bi);
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
*/
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
(s.syncing && (s.uptodate < disks)) || s.expanding)
- handle_issuing_new_read_requests6(sh, &s, &r6s, disks);
+ handle_stripe_fill6(sh, &s, &r6s, disks);
/* now to consider writing and what else, if anything should be read */
if (s.to_write)
- handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks);
+ handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+/* returns true if the stripe was handled */
+static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
{
if (sh->raid_conf->level == 6)
- handle_stripe6(sh, tmp_page);
+ return handle_stripe6(sh, tmp_page);
else
- handle_stripe5(sh);
+ return handle_stripe5(sh);
}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
*/
-static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid5_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
- if (bio_data_dir(bio) == WRITE)
+ if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
+ /*
+ * this sets the active strip count to 1 and the processed
+ * strip count to zero (upper 8 bits)
+ */
bi->bi_phys_segments = 1; /* biased count of active stripes */
- bi->bi_hw_segments = 0; /* count of processed stripes */
}
return bi;
if ((bi->bi_size>>9) > q->max_sectors)
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments ||
- bi->bi_hw_segments > q->max_hw_segments)
+ if (bi->bi_phys_segments > q->max_phys_segments)
return 0;
if (q->merge_bvec_fn)
sector_t logical_sector, last_sector;
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
- int remaining;
+ int cpu, remaining;
if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
md_write_start(mddev, bi);
- disk_stat_inc(mddev->gendisk, ios[rw]);
- disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+ bio_sectors(bi));
+ part_stat_unlock();
if (rw == READ &&
mddev->reshape_position == MaxSector &&
}
spin_lock_irq(&conf->device_lock);
- remaining = --bi->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(bi);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0) {
j == raid6_next_disk(sh->pd_idx, sh->disks))
continue;
s = compute_blocknr(sh, j);
- if (s < (mddev->array_size<<1)) {
+ if (s < mddev->array_sectors) {
skipped = 1;
continue;
}
clear_bit(STRIPE_INSYNC, &sh->state);
spin_unlock(&sh->lock);
- handle_stripe(sh, NULL);
+ /* wait for any blocked device to be handled */
+ while(unlikely(!handle_stripe(sh, NULL)))
+ ;
release_stripe(sh);
return STRIPE_SECTORS;
sector += STRIPE_SECTORS,
scnt++) {
- if (scnt < raid_bio->bi_hw_segments)
+ if (scnt < raid5_bi_hw_segments(raid_bio))
/* already done this stripe */
continue;
if (!sh) {
/* failed to get a stripe - must wait */
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
release_stripe(sh);
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
handled++;
}
spin_lock_irq(&conf->device_lock);
- remaining = --raid_bio->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(raid_bio);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0)
bio_endio(raid_bio, 0);
sh = __get_priority_stripe(conf);
- if (!sh) {
- async_tx_issue_pending_all();
+ if (!sh)
break;
- }
spin_unlock_irq(&conf->device_lock);
handled++;
spin_unlock_irq(&conf->device_lock);
+ async_tx_issue_pending_all();
unplug_slaves(mddev);
pr_debug("--- raid5d inactive\n");
{
raid5_conf_t *conf = mddev_to_conf(mddev);
unsigned long new;
+ int err;
+
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
else
break;
}
- md_allow_write(mddev);
+ err = md_allow_write(mddev);
+ if (err)
+ return err;
while (new > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
int raid_disk, memory;
mdk_rdev_t *rdev;
struct disk_info *disk;
- struct list_head *tmp;
int working_disks = 0;
if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
return -EIO;
}
+ if (mddev->chunk_size < PAGE_SIZE) {
+ printk(KERN_ERR "md/raid5: chunk_size must be at least "
+ "PAGE_SIZE but %d < %ld\n",
+ mddev->chunk_size, PAGE_SIZE);
+ return -EINVAL;
+ }
+
if (mddev->reshape_position != MaxSector) {
/* Check that we can continue the reshape.
* Currently only disks can change, it must
pr_debug("raid5: run(%s) called.\n", mdname(mddev));
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
raid_disk = rdev->raid_disk;
if (raid_disk >= conf->raid_disks
|| raid_disk < 0)
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
- mddev->array_size = mddev->size * (conf->previous_raid_disks -
+ mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks -
conf->max_degraded);
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
}
#ifdef DEBUG
-static void print_sh (struct seq_file *seq, struct stripe_head *sh)
+static void print_sh(struct seq_file *seq, struct stripe_head *sh)
{
int i;
seq_printf(seq, "\n");
}
-static void printall (struct seq_file *seq, raid5_conf_t *conf)
+static void printall(struct seq_file *seq, raid5_conf_t *conf)
{
struct stripe_head *sh;
struct hlist_node *hn;
}
#endif
-static void status (struct seq_file *seq, mddev_t *mddev)
+static void status(struct seq_file *seq, mddev_t *mddev)
{
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
int i;
raid5_conf_t *conf = mddev_to_conf(mddev);
sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
- mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1;
- set_capacity(mddev->gendisk, mddev->array_size << 1);
+ mddev->array_sectors = sectors * (mddev->raid_disks
+ - conf->max_degraded);
+ set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
return -EINVAL; /* Cannot shrink array or change level yet */
if (mddev->delta_disks == 0)
return 0; /* nothing to do */
+ if (mddev->bitmap)
+ /* Cannot grow a bitmap yet */
+ return -EBUSY;
/* Can only proceed if there are plenty of stripe_heads.
* We need a minimum of one full stripe,, and for sensible progress
{
raid5_conf_t *conf = mddev_to_conf(mddev);
mdk_rdev_t *rdev;
- struct list_head *rtmp;
int spares = 0;
int added_devices = 0;
unsigned long flags;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags))
spares++;
/* Add some new drives, as many as will fit.
* We know there are enough to make the newly sized array work.
*/
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) {
struct block_device *bdev;
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
- conf->mddev->array_size = conf->mddev->size *
+ conf->mddev->array_sectors = 2 * conf->mddev->size *
(conf->raid_disks - conf->max_degraded);
- set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
+ set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
conf->mddev->changed = 1;
bdev = bdget_disk(conf->mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode, (loff_t)conf->mddev->array_size << 10);
+ i_size_write(bdev->bd_inode,
+ (loff_t)conf->mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}