#include <linux/async.h>
#include <linux/seq_file.h>
#include <linux/cpu.h>
+#include <linux/slab.h>
#include "md.h"
#include "raid5.h"
#include "bitmap.h"
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
- if (conf->mddev->degraded)
+ if (conf->mddev->degraded >= conf->max_degraded)
printk_rl(KERN_WARNING
"raid5:%s: read error not correctable "
"(sector %llu on %s).\n",
int previous, int *dd_idx,
struct stripe_head *sh)
{
- long stripe;
- unsigned long chunk_number;
+ sector_t stripe, stripe2;
+ sector_t chunk_number;
unsigned int chunk_offset;
int pd_idx, qd_idx;
int ddf_layout = 0;
*/
chunk_offset = sector_div(r_sector, sectors_per_chunk);
chunk_number = r_sector;
- BUG_ON(r_sector != chunk_number);
/*
* Compute the stripe number
*/
- stripe = chunk_number / data_disks;
-
- /*
- * Compute the data disk and parity disk indexes inside the stripe
- */
- *dd_idx = chunk_number % data_disks;
-
+ stripe = chunk_number;
+ *dd_idx = sector_div(stripe, data_disks);
+ stripe2 = stripe;
/*
* Select the parity disk based on the user selected algorithm.
*/
case 5:
switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
- pd_idx = data_disks - stripe % raid_disks;
+ pd_idx = data_disks - sector_div(stripe2, raid_disks);
if (*dd_idx >= pd_idx)
(*dd_idx)++;
break;
case ALGORITHM_RIGHT_ASYMMETRIC:
- pd_idx = stripe % raid_disks;
+ pd_idx = sector_div(stripe2, raid_disks);
if (*dd_idx >= pd_idx)
(*dd_idx)++;
break;
case ALGORITHM_LEFT_SYMMETRIC:
- pd_idx = data_disks - stripe % raid_disks;
+ pd_idx = data_disks - sector_div(stripe2, raid_disks);
*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
break;
case ALGORITHM_RIGHT_SYMMETRIC:
- pd_idx = stripe % raid_disks;
+ pd_idx = sector_div(stripe2, raid_disks);
*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
break;
case ALGORITHM_PARITY_0:
switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
- pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
qd_idx = pd_idx + 1;
if (pd_idx == raid_disks-1) {
(*dd_idx)++; /* Q D D D P */
(*dd_idx) += 2; /* D D P Q D */
break;
case ALGORITHM_RIGHT_ASYMMETRIC:
- pd_idx = stripe % raid_disks;
+ pd_idx = sector_div(stripe2, raid_disks);
qd_idx = pd_idx + 1;
if (pd_idx == raid_disks-1) {
(*dd_idx)++; /* Q D D D P */
(*dd_idx) += 2; /* D D P Q D */
break;
case ALGORITHM_LEFT_SYMMETRIC:
- pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
qd_idx = (pd_idx + 1) % raid_disks;
*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
break;
case ALGORITHM_RIGHT_SYMMETRIC:
- pd_idx = stripe % raid_disks;
+ pd_idx = sector_div(stripe2, raid_disks);
qd_idx = (pd_idx + 1) % raid_disks;
*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
break;
/* Exactly the same as RIGHT_ASYMMETRIC, but or
* of blocks for computing Q is different.
*/
- pd_idx = stripe % raid_disks;
+ pd_idx = sector_div(stripe2, raid_disks);
qd_idx = pd_idx + 1;
if (pd_idx == raid_disks-1) {
(*dd_idx)++; /* Q D D D P */
* D D D P Q rather than
* Q D D D P
*/
- pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
+ stripe2 += 1;
+ pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
qd_idx = pd_idx + 1;
if (pd_idx == raid_disks-1) {
(*dd_idx)++; /* Q D D D P */
case ALGORITHM_ROTATING_N_CONTINUE:
/* Same as left_symmetric but Q is before P */
- pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
ddf_layout = 1;
case ALGORITHM_LEFT_ASYMMETRIC_6:
/* RAID5 left_asymmetric, with Q on last device */
- pd_idx = data_disks - stripe % (raid_disks-1);
+ pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
if (*dd_idx >= pd_idx)
(*dd_idx)++;
qd_idx = raid_disks - 1;
break;
case ALGORITHM_RIGHT_ASYMMETRIC_6:
- pd_idx = stripe % (raid_disks-1);
+ pd_idx = sector_div(stripe2, raid_disks-1);
if (*dd_idx >= pd_idx)
(*dd_idx)++;
qd_idx = raid_disks - 1;
break;
case ALGORITHM_LEFT_SYMMETRIC_6:
- pd_idx = data_disks - stripe % (raid_disks-1);
+ pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
qd_idx = raid_disks - 1;
break;
case ALGORITHM_RIGHT_SYMMETRIC_6:
- pd_idx = stripe % (raid_disks-1);
+ pd_idx = sector_div(stripe2, raid_disks-1);
*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
qd_idx = raid_disks - 1;
break;
: conf->algorithm;
sector_t stripe;
int chunk_offset;
- int chunk_number, dummy1, dd_idx = i;
+ sector_t chunk_number;
+ int dummy1, dd_idx = i;
sector_t r_sector;
struct stripe_head sh2;
chunk_offset = sector_div(new_sector, sectors_per_chunk);
stripe = new_sector;
- BUG_ON(new_sector != stripe);
if (i == sh->pd_idx)
return 0;
}
chunk_number = stripe * data_disks + i;
- r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
+ r_sector = chunk_number * sectors_per_chunk + chunk_offset;
check = raid5_compute_sector(conf, r_sector,
previous, &dummy1, &sh2);
struct r5dev *dev;
mdk_rdev_t *blocked_rdev = NULL;
int prexor;
+ int dec_preread_active = 0;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
set_bit(STRIPE_INSYNC, &sh->state);
}
}
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ dec_preread_active = 1;
}
/* Now to consider new write requests and what else, if anything
ops_run_io(sh, &s);
+ if (dec_preread_active) {
+ /* We delay this until after ops_run_io so that if make_request
+ * is waiting on a barrier, it won't continue until the writes
+ * have actually been submitted.
+ */
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
return_io(return_bi);
}
struct r6_state r6s;
struct r5dev *dev, *pdev, *qdev;
mdk_rdev_t *blocked_rdev = NULL;
+ int dec_preread_active = 0;
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
set_bit(STRIPE_INSYNC, &sh->state);
}
}
- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
- atomic_dec(&conf->preread_active_stripes);
- if (atomic_read(&conf->preread_active_stripes) <
- IO_THRESHOLD)
- md_wakeup_thread(conf->mddev->thread);
- }
+ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ dec_preread_active = 1;
}
/* Now to consider new write requests and what else, if anything
ops_run_io(sh, &s);
+
+ if (dec_preread_active) {
+ /* We delay this until after ops_run_io so that if make_request
+ * is waiting on a barrier, it won't continue until the writes
+ * have actually been submitted.
+ */
+ atomic_dec(&conf->preread_active_stripes);
+ if (atomic_read(&conf->preread_active_stripes) <
+ IO_THRESHOLD)
+ md_wakeup_thread(conf->mddev->thread);
+ }
+
return_io(return_bi);
}
if ((bi->bi_size>>9) > queue_max_sectors(q))
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > queue_max_phys_segments(q))
+ if (bi->bi_phys_segments > queue_max_segments(q))
return 0;
if (q->merge_bvec_fn)
int cpu, remaining;
if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
- bio_endio(bi, -EOPNOTSUPP);
+ /* Drain all pending writes. We only really need
+ * to ensure they have been submitted, but this is
+ * easier.
+ */
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+ md_barrier_request(mddev, bi);
return 0;
}
finish_wait(&conf->wait_for_overlap, &w);
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
+ if (mddev->barrier &&
+ !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+ atomic_inc(&conf->preread_active_stripes);
release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
bio_endio(bi, 0);
}
+
+ if (mddev->barrier) {
+ /* We need to wait for the stripes to all be handled.
+ * So: wait for preread_active_stripes to drop to 0.
+ */
+ wait_event(mddev->thread->wqueue,
+ atomic_read(&conf->preread_active_stripes) == 0);
+ }
return 0;
}
{
unsigned long cpu;
struct page *spare_page;
- struct raid5_percpu *allcpus;
+ struct raid5_percpu __percpu *allcpus;
void *scribble;
int err;
mddev->thread = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
- sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
free_conf(conf);
- mddev->private = NULL;
+ mddev->private = &raid5_attrs_group;
return 0;
}
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) {
char nm[20];
- if (rdev->raid_disk >= conf->previous_raid_disks)
+ if (rdev->raid_disk >= conf->previous_raid_disks) {
set_bit(In_sync, &rdev->flags);
- else
+ added_devices++;
+ } else
rdev->recovery_offset = 0;
- added_devices++;
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm))
break;
}
+ /* When a reshape changes the number of devices, ->degraded
+ * is measured against the large of the pre and post number of
+ * devices.*/
if (mddev->delta_disks > 0) {
spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
+ mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
- added_devices;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
module_init(raid5_init);
module_exit(raid5_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
MODULE_ALIAS("md-personality-4"); /* RAID5 */
MODULE_ALIAS("md-raid5");
MODULE_ALIAS("md-raid4");