X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid0.c;h=77605cdceaf179ce1a5ce15d9bf50d4239efb91e;hb=f653398c86a1c104f0992bd788dd4bb065449be4;hp=99cee51734e55d0dd5425db56b05dfa26d507297;hpb=ed7b00380d957ec770b5e90380d012c6062c13cc;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 99cee51..77605cd 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -26,8 +26,8 @@ static void raid0_unplug(struct request_queue *q) { mddev_t *mddev = q->queuedata; - raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + raid0_conf_t *conf = mddev->private; + mdk_rdev_t **devlist = conf->devlist; int i; for (i=0; iraid_disks; i++) { @@ -40,10 +40,13 @@ static void raid0_unplug(struct request_queue *q) static int raid0_congested(void *data, int bits) { mddev_t *mddev = data; - raid0_conf_t *conf = mddev_to_conf(mddev); - mdk_rdev_t **devlist = conf->strip_zone[0].dev; + raid0_conf_t *conf = mddev->private; + mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(devlist[i]->bdev); @@ -52,11 +55,43 @@ static int raid0_congested(void *data, int bits) return ret; } +/* + * inform the user of the raid configuration +*/ +static void dump_zones(mddev_t *mddev) +{ + int j, k, h; + sector_t zone_size = 0; + sector_t zone_start = 0; + char b[BDEVNAME_SIZE]; + raid0_conf_t *conf = mddev->private; + printk(KERN_INFO "******* %s configuration *********\n", + mdname(mddev)); + h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { + printk(KERN_INFO "zone%d=[", j); + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) + printk("%s/", + bdevname(conf->devlist[j*mddev->raid_disks + + k]->bdev, b)); + printk("]\n"); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + printk(KERN_INFO " zone offset=%llukb " + "device offset=%llukb size=%llukb\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; + } + printk(KERN_INFO "**********************************\n\n"); +} + static int create_strip_zones(mddev_t *mddev) { - int i, c, j, err; - sector_t curr_zone_end; - mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; + int i, c, err; + sector_t curr_zone_end, sectors; + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; int cnt; char b[BDEVNAME_SIZE]; @@ -68,6 +103,12 @@ static int create_strip_zones(mddev_t *mddev) printk(KERN_INFO "raid0: looking at %s\n", bdevname(rdev1->bdev,b)); c = 0; + + /* round size to chunk_size */ + sectors = rdev1->sectors; + sector_div(sectors, mddev->chunk_sectors); + rdev1->sectors = sectors * mddev->chunk_sectors; + list_for_each_entry(rdev2, &mddev->disks, same_set) { printk(KERN_INFO "raid0: comparing %s(%llu)", bdevname(rdev1->bdev,b), @@ -115,7 +156,7 @@ static int create_strip_zones(mddev_t *mddev) zone = &conf->strip_zone[0]; cnt = 0; smallest = NULL; - zone->dev = conf->devlist; + dev = conf->devlist; err = -EINVAL; list_for_each_entry(rdev1, &mddev->disks, same_set) { int j = rdev1->raid_disk; @@ -125,15 +166,15 @@ static int create_strip_zones(mddev_t *mddev) "aborting!\n", j); goto abort; } - if (zone->dev[j]) { + if (dev[j]) { printk(KERN_ERR "raid0: multiple devices for %d - " "aborting!\n", j); goto abort; } - zone->dev[j] = rdev1; + dev[j] = rdev1; - blk_queue_stack_limits(mddev->queue, - rdev1->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev1->bdev, + rdev1->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. @@ -153,16 +194,17 @@ static int create_strip_zones(mddev_t *mddev) goto abort; } zone->nb_dev = cnt; - zone->sectors = smallest->sectors * cnt; - zone->zone_end = zone->sectors; + zone->zone_end = smallest->sectors * cnt; - curr_zone_end = zone->sectors; + curr_zone_end = zone->zone_end; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) { + int j; + zone = conf->strip_zone + i; - zone->dev = conf->strip_zone[i-1].dev + mddev->raid_disks; + dev = conf->devlist + i * mddev->raid_disks; printk(KERN_INFO "raid0: zone %d\n", i); zone->dev_start = smallest->sectors; @@ -170,8 +212,7 @@ static int create_strip_zones(mddev_t *mddev) c = 0; for (j=0; jstrip_zone[0].dev[j]; + rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); if (rdev->sectors <= zone->dev_start) { @@ -179,7 +220,7 @@ static int create_strip_zones(mddev_t *mddev) continue; } printk(KERN_INFO " contained as device %d\n", c); - zone->dev[c] = rdev; + dev[c] = rdev; c++; if (!smallest || rdev->sectors < smallest->sectors) { smallest = rdev; @@ -189,11 +230,11 @@ static int create_strip_zones(mddev_t *mddev) } zone->nb_dev = c; - zone->sectors = (smallest->sectors - zone->dev_start) * c; + sectors = (smallest->sectors - zone->dev_start) * c; printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n", - zone->nb_dev, (unsigned long long)zone->sectors); + zone->nb_dev, (unsigned long long)sectors); - curr_zone_end += zone->sectors; + curr_zone_end += sectors; zone->zone_end = curr_zone_end; printk(KERN_INFO "raid0: current zone start: %llu\n", @@ -203,6 +244,21 @@ static int create_strip_zones(mddev_t *mddev) mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { + printk(KERN_ERR "%s chunk_size of %d not valid\n", + mdname(mddev), + mddev->chunk_sectors << 9); + goto abort; + } + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + printk(KERN_INFO "raid0: done.\n"); mddev->private = conf; return 0; @@ -229,10 +285,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; - max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; + if (is_power_of_2(chunk_sectors)) + max = (chunk_sectors - ((sector & (chunk_sectors-1)) + + bio_sectors)) << 9; + else + max = (chunk_sectors - (sector_div(sector, chunk_sectors) + + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ if (max <= biovec->bv_len && bio_sectors == 0) return biovec->bv_len; @@ -258,16 +319,13 @@ static int raid0_run(mddev_t *mddev) { int ret; - if (mddev->chunk_size == 0) { - printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + if (mddev->chunk_sectors == 0) { + printk(KERN_ERR "md/raid0: chunk size must be set.\n"); return -EINVAL; } - printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", - mdname(mddev), - mddev->chunk_size >> 9, - (mddev->chunk_size>>1)-1); - blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); - blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); + if (md_check_no_bitmap(mddev)) + return -EINVAL; + blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors); mddev->queue->queue_lock = &mddev->queue->__queue_lock; ret = create_strip_zones(mddev); @@ -289,55 +347,113 @@ static int raid0_run(mddev_t *mddev) * chunksize should be used in that case. */ { - int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE; + int stripe = mddev->raid_disks * + (mddev->chunk_sectors << 9) / PAGE_SIZE; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; } blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); + dump_zones(mddev); + md_integrity_register(mddev); return 0; } -static int raid0_stop (mddev_t *mddev) +static int raid0_stop(mddev_t *mddev) { - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf->strip_zone); - conf->strip_zone = NULL; + kfree(conf->devlist); kfree(conf); mddev->private = NULL; - return 0; } -/* Find the zone which holds a particular offset */ +/* Find the zone which holds a particular offset + * Update *sectorp to be an offset in that zone + */ static struct strip_zone *find_zone(struct raid0_private_data *conf, - sector_t sector) + sector_t *sectorp) { int i; struct strip_zone *z = conf->strip_zone; + sector_t sector = *sectorp; for (i = 0; i < conf->nr_strip_zones; i++) - if (sector < z[i].zone_end) + if (sector < z[i].zone_end) { + if (i) + *sectorp = sector - z[i-1].zone_end; return z + i; + } BUG(); } -static int raid0_make_request (struct request_queue *q, struct bio *bio) +/* + * remaps the bio to the target device. we separate two flows. + * power 2 flow and a general flow for the sake of perfromance +*/ +static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone, + sector_t sector, sector_t *sector_offset) +{ + unsigned int sect_in_chunk; + sector_t chunk; + raid0_conf_t *conf = mddev->private; + unsigned int chunk_sects = mddev->chunk_sectors; + + if (is_power_of_2(chunk_sects)) { + int chunksect_bits = ffz(~chunk_sects); + /* find the sector offset inside the chunk */ + sect_in_chunk = sector & (chunk_sects - 1); + sector >>= chunksect_bits; + /* chunk in zone */ + chunk = *sector_offset; + /* quotient is the chunk in real device*/ + sector_div(chunk, zone->nb_dev << chunksect_bits); + } else{ + sect_in_chunk = sector_div(sector, chunk_sects); + chunk = *sector_offset; + sector_div(chunk, chunk_sects * zone->nb_dev); + } + /* + * position the bio over the real device + * real sector = chunk in device + starting of zone + * + the position in the chunk + */ + *sector_offset = (chunk * chunk_sects) + sect_in_chunk; + return conf->devlist[(zone - conf->strip_zone)*mddev->raid_disks + + sector_div(sector, zone->nb_dev)]; +} + +/* + * Is io distribute over 1 or more chunks ? +*/ +static inline int is_io_in_chunk_boundary(mddev_t *mddev, + unsigned int chunk_sects, struct bio *bio) +{ + if (likely(is_power_of_2(chunk_sects))) { + return chunk_sects >= ((bio->bi_sector & (chunk_sects-1)) + + (bio->bi_size >> 9)); + } else{ + sector_t sector = bio->bi_sector; + return chunk_sects >= (sector_div(sector, chunk_sects) + + (bio->bi_size >> 9)); + } +} + +static int raid0_make_request(struct request_queue *q, struct bio *bio) { mddev_t *mddev = q->queuedata; - unsigned int sect_in_chunk, chunksect_bits, chunk_sects; - raid0_conf_t *conf = mddev_to_conf(mddev); + unsigned int chunk_sects; + sector_t sector_offset; struct strip_zone *zone; mdk_rdev_t *tmp_dev; - sector_t chunk; - sector_t sector, rsect; const int rw = bio_data_dir(bio); int cpu; - if (unlikely(bio_barrier(bio))) { - bio_endio(bio, -EOPNOTSUPP); + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + md_barrier_request(mddev, bio); return 0; } @@ -347,11 +463,9 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_sectors(bio)); part_stat_unlock(); - chunk_sects = mddev->chunk_size >> 9; - chunksect_bits = ffz(~chunk_sects); - sector = bio->bi_sector; - - if (unlikely(chunk_sects < (bio->bi_sector & (chunk_sects - 1)) + (bio->bi_size >> 9))) { + chunk_sects = mddev->chunk_sectors; + if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) { + sector_t sector = bio->bi_sector; struct bio_pair *bp; /* Sanity check -- queue functions should prevent this happening */ if (bio->bi_vcnt != 1 || @@ -360,7 +474,12 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); + if (likely(is_power_of_2(chunk_sects))) + bp = bio_split(bio, chunk_sects - (sector & + (chunk_sects-1))); + else + bp = bio_split(bio, chunk_sects - + sector_div(sector, chunk_sects)); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) @@ -369,23 +488,14 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) bio_pair_release(bp); return 0; } - zone = find_zone(conf, sector); - sect_in_chunk = bio->bi_sector & (chunk_sects - 1); - { - sector_t x = (zone->sectors + sector - zone->zone_end) - >> chunksect_bits; - - sector_div(x, zone->nb_dev); - chunk = x; - x = sector >> chunksect_bits; - tmp_dev = zone->dev[sector_div(x, zone->nb_dev)]; - } - rsect = (chunk << chunksect_bits) + zone->dev_start + sect_in_chunk; - + sector_offset = bio->bi_sector; + zone = find_zone(mddev->private, §or_offset); + tmp_dev = map_sector(mddev, zone, bio->bi_sector, + §or_offset); bio->bi_bdev = tmp_dev->bdev; - bio->bi_sector = rsect + tmp_dev->data_offset; - + bio->bi_sector = sector_offset + zone->dev_start + + tmp_dev->data_offset; /* * Let the main block layer submit the IO and resolve recursion: */ @@ -400,29 +510,35 @@ bad_map: return 0; } -static void raid0_status (struct seq_file *seq, mddev_t *mddev) +static void raid0_status(struct seq_file *seq, mddev_t *mddev) { #undef MD_DEBUG #ifdef MD_DEBUG int j, k, h; char b[BDEVNAME_SIZE]; - raid0_conf_t *conf = mddev_to_conf(mddev); + raid0_conf_t *conf = mddev->private; + sector_t zone_size; + sector_t zone_start = 0; h = 0; + for (j = 0; j < conf->nr_strip_zones; j++) { seq_printf(seq, " z%d", j); seq_printf(seq, "=["); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) seq_printf(seq, "%s/", bdevname( - conf->strip_zone[j].dev[k]->bdev,b)); - - seq_printf(seq, "] ze=%d ds=%d s=%d\n", - conf->strip_zone[j].zone_end, - conf->strip_zone[j].dev_start, - conf->strip_zone[j].sectors); + conf->devlist[j*mddev->raid_disks + k] + ->bdev, b)); + + zone_size = conf->strip_zone[j].zone_end - zone_start; + seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n", + (unsigned long long)zone_start>>1, + (unsigned long long)conf->strip_zone[j].dev_start>>1, + (unsigned long long)zone_size>>1); + zone_start = conf->strip_zone[j].zone_end; } #endif - seq_printf(seq, " %dk chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2); return; } @@ -451,6 +567,7 @@ static void raid0_exit (void) module_init(raid0_init); module_exit(raid0_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID0 (striping) personality for MD"); MODULE_ALIAS("md-personality-2"); /* RAID0 */ MODULE_ALIAS("md-raid0"); MODULE_ALIAS("md-level-0");