[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps
[safe/jmp/linux-2.6] / drivers / md / md.c
index aa72c88..ae65446 100644 (file)
@@ -19,6 +19,9 @@
 
      Neil Brown <neilb@cse.unsw.edu.au>.
 
+   - persistent bitmap code
+     Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, or (at your option)
@@ -33,6 +36,7 @@
 #include <linux/config.h>
 #include <linux/linkage.h>
 #include <linux/raid/md.h>
+#include <linux/raid/bitmap.h>
 #include <linux/sysctl.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
@@ -40,6 +44,8 @@
 
 #include <linux/init.h>
 
+#include <linux/file.h>
+
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -189,8 +195,7 @@ static mddev_t * mddev_find(dev_t unit)
                if (mddev->unit == unit) {
                        mddev_get(mddev);
                        spin_unlock(&all_mddevs_lock);
-                       if (new)
-                               kfree(new);
+                       kfree(new);
                        return mddev;
                }
 
@@ -218,6 +223,8 @@ static mddev_t * mddev_find(dev_t unit)
        INIT_LIST_HEAD(&new->all_mddevs);
        init_timer(&new->safemode_timer);
        atomic_set(&new->active, 1);
+       spin_lock_init(&new->write_lock);
+       init_waitqueue_head(&new->sb_wait);
 
        new->queue = blk_alloc_queue(GFP_KERNEL);
        if (!new->queue) {
@@ -249,8 +256,7 @@ static inline void mddev_unlock(mddev_t * mddev)
 {
        up(&mddev->reconfig_sem);
 
-       if (mddev->thread)
-               md_wakeup_thread(mddev->thread);
+       md_wakeup_thread(mddev->thread);
 }
 
 mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
@@ -277,7 +283,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
        return NULL;
 }
 
-inline static sector_t calc_dev_sboffset(struct block_device *bdev)
+static inline sector_t calc_dev_sboffset(struct block_device *bdev)
 {
        sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
        return MD_NEW_SIZE_BLOCKS(size);
@@ -320,6 +326,41 @@ static void free_disk_sb(mdk_rdev_t * rdev)
 }
 
 
+static int super_written(struct bio *bio, unsigned int bytes_done, int error)
+{
+       mdk_rdev_t *rdev = bio->bi_private;
+       if (bio->bi_size)
+               return 1;
+
+       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags))
+               md_error(rdev->mddev, rdev);
+
+       if (atomic_dec_and_test(&rdev->mddev->pending_writes))
+               wake_up(&rdev->mddev->sb_wait);
+       bio_put(bio);
+       return 0;
+}
+
+void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+                  sector_t sector, int size, struct page *page)
+{
+       /* write first size bytes of page to sector of rdev
+        * Increment mddev->pending_writes before returning
+        * and decrement it on completion, waking up sb_wait
+        * if zero is reached.
+        * If an error occurred, call md_error
+        */
+       struct bio *bio = bio_alloc(GFP_NOIO, 1);
+
+       bio->bi_bdev = rdev->bdev;
+       bio->bi_sector = sector;
+       bio_add_page(bio, page, size, 0);
+       bio->bi_private = rdev;
+       bio->bi_end_io = super_written;
+       atomic_inc(&mddev->pending_writes);
+       submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio);
+}
+
 static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
 {
        if (bio->bi_size)
@@ -329,7 +370,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
        return 0;
 }
 
-static int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+int sync_page_io(struct block_device *bdev, sector_t sector, int size,
                   struct page *page, int rw)
 {
        struct bio *bio = bio_alloc(GFP_NOIO, 1);
@@ -416,11 +457,8 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
                ret = 1;
 
 abort:
-       if (tmp1)
-               kfree(tmp1);
-       if (tmp2)
-               kfree(tmp2);
-
+       kfree(tmp1);
+       kfree(tmp2);
        return ret;
 }
 
@@ -569,6 +607,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
        mdp_disk_t *desc;
        mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
 
+       rdev->raid_disk = -1;
+       rdev->in_sync = 0;
        if (mddev->raid_disks == 0) {
                mddev->major_version = 0;
                mddev->minor_version = sb->minor_version;
@@ -582,6 +622,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->raid_disks = sb->raid_disks;
                mddev->size = sb->size;
                mddev->events = md_event(sb);
+               mddev->bitmap_offset = 0;
+               mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
 
                if (sb->state & (1<<MD_SB_CLEAN))
                        mddev->recovery_cp = MaxSector;
@@ -599,16 +641,35 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
 
                mddev->max_disks = MD_SB_DISKS;
-       } else {
-               __u64 ev1;
-               ev1 = md_event(sb);
+
+               if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
+                   mddev->bitmap_file == NULL) {
+                       if (mddev->level != 1) {
+                               /* FIXME use a better test */
+                               printk(KERN_WARNING "md: bitmaps only support for raid1\n");
+                               return -EINVAL;
+                       }
+                       mddev->bitmap_offset = mddev->default_bitmap_offset;
+               }
+
+       } else if (mddev->pers == NULL) {
+               /* Insist on good event counter while assembling */
+               __u64 ev1 = md_event(sb);
                ++ev1;
                if (ev1 < mddev->events) 
                        return -EINVAL;
-       }
+       } else if (mddev->bitmap) {
+               /* if adding to array with a bitmap, then we can accept an
+                * older device ... but not too old.
+                */
+               __u64 ev1 = md_event(sb);
+               if (ev1 < mddev->bitmap->events_cleared)
+                       return 0;
+       } else /* just a hot-add of a new device, leave raid_disk at -1 */
+               return 0;
+
        if (mddev->level != LEVEL_MULTIPATH) {
-               rdev->raid_disk = -1;
-               rdev->in_sync = rdev->faulty = 0;
+               rdev->faulty = 0;
                desc = sb->disks + rdev->desc_nr;
 
                if (desc->state & (1<<MD_DISK_FAULTY))
@@ -618,7 +679,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                        rdev->in_sync = 1;
                        rdev->raid_disk = desc->raid_disk;
                }
-       }
+       } else /* MULTIPATH are always insync */
+               rdev->in_sync = 1;
        return 0;
 }
 
@@ -683,6 +745,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        sb->layout = mddev->layout;
        sb->chunk_size = mddev->chunk_size;
 
+       if (mddev->bitmap && mddev->bitmap_file == NULL)
+               sb->state |= (1<<MD_SB_BITMAP_PRESENT);
+
        sb->disks[0].state = (1<<MD_DISK_REMOVED);
        ITERATE_RDEV(mddev,rdev2,tmp) {
                mdp_disk_t *d;
@@ -780,7 +845,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
        case 0:
                sb_offset = rdev->bdev->bd_inode->i_size >> 9;
                sb_offset -= 8*2;
-               sb_offset &= ~(4*2-1);
+               sb_offset &= ~(sector_t)(4*2-1);
                /* convert from sectors to K */
                sb_offset /= 2;
                break;
@@ -860,6 +925,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
 
+       rdev->raid_disk = -1;
+       rdev->in_sync = 0;
        if (mddev->raid_disks == 0) {
                mddev->major_version = 1;
                mddev->patch_version = 0;
@@ -872,18 +939,39 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->raid_disks = le32_to_cpu(sb->raid_disks);
                mddev->size = le64_to_cpu(sb->size)/2;
                mddev->events = le64_to_cpu(sb->events);
+               mddev->bitmap_offset = 0;
+               mddev->default_bitmap_offset = 0;
+               if (mddev->minor_version == 0)
+                       mddev->default_bitmap_offset = -(64*1024)/512;
                
                mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
                memcpy(mddev->uuid, sb->set_uuid, 16);
 
                mddev->max_disks =  (4096-256)/2;
-       } else {
-               __u64 ev1;
-               ev1 = le64_to_cpu(sb->events);
+
+               if ((le32_to_cpu(sb->feature_map) & 1) &&
+                   mddev->bitmap_file == NULL ) {
+                       if (mddev->level != 1) {
+                               printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
+                               return -EINVAL;
+                       }
+                       mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
+               }
+       } else if (mddev->pers == NULL) {
+               /* Insist of good event counter while assembling */
+               __u64 ev1 = le64_to_cpu(sb->events);
                ++ev1;
                if (ev1 < mddev->events)
                        return -EINVAL;
-       }
+       } else if (mddev->bitmap) {
+               /* If adding to array with a bitmap, then we can accept an
+                * older device, but not too old.
+                */
+               __u64 ev1 = le64_to_cpu(sb->events);
+               if (ev1 < mddev->bitmap->events_cleared)
+                       return 0;
+       } else /* just a hot-add of a new device, leave raid_disk at -1 */
+               return 0;
 
        if (mddev->level != LEVEL_MULTIPATH) {
                int role;
@@ -891,14 +979,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
                switch(role) {
                case 0xffff: /* spare */
-                       rdev->in_sync = 0;
                        rdev->faulty = 0;
-                       rdev->raid_disk = -1;
                        break;
                case 0xfffe: /* faulty */
-                       rdev->in_sync = 0;
                        rdev->faulty = 1;
-                       rdev->raid_disk = -1;
                        break;
                default:
                        rdev->in_sync = 1;
@@ -906,7 +990,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                        rdev->raid_disk = role;
                        break;
                }
-       }
+       } else /* MULTIPATH are always insync */
+               rdev->in_sync = 1;
+
        return 0;
 }
 
@@ -933,6 +1019,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        else
                sb->resync_offset = cpu_to_le64(0);
 
+       if (mddev->bitmap && mddev->bitmap_file == NULL) {
+               sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
+               sb->feature_map = cpu_to_le32(1);
+       }
+
        max_dev = 0;
        ITERATE_RDEV(mddev,rdev2,tmp)
                if (rdev2->desc_nr+1 > max_dev)
@@ -957,7 +1048,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 }
 
 
-struct super_type super_types[] = {
+static struct super_type super_types[] = {
        [0] = {
                .name   = "0.90.0",
                .owner  = THIS_MODULE,
@@ -1196,8 +1287,11 @@ void md_print_devices(void)
        printk("md:     * <COMPLETE RAID STATE PRINTOUT> *\n");
        printk("md:     **********************************\n");
        ITERATE_MDDEV(mddev,tmp) {
-               printk("%s: ", mdname(mddev));
 
+               if (mddev->bitmap)
+                       bitmap_print_sb(mddev->bitmap);
+               else
+                       printk("%s: ", mdname(mddev));
                ITERATE_RDEV(mddev,rdev,tmp2)
                        printk("<%s>", bdevname(rdev->bdev,b));
                printk("\n");
@@ -1210,30 +1304,6 @@ void md_print_devices(void)
 }
 
 
-static int write_disk_sb(mdk_rdev_t * rdev)
-{
-       char b[BDEVNAME_SIZE];
-       if (!rdev->sb_loaded) {
-               MD_BUG();
-               return 1;
-       }
-       if (rdev->faulty) {
-               MD_BUG();
-               return 1;
-       }
-
-       dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
-               bdevname(rdev->bdev,b),
-              (unsigned long long)rdev->sb_offset);
-  
-       if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
-               return 0;
-
-       printk("md: write_disk_sb failed for device %s\n", 
-               bdevname(rdev->bdev,b));
-       return 1;
-}
-
 static void sync_sbs(mddev_t * mddev)
 {
        mdk_rdev_t *rdev;
@@ -1248,12 +1318,14 @@ static void sync_sbs(mddev_t * mddev)
 
 static void md_update_sb(mddev_t * mddev)
 {
-       int err, count = 100;
+       int err;
        struct list_head *tmp;
        mdk_rdev_t *rdev;
+       int sync_req;
 
-       mddev->sb_dirty = 0;
 repeat:
+       spin_lock(&mddev->write_lock);
+       sync_req = mddev->in_sync;
        mddev->utime = get_seconds();
        mddev->events ++;
 
@@ -1266,20 +1338,26 @@ repeat:
                MD_BUG();
                mddev->events --;
        }
+       mddev->sb_dirty = 2;
        sync_sbs(mddev);
 
        /*
         * do not write anything to disk if using
         * nonpersistent superblocks
         */
-       if (!mddev->persistent)
+       if (!mddev->persistent) {
+               mddev->sb_dirty = 0;
+               spin_unlock(&mddev->write_lock);
+               wake_up(&mddev->sb_wait);
                return;
+       }
+       spin_unlock(&mddev->write_lock);
 
        dprintk(KERN_INFO 
                "md: updating %s RAID superblock on device (in sync %d)\n",
                mdname(mddev),mddev->in_sync);
 
-       err = 0;
+       err = bitmap_update_sb(mddev->bitmap);
        ITERATE_RDEV(mddev,rdev,tmp) {
                char b[BDEVNAME_SIZE];
                dprintk(KERN_INFO "md: ");
@@ -1288,22 +1366,32 @@ repeat:
 
                dprintk("%s ", bdevname(rdev->bdev,b));
                if (!rdev->faulty) {
-                       err += write_disk_sb(rdev);
+                       md_super_write(mddev,rdev,
+                                      rdev->sb_offset<<1, MD_SB_BYTES,
+                                      rdev->sb_page);
+                       dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
+                               bdevname(rdev->bdev,b),
+                               (unsigned long long)rdev->sb_offset);
+
                } else
                        dprintk(")\n");
-               if (!err && mddev->level == LEVEL_MULTIPATH)
+               if (mddev->level == LEVEL_MULTIPATH)
                        /* only need to write one superblock... */
                        break;
        }
-       if (err) {
-               if (--count) {
-                       printk(KERN_ERR "md: errors occurred during superblock"
-                               " update, repeating\n");
-                       goto repeat;
-               }
-               printk(KERN_ERR \
-                       "md: excessive errors occurred during superblock update, exiting\n");
+       wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+       /* if there was a failure, sb_dirty was set to 1, and we re-write super */
+
+       spin_lock(&mddev->write_lock);
+       if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) {
+               /* have to write it out again */
+               spin_unlock(&mddev->write_lock);
+               goto repeat;
        }
+       mddev->sb_dirty = 0;
+       spin_unlock(&mddev->write_lock);
+       wake_up(&mddev->sb_wait);
+
 }
 
 /*
@@ -1387,7 +1475,7 @@ abort_free:
  */
 
 
-static int analyze_sbs(mddev_t * mddev)
+static void analyze_sbs(mddev_t * mddev)
 {
        int i;
        struct list_head *tmp;
@@ -1441,7 +1529,6 @@ static int analyze_sbs(mddev_t * mddev)
                       " -- starting background reconstruction\n",
                       mdname(mddev));
 
-       return 0;
 }
 
 int mdp_major = 0;
@@ -1508,10 +1595,9 @@ static int do_md_run(mddev_t * mddev)
        struct gendisk *disk;
        char b[BDEVNAME_SIZE];
 
-       if (list_empty(&mddev->disks)) {
-               MD_BUG();
+       if (list_empty(&mddev->disks))
+               /* cannot run an array with no devices.. */
                return -EINVAL;
-       }
 
        if (mddev->pers)
                return -EBUSY;
@@ -1519,10 +1605,8 @@ static int do_md_run(mddev_t * mddev)
        /*
         * Analyze all RAID superblock(s)
         */
-       if (!mddev->raid_disks && analyze_sbs(mddev)) {
-               MD_BUG();
-               return -EINVAL;
-       }
+       if (!mddev->raid_disks)
+               analyze_sbs(mddev);
 
        chunk_size = mddev->chunk_size;
        pnum = level_to_pers(mddev->level);
@@ -1548,7 +1632,7 @@ static int do_md_run(mddev_t * mddev)
                 * chunk-size has to be a power of 2 and multiples of PAGE_SIZE
                 */
                if ( (1 << ffz(~chunk_size)) != chunk_size) {
-                       MD_BUG();
+                       printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
                        return -EINVAL;
                }
                if (chunk_size < PAGE_SIZE) {
@@ -1573,11 +1657,6 @@ static int do_md_run(mddev_t * mddev)
                }
        }
 
-       if (pnum >= MAX_PERSONALITY) {
-               MD_BUG();
-               return -EINVAL;
-       }
-
 #ifdef CONFIG_KMOD
        if (!pers[pnum])
        {
@@ -1614,14 +1693,22 @@ static int do_md_run(mddev_t * mddev)
        mddev->pers = pers[pnum];
        spin_unlock(&pers_lock);
 
+       mddev->recovery = 0;
        mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
 
-       err = mddev->pers->run(mddev);
+       /* before we start the array running, initialise the bitmap */
+       err = bitmap_create(mddev);
+       if (err)
+               printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
+                       mdname(mddev), err);
+       else
+               err = mddev->pers->run(mddev);
        if (err) {
                printk(KERN_ERR "md: pers->run() failed ...\n");
                module_put(mddev->pers->owner);
                mddev->pers = NULL;
-               return -EINVAL;
+               bitmap_destroy(mddev);
+               return err;
        }
        atomic_set(&mddev->writes_pending,0);
        mddev->safemode = 0;
@@ -1631,6 +1718,7 @@ static int do_md_run(mddev_t * mddev)
        mddev->in_sync = 1;
        
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
        
        if (mddev->sb_dirty)
                md_update_sb(mddev);
@@ -1717,6 +1805,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
                                goto out;
                        mddev->ro = 1;
                } else {
+                       bitmap_flush(mddev);
+                       wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
                        if (mddev->ro)
                                set_disk_ro(disk, 0);
                        blk_queue_make_request(mddev->queue, md_fail_request);
@@ -1734,6 +1824,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
                if (ro)
                        set_disk_ro(disk, 1);
        }
+
+       bitmap_destroy(mddev);
+       if (mddev->bitmap_file) {
+               atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
+               fput(mddev->bitmap_file);
+               mddev->bitmap_file = NULL;
+       }
+       mddev->bitmap_offset = 0;
+
        /*
         * Free resources if final stop
         */
@@ -1762,10 +1861,8 @@ static void autorun_array(mddev_t *mddev)
        struct list_head *tmp;
        int err;
 
-       if (list_empty(&mddev->disks)) {
-               MD_BUG();
+       if (list_empty(&mddev->disks))
                return;
-       }
 
        printk(KERN_INFO "md: running: ");
 
@@ -1980,6 +2077,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        info.state         = 0;
        if (mddev->in_sync)
                info.state = (1<<MD_SB_CLEAN);
+       if (mddev->bitmap && mddev->bitmap_offset)
+               info.state = (1<<MD_SB_BITMAP_PRESENT);
        info.active_disks  = active;
        info.working_disks = working;
        info.failed_disks  = failed;
@@ -1994,6 +2093,42 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        return 0;
 }
 
+static int get_bitmap_file(mddev_t * mddev, void __user * arg)
+{
+       mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
+       char *ptr, *buf = NULL;
+       int err = -ENOMEM;
+
+       file = kmalloc(sizeof(*file), GFP_KERNEL);
+       if (!file)
+               goto out;
+
+       /* bitmap disabled, zero the first byte and copy out */
+       if (!mddev->bitmap || !mddev->bitmap->file) {
+               file->pathname[0] = '\0';
+               goto copy_out;
+       }
+
+       buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
+       if (!buf)
+               goto out;
+
+       ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
+       if (!ptr)
+               goto out;
+
+       strcpy(file->pathname, ptr);
+
+copy_out:
+       err = 0;
+       if (copy_to_user(arg, file, sizeof(*file)))
+               err = -EFAULT;
+out:
+       kfree(buf);
+       kfree(file);
+       return err;
+}
+
 static int get_disk_info(mddev_t * mddev, void __user * arg)
 {
        mdu_disk_info_t info;
@@ -2089,13 +2224,26 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                                PTR_ERR(rdev));
                        return PTR_ERR(rdev);
                }
+               /* set save_raid_disk if appropriate */
+               if (!mddev->persistent) {
+                       if (info->state & (1<<MD_DISK_SYNC)  &&
+                           info->raid_disk < mddev->raid_disks)
+                               rdev->raid_disk = info->raid_disk;
+                       else
+                               rdev->raid_disk = -1;
+               } else
+                       super_types[mddev->major_version].
+                               validate_super(mddev, rdev);
+               rdev->saved_raid_disk = rdev->raid_disk;
+
                rdev->in_sync = 0; /* just to be sure */
                rdev->raid_disk = -1;
                err = bind_rdev_to_array(rdev, mddev);
                if (err)
                        export_rdev(rdev);
-               if (mddev->thread)
-                       md_wakeup_thread(mddev->thread);
+
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
                return err;
        }
 
@@ -2267,6 +2415,75 @@ abort_export:
        return err;
 }
 
+/* similar to deny_write_access, but accounts for our holding a reference
+ * to the file ourselves */
+static int deny_bitmap_write_access(struct file * file)
+{
+       struct inode *inode = file->f_mapping->host;
+
+       spin_lock(&inode->i_lock);
+       if (atomic_read(&inode->i_writecount) > 1) {
+               spin_unlock(&inode->i_lock);
+               return -ETXTBSY;
+       }
+       atomic_set(&inode->i_writecount, -1);
+       spin_unlock(&inode->i_lock);
+
+       return 0;
+}
+
+static int set_bitmap_file(mddev_t *mddev, int fd)
+{
+       int err;
+
+       if (mddev->pers) {
+               if (!mddev->pers->quiesce)
+                       return -EBUSY;
+               if (mddev->recovery || mddev->sync_thread)
+                       return -EBUSY;
+               /* we should be able to change the bitmap.. */
+       }
+
+
+       if (fd >= 0) {
+               if (mddev->bitmap)
+                       return -EEXIST; /* cannot add when bitmap is present */
+               mddev->bitmap_file = fget(fd);
+
+               if (mddev->bitmap_file == NULL) {
+                       printk(KERN_ERR "%s: error: failed to get bitmap file\n",
+                              mdname(mddev));
+                       return -EBADF;
+               }
+
+               err = deny_bitmap_write_access(mddev->bitmap_file);
+               if (err) {
+                       printk(KERN_ERR "%s: error: bitmap file is already in use\n",
+                              mdname(mddev));
+                       fput(mddev->bitmap_file);
+                       mddev->bitmap_file = NULL;
+                       return err;
+               }
+               mddev->bitmap_offset = 0; /* file overrides offset */
+       } else if (mddev->bitmap == NULL)
+               return -ENOENT; /* cannot remove what isn't there */
+       err = 0;
+       if (mddev->pers) {
+               mddev->pers->quiesce(mddev, 1);
+               if (fd >= 0)
+                       err = bitmap_create(mddev);
+               if (fd < 0 || err)
+                       bitmap_destroy(mddev);
+               mddev->pers->quiesce(mddev, 0);
+       } else if (fd < 0) {
+               if (mddev->bitmap_file)
+                       fput(mddev->bitmap_file);
+               mddev->bitmap_file = NULL;
+       }
+
+       return err;
+}
+
 /*
  * set_array_info is used two different ways
  * The original usage is when creating a new array.
@@ -2343,6 +2560,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
 {
        int rv = 0;
        int cnt = 0;
+       int state = 0;
+
+       /* calculate expected state,ignoring low bits */
+       if (mddev->bitmap && mddev->bitmap_offset)
+               state |= (1 << MD_SB_BITMAP_PRESENT);
 
        if (mddev->major_version != info->major_version ||
            mddev->minor_version != info->minor_version ||
@@ -2351,12 +2573,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
            mddev->level         != info->level         ||
 /*         mddev->layout        != info->layout        || */
            !mddev->persistent   != info->not_persistent||
-           mddev->chunk_size    != info->chunk_size    )
+           mddev->chunk_size    != info->chunk_size    ||
+           /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
+           ((state^info->state) & 0xfffffe00)
+               )
                return -EINVAL;
        /* Check there is only one change */
        if (mddev->size != info->size) cnt++;
        if (mddev->raid_disks != info->raid_disks) cnt++;
        if (mddev->layout != info->layout) cnt++;
+       if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
        if (cnt == 0) return 0;
        if (cnt > 1) return -EINVAL;
 
@@ -2435,6 +2661,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                        }
                }
        }
+       if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
+               if (mddev->pers->quiesce == NULL)
+                       return -EINVAL;
+               if (mddev->recovery || mddev->sync_thread)
+                       return -EBUSY;
+               if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       /* add the bitmap */
+                       if (mddev->bitmap)
+                               return -EEXIST;
+                       if (mddev->default_bitmap_offset == 0)
+                               return -EINVAL;
+                       mddev->bitmap_offset = mddev->default_bitmap_offset;
+                       mddev->pers->quiesce(mddev, 1);
+                       rv = bitmap_create(mddev);
+                       if (rv)
+                               bitmap_destroy(mddev);
+                       mddev->pers->quiesce(mddev, 0);
+               } else {
+                       /* remove the bitmap */
+                       if (!mddev->bitmap)
+                               return -ENOENT;
+                       if (mddev->bitmap->file)
+                               return -EINVAL;
+                       mddev->pers->quiesce(mddev, 1);
+                       bitmap_destroy(mddev);
+                       mddev->pers->quiesce(mddev, 0);
+                       mddev->bitmap_offset = 0;
+               }
+       }
        md_update_sb(mddev);
        return rv;
 }
@@ -2578,8 +2833,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
        /*
         * Commands querying/configuring an existing array:
         */
-       /* if we are initialised yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */
-       if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) {
+       /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
+        * RUN_ARRAY, and SET_BITMAP_FILE are allowed */
+       if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
+                       && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE) {
                err = -ENODEV;
                goto abort_unlock;
        }
@@ -2593,6 +2850,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
                        err = get_array_info(mddev, argp);
                        goto done_unlock;
 
+               case GET_BITMAP_FILE:
+                       err = get_bitmap_file(mddev, argp);
+                       goto done_unlock;
+
                case GET_DISK_INFO:
                        err = get_disk_info(mddev, argp);
                        goto done_unlock;
@@ -2673,6 +2934,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
                        err = do_md_run (mddev);
                        goto done_unlock;
 
+               case SET_BITMAP_FILE:
+                       err = set_bitmap_file(mddev, (int)arg);
+                       goto done_unlock;
+
                default:
                        if (_IOC_TYPE(cmd) == MD_MAJOR)
                                printk(KERN_WARNING "md: %s(pid %d) used"
@@ -2751,7 +3016,7 @@ static struct block_device_operations md_fops =
        .revalidate_disk= md_revalidate,
 };
 
-int md_thread(void * arg)
+static int md_thread(void * arg)
 {
        mdk_thread_t *thread = arg;
 
@@ -2784,10 +3049,10 @@ int md_thread(void * arg)
        while (thread->run) {
                void (*run)(mddev_t *);
 
-               wait_event_interruptible(thread->wqueue,
-                                        test_bit(THREAD_WAKEUP, &thread->flags));
-               if (current->flags & PF_FREEZE)
-                       refrigerator(PF_FREEZE);
+               wait_event_interruptible_timeout(thread->wqueue,
+                                                test_bit(THREAD_WAKEUP, &thread->flags),
+                                                thread->timeout);
+               try_to_freeze();
 
                clear_bit(THREAD_WAKEUP, &thread->flags);
 
@@ -2831,6 +3096,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
        thread->run = run;
        thread->mddev = mddev;
        thread->name = name;
+       thread->timeout = MAX_SCHEDULE_TIMEOUT;
        ret = kernel_thread(md_thread, thread, 0);
        if (ret < 0) {
                kfree(thread);
@@ -2840,16 +3106,6 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
        return thread;
 }
 
-static void md_interrupt_thread(mdk_thread_t *thread)
-{
-       if (!thread->tsk) {
-               MD_BUG();
-               return;
-       }
-       dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
-       send_sig(SIGKILL, thread->tsk, 1);
-}
-
 void md_unregister_thread(mdk_thread_t *thread)
 {
        struct completion event;
@@ -2857,9 +3113,15 @@ void md_unregister_thread(mdk_thread_t *thread)
        init_completion(&event);
 
        thread->event = &event;
+
+       /* As soon as ->run is set to NULL, the task could disappear,
+        * so we need to hold tasklist_lock until we have sent the signal
+        */
+       dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
+       read_lock(&tasklist_lock);
        thread->run = NULL;
-       thread->name = NULL;
-       md_interrupt_thread(thread);
+       send_sig(SIGKILL, thread->tsk, 1);
+       read_unlock(&tasklist_lock);
        wait_for_completion(&event);
        kfree(thread);
 }
@@ -2873,13 +3135,13 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
 
        if (!rdev || rdev->faulty)
                return;
-
+/*
        dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
                mdname(mddev),
                MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
                __builtin_return_address(0),__builtin_return_address(1),
                __builtin_return_address(2),__builtin_return_address(3));
-
+*/
        if (!mddev->pers->error_handler)
                return;
        mddev->pers->error_handler(mddev,rdev);
@@ -3033,6 +3295,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
        struct list_head *tmp2;
        mdk_rdev_t *rdev;
        int i;
+       struct bitmap *bitmap;
 
        if (v == (void*)1) {
                seq_printf(seq, "Personalities : ");
@@ -3085,10 +3348,35 @@ static int md_seq_show(struct seq_file *seq, void *v)
                if (mddev->pers) {
                        mddev->pers->status (seq, mddev);
                        seq_printf(seq, "\n      ");
-                       if (mddev->curr_resync > 2)
+                       if (mddev->curr_resync > 2) {
                                status_resync (seq, mddev);
-                       else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
-                               seq_printf(seq, "       resync=DELAYED");
+                               seq_printf(seq, "\n      ");
+                       } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
+                               seq_printf(seq, "       resync=DELAYED\n      ");
+               } else
+                       seq_printf(seq, "\n       ");
+
+               if ((bitmap = mddev->bitmap)) {
+                       unsigned long chunk_kb;
+                       unsigned long flags;
+                       spin_lock_irqsave(&bitmap->lock, flags);
+                       chunk_kb = bitmap->chunksize >> 10;
+                       seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
+                               "%lu%s chunk",
+                               bitmap->pages - bitmap->missing_pages,
+                               bitmap->pages,
+                               (bitmap->pages - bitmap->missing_pages)
+                                       << (PAGE_SHIFT - 10),
+                               chunk_kb ? chunk_kb : bitmap->chunksize,
+                               chunk_kb ? "KB" : "B");
+                       if (bitmap->file) {
+                               seq_printf(seq, ", file: ");
+                               seq_path(seq, bitmap->file->f_vfsmnt,
+                                        bitmap->file->f_dentry," \t\n");
+                       }
+
+                       seq_printf(seq, "\n");
+                       spin_unlock_irqrestore(&bitmap->lock, flags);
                }
 
                seq_printf(seq, "\n");
@@ -3132,7 +3420,6 @@ int register_md_personality(int pnum, mdk_personality_t *p)
        spin_lock(&pers_lock);
        if (pers[pnum]) {
                spin_unlock(&pers_lock);
-               MD_BUG();
                return -EBUSY;
        }
 
@@ -3144,10 +3431,8 @@ int register_md_personality(int pnum, mdk_personality_t *p)
 
 int unregister_md_personality(int pnum)
 {
-       if (pnum >= MAX_PERSONALITY) {
-               MD_BUG();
+       if (pnum >= MAX_PERSONALITY)
                return -EINVAL;
-       }
 
        printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name);
        spin_lock(&pers_lock);
@@ -3194,19 +3479,28 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
 }
 
 
-void md_write_start(mddev_t *mddev)
+/* md_write_start(mddev, bi)
+ * If we need to update some array metadata (e.g. 'active' flag
+ * in superblock) before writing, schedule a superblock update
+ * and wait for it to complete.
+ */
+void md_write_start(mddev_t *mddev, struct bio *bi)
 {
-       if (!atomic_read(&mddev->writes_pending)) {
-               mddev_lock_uninterruptible(mddev);
+       DEFINE_WAIT(w);
+       if (bio_data_dir(bi) != WRITE)
+               return;
+
+       atomic_inc(&mddev->writes_pending);
+       if (mddev->in_sync) {
+               spin_lock(&mddev->write_lock);
                if (mddev->in_sync) {
                        mddev->in_sync = 0;
-                       del_timer(&mddev->safemode_timer);
-                       md_update_sb(mddev);
+                       mddev->sb_dirty = 1;
+                       md_wakeup_thread(mddev->thread);
                }
-               atomic_inc(&mddev->writes_pending);
-               mddev_unlock(mddev);
-       } else
-               atomic_inc(&mddev->writes_pending);
+               spin_unlock(&mddev->write_lock);
+       }
+       wait_event(mddev->sb_wait, mddev->sb_dirty==0);
 }
 
 void md_write_end(mddev_t *mddev)
@@ -3219,38 +3513,7 @@ void md_write_end(mddev_t *mddev)
        }
 }
 
-static inline void md_enter_safemode(mddev_t *mddev)
-{
-       if (!mddev->safemode) return;
-       if (mddev->safemode == 2 &&
-           (atomic_read(&mddev->writes_pending) || mddev->in_sync ||
-                   mddev->recovery_cp != MaxSector))
-               return; /* avoid the lock */
-       mddev_lock_uninterruptible(mddev);
-       if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
-           !mddev->in_sync && mddev->recovery_cp == MaxSector) {
-               mddev->in_sync = 1;
-               md_update_sb(mddev);
-       }
-       mddev_unlock(mddev);
-
-       if (mddev->safemode == 1)
-               mddev->safemode = 0;
-}
-
-void md_handle_safemode(mddev_t *mddev)
-{
-       if (signal_pending(current)) {
-               printk(KERN_INFO "md: %s in immediate safe mode\n",
-                       mdname(mddev));
-               mddev->safemode = 2;
-               flush_signals(current);
-       }
-       md_enter_safemode(mddev);
-}
-
-
-DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 
 #define SYNC_MARKS     10
 #define        SYNC_MARK_STEP  (3*HZ)
@@ -3259,12 +3522,13 @@ static void md_do_sync(mddev_t *mddev)
        mddev_t *mddev2;
        unsigned int currspeed = 0,
                 window;
-       sector_t max_sectors,j;
+       sector_t max_sectors,j, io_sectors;
        unsigned long mark[SYNC_MARKS];
        sector_t mark_cnt[SYNC_MARKS];
        int last_mark,m;
        struct list_head *tmp;
        sector_t last_check;
+       int skipped = 0;
 
        /* just incase thread restarts... */
        if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -3295,7 +3559,6 @@ static void md_do_sync(mddev_t *mddev)
                        goto skip;
                }
                ITERATE_MDDEV(mddev2,tmp) {
-                       printk(".");
                        if (mddev2 == mddev)
                                continue;
                        if (mddev2->curr_resync && 
@@ -3330,7 +3593,7 @@ static void md_do_sync(mddev_t *mddev)
 
        if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
                /* resync follows the size requested by the personality,
-                * which default to physical size, but can be virtual size
+                * which defaults to physical size, but can be virtual size
                 */
                max_sectors = mddev->resync_max_sectors;
        else
@@ -3345,13 +3608,15 @@ static void md_do_sync(mddev_t *mddev)
               sysctl_speed_limit_max);
 
        is_mddev_idle(mddev); /* this also initializes IO event counters */
-       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+       /* we don't use the checkpoint if there's a bitmap */
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap)
                j = mddev->recovery_cp;
        else
                j = 0;
+       io_sectors = 0;
        for (m = 0; m < SYNC_MARKS; m++) {
                mark[m] = jiffies;
-               mark_cnt[m] = j;
+               mark_cnt[m] = io_sectors;
        }
        last_mark = 0;
        mddev->resync_mark = mark[last_mark];
@@ -3376,21 +3641,29 @@ static void md_do_sync(mddev_t *mddev)
        }
 
        while (j < max_sectors) {
-               int sectors;
+               sector_t sectors;
 
-               sectors = mddev->pers->sync_request(mddev, j, currspeed < sysctl_speed_limit_min);
-               if (sectors < 0) {
+               skipped = 0;
+               sectors = mddev->pers->sync_request(mddev, j, &skipped,
+                                           currspeed < sysctl_speed_limit_min);
+               if (sectors == 0) {
                        set_bit(MD_RECOVERY_ERR, &mddev->recovery);
                        goto out;
                }
-               atomic_add(sectors, &mddev->recovery_active);
+
+               if (!skipped) { /* actual IO requested */
+                       io_sectors += sectors;
+                       atomic_add(sectors, &mddev->recovery_active);
+               }
+
                j += sectors;
                if (j>1) mddev->curr_resync = j;
 
-               if (last_check + window > j || j == max_sectors)
+
+               if (last_check + window > io_sectors || j == max_sectors)
                        continue;
 
-               last_check = j;
+               last_check = io_sectors;
 
                if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
                    test_bit(MD_RECOVERY_ERR, &mddev->recovery))
@@ -3404,7 +3677,7 @@ static void md_do_sync(mddev_t *mddev)
                        mddev->resync_mark = mark[next];
                        mddev->resync_mark_cnt = mark_cnt[next];
                        mark[next] = jiffies;
-                       mark_cnt[next] = j - atomic_read(&mddev->recovery_active);
+                       mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
                        last_mark = next;
                }
 
@@ -3431,7 +3704,8 @@ static void md_do_sync(mddev_t *mddev)
                mddev->queue->unplug_fn(mddev->queue);
                cond_resched();
 
-               currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+               currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
+                       /((jiffies-mddev->resync_mark)/HZ +1) +1;
 
                if (currspeed > sysctl_speed_limit_min) {
                        if ((currspeed > sysctl_speed_limit_max) ||
@@ -3451,7 +3725,7 @@ static void md_do_sync(mddev_t *mddev)
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
        /* tell personality that we are finished */
-       mddev->pers->sync_request(mddev, max_sectors, 1);
+       mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
        if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
            mddev->curr_resync > 2 &&
@@ -3465,7 +3739,6 @@ static void md_do_sync(mddev_t *mddev)
                        mddev->recovery_cp = MaxSector;
        }
 
-       md_enter_safemode(mddev);
  skip:
        mddev->curr_resync = 0;
        wake_up(&resync_wait);
@@ -3502,20 +3775,48 @@ void md_check_recovery(mddev_t *mddev)
        struct list_head *rtmp;
 
 
-       dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
+       if (mddev->bitmap)
+               bitmap_daemon_work(mddev->bitmap);
 
        if (mddev->ro)
                return;
+
+       if (signal_pending(current)) {
+               if (mddev->pers->sync_request) {
+                       printk(KERN_INFO "md: %s in immediate safe mode\n",
+                              mdname(mddev));
+                       mddev->safemode = 2;
+               }
+               flush_signals(current);
+       }
+
        if ( ! (
                mddev->sb_dirty ||
                test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
-               test_bit(MD_RECOVERY_DONE, &mddev->recovery)
+               test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
+               (mddev->safemode == 1) ||
+               (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
+                && !mddev->in_sync && mddev->recovery_cp == MaxSector)
                ))
                return;
+
        if (mddev_trylock(mddev)==0) {
                int spares =0;
+
+               spin_lock(&mddev->write_lock);
+               if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
+                   !mddev->in_sync && mddev->recovery_cp == MaxSector) {
+                       mddev->in_sync = 1;
+                       mddev->sb_dirty = 1;
+               }
+               if (mddev->safemode == 1)
+                       mddev->safemode = 0;
+               spin_unlock(&mddev->write_lock);
+
                if (mddev->sb_dirty)
                        md_update_sb(mddev);
+
+
                if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
                    !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
                        /* resync/recovery still happening */
@@ -3533,6 +3834,14 @@ void md_check_recovery(mddev_t *mddev)
                                mddev->pers->spare_active(mddev);
                        }
                        md_update_sb(mddev);
+
+                       /* if array is no-longer degraded, then any saved_raid_disk
+                        * information must be scrapped
+                        */
+                       if (!mddev->degraded)
+                               ITERATE_RDEV(mddev,rdev,rtmp)
+                                       rdev->saved_raid_disk = -1;
+
                        mddev->recovery = 0;
                        /* flag recovery needed just to double check */
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3575,6 +3884,13 @@ void md_check_recovery(mddev_t *mddev)
                        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                        if (!spares)
                                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+                       if (spares && mddev->bitmap && ! mddev->bitmap->file) {
+                               /* We are adding a device or devices to an array
+                                * which has the bitmap stored on all devices.
+                                * So make sure all bitmap pages get written
+                                */
+                               bitmap_write_all(mddev->bitmap);
+                       }
                        mddev->sync_thread = md_register_thread(md_do_sync,
                                                                mddev,
                                                                "%s_resync");
@@ -3593,8 +3909,8 @@ void md_check_recovery(mddev_t *mddev)
        }
 }
 
-int md_notify_reboot(struct notifier_block *this,
-                                       unsigned long code, void *x)
+static int md_notify_reboot(struct notifier_block *this,
+                           unsigned long code, void *x)
 {
        struct list_head *tmp;
        mddev_t *mddev;
@@ -3617,7 +3933,7 @@ int md_notify_reboot(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
-struct notifier_block md_notifier = {
+static struct notifier_block md_notifier = {
        .notifier_call  = md_notify_reboot,
        .next           = NULL,
        .priority       = INT_MAX, /* before any real devices */
@@ -3634,7 +3950,7 @@ static void md_geninit(void)
                p->proc_fops = &md_seq_fops;
 }
 
-int __init md_init(void)
+static int __init md_init(void)
 {
        int minor;
 
@@ -3642,6 +3958,8 @@ int __init md_init(void)
                        " MD_SB_DISKS=%d\n",
                        MD_MAJOR_VERSION, MD_MINOR_VERSION,
                        MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS);
+       printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR,
+                       BITMAP_MINOR);
 
        if (register_blkdev(MAJOR_NR, "md"))
                return -1;
@@ -3757,10 +4075,11 @@ EXPORT_SYMBOL(md_error);
 EXPORT_SYMBOL(md_done_sync);
 EXPORT_SYMBOL(md_write_start);
 EXPORT_SYMBOL(md_write_end);
-EXPORT_SYMBOL(md_handle_safemode);
 EXPORT_SYMBOL(md_register_thread);
 EXPORT_SYMBOL(md_unregister_thread);
 EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_print_devices);
 EXPORT_SYMBOL(md_check_recovery);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("md");
+MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);