[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps
[safe/jmp/linux-2.6] / drivers / md / md.c
index 65fab74..ae65446 100644 (file)
@@ -195,8 +195,7 @@ static mddev_t * mddev_find(dev_t unit)
                if (mddev->unit == unit) {
                        mddev_get(mddev);
                        spin_unlock(&all_mddevs_lock);
-                       if (new)
-                               kfree(new);
+                       kfree(new);
                        return mddev;
                }
 
@@ -257,8 +256,7 @@ static inline void mddev_unlock(mddev_t * mddev)
 {
        up(&mddev->reconfig_sem);
 
-       if (mddev->thread)
-               md_wakeup_thread(mddev->thread);
+       md_wakeup_thread(mddev->thread);
 }
 
 mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
@@ -285,7 +283,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
        return NULL;
 }
 
-inline static sector_t calc_dev_sboffset(struct block_device *bdev)
+static inline sector_t calc_dev_sboffset(struct block_device *bdev)
 {
        sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
        return MD_NEW_SIZE_BLOCKS(size);
@@ -339,6 +337,7 @@ static int super_written(struct bio *bio, unsigned int bytes_done, int error)
 
        if (atomic_dec_and_test(&rdev->mddev->pending_writes))
                wake_up(&rdev->mddev->sb_wait);
+       bio_put(bio);
        return 0;
 }
 
@@ -458,11 +457,8 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
                ret = 1;
 
 abort:
-       if (tmp1)
-               kfree(tmp1);
-       if (tmp2)
-               kfree(tmp2);
-
+       kfree(tmp1);
+       kfree(tmp2);
        return ret;
 }
 
@@ -626,6 +622,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->raid_disks = sb->raid_disks;
                mddev->size = sb->size;
                mddev->events = md_event(sb);
+               mddev->bitmap_offset = 0;
+               mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
 
                if (sb->state & (1<<MD_SB_CLEAN))
                        mddev->recovery_cp = MaxSector;
@@ -651,7 +649,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                                printk(KERN_WARNING "md: bitmaps only support for raid1\n");
                                return -EINVAL;
                        }
-                       mddev->bitmap_offset = (MD_SB_BYTES >> 9);
+                       mddev->bitmap_offset = mddev->default_bitmap_offset;
                }
 
        } else if (mddev->pers == NULL) {
@@ -941,6 +939,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->raid_disks = le32_to_cpu(sb->raid_disks);
                mddev->size = le64_to_cpu(sb->size)/2;
                mddev->events = le64_to_cpu(sb->events);
+               mddev->bitmap_offset = 0;
+               mddev->default_bitmap_offset = 0;
+               if (mddev->minor_version == 0)
+                       mddev->default_bitmap_offset = -(64*1024)/512;
                
                mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
                memcpy(mddev->uuid, sb->set_uuid, 16);
@@ -1691,6 +1693,7 @@ static int do_md_run(mddev_t * mddev)
        mddev->pers = pers[pnum];
        spin_unlock(&pers_lock);
 
+       mddev->recovery = 0;
        mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
 
        /* before we start the array running, initialise the bitmap */
@@ -1715,6 +1718,7 @@ static int do_md_run(mddev_t * mddev)
        mddev->in_sync = 1;
        
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
        
        if (mddev->sb_dirty)
                md_update_sb(mddev);
@@ -1801,6 +1805,8 @@ static int do_md_stop(mddev_t * mddev, int ro)
                                goto out;
                        mddev->ro = 1;
                } else {
+                       bitmap_flush(mddev);
+                       wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
                        if (mddev->ro)
                                set_disk_ro(disk, 0);
                        blk_queue_make_request(mddev->queue, md_fail_request);
@@ -1825,6 +1831,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
                fput(mddev->bitmap_file);
                mddev->bitmap_file = NULL;
        }
+       mddev->bitmap_offset = 0;
 
        /*
         * Free resources if final stop
@@ -2070,6 +2077,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        info.state         = 0;
        if (mddev->in_sync)
                info.state = (1<<MD_SB_CLEAN);
+       if (mddev->bitmap && mddev->bitmap_offset)
+               info.state = (1<<MD_SB_BITMAP_PRESENT);
        info.active_disks  = active;
        info.working_disks = working;
        info.failed_disks  = failed;
@@ -2084,7 +2093,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
        return 0;
 }
 
-static int get_bitmap_file(mddev_t * mddev, void * arg)
+static int get_bitmap_file(mddev_t * mddev, void __user * arg)
 {
        mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
        char *ptr, *buf = NULL;
@@ -2234,8 +2243,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                        export_rdev(rdev);
 
                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-               if (mddev->thread)
-                       md_wakeup_thread(mddev->thread);
+               md_wakeup_thread(mddev->thread);
                return err;
        }
 
@@ -2428,25 +2436,51 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
 {
        int err;
 
-       if (mddev->pers)
-               return -EBUSY;
+       if (mddev->pers) {
+               if (!mddev->pers->quiesce)
+                       return -EBUSY;
+               if (mddev->recovery || mddev->sync_thread)
+                       return -EBUSY;
+               /* we should be able to change the bitmap.. */
+       }
 
-       mddev->bitmap_file = fget(fd);
 
-       if (mddev->bitmap_file == NULL) {
-               printk(KERN_ERR "%s: error: failed to get bitmap file\n",
-                       mdname(mddev));
-               return -EBADF;
-       }
+       if (fd >= 0) {
+               if (mddev->bitmap)
+                       return -EEXIST; /* cannot add when bitmap is present */
+               mddev->bitmap_file = fget(fd);
 
-       err = deny_bitmap_write_access(mddev->bitmap_file);
-       if (err) {
-               printk(KERN_ERR "%s: error: bitmap file is already in use\n",
-                       mdname(mddev));
-               fput(mddev->bitmap_file);
-               mddev->bitmap_file = NULL;
-       } else
+               if (mddev->bitmap_file == NULL) {
+                       printk(KERN_ERR "%s: error: failed to get bitmap file\n",
+                              mdname(mddev));
+                       return -EBADF;
+               }
+
+               err = deny_bitmap_write_access(mddev->bitmap_file);
+               if (err) {
+                       printk(KERN_ERR "%s: error: bitmap file is already in use\n",
+                              mdname(mddev));
+                       fput(mddev->bitmap_file);
+                       mddev->bitmap_file = NULL;
+                       return err;
+               }
                mddev->bitmap_offset = 0; /* file overrides offset */
+       } else if (mddev->bitmap == NULL)
+               return -ENOENT; /* cannot remove what isn't there */
+       err = 0;
+       if (mddev->pers) {
+               mddev->pers->quiesce(mddev, 1);
+               if (fd >= 0)
+                       err = bitmap_create(mddev);
+               if (fd < 0 || err)
+                       bitmap_destroy(mddev);
+               mddev->pers->quiesce(mddev, 0);
+       } else if (fd < 0) {
+               if (mddev->bitmap_file)
+                       fput(mddev->bitmap_file);
+               mddev->bitmap_file = NULL;
+       }
+
        return err;
 }
 
@@ -2526,6 +2560,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
 {
        int rv = 0;
        int cnt = 0;
+       int state = 0;
+
+       /* calculate expected state,ignoring low bits */
+       if (mddev->bitmap && mddev->bitmap_offset)
+               state |= (1 << MD_SB_BITMAP_PRESENT);
 
        if (mddev->major_version != info->major_version ||
            mddev->minor_version != info->minor_version ||
@@ -2534,12 +2573,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
            mddev->level         != info->level         ||
 /*         mddev->layout        != info->layout        || */
            !mddev->persistent   != info->not_persistent||
-           mddev->chunk_size    != info->chunk_size    )
+           mddev->chunk_size    != info->chunk_size    ||
+           /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
+           ((state^info->state) & 0xfffffe00)
+               )
                return -EINVAL;
        /* Check there is only one change */
        if (mddev->size != info->size) cnt++;
        if (mddev->raid_disks != info->raid_disks) cnt++;
        if (mddev->layout != info->layout) cnt++;
+       if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
        if (cnt == 0) return 0;
        if (cnt > 1) return -EINVAL;
 
@@ -2618,6 +2661,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
                        }
                }
        }
+       if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
+               if (mddev->pers->quiesce == NULL)
+                       return -EINVAL;
+               if (mddev->recovery || mddev->sync_thread)
+                       return -EBUSY;
+               if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       /* add the bitmap */
+                       if (mddev->bitmap)
+                               return -EEXIST;
+                       if (mddev->default_bitmap_offset == 0)
+                               return -EINVAL;
+                       mddev->bitmap_offset = mddev->default_bitmap_offset;
+                       mddev->pers->quiesce(mddev, 1);
+                       rv = bitmap_create(mddev);
+                       if (rv)
+                               bitmap_destroy(mddev);
+                       mddev->pers->quiesce(mddev, 0);
+               } else {
+                       /* remove the bitmap */
+                       if (!mddev->bitmap)
+                               return -ENOENT;
+                       if (mddev->bitmap->file)
+                               return -EINVAL;
+                       mddev->pers->quiesce(mddev, 1);
+                       bitmap_destroy(mddev);
+                       mddev->pers->quiesce(mddev, 0);
+                       mddev->bitmap_offset = 0;
+               }
+       }
        md_update_sb(mddev);
        return rv;
 }
@@ -2779,7 +2851,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
                        goto done_unlock;
 
                case GET_BITMAP_FILE:
-                       err = get_bitmap_file(mddev, (void *)arg);
+                       err = get_bitmap_file(mddev, argp);
                        goto done_unlock;
 
                case GET_DISK_INFO:
@@ -2980,8 +3052,7 @@ static int md_thread(void * arg)
                wait_event_interruptible_timeout(thread->wqueue,
                                                 test_bit(THREAD_WAKEUP, &thread->flags),
                                                 thread->timeout);
-               if (current->flags & PF_FREEZE)
-                       refrigerator(PF_FREEZE);
+               try_to_freeze();
 
                clear_bit(THREAD_WAKEUP, &thread->flags);
 
@@ -3488,7 +3559,6 @@ static void md_do_sync(mddev_t *mddev)
                        goto skip;
                }
                ITERATE_MDDEV(mddev2,tmp) {
-                       printk(".");
                        if (mddev2 == mddev)
                                continue;
                        if (mddev2->curr_resync && 
@@ -4011,3 +4081,5 @@ EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_print_devices);
 EXPORT_SYMBOL(md_check_recovery);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("md");
+MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);