md: Turn rdev->sb_offset into a sector-based quantity.
[safe/jmp/linux-2.6] / include / linux / raid / md_k.h
index d5854c2..e37aaa4 100644 (file)
 /* and dm-bio-list.h is not under include/linux because.... ??? */
 #include "../../../drivers/md/dm-bio-list.h"
 
-#define MD_RESERVED       0UL
-#define LINEAR            1UL
-#define RAID0             2UL
-#define RAID1             3UL
-#define RAID5             4UL
-#define TRANSLUCENT       5UL
-#define HSM               6UL
-#define MULTIPATH         7UL
-#define RAID6            8UL
-#define        RAID10            9UL
-#define FAULTY           10UL
-#define MAX_PERSONALITY   11UL
+#ifdef CONFIG_BLOCK
 
 #define        LEVEL_MULTIPATH         (-4)
 #define        LEVEL_LINEAR            (-1)
 #define        LEVEL_FAULTY            (-5)
 
-#define MaxSector (~(sector_t)0)
-#define MD_THREAD_NAME_MAX 14
-
-static inline int pers_to_level (int pers)
-{
-       switch (pers) {
-               case FAULTY:            return LEVEL_FAULTY;
-               case MULTIPATH:         return LEVEL_MULTIPATH;
-               case HSM:               return -3;
-               case TRANSLUCENT:       return -2;
-               case LINEAR:            return LEVEL_LINEAR;
-               case RAID0:             return 0;
-               case RAID1:             return 1;
-               case RAID5:             return 5;
-               case RAID6:             return 6;
-               case RAID10:            return 10;
-       }
-       BUG();
-       return MD_RESERVED;
-}
+/* we need a value for 'no level specified' and 0
+ * means 'raid0', so we need something else.  This is
+ * for internal use only
+ */
+#define        LEVEL_NONE              (-1000000)
 
-static inline int level_to_pers (int level)
-{
-       switch (level) {
-               case LEVEL_FAULTY: return FAULTY;
-               case LEVEL_MULTIPATH: return MULTIPATH;
-               case -3: return HSM;
-               case -2: return TRANSLUCENT;
-               case LEVEL_LINEAR: return LINEAR;
-               case 0: return RAID0;
-               case 1: return RAID1;
-               case 4:
-               case 5: return RAID5;
-               case 6: return RAID6;
-               case 10: return RAID10;
-       }
-       return MD_RESERVED;
-}
+#define MaxSector (~(sector_t)0)
 
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
-#define MAX_MD_DEVS  256       /* Max number of md dev */
-
 /*
  * options passed in raidrun:
  */
 
-#define MAX_CHUNK_SIZE (4096*1024)
+/* Currently this must fit in an 'int' */
+#define MAX_CHUNK_SIZE (1<<30)
 
 /*
  * MD's 'extended' device
@@ -94,14 +51,15 @@ struct mdk_rdev_s
 
        sector_t size;                  /* Device size (in blocks) */
        mddev_t *mddev;                 /* RAID array if running */
-       unsigned long last_events;      /* IO event timestamp */
+       long last_events;               /* IO event timestamp */
 
        struct block_device *bdev;      /* block device handle */
 
        struct page     *sb_page;
        int             sb_loaded;
+       __u64           sb_events;
        sector_t        data_offset;    /* start of data in array */
-       sector_t        sb_offset;
+       sector_t        sb_start;       /* offset of the super block (in 512byte sectors) */
        int             sb_size;        /* bytes in the superblock */
        int             preferred_minor;        /* autorun support */
 
@@ -123,6 +81,16 @@ struct mdk_rdev_s
 #define        In_sync         2               /* device is in_sync with rest of array */
 #define        WriteMostly     4               /* Avoid reading if at all possible */
 #define        BarriersNotsupp 5               /* BIO_RW_BARRIER is not supported */
+#define        AllReserved     6               /* If whole device is reserved for
+                                        * one array */
+#define        AutoDetected    7               /* added by auto-detect */
+#define Blocked                8               /* An error occured on an externally
+                                        * managed array, don't allow writes
+                                        * until it is cleared */
+#define StateChanged   9               /* Faulty or Blocked has changed during
+                                        * interrupt, so it needs to be
+                                        * notified by the thread */
+       wait_queue_head_t blocked_wait;
 
        int desc_nr;                    /* descriptor index in the superblock */
        int raid_disk;                  /* role of device in array */
@@ -130,6 +98,10 @@ struct mdk_rdev_s
                                         * array and could again if we did a partial
                                         * resync from the bitmap
                                         */
+       sector_t        recovery_offset;/* If this device has been partially
+                                        * recovered, this is where we were
+                                        * up to.
+                                        */
 
        atomic_t        nr_pending;     /* number of pending requests.
                                         * only maintained for arrays that
@@ -138,18 +110,25 @@ struct mdk_rdev_s
        atomic_t        read_errors;    /* number of consecutive read errors that
                                         * we have tried to ignore.
                                         */
+       atomic_t        corrected_errors; /* number of corrected read errors,
+                                          * for reporting to userspace and storing
+                                          * in superblock.
+                                          */
+       struct work_struct del_work;    /* used for delayed sysfs removal */
 };
 
-typedef struct mdk_personality_s mdk_personality_t;
-
 struct mddev_s
 {
        void                            *private;
-       mdk_personality_t               *pers;
+       struct mdk_personality          *pers;
        dev_t                           unit;
        int                             md_minor;
        struct list_head                disks;
-       int                             sb_dirty;
+       unsigned long                   flags;
+#define MD_CHANGE_DEVS 0       /* Some device status has changed */
+#define MD_CHANGE_CLEAN 1      /* transition to or from 'clean' */
+#define MD_CHANGE_PENDING 2    /* superblock update in progress */
+
        int                             ro;
 
        struct gendisk                  *gendisk;
@@ -161,9 +140,13 @@ struct mddev_s
                                        minor_version,
                                        patch_version;
        int                             persistent;
+       int                             external;       /* metadata is
+                                                        * managed externally */
+       char                            metadata_type[17]; /* externally set*/
        int                             chunk_size;
        time_t                          ctime, utime;
        int                             level, layout;
+       char                            clevel[16];
        int                             raid_disks;
        int                             max_disks;
        sector_t                        size; /* used size of component devices */
@@ -172,39 +155,66 @@ struct mddev_s
 
        char                            uuid[16];
 
+       /* If the array is being reshaped, we need to record the
+        * new shape and an indication of where we are up to.
+        * This is written to the superblock.
+        * If reshape_position is MaxSector, then no reshape is happening (yet).
+        */
+       sector_t                        reshape_position;
+       int                             delta_disks, new_level, new_layout, new_chunk;
+
        struct mdk_thread_s             *thread;        /* management thread */
        struct mdk_thread_s             *sync_thread;   /* doing resync or reconstruct */
-       sector_t                        curr_resync;    /* blocks scheduled */
+       sector_t                        curr_resync;    /* last block scheduled */
        unsigned long                   resync_mark;    /* a recent timestamp */
        sector_t                        resync_mark_cnt;/* blocks written at resync_mark */
+       sector_t                        curr_mark_cnt; /* blocks scheduled now */
 
        sector_t                        resync_max_sectors; /* may be set by personality */
 
        sector_t                        resync_mismatches; /* count of sectors where
                                                            * parity/replica mismatch found
                                                            */
+
+       /* allow user-space to request suspension of IO to regions of the array */
+       sector_t                        suspend_lo;
+       sector_t                        suspend_hi;
+       /* if zero, use the system-wide default */
+       int                             sync_speed_min;
+       int                             sync_speed_max;
+
+       /* resync even though the same disks are shared among md-devices */
+       int                             parallel_resync;
+
+       int                             ok_start_degraded;
        /* recovery/resync flags 
         * NEEDED:   we might need to start a resync/recover
         * RUNNING:  a thread is running, or about to be started
         * SYNC:     actually doing a resync, not a recovery
-        * ERR:      and IO error was detected - abort the resync/recovery
-        * INTR:     someone requested a (clean) early abort.
+        * RECOVER:  doing recovery, or need to try it.
+        * INTR:     resync needs to be aborted for some reason
         * DONE:     thread is done and is waiting to be reaped
         * REQUEST:  user-space has requested a sync (used with SYNC)
         * CHECK:    user-space request for for check-only, no repair
+        * RESHAPE:  A reshape is happening
+        *
+        * If neither SYNC or RESHAPE are set, then it is a recovery.
         */
 #define        MD_RECOVERY_RUNNING     0
 #define        MD_RECOVERY_SYNC        1
-#define        MD_RECOVERY_ERR         2
+#define        MD_RECOVERY_RECOVER     2
 #define        MD_RECOVERY_INTR        3
 #define        MD_RECOVERY_DONE        4
 #define        MD_RECOVERY_NEEDED      5
 #define        MD_RECOVERY_REQUESTED   6
 #define        MD_RECOVERY_CHECK       7
+#define MD_RECOVERY_RESHAPE    8
+#define        MD_RECOVERY_FROZEN      9
+
        unsigned long                   recovery;
 
        int                             in_sync;        /* know to not need resync */
-       struct semaphore                reconfig_sem;
+       struct mutex                    reconfig_mutex;
        atomic_t                        active;
 
        int                             changed;        /* true if we might need to reread partition info */
@@ -222,6 +232,10 @@ struct mddev_s
        atomic_t                        recovery_active; /* blocks scheduled, but not written */
        wait_queue_head_t               recovery_wait;
        sector_t                        recovery_cp;
+       sector_t                        resync_min;     /* user requested sync
+                                                        * starts here */
+       sector_t                        resync_max;     /* resync should pause
+                                                        * when it gets here */
 
        spinlock_t                      write_lock;
        wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
@@ -233,7 +247,7 @@ struct mddev_s
        unsigned int                    safemode_delay;
        struct timer_list               safemode_timer;
        atomic_t                        writes_pending; 
-       request_queue_t                 *queue; /* for plugging ... */
+       struct request_queue            *queue; /* for plugging ... */
 
        atomic_t                        write_behind; /* outstanding async IO */
        unsigned int                    max_write_behind; /* 0 = sync */
@@ -265,11 +279,13 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect
         atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
 }
 
-struct mdk_personality_s
+struct mdk_personality
 {
        char *name;
+       int level;
+       struct list_head list;
        struct module *owner;
-       int (*make_request)(request_queue_t *q, struct bio *bio);
+       int (*make_request)(struct request_queue *q, struct bio *bio);
        int (*run)(mddev_t *mddev);
        int (*stop)(mddev_t *mddev);
        void (*status)(struct seq_file *seq, mddev_t *mddev);
@@ -282,7 +298,8 @@ struct mdk_personality_s
        int (*spare_active) (mddev_t *mddev);
        sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
        int (*resize) (mddev_t *mddev, sector_t sectors);
-       int (*reshape) (mddev_t *mddev, int raid_disks);
+       int (*check_reshape) (mddev_t *mddev);
+       int (*start_reshape) (mddev_t *mddev);
        int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
        /* quiesce moves between quiescence states
         * 0 - fully active
@@ -305,39 +322,29 @@ static inline char * mdname (mddev_t * mddev)
        return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
 }
 
-extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
-
 /*
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
  */
-#define ITERATE_RDEV_GENERIC(head,rdev,tmp)                            \
+#define rdev_for_each_list(rdev, tmp, list)                            \
                                                                        \
-       for ((tmp) = (head).next;                                       \
+       for ((tmp) = (list).next;                                       \
                (rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),     \
-                       (tmp) = (tmp)->next, (tmp)->prev != &(head)     \
+                       (tmp) = (tmp)->next, (tmp)->prev != &(list)     \
                ; )
 /*
  * iterates through the 'same array disks' ringlist
  */
-#define ITERATE_RDEV(mddev,rdev,tmp)                                   \
-       ITERATE_RDEV_GENERIC((mddev)->disks,rdev,tmp)
-
-/*
- * Iterates through 'pending RAID disks'
- */
-#define ITERATE_RDEV_PENDING(rdev,tmp)                                 \
-       ITERATE_RDEV_GENERIC(pending_raid_disks,rdev,tmp)
+#define rdev_for_each(rdev, tmp, mddev)                                \
+       rdev_for_each_list(rdev, tmp, (mddev)->disks)
 
 typedef struct mdk_thread_s {
        void                    (*run) (mddev_t *mddev);
        mddev_t                 *mddev;
        wait_queue_head_t       wqueue;
        unsigned long           flags;
-       struct completion       *event;
        struct task_struct      *tsk;
        unsigned long           timeout;
-       const char              *name;
 } mdk_thread_t;
 
 #define THREAD_WAKEUP  0
@@ -368,5 +375,11 @@ do {                                                                       \
        __wait_event_lock_irq(wq, condition, lock, cmd);                \
 } while (0)
 
+static inline void safe_put_page(struct page *p)
+{
+       if (p) put_page(p);
+}
+
+#endif /* CONFIG_BLOCK */
 #endif