Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[safe/jmp/linux-2.6] / fs / gfs2 / recovery.c
index 6c7e2e8..09fa319 100644 (file)
@@ -1,28 +1,26 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
+ * of the GNU General Public License version 2.
  */
 
-#include <linux/sched.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
-#include <asm/semaphore.h>
+#include <linux/slow-work.h>
 
 #include "gfs2.h"
-#include "lm_interface.h"
 #include "incore.h"
 #include "bmap.h"
 #include "glock.h"
 #include "glops.h"
-#include "lm.h"
 #include "lops.h"
 #include "meta_io.h"
 #include "recovery.h"
 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
                           struct buffer_head **bh)
 {
-       struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
+       struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
        struct gfs2_glock *gl = ip->i_gl;
        int new = 0;
-       uint64_t dblock;
-       uint32_t extlen;
+       u64 dblock;
+       u32 extlen;
        int error;
 
-       error = gfs2_block_map(ip, blk, &new, &dblock,
-                              &extlen);
+       error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
        if (error)
                return error;
        if (!dblock) {
@@ -49,13 +46,12 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
                return -EIO;
        }
 
-       gfs2_meta_ra(gl, dblock, extlen);
-       error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
+       *bh = gfs2_meta_ra(gl, dblock, extlen);
 
        return error;
 }
 
-int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
 {
        struct list_head *head = &sdp->sd_revoke_list;
        struct gfs2_revoke_replay *rr;
@@ -73,7 +69,7 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
                return 0;
        }
 
-       rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
+       rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
        if (!rr)
                return -ENOMEM;
 
@@ -84,7 +80,7 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
        return 1;
 }
 
-int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
 {
        struct gfs2_revoke_replay *rr;
        int wrap, a, b, revoke;
@@ -120,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
        }
 }
 
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+       const struct gfs2_log_header *str = buf;
+
+       if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+           str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+               return 1;
+
+       lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+       lh->lh_flags = be32_to_cpu(str->lh_flags);
+       lh->lh_tail = be32_to_cpu(str->lh_tail);
+       lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+       lh->lh_hash = be32_to_cpu(str->lh_hash);
+       return 0;
+}
+
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -135,28 +147,26 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
  */
 
 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
-                         struct gfs2_log_header *head)
+                         struct gfs2_log_header_host *head)
 {
        struct buffer_head *bh;
-       struct gfs2_log_header lh;
-       uint32_t hash;
+       struct gfs2_log_header_host uninitialized_var(lh);
+       const u32 nothing = 0;
+       u32 hash;
        int error;
 
        error = gfs2_replay_read_block(jd, blk, &bh);
        if (error)
                return error;
 
-       memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
-       lh.lh_hash = 0;
-       hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
-       gfs2_log_header_in(&lh, bh->b_data);
-
+       hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
+                                            sizeof(u32));
+       hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
+       hash ^= (u32)~0;
+       error = gfs2_log_header_in(&lh, bh->b_data);
        brelse(bh);
 
-       if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-           lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-           lh.lh_blkno != blk ||
-           lh.lh_hash != hash)
+       if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
                return 1;
 
        *head = lh;
@@ -178,7 +188,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
  */
 
 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
-                       struct gfs2_log_header *head)
+                       struct gfs2_log_header_host *head)
 {
        unsigned int orig_blk = *blk;
        int error;
@@ -192,7 +202,7 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
                        *blk = 0;
 
                if (*blk == orig_blk) {
-                       gfs2_consist_inode(jd->jd_inode->u.generic_ip);
+                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
                        return -EIO;
                }
        }
@@ -209,10 +219,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
  * Returns: errno
  */
 
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
        unsigned int blk = head->lh_blkno;
-       struct gfs2_log_header lh;
+       struct gfs2_log_header_host lh;
        int error;
 
        for (;;) {
@@ -226,7 +236,7 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
                        continue;
 
                if (lh.lh_sequence == head->lh_sequence) {
-                       gfs2_consist_inode(jd->jd_inode->u.generic_ip);
+                       gfs2_consist_inode(GFS2_I(jd->jd_inode));
                        return -EIO;
                }
                if (lh.lh_sequence < head->lh_sequence)
@@ -249,10 +259,10 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
  * Returns: errno
  */
 
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-       struct gfs2_log_header lh_1, lh_m;
-       uint32_t blk_1, blk_2, blk_m;
+       struct gfs2_log_header_host lh_1, lh_m;
+       u32 blk_1, blk_2, blk_m;
        int error;
 
        blk_1 = 0;
@@ -302,16 +312,15 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
 static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
                              unsigned int end, int pass)
 {
-       struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
-       struct gfs2_sbd *sdp = ip->i_sbd;
+       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
        struct buffer_head *bh;
        struct gfs2_log_descriptor *ld;
        int error = 0;
        u32 length;
        __be64 *ptr;
        unsigned int offset = sizeof(struct gfs2_log_descriptor);
-       offset += (sizeof(__be64)-1);
-       offset &= ~(sizeof(__be64)-1);
+       offset += sizeof(__be64) - 1;
+       offset &= ~(sizeof(__be64) - 1);
 
        while (start != end) {
                error = gfs2_replay_read_block(jd, start, &bh);
@@ -324,15 +333,16 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
                ld = (struct gfs2_log_descriptor *)bh->b_data;
                length = be32_to_cpu(ld->ld_length);
 
-               if (be16_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
-                       struct gfs2_log_header lh;
+               if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
+                       struct gfs2_log_header_host lh;
                        error = get_log_header(jd, start, &lh);
                        if (!error) {
                                gfs2_replay_incr_blk(sdp, &start);
+                               brelse(bh);
                                continue;
                        }
                        if (error == 1) {
-                               gfs2_consist_inode(jd->jd_inode->u.generic_ip);
+                               gfs2_consist_inode(GFS2_I(jd->jd_inode));
                                error = -EIO;
                        }
                        brelse(bh);
@@ -367,29 +377,29 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
  * Returns: errno
  */
 
-static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-       struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
-       struct gfs2_sbd *sdp = ip->i_sbd;
+       struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
        unsigned int lblock;
-       int new = 0;
-       uint64_t dblock;
        struct gfs2_log_header *lh;
-       uint32_t hash;
+       u32 hash;
        struct buffer_head *bh;
        int error;
-       
+       struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
+
        lblock = head->lh_blkno;
        gfs2_replay_incr_blk(sdp, &lblock);
-       error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
+       bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+       error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
        if (error)
                return error;
-       if (!dblock) {
+       if (!bh_map.b_blocknr) {
                gfs2_consist_inode(ip);
                return -EIO;
        }
 
-       bh = sb_getblk(sdp->sd_vfs, dblock);
+       bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
        lock_buffer(bh);
        memset(bh->b_data, 0, bh->b_size);
        set_buffer_uptodate(bh);
@@ -399,8 +409,8 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
        lh = (struct gfs2_log_header *)bh->b_data;
        memset(lh, 0, sizeof(struct gfs2_log_header));
        lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-       lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
-       lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
+       lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+       lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
        lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
        lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
        lh->lh_blkno = cpu_to_be32(lblock);
@@ -415,54 +425,79 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
        return error;
 }
 
-/**
- * gfs2_recover_journal - recovery a given journal
- * @jd: the struct gfs2_jdesc describing the journal
- * @wait: Don't return until the journal is clean (or an error is encountered)
- *
- * Acquire the journal's lock, check to see if the journal is clean, and
- * do recovery if necessary.
- *
- * Returns: errno
- */
 
-int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
+static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
+                               unsigned int message)
 {
-       struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct gfs2_log_header head;
+       char env_jid[20];
+       char env_status[20];
+       char *envp[] = { env_jid, env_status, NULL };
+       struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+        ls->ls_recover_jid_done = jid;
+        ls->ls_recover_jid_status = message;
+       sprintf(env_jid, "JID=%d", jid);
+       sprintf(env_status, "RECOVERY=%s",
+               message == LM_RD_SUCCESS ? "Done" : "Failed");
+        kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
+}
+
+static int gfs2_recover_get_ref(struct slow_work *work)
+{
+       struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+       if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
+               return -EBUSY;
+       return 0;
+}
+
+static void gfs2_recover_put_ref(struct slow_work *work)
+{
+       struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+       clear_bit(JDF_RECOVERY, &jd->jd_flags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
+}
+
+static void gfs2_recover_work(struct slow_work *work)
+{
+       struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
+       struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+       struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+       struct gfs2_log_header_host head;
        struct gfs2_holder j_gh, ji_gh, t_gh;
        unsigned long t;
        int ro = 0;
        unsigned int pass;
        int error;
 
-       fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid);
+       if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
+               fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
+                       jd->jd_jid);
 
-       /* Aquire the journal lock so we can do recovery */
+               /* Acquire the journal lock so we can do recovery */
 
-       error = gfs2_glock_nq_num(sdp,
-                                 jd->jd_jid, &gfs2_journal_glops,
-                                 LM_ST_EXCLUSIVE,
-                                 LM_FLAG_NOEXP |
-                                 ((wait) ? 0 : LM_FLAG_TRY) |
-                                 GL_NOCACHE, &j_gh);
-       switch (error) {
-       case 0:
-               break;
+               error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
+                                         LM_ST_EXCLUSIVE,
+                                         LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
+                                         &j_gh);
+               switch (error) {
+               case 0:
+                       break;
 
-       case GLR_TRYFAILED:
-               fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
-               error = 0;
+               case GLR_TRYFAILED:
+                       fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
+                       error = 0;
 
-       default:
-               goto fail;
-       };
+               default:
+                       goto fail;
+               };
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
-                                  LM_FLAG_NOEXP, &ji_gh);
-       if (error)
-               goto fail_gunlock_j;
+               error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
+                                          LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
+               if (error)
+                       goto fail_gunlock_j;
+       } else {
+               fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
+       }
 
        fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 
@@ -482,14 +517,9 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
 
                /* Acquire a shared hold on the transaction lock */
 
-               error = gfs2_glock_nq_init(sdp->sd_trans_gl,
-                                          LM_ST_SHARED,
-                                          LM_FLAG_NOEXP |
-                                          LM_FLAG_PRIORITY |
-                                          GL_NEVER_RECURSE |
-                                          GL_NOCANCEL |
-                                          GL_NOCACHE,
-                                          &t_gh);
+               error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
+                                          LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
+                                          GL_NOCACHE, &t_gh);
                if (error)
                        goto fail_gunlock_ji;
 
@@ -497,13 +527,21 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
                        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                                ro = 1;
                } else {
-                       if (sdp->sd_vfs->s_flags & MS_RDONLY)
-                               ro = 1;
+                       if (sdp->sd_vfs->s_flags & MS_RDONLY) {
+                               /* check if device itself is read-only */
+                               ro = bdev_read_only(sdp->sd_vfs->s_bdev);
+                               if (!ro) {
+                                       fs_info(sdp, "recovery required on "
+                                               "read-only filesystem.\n");
+                                       fs_info(sdp, "write access will be "
+                                               "enabled during recovery.\n");
+                               }
+                       }
                }
 
                if (ro) {
-                       fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
-                               jd->jd_jid);
+                       fs_warn(sdp, "jid=%u: Can't replay: read-only block "
+                               "device\n", jd->jd_jid);
                        error = -EROFS;
                        goto fail_gunlock_tr;
                }
@@ -524,57 +562,58 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
                        goto fail_gunlock_tr;
 
                gfs2_glock_dq_uninit(&t_gh);
-
                t = DIV_ROUND_UP(jiffies - t, HZ);
-               
                fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
                        jd->jd_jid, t);
        }
 
-       gfs2_glock_dq_uninit(&ji_gh);
+       if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
+               gfs2_glock_dq_uninit(&ji_gh);
 
-       gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
+       gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 
-       gfs2_glock_dq_uninit(&j_gh);
+       if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
+               gfs2_glock_dq_uninit(&j_gh);
 
        fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
+       return;
 
-       return 0;
-
- fail_gunlock_tr:
+fail_gunlock_tr:
        gfs2_glock_dq_uninit(&t_gh);
-
- fail_gunlock_ji:
-       gfs2_glock_dq_uninit(&ji_gh);
-
- fail_gunlock_j:
-       gfs2_glock_dq_uninit(&j_gh);
+fail_gunlock_ji:
+       if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
+               gfs2_glock_dq_uninit(&ji_gh);
+fail_gunlock_j:
+               gfs2_glock_dq_uninit(&j_gh);
+       }
 
        fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 
- fail:
-       gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
-
-       return error;
+fail:
+       gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 }
 
-/**
- * gfs2_check_journals - Recover any dirty journals
- * @sdp: the filesystem
- *
- */
+struct slow_work_ops gfs2_recover_ops = {
+       .owner   = THIS_MODULE,
+       .get_ref = gfs2_recover_get_ref,
+       .put_ref = gfs2_recover_put_ref,
+       .execute = gfs2_recover_work,
+};
 
-void gfs2_check_journals(struct gfs2_sbd *sdp)
-{
-       struct gfs2_jdesc *jd;
 
-       for (;;) {
-               jd = gfs2_jdesc_find_dirty(sdp);
-               if (!jd)
-                       break;
+static int gfs2_recovery_wait(void *word)
+{
+       schedule();
+       return 0;
+}
 
-               if (jd != sdp->sd_jdesc)
-                       gfs2_recover_journal(jd, NO_WAIT);
-       }
+int gfs2_recover_journal(struct gfs2_jdesc *jd)
+{
+       int rv;
+       rv = slow_work_enqueue(&jd->jd_work);
+       if (rv)
+               return rv;
+       wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE);
+       return 0;
 }