logfs: handle powerfail on NAND flash
authorJoern Engel <joern@logfs.org>
Fri, 7 May 2010 17:38:40 +0000 (19:38 +0200)
committerJoern Engel <joern@logfs.org>
Fri, 7 May 2010 17:38:40 +0000 (19:38 +0200)
The write buffer may not have been written and may no longer be written
due to an interrupted write in the affected page.

Signed-off-by: Joern Engel <joern@logfs.org>
fs/logfs/dev_bdev.c
fs/logfs/dev_mtd.c
fs/logfs/gc.c
fs/logfs/logfs.h

index 243c000..9bd2ce2 100644 (file)
@@ -303,6 +303,11 @@ static void bdev_put_device(struct super_block *sb)
        close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
 }
 
+static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
+{
+       return 0;
+}
+
 static const struct logfs_device_ops bd_devops = {
        .find_first_sb  = bdev_find_first_sb,
        .find_last_sb   = bdev_find_last_sb,
@@ -310,6 +315,7 @@ static const struct logfs_device_ops bd_devops = {
        .readpage       = bdev_readpage,
        .writeseg       = bdev_writeseg,
        .erase          = bdev_erase,
+       .can_write_buf  = bdev_can_write_buf,
        .sync           = bdev_sync,
        .put_device     = bdev_put_device,
 };
index b02a402..a85d47d 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/completion.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
 
@@ -126,7 +127,8 @@ static int mtd_readpage(void *_sb, struct page *page)
 
        err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
                        page_address(page));
-       if (err == -EUCLEAN) {
+       if (err == -EUCLEAN || err == -EBADMSG) {
+               /* -EBADMSG happens regularly on power failures */
                err = 0;
                /* FIXME: force GC this segment */
        }
@@ -233,12 +235,32 @@ static void mtd_put_device(struct super_block *sb)
        put_mtd_device(logfs_super(sb)->s_mtd);
 }
 
+static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
+{
+       struct logfs_super *super = logfs_super(sb);
+       void *buf;
+       int err;
+
+       buf = kmalloc(super->s_writesize, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+       err = mtd_read(sb, ofs, super->s_writesize, buf);
+       if (err)
+               goto out;
+       if (memchr_inv(buf, 0xff, super->s_writesize))
+               err = -EIO;
+       kfree(buf);
+out:
+       return err;
+}
+
 static const struct logfs_device_ops mtd_devops = {
        .find_first_sb  = mtd_find_first_sb,
        .find_last_sb   = mtd_find_last_sb,
        .readpage       = mtd_readpage,
        .writeseg       = mtd_writeseg,
        .erase          = mtd_erase,
+       .can_write_buf  = mtd_can_write_buf,
        .sync           = mtd_sync,
        .put_device     = mtd_put_device,
 };
index 76c242f..caa4419 100644 (file)
@@ -122,7 +122,7 @@ static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
        logfs_safe_iput(inode, cookie);
 }
 
-static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
+static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
 {
        struct logfs_super *super = logfs_super(sb);
        struct logfs_segment_header sh;
@@ -401,7 +401,7 @@ static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
                        segno, (u64)segno << super->s_segshift,
                        dist, no_free_segments(sb), valid,
                        super->s_free_bytes);
-       cleaned = logfs_gc_segment(sb, segno, dist);
+       cleaned = logfs_gc_segment(sb, segno);
        log_gc("GC segment #%02x complete - now %x valid\n", segno,
                        valid - cleaned);
        BUG_ON(cleaned != valid);
@@ -632,38 +632,31 @@ static int check_area(struct super_block *sb, int i)
 {
        struct logfs_super *super = logfs_super(sb);
        struct logfs_area *area = super->s_area[i];
-       struct logfs_object_header oh;
+       gc_level_t gc_level;
+       u32 cleaned, valid, ec;
        u32 segno = area->a_segno;
-       u32 ofs = area->a_used_bytes;
-       __be32 crc;
-       int err;
+       u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 
        if (!area->a_is_open)
                return 0;
 
-       for (ofs = area->a_used_bytes;
-            ofs <= super->s_segsize - sizeof(oh);
-            ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
-               err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
-               if (err)
-                       return err;
-
-               if (!memchr_inv(&oh, 0xff, sizeof(oh)))
-                       break;
+       if (super->s_devops->can_write_buf(sb, ofs) == 0)
+               return 0;
 
-               crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
-               if (crc != oh.crc) {
-                       printk(KERN_INFO "interrupted header at %llx\n",
-                                       dev_ofs(sb, segno, ofs));
-                       return 0;
-               }
-       }
-       if (ofs != area->a_used_bytes) {
-               printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
-                               ofs - area->a_used_bytes,
-                               dev_ofs(sb, segno, area->a_used_bytes));
-               area->a_used_bytes = ofs;
-       }
+       printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
+       /*
+        * The device cannot write back the write buffer.  Most likely the
+        * wbuf was already written out and the system crashed at some point
+        * before the journal commit happened.  In that case we wouldn't have
+        * to do anything.  But if the crash happened before the wbuf was
+        * written out correctly, we must GC this segment.  So assume the
+        * worst and always do the GC run.
+        */
+       area->a_is_open = 0;
+       valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+       cleaned = logfs_gc_segment(sb, segno);
+       if (cleaned != valid)
+               return -EIO;
        return 0;
 }
 
index 26a9458..93b55f3 100644 (file)
@@ -144,6 +144,7 @@ struct logfs_area_ops {
  * @erase:                     erase one segment
  * @read:                      read from the device
  * @erase:                     erase part of the device
+ * @can_write_buf:             decide whether wbuf can be written to ofs
  */
 struct logfs_device_ops {
        struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
@@ -153,6 +154,7 @@ struct logfs_device_ops {
        void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
        int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
                        int ensure_write);
+       int (*can_write_buf)(struct super_block *sb, u64 ofs);
        void (*sync)(struct super_block *sb);
        void (*put_device)(struct super_block *sb);
 };