[JFFS2] Build fragtree in reverse order
authorArtem B. Bityutskiy <dedekind@infradead.org>
Mon, 1 Aug 2005 12:05:22 +0000 (13:05 +0100)
committerThomas Gleixner <tglx@mtd.linutronix.de>
Sun, 6 Nov 2005 17:22:17 +0000 (18:22 +0100)
Instead of building fragtree starting from node with the smallest version
number, start from the highest. This helps to avoid reading and checking
obsolete nodes.

Signed-off-by: Artem B. Bityutskiy <dedekind@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
fs/jffs2/nodelist.c
fs/jffs2/nodelist.h
fs/jffs2/readinode.c

index 0cf5e6f..390ce06 100644 (file)
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: nodelist.c,v 1.103 2005/07/31 08:20:44 dedekind Exp $
+ * $Id: nodelist.c,v 1.104 2005/08/01 12:05:19 dedekind Exp $
  *
  */
 
@@ -59,7 +59,7 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
 
        /* We know frag->ofs <= size. That's what lookup does for us */
        if (frag && frag->ofs != size) {
-               if (frag->ofs+frag->size >= size) {
+               if (frag->ofs+frag->size > size) {
                        JFFS2_DBG_FRAGTREE2("truncating frag 0x%08x-0x%08x\n", frag->ofs, frag->ofs+frag->size);
                        frag->size = size - frag->ofs;
                }
@@ -73,6 +73,20 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
                jffs2_obsolete_node_frag(c, frag);
                frag = next;
        }
+
+       if (size == 0)
+               return;
+
+       /* 
+        * If the last fragment starts at the RAM page boundary, it is
+        * REF_PRISTINE irrespective of its size.
+        */
+       frag = frag_last(list);
+       if ((frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) {
+               JFFS2_DBG_FRAGTREE2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n",
+                       frag->ofs, frag->ofs + frag->size); 
+               frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE;
+       }
 }
 
 void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
@@ -120,14 +134,82 @@ static void jffs2_fragtree_insert(struct jffs2_node_frag *newfrag, struct jffs2_
        rb_link_node(&newfrag->rb, &base->rb, link);
 }
 
+/*
+ * Allocate and initializes a new fragment.
+ */
+static inline struct jffs2_node_frag * new_fragment(struct jffs2_full_dnode *fn, uint32_t ofs, uint32_t size)
+{
+       struct jffs2_node_frag *newfrag;
+       
+       newfrag = jffs2_alloc_node_frag();
+       if (likely(newfrag)) {
+               newfrag->ofs = ofs;
+               newfrag->size = size;
+               newfrag->node = fn;
+       } else {
+               JFFS2_ERROR("cannot allocate a jffs2_node_frag object\n");
+       }
+
+       return newfrag;
+}
+
+/*
+ * Called when there is no overlapping fragment exist. Inserts a hole before the new
+ * fragment and inserts the new fragment to the fragtree.
+ */
+static int no_overlapping_node(struct jffs2_sb_info *c, struct rb_root *root,
+                              struct jffs2_node_frag *newfrag,
+                              struct jffs2_node_frag *this, uint32_t lastend)
+{
+       if (lastend < newfrag->node->ofs) {
+               /* put a hole in before the new fragment */
+               struct jffs2_node_frag *holefrag;
+
+               holefrag= new_fragment(NULL, lastend, newfrag->node->ofs - lastend);
+               if (unlikely(!holefrag)) {
+                       jffs2_free_node_frag(newfrag);
+                       return -ENOMEM;
+               }
+
+               if (this) {
+                       /* By definition, the 'this' node has no right-hand child, 
+                          because there are no frags with offset greater than it.
+                          So that's where we want to put the hole */
+                       JFFS2_DBG_FRAGTREE2("add hole frag %u-%u on the right of the new frag.\n",
+                               holefrag->ofs, holefrag->ofs + holefrag->size);
+                       rb_link_node(&holefrag->rb, &this->rb, &this->rb.rb_right);
+               } else {
+                       JFFS2_DBG_FRAGTREE2("Add hole frag %u-%u to the root of the tree.\n",
+                               holefrag->ofs, holefrag->ofs + holefrag->size);
+                       rb_link_node(&holefrag->rb, NULL, &root->rb_node);
+               }
+               rb_insert_color(&holefrag->rb, root);
+               this = holefrag;
+       }
+       
+       if (this) {
+               /* By definition, the 'this' node has no right-hand child, 
+                  because there are no frags with offset greater than it.
+                  So that's where we want to put new fragment */
+               JFFS2_DBG_FRAGTREE2("add the new node at the right\n");
+               rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);                      
+       } else {
+               JFFS2_DBG_FRAGTREE2("insert the new node at the root of the tree\n");
+               rb_link_node(&newfrag->rb, NULL, &root->rb_node);
+       }
+       rb_insert_color(&newfrag->rb, root);
+
+       return 0;
+}
+
 /* Doesn't set inode->i_size */
-static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *list, struct jffs2_node_frag *newfrag)
+static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *root, struct jffs2_node_frag *newfrag)
 {
        struct jffs2_node_frag *this;
        uint32_t lastend;
 
        /* Skip all the nodes which are completed before this one starts */
-       this = jffs2_lookup_node_frag(list, newfrag->node->ofs);
+       this = jffs2_lookup_node_frag(root, newfrag->node->ofs);
 
        if (this) {
                JFFS2_DBG_FRAGTREE2("lookup gave frag 0x%04x-0x%04x; phys 0x%08x (*%p)\n",
@@ -138,7 +220,7 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                lastend = 0;
        }
                          
-       /* See if we ran off the end of the list */
+       /* See if we ran off the end of the fragtree */
        if (lastend <= newfrag->ofs) {
                /* We did */
 
@@ -152,45 +234,16 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                        mark_ref_normal(newfrag->node->raw);
                }
 
-               if (lastend < newfrag->node->ofs) {
-                       /* ... and we need to put a hole in before the new node */
-                       struct jffs2_node_frag *holefrag = jffs2_alloc_node_frag();
-                       if (!holefrag) {
-                               jffs2_free_node_frag(newfrag);
-                               return -ENOMEM;
-                       }
-                       holefrag->ofs = lastend;
-                       holefrag->size = newfrag->node->ofs - lastend;
-                       holefrag->node = NULL;
-                       if (this) {
-                               /* By definition, the 'this' node has no right-hand child, 
-                                  because there are no frags with offset greater than it.
-                                  So that's where we want to put the hole */
-                               JFFS2_DBG_FRAGTREE2("adding hole frag (%p) on right of node at (%p)\n", holefrag, this);
-                               rb_link_node(&holefrag->rb, &this->rb, &this->rb.rb_right);
-                       } else {
-                               JFFS2_DBG_FRAGTREE2("adding hole frag (%p) at root of tree\n", holefrag);
-                               rb_link_node(&holefrag->rb, NULL, &list->rb_node);
-                       }
-                       rb_insert_color(&holefrag->rb, list);
-                       this = holefrag;
-               }
-               if (this) {
-                       /* By definition, the 'this' node has no right-hand child, 
-                          because there are no frags with offset greater than it.
-                          So that's where we want to put new fragment */
-                       JFFS2_DBG_FRAGTREE2("adding new frag (%p) on right of node at (%p)\n", newfrag, this);
-                       rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);                      
-               } else {
-                       JFFS2_DBG_FRAGTREE2("adding new frag (%p) at root of tree\n", newfrag);
-                       rb_link_node(&newfrag->rb, NULL, &list->rb_node);
-               }
-               rb_insert_color(&newfrag->rb, list);
-               return 0;
+               return no_overlapping_node(c, root, newfrag, this, lastend);
        }
 
-       JFFS2_DBG_FRAGTREE2("dealing with frag 0x%04x-0x%04x; phys 0x%08x (*%p)\n", 
-                 this->ofs, this->ofs+this->size, this->node?(ref_offset(this->node->raw)):0xffffffff, this);
+       if (this->node)
+               JFFS2_DBG_FRAGTREE2("dealing with frag %u-%u, phys %#08x(%d).\n",
+               this->ofs, this->ofs + this->size,
+               ref_offset(this->node->raw), ref_flags(this->node->raw));
+       else
+               JFFS2_DBG_FRAGTREE2("dealing with hole frag %u-%u.\n",
+               this->ofs, this->ofs + this->size);
 
        /* OK. 'this' is pointing at the first frag that newfrag->ofs at least partially obsoletes,
         * - i.e. newfrag->ofs < this->ofs+this->size && newfrag->ofs >= this->ofs  
@@ -206,11 +259,8 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
 
                if (this->ofs + this->size > newfrag->ofs + newfrag->size) {
                        /* The new node splits 'this' frag into two */
-                       struct jffs2_node_frag *newfrag2 = jffs2_alloc_node_frag();
-                       if (!newfrag2) {
-                               jffs2_free_node_frag(newfrag);
-                               return -ENOMEM;
-                       }
+                       struct jffs2_node_frag *newfrag2;
+
                        if (this->node)
                                JFFS2_DBG_FRAGTREE2("split old frag 0x%04x-0x%04x, phys 0x%08x\n",
                                        this->ofs, this->ofs+this->size, ref_offset(this->node->raw));
@@ -219,9 +269,10 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                                        this->ofs, this->ofs+this->size, ref_offset(this->node->raw));
                        
                        /* New second frag pointing to this's node */
-                       newfrag2->ofs = newfrag->ofs + newfrag->size;
-                       newfrag2->size = (this->ofs+this->size) - newfrag2->ofs;
-                       newfrag2->node = this->node;
+                       newfrag2 = new_fragment(this->node, newfrag->ofs + newfrag->size,
+                                               this->ofs + this->size - newfrag->ofs - newfrag->size);
+                       if (unlikely(!newfrag2))
+                               return -ENOMEM;
                        if (this->node)
                                this->node->frags++;
 
@@ -235,10 +286,10 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                           'this' to insert newfrag, and a tree insert
                           from newfrag to insert newfrag2. */
                        jffs2_fragtree_insert(newfrag, this);
-                       rb_insert_color(&newfrag->rb, list);
+                       rb_insert_color(&newfrag->rb, root);
                        
                        jffs2_fragtree_insert(newfrag2, newfrag);
-                       rb_insert_color(&newfrag2->rb, list);
+                       rb_insert_color(&newfrag2->rb, root);
                        
                        return 0;
                }
@@ -247,14 +298,14 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
 
                /* Again, we know it lives down here in the tree */
                jffs2_fragtree_insert(newfrag, this);
-               rb_insert_color(&newfrag->rb, list);
+               rb_insert_color(&newfrag->rb, root);
        } else {
                /* New frag starts at the same point as 'this' used to. Replace 
                   it in the tree without doing a delete and insertion */
                JFFS2_DBG_FRAGTREE2("inserting newfrag (*%p),%d-%d in before 'this' (*%p),%d-%d\n",
                          newfrag, newfrag->ofs, newfrag->ofs+newfrag->size, this, this->ofs, this->ofs+this->size);
        
-               rb_replace_node(&this->rb, &newfrag->rb, list);
+               rb_replace_node(&this->rb, &newfrag->rb, root);
                
                if (newfrag->ofs + newfrag->size >= this->ofs+this->size) {
                        JFFS2_DBG_FRAGTREE2("obsoleting node frag %p (%x-%x)\n", this, this->ofs, this->ofs+this->size);
@@ -264,7 +315,7 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                        this->size -= newfrag->size;
 
                        jffs2_fragtree_insert(this, newfrag);
-                       rb_insert_color(&this->rb, list);
+                       rb_insert_color(&this->rb, root);
                        return 0;
                }
        }
@@ -275,15 +326,15 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
                /* 'this' frag is obsoleted completely. */
                JFFS2_DBG_FRAGTREE2("obsoleting node frag %p (%x-%x) and removing from tree\n",
                        this, this->ofs, this->ofs+this->size);
-               rb_erase(&this->rb, list);
+               rb_erase(&this->rb, root);
                jffs2_obsolete_node_frag(c, this);
        }
        /* Now we're pointing at the first frag which isn't totally obsoleted by 
           the new frag */
 
-       if (!this || newfrag->ofs + newfrag->size == this->ofs) {
+       if (!this || newfrag->ofs + newfrag->size == this->ofs)
                return 0;
-       }
+
        /* Still some overlap but we don't need to move it in the tree */
        this->size = (this->ofs + this->size) - (newfrag->ofs + newfrag->size);
        this->ofs = newfrag->ofs + newfrag->size;
@@ -296,8 +347,9 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *l
        return 0;
 }
 
-/* Given an inode, probably with existing list of fragments, add the new node
- * to the fragment list.
+/* 
+ * Given an inode, probably with existing tree of fragments, add the new node
+ * to the fragment tree.
  */
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 {
@@ -307,18 +359,14 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
        if (unlikely(!fn->size))
                return 0;
 
-       newfrag = jffs2_alloc_node_frag();
+       newfrag = new_fragment(fn, fn->ofs, fn->size);
        if (unlikely(!newfrag))
                return -ENOMEM;
+       newfrag->node->frags = 1;
 
        JFFS2_DBG_FRAGTREE("adding node %#04x-%#04x @0x%08x on flash, newfrag *%p\n",
                  fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag);
        
-       newfrag->ofs = fn->ofs;
-       newfrag->size = fn->size;
-       newfrag->node = fn;
-       newfrag->node->frags = 1;
-
        ret = jffs2_add_frag_to_fragtree(c, &f->fragtree, newfrag);
        if (unlikely(ret))
                return ret;
@@ -344,10 +392,465 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
                }
        }
        jffs2_dbg_fragtree_paranoia_check_nolock(f);
-       jffs2_dbg_dump_fragtree_nolock(f);
+
+       return 0;
+}
+
+/*
+ * Check the data CRC of the node.
+ *
+ * Returns: 0 if the data CRC is correct;
+ *         1 - if incorrect;
+ *         error code if an error occured.
+ */
+static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
+{
+       struct jffs2_raw_node_ref *ref = tn->fn->raw;
+       int err = 0, pointed = 0;
+       struct jffs2_eraseblock *jeb;
+       unsigned char *buffer;
+       uint32_t crc, ofs, retlen, len;
+
+       BUG_ON(tn->csize == 0);
+
+       /* Calculate how many bytes were already checked */
+       ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
+       len = ofs - (ofs & (PAGE_CACHE_SIZE - 1));
+       len = c->wbuf_pagesize - len;
+
+       if (len >= tn->csize) {
+               JFFS2_DBG_READINODE("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
+                       ref_offset(ref), tn->csize, ofs);
+               goto adj_acc;
+       }
+       
+       ofs += len;
+       len = tn->csize - len;
+       
+       JFFS2_DBG_READINODE("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
+               ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
+       
+#ifndef __ECOS
+       /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
+        * adding and jffs2_flash_read_end() interface. */
+       if (c->mtd->point) {
+               err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
+               if (!err && retlen < tn->csize) {
+                       JFFS2_WARNING("MTD point returned len too short: %u instead of %u.\n", retlen, tn->csize);
+                       c->mtd->unpoint(c->mtd, buffer, ofs, len);
+               } else if (err)
+                       JFFS2_WARNING("MTD point failed: error code %d.\n", err);
+               else
+                       pointed = 1; /* succefully pointed to device */
+       }
+#endif
+       
+       if (!pointed) {
+               buffer = kmalloc(len, GFP_KERNEL);
+               if (unlikely(!buffer))
+                       return -ENOMEM;
+                       
+               /* TODO: this is very frequent pattern, make it a separate
+                * routine */
+               err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
+               if (err) {
+                       JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
+                       goto free_out;
+               }
+                       
+               if (retlen != len) {
+                       JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ofs, retlen, len);
+                       err = -EIO;
+                       goto free_out;
+               }
+       }
+
+       /* Continue calculating CRC */
+       crc = crc32(tn->partial_crc, buffer, len);
+       if(!pointed)
+               kfree(buffer);
+#ifndef __ECOS
+       else
+               c->mtd->unpoint(c->mtd, buffer, ofs, len);
+#endif
+
+       if (crc != tn->data_crc) {
+               JFFS2_NOTICE("drong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
+                       ofs, tn->data_crc, crc);
+               return 1;
+       }
+
+adj_acc:
+       jeb = &c->blocks[ref->flash_offset / c->sector_size];
+       len = ref_totlen(c, jeb, ref);
+
+       /* 
+        * Mark the node as having been checked and fix the
+        * accounting accordingly.
+        */
+       spin_lock(&c->erase_completion_lock);
+       jeb->used_size += len;
+       jeb->unchecked_size -= len;
+       c->used_size += len;
+       c->unchecked_size -= len;
+       spin_unlock(&c->erase_completion_lock);
+
        return 0;
+
+free_out:
+       if(!pointed)
+               kfree(buffer);
+#ifndef __ECOS
+       else
+               c->mtd->unpoint(c->mtd, buffer, ofs, len);
+#endif
+       return err;
 }
 
+/*
+ * Helper function for jffs2_add_older_frag_to_fragtree().
+ *
+ * Checks the node if we are in the checking stage.
+ */
+static inline int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
+{
+       int ret;
+       
+       BUG_ON(ref_obsolete(tn->fn->raw));
+
+       /* We only check the data CRC of unchecked nodes */
+       if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
+               return 0;
+       
+       JFFS2_DBG_FRAGTREE2("check node %u-%u, phys offs %#08x.\n",
+               tn->fn->ofs, tn->fn->ofs + tn->fn->size,
+               ref_offset(tn->fn->raw));
+
+       ret = check_node_data(c, tn);
+       if (unlikely(ret < 0)) {
+               JFFS2_ERROR("check_node_data() returned error: %d.\n",
+                       ret);
+       } else if (unlikely(ret > 0)) {
+               JFFS2_DBG_FRAGTREE2("CRC error, mark it obsolete.\n");
+               jffs2_mark_node_obsolete(c, tn->fn->raw);
+       }
+
+       return ret;
+}
+
+/* 
+ * Helper function for jffs2_add_older_frag_to_fragtree().
+ *
+ * Called when the new fragment that is being inserted
+ * splits a hole fragment.
+ */
+static int split_hole(struct jffs2_sb_info *c, struct rb_root *root,
+                     struct jffs2_node_frag *newfrag, struct jffs2_node_frag *hole)
+{
+       JFFS2_DBG_FRAGTREE2("fragment %#04x-%#04x splits the hole %#04x-%#04x\n",
+               newfrag->ofs, newfrag->ofs + newfrag->size, hole->ofs, hole->ofs + hole->size);
+
+       if (hole->ofs == newfrag->ofs) {
+               /* 
+                * Well, the new fragment actually starts at the same offset as
+                * the hole.
+                */
+               if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
+                       /* 
+                        * We replace the overlapped left part of the hole by
+                        * the new node.
+                        */
+                       
+                       JFFS2_DBG_FRAGTREE2("insert fragment %#04x-%#04x and cut the left part of the hole\n",
+                               newfrag->ofs, newfrag->ofs + newfrag->size);
+                       rb_replace_node(&hole->rb, &newfrag->rb, root);
+                       
+                       hole->ofs += newfrag->size;
+                       hole->size -= newfrag->size;
+                       
+                       /* 
+                        * We know that 'hole' should be the right hand
+                        * fragment.
+                        */
+                       jffs2_fragtree_insert(hole, newfrag);
+                       rb_insert_color(&hole->rb, root);
+               } else {
+                       /* 
+                        * Ah, the new fragment is of the same size as the hole.
+                        * Relace the hole by it.
+                        */
+                       JFFS2_DBG_FRAGTREE2("insert fragment %#04x-%#04x and overwrite hole\n",
+                               newfrag->ofs, newfrag->ofs + newfrag->size);
+                       rb_replace_node(&hole->rb, &newfrag->rb, root);
+                       jffs2_free_node_frag(hole);
+               }
+       } else {
+               /* The new fragment lefts some hole space at the left */
+               
+               struct jffs2_node_frag * newfrag2 = NULL;
+
+               if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
+                       /* The new frag also lefts some space at the right */
+                       newfrag2 = new_fragment(NULL, newfrag->ofs +
+                               newfrag->size, hole->ofs + hole->size
+                               - newfrag->ofs - newfrag->size);
+                       if (unlikely(!newfrag2)) {
+                               jffs2_free_node_frag(newfrag);
+                               return -ENOMEM;
+                       }
+               }
+
+               hole->size = newfrag->ofs - hole->ofs;
+               JFFS2_DBG_FRAGTREE2("left the hole %#04x-%#04x at the left and inserd fragment %#04x-%#04x\n",
+                       hole->ofs, hole->ofs + hole->size, newfrag->ofs, newfrag->ofs + newfrag->size);
+
+               jffs2_fragtree_insert(newfrag, hole);
+               rb_insert_color(&newfrag->rb, root);
+               
+               if (newfrag2) {
+                       JFFS2_DBG_FRAGTREE2("left the hole %#04x-%#04x at the right\n",
+                               newfrag2->ofs, newfrag2->ofs + newfrag2->size);
+                       jffs2_fragtree_insert(newfrag2, newfrag);
+                       rb_insert_color(&newfrag2->rb, root);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * This function is used when we build inode. It expects the nodes are passed
+ * in the decreasing version order. The whole point of this is to improve the
+ * inodes checking on NAND: we check the nodes' data CRC only when they are not
+ * obsoleted. Previously, add_frag_to_fragtree() function was used and
+ * nodes were passed to it in the increasing version ordes and CRCs of all
+ * nodes were checked.
+ *
+ * Note: tn->fn->size shouldn't be zero.
+ *
+ * Returns 0 if the node was inserted
+ *         1 if it wasn't inserted (since it is obsolete)
+ *         < 0 an if error occured
+ */
+int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
+                                    struct jffs2_tmp_dnode_info *tn)
+{
+       struct jffs2_node_frag *this, *newfrag;
+       uint32_t lastend;
+       struct jffs2_full_dnode *fn = tn->fn;
+       struct rb_root *root = &f->fragtree;
+       uint32_t fn_size = fn->size, fn_ofs = fn->ofs;
+       int err, checked = 0;
+       int ref_flag;
+
+       JFFS2_DBG_FRAGTREE("insert fragment %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
+
+       /* Skip all the nodes which are completed before this one starts */
+       this = jffs2_lookup_node_frag(root, fn_ofs);
+       if (this)
+               JFFS2_DBG_FRAGTREE2("'this' found %#04x-%#04x (%s)\n", this->ofs, this->ofs + this->size, this->node ? "data" : "hole");
+
+       if (this)
+               lastend = this->ofs + this->size;
+       else
+               lastend = 0;
+       
+       /* Detect the preliminary type of node */
+       if (fn->size >= PAGE_CACHE_SIZE)
+               ref_flag = REF_PRISTINE;
+       else
+               ref_flag = REF_NORMAL;
+       
+       /* See if we ran off the end of the root */
+       if (lastend <= fn_ofs) {
+               /* We did */
+               
+               /* 
+                * We are going to insert the new node into the
+                * fragment tree, so check it.
+                */
+               err = check_node(c, f, tn);
+               if (err != 0)
+                       return err;
+
+               fn->frags = 1;
+
+               newfrag = new_fragment(fn, fn_ofs, fn_size);
+               if (unlikely(!newfrag))
+                       return -ENOMEM;
+
+               err = no_overlapping_node(c, root, newfrag, this, lastend);
+               if (unlikely(err != 0)) {
+                       jffs2_free_node_frag(newfrag);
+                       return err;
+               }
+
+               goto out_ok;
+       }
+
+       fn->frags = 0;
+
+       while (1) {
+               /* 
+                * Here we have:
+                * fn_ofs < this->ofs + this->size && fn_ofs >= this->ofs.
+                * 
+                * Remember, 'this' has higher version, any non-hole node
+                * which is already in the fragtree is newer then the newly
+                * inserted.
+                */
+               if (!this->node) {
+                       /* 
+                        * 'this' is the hole fragment, so at least the
+                        * beginning of the new fragment is valid.
+                        */
+                       
+                       /* 
+                        * We are going to insert the new node into the
+                        * fragment tree, so check it.
+                        */
+                       if (!checked) {
+                               err = check_node(c, f, tn);
+                               if (unlikely(err != 0))
+                                       return err;
+                               checked = 1;
+                       }
+                       
+                       if (this->ofs + this->size >= fn_ofs + fn_size) {
+                               /* We split the hole on two parts */
+
+                               fn->frags += 1;
+                               newfrag = new_fragment(fn, fn_ofs, fn_size);
+                               if (unlikely(!newfrag))
+                                       return -ENOMEM;
+
+                               err = split_hole(c, root, newfrag, this);
+                               if (unlikely(err))
+                                       return err;
+                               goto out_ok;
+                       }
+
+                       /* 
+                        * The beginning of the new fragment is valid since it
+                        * overlaps the hole node.
+                        */
+
+                       ref_flag = REF_NORMAL;
+
+                       fn->frags += 1;
+                       newfrag = new_fragment(fn, fn_ofs,
+                                       this->ofs + this->size - fn_ofs);
+                       if (unlikely(!newfrag))
+                               return -ENOMEM;
+                       
+                       if (fn_ofs == this->ofs) {
+                               /* 
+                                * The new node starts at the same offset as
+                                * the hole and supersieds the hole.
+                                */
+                               JFFS2_DBG_FRAGTREE2("add the new fragment instead of hole %#04x-%#04x, refcnt %d\n",
+                                       fn_ofs, fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
+
+                               rb_replace_node(&this->rb, &newfrag->rb, root);
+                               jffs2_free_node_frag(this);
+                       } else {
+                               /* 
+                                * The hole becomes shorter as its right part
+                                * is supersieded by the new fragment.
+                                */
+                               JFFS2_DBG_FRAGTREE2("reduce size of hole %#04x-%#04x to %#04x-%#04x\n",
+                                       this->ofs, this->ofs + this->size, this->ofs, this->ofs + this->size - newfrag->size);
+                               
+                               JFFS2_DBG_FRAGTREE2("add new fragment %#04x-%#04x, refcnt %d\n", fn_ofs,
+                                       fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
+       
+                               this->size -= newfrag->size;
+                               jffs2_fragtree_insert(newfrag, this);
+                               rb_insert_color(&newfrag->rb, root);
+                       }
+                       
+                       fn_ofs += newfrag->size;
+                       fn_size -= newfrag->size;
+                       this = rb_entry(rb_next(&newfrag->rb),
+                                       struct jffs2_node_frag, rb);
+
+                       JFFS2_DBG_FRAGTREE2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
+                               this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
+               }
+
+               /* 
+                * 'This' node is not the hole so it obsoletes the new fragment
+                * either fully or partially.
+                */
+               if (this->ofs + this->size >= fn_ofs + fn_size) {
+                       /* The new node is obsolete, drop it */
+                       if (fn->frags == 0) {
+                               JFFS2_DBG_FRAGTREE2("%#04x-%#04x is obsolete, mark it obsolete\n", fn_ofs, fn_ofs + fn_size);
+                               ref_flag = REF_OBSOLETE;
+                       }
+                       goto out_ok;
+               } else {
+                       struct jffs2_node_frag *new_this;
+                       
+                       /* 'This' node obsoletes the beginning of the new node */
+                       JFFS2_DBG_FRAGTREE2("the beginning %#04x-%#04x is obsolete\n", fn_ofs, this->ofs + this->size);
+
+                       ref_flag = REF_NORMAL;
+                       
+                       fn_size -= this->ofs + this->size - fn_ofs;
+                       fn_ofs = this->ofs + this->size;
+                       JFFS2_DBG_FRAGTREE2("now considering %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
+                       
+                       new_this = rb_entry(rb_next(&this->rb), struct jffs2_node_frag, rb);
+                       if (!new_this) {
+                               /* 
+                                * There is no next fragment. Add the rest of
+                                * the new node as the right-hand child.
+                                */
+                               if (!checked) {
+                                       err = check_node(c, f, tn);
+                                       if (unlikely(err != 0))
+                                               return err;
+                                       checked = 1;
+                               }
+                               
+                               fn->frags += 1;
+                               newfrag = new_fragment(fn, fn_ofs, fn_size);
+                               if (unlikely(!newfrag))
+                                       return -ENOMEM;
+
+                               JFFS2_DBG_FRAGTREE2("there are no more fragments, insert %#04x-%#04x\n",
+                                       newfrag->ofs, newfrag->ofs + newfrag->size);
+                               rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);                      
+                               rb_insert_color(&newfrag->rb, root);
+                               goto out_ok;
+                       } else {
+                               this = new_this;
+                               JFFS2_DBG_FRAGTREE2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
+                                       this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
+                       }
+               }
+       }
+
+out_ok:
+       BUG_ON(fn->size < PAGE_CACHE_SIZE && ref_flag == REF_PRISTINE);
+
+       if (ref_flag == REF_OBSOLETE) {
+               JFFS2_DBG_FRAGTREE2("the node is obsolete now\n");
+               /* jffs2_mark_node_obsolete() will adjust space accounting */
+               jffs2_mark_node_obsolete(c, fn->raw);
+               return 1;
+       }
+
+       JFFS2_DBG_FRAGTREE2("the node is \"%s\" now\n", ref_flag == REF_NORMAL ? "REF_NORMAL" : "REF_PRISTINE");
+
+       /* Space accounting was adjusted at check_node_data() */
+       spin_lock(&c->erase_completion_lock);
+       fn->raw->flash_offset = ref_offset(fn->raw) | ref_flag;
+       spin_unlock(&c->erase_completion_lock);
+
+       return 0;
+}
 
 void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state)
 {
index 53c12e4..adee3c6 100644 (file)
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: nodelist.h,v 1.136 2005/07/31 08:20:44 dedekind Exp $
+ * $Id: nodelist.h,v 1.137 2005/08/01 12:05:19 dedekind Exp $
  *
  */
 
@@ -61,6 +61,9 @@
 #error wibble
 #endif
 
+/* The minimal node header size */
+#define JFFS2_MIN_NODE_HEADER sizeof(struct jffs2_raw_dirent)
+
 /*
   This is all we need to keep in-core for each raw node during normal
   operation. As and when we do read_inode on a particular inode, we can
@@ -148,6 +151,9 @@ struct jffs2_tmp_dnode_info
        struct rb_node rb;
        struct jffs2_full_dnode *fn;
        uint32_t version;
+       uint32_t data_crc;
+       uint32_t partial_crc;
+       uint32_t csize;
 };       
 
 struct jffs2_full_dirent
@@ -311,6 +317,7 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root
 void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
 int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
 void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
+int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
 
 /* nodemgmt.c */
 int jffs2_thread_should_wake(struct jffs2_sb_info *c);
index f3b12d7..488787a 100644 (file)
@@ -7,7 +7,7 @@
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
- * $Id: readinode.c,v 1.134 2005/07/31 08:20:44 dedekind Exp $
+ * $Id: readinode.c,v 1.135 2005/08/01 12:05:19 dedekind Exp $
  *
  */
 
@@ -21,8 +21,8 @@
 #include <linux/compiler.h>
 #include "nodelist.h"
 
-/* 
- * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in 
+/*
+ * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
  * order of increasing version.
  */
 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
@@ -38,11 +38,11 @@ static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root
                /* There may actually be a collision here, but it doesn't
                   actually matter. As long as the two nodes with the same
                   version are together, it's all fine. */
-               if (tn->version < this->version)
+               if (tn->version > this->version)
                        p = &(*p)->rb_left;
                else
                        p = &(*p)->rb_right;
-        }
+       }
 
        rb_link_node(&tn->rb, parent, p);
        rb_insert_color(&tn->rb, list);
@@ -111,14 +111,9 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
  *         1 if the node should be marked obsolete;
  *         negative error code on failure.
  */
-static inline int
-read_direntry(struct jffs2_sb_info *c,
-             struct jffs2_raw_node_ref *ref,
-             struct jffs2_raw_dirent *rd,
-             uint32_t read,
-             struct jffs2_full_dirent **fdp,
-             int32_t *latest_mctime,
-             uint32_t *mctime_ver)
+static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+                               struct jffs2_raw_dirent *rd, uint32_t read, struct jffs2_full_dirent **fdp,
+                               uint32_t *latest_mctime, uint32_t *mctime_ver)
 {
        struct jffs2_full_dirent *fd;
        
@@ -196,30 +191,35 @@ read_direntry(struct jffs2_sb_info *c,
  *         1 if the node should be marked obsolete;
  *         negative error code on failure.
  */
-static inline int
-read_dnode(struct jffs2_sb_info *c,
-          struct jffs2_raw_node_ref *ref,
-          struct jffs2_raw_inode *rd,
-          uint32_t read,
-          struct rb_root *tnp,
-          int32_t *latest_mctime,
-          uint32_t *mctime_ver)
+static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+                            struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
+                            uint32_t *latest_mctime, uint32_t *mctime_ver)
 {
-       struct jffs2_eraseblock *jeb;
        struct jffs2_tmp_dnode_info *tn;
+       uint32_t len, csize;
+       int ret = 1;
        
        /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
        BUG_ON(ref_obsolete(ref));
 
+       tn = jffs2_alloc_tmp_dnode_info();
+       if (!tn) {
+               JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn));
+               return -ENOMEM;
+       }
+
+       tn->partial_crc = 0;
+       csize = je32_to_cpu(rd->csize);
+       
        /* If we've never checked the CRCs on this node, check them now */
        if (ref_flags(ref) == REF_UNCHECKED) {
-               uint32_t crc, len;
+               uint32_t crc;
 
                crc = crc32(0, rd, sizeof(*rd) - 8);
                if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
                        JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
                                        ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
-                       return 1;
+                       goto free_out;
                }
                
                /* Sanity checks */
@@ -227,107 +227,102 @@ read_dnode(struct jffs2_sb_info *c,
                    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
                                JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
                                jffs2_dbg_dump_node(c, ref_offset(ref));
-                       return 1;
+                       goto free_out;
                }
 
-               if (rd->compr != JFFS2_COMPR_ZERO && je32_to_cpu(rd->csize)) {
-                       unsigned char *buf = NULL;
-                       uint32_t pointed = 0;
-                       int err;
-#ifndef __ECOS
-                       if (c->mtd->point) {
-                               err = c->mtd->point (c->mtd, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize),
-                                                    &read, &buf);
-                               if (unlikely(read < je32_to_cpu(rd->csize)) && likely(!err)) {
-                                       JFFS2_ERROR("MTD point returned len too short: 0x%zx\n", read);
-                                       c->mtd->unpoint(c->mtd, buf, ref_offset(ref) + sizeof(*rd),
-                                                       je32_to_cpu(rd->csize));
-                               } else if (unlikely(err)){
-                                       JFFS2_ERROR("MTD point failed %d\n", err);
-                               } else
-                                       pointed = 1; /* succefully pointed to device */
-                       }
-#endif                                 
-                       if(!pointed){
-                               buf = kmalloc(je32_to_cpu(rd->csize), GFP_KERNEL);
-                               if (!buf)
-                                       return -ENOMEM;
-                               
-                               err = jffs2_flash_read(c, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize),
-                                                       &read, buf);
-                               if (unlikely(read != je32_to_cpu(rd->csize)) && likely(!err))
-                                       err = -EIO;
-                               if (err) {
-                                       kfree(buf);
-                                       return err;
-                               }
-                       }
-                       crc = crc32(0, buf, je32_to_cpu(rd->csize));
-                       if(!pointed)
-                               kfree(buf);
-#ifndef __ECOS
-                       else
-                               c->mtd->unpoint(c->mtd, buf, ref_offset(ref) + sizeof(*rd), je32_to_cpu(rd->csize));
-#endif
-
-                       if (crc != je32_to_cpu(rd->data_crc)) {
-                               JFFS2_NOTICE("data CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
-                                       ref_offset(ref), je32_to_cpu(rd->data_crc), crc);
-                               return 1;
-                       }
+               if (jffs2_is_writebuffered(c) && csize != 0) {
+                       /* At this point we are supposed to check the data CRC
+                        * of our unchecked node. But thus far, we do not
+                        * know whether the node is valid or obsolete. To
+                        * figure this out, we need to walk all the nodes of
+                        * the inode and build the inode fragtree. We don't
+                        * want to spend time checking data of nodes which may
+                        * later be found to be obsolete. So we put off the full
+                        * data CRC checking until we have read all the inode
+                        * nodes and have started building the fragtree.
+                        *
+                        * The fragtree is being built starting with nodes
+                        * having the highest version number, so we'll be able
+                        * to detect whether a node is valid (i.e., it is not
+                        * overlapped by a node with higher version) or not.
+                        * And we'll be able to check only those nodes, which
+                        * are not obsolete.
+                        *
+                        * Of course, this optimization only makes sense in case
+                        * of NAND flashes (or other flashes whith
+                        * !jffs2_can_mark_obsolete()), since on NOR flashes
+                        * nodes are marked obsolete physically.
+                        *
+                        * Since NAND flashes (or other flashes with
+                        * jffs2_is_writebuffered(c)) are anyway read by
+                        * fractions of c->wbuf_pagesize, and we have just read
+                        * the node header, it is likely that the starting part
+                        * of the node data is also read when we read the
+                        * header. So we don't mind to check the CRC of the
+                        * starting part of the data of the node now, and check
+                        * the second part later (in jffs2_check_node_data()).
+                        * Of course, we will not need to re-read and re-check
+                        * the NAND page which we have just read. This is why we
+                        * read the whole NAND page at jffs2_get_inode_nodes(),
+                        * while we needed only the node header.
+                        */
+                       unsigned char *buf;
+
+                       /* 'buf' will point to the start of data */
+                       buf = (unsigned char *)rd + sizeof(*rd);
+                       /* len will be the read data length */
+                       len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
                        
-               }
-
-               /* Mark the node as having been checked and fix the accounting accordingly */
-               jeb = &c->blocks[ref->flash_offset / c->sector_size];
-               len = ref_totlen(c, jeb, ref);
-
-               spin_lock(&c->erase_completion_lock);
-               jeb->used_size += len;
-               jeb->unchecked_size -= len;
-               c->used_size += len;
-               c->unchecked_size -= len;
-
-               /* If node covers at least a whole page, or if it starts at the 
-                  beginning of a page and runs to the end of the file, or if 
-                  it's a hole node, mark it REF_PRISTINE, else REF_NORMAL. 
+                       if (len)
+                               tn->partial_crc = crc = crc32(0, buf, len);
+
+                       /* If we actually calculated the whole data CRC
+                        * and it is wrong, drop the node. */
+                       if (unlikely(tn->partial_crc
+                                       != je32_to_cpu(rd->data_crc)) &&
+                               len == csize)
+                               goto free_out;
 
-                  If it's actually overlapped, it'll get made NORMAL (or OBSOLETE) 
-                  when the overlapping node(s) get added to the tree anyway. 
-               */
-               if ((je32_to_cpu(rd->dsize) >= PAGE_CACHE_SIZE) ||
-                   ( ((je32_to_cpu(rd->offset) & (PAGE_CACHE_SIZE-1))==0) &&
-                     (je32_to_cpu(rd->dsize) + je32_to_cpu(rd->offset) == je32_to_cpu(rd->isize)))) {
-                       JFFS2_DBG_READINODE("marking node at %#08x REF_PRISTINE\n", ref_offset(ref));
-                       ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
-               } else {
-                       JFFS2_DBG_READINODE("marking node at %#08x REF_NORMAL\n", ref_offset(ref));
+               } else if (csize == 0) {
+                       /*
+                        * We checked the header CRC. If the node has no data, adjust
+                        * the space accounting now. For other nodes this will be done
+                        * later either when the node is marked obsolete or when its
+                        * data is checked.
+                        */
+                       struct jffs2_eraseblock *jeb;
+
+                       JFFS2_DBG_READINODE("the node has no data.\n");
+                       jeb = &c->blocks[ref->flash_offset / c->sector_size];
+                       len = ref_totlen(c, jeb, ref);
+
+                       spin_lock(&c->erase_completion_lock);
+                       jeb->used_size += len;
+                       jeb->unchecked_size -= len;
+                       c->used_size += len;
+                       c->unchecked_size -= len;
                        ref->flash_offset = ref_offset(ref) | REF_NORMAL;
+                       spin_unlock(&c->erase_completion_lock);
                }
-               spin_unlock(&c->erase_completion_lock);
-       }
-
-       tn = jffs2_alloc_tmp_dnode_info();
-       if (!tn) {
-               JFFS2_ERROR("alloc tn failed\n");
-               return -ENOMEM;
        }
 
        tn->fn = jffs2_alloc_full_dnode();
        if (!tn->fn) {
                JFFS2_ERROR("alloc fn failed\n");
-               jffs2_free_tmp_dnode_info(tn);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto free_out;
        }
        
        tn->version = je32_to_cpu(rd->version);
        tn->fn->ofs = je32_to_cpu(rd->offset);
+       tn->data_crc = je32_to_cpu(rd->data_crc);
+       tn->csize = csize;
        tn->fn->raw = ref;
        
        /* There was a bug where we wrote hole nodes out with
           csize/dsize swapped. Deal with it */
-       if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && je32_to_cpu(rd->csize))
-               tn->fn->size = je32_to_cpu(rd->csize);
+       if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
+               tn->fn->size = csize;
        else // normal case...
                tn->fn->size = je32_to_cpu(rd->dsize);
 
@@ -337,6 +332,10 @@ read_dnode(struct jffs2_sb_info *c,
        jffs2_add_tn_to_tree(tn, tnp);
 
        return 0;
+
+free_out:
+       jffs2_free_tmp_dnode_info(tn);
+       return ret;
 }
 
 /*
@@ -347,11 +346,7 @@ read_dnode(struct jffs2_sb_info *c,
  *         1 if the node should be marked obsolete;
  *         negative error code on failure.
  */
-static inline int
-read_unknown(struct jffs2_sb_info *c,
-            struct jffs2_raw_node_ref *ref,
-            struct jffs2_unknown_node *un,
-            uint32_t read)
+static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
 {
        /* We don't mark unknown nodes as REF_UNCHECKED */
        BUG_ON(ref_flags(ref) == REF_UNCHECKED);
@@ -394,9 +389,62 @@ read_unknown(struct jffs2_sb_info *c,
        return 0;
 }
 
+/*
+ * Helper function for jffs2_get_inode_nodes().
+ * The function detects whether more data should be read and reads it if yes.
+ *
+ * Returns: 0 on succes;
+ *         negative error code on failure.
+ */
+static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
+                    int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
+{
+       int right_len, err, len;
+       size_t retlen;
+       uint32_t offs;
+
+       if (jffs2_is_writebuffered(c)) {
+               right_len = c->wbuf_pagesize - (bufstart - buf);
+               if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
+                       right_len += c->wbuf_pagesize;
+       } else
+               right_len = right_size;
+
+       if (*rdlen == right_len)
+               return 0;
+
+       /* We need to read more data */
+       offs = ref_offset(ref) + *rdlen;
+       if (jffs2_is_writebuffered(c)) {
+               bufstart = buf + c->wbuf_pagesize;
+               len = c->wbuf_pagesize;
+       } else {
+               bufstart = buf + *rdlen;
+               len = right_size - *rdlen;
+       }
+       
+       JFFS2_DBG_READINODE("read more %d bytes.", len);
+
+       err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
+       if (err) {
+               JFFS2_ERROR("can not read %d bytes from 0x%08x, "
+                       "error code: %d.\n", len, offs, err);
+               return err;
+       }
+       
+       if (retlen < len) {
+               JFFS2_ERROR("short read at %#08x: %d instead of %d.\n",
+                               offs, retlen, len);
+               return -EIO;
+       }
+
+       *rdlen = right_len;
+
+       return 0;
+}
+
 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
    with this ino, returning the former in order of version */
-
 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
                                 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
                                 uint32_t *highest_version, uint32_t *latest_mctime,
@@ -405,22 +453,47 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
        struct jffs2_raw_node_ref *ref, *valid_ref;
        struct rb_root ret_tn = RB_ROOT;
        struct jffs2_full_dirent *ret_fd = NULL;
-       union jffs2_node_union node;
+       unsigned char *buf = NULL;
+       union jffs2_node_union *node;
        size_t retlen;
-       int err;
+       int len, err;
 
        *mctime_ver = 0;
        
        JFFS2_DBG_READINODE("ino #%u\n", f->inocache->ino);
 
-       spin_lock(&c->erase_completion_lock);
+       if (jffs2_is_writebuffered(c)) {
+               /*
+                * If we have the write buffer, we assume the minimal I/O unit
+                * is c->wbuf_pagesize. We implement some optimizations which in
+                * this case and we need a temporary buffer of size =
+                * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
+                * Basically, we want to read not only the node header, but the
+                * whole wbuf (NAND page in case of NAND) or 2, if the node
+                * header overlaps the border between the 2 wbufs.
+                */
+               len = 2*c->wbuf_pagesize;
+       } else {
+               /*
+                * When there is no write buffer, the size of the temporary
+                * buffer is the size of the larges node header.
+                */
+               len = sizeof(union jffs2_node_union);
+       }
 
+       /* FIXME: in case of NOR and available ->point() this
+        * needs to be fixed. */
+       buf = kmalloc(len, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+                       
+       spin_lock(&c->erase_completion_lock);
        valid_ref = jffs2_first_valid_node(f->inocache->nodes);
-
-       if (!valid_ref && (f->inocache->ino != 1))
-               JFFS2_WARNING("no valid nodes for ino #%u\n", f->inocache->ino);
-
+       if (!valid_ref && f->inocache->ino != 1)
+               JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
        while (valid_ref) {
+               unsigned char *bufstart;
+
                /* We can hold a pointer to a non-obsolete node without the spinlock,
                   but _obsolete_ nodes may disappear at any time, if the block
                   they're in gets erased. So if we mark 'ref' obsolete while we're
@@ -433,70 +506,100 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
 
                cond_resched();
 
+               /*
+                * At this point we don't know the type of the node we're going
+                * to read, so we do not know the size of its header. In order
+                * to minimize the amount of flash IO we assume the node has
+                * size = JFFS2_MIN_NODE_HEADER.
+                */
+               if (jffs2_is_writebuffered(c)) {
+                       /* 
+                        * We treat 'buf' as 2 adjacent wbufs. We want to
+                        * adjust bufstart such as it points to the
+                        * beginning of the node within this wbuf.
+                        */
+                       bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
+                       /* We will read either one wbuf or 2 wbufs. */
+                       len = c->wbuf_pagesize - (bufstart - buf);
+                       if (JFFS2_MIN_NODE_HEADER +
+                               (int)(bufstart - buf) > c->wbuf_pagesize) {
+                               /* The header spans the border of the
+                                * first wbuf */
+                               len += c->wbuf_pagesize;
+                       }
+               } else {
+                       bufstart = buf;
+                       len = JFFS2_MIN_NODE_HEADER;
+               }
+
+               JFFS2_DBG_READINODE("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
+
                /* FIXME: point() */
-               err = jffs2_flash_read(c, (ref_offset(ref)), 
-                                      min_t(uint32_t, ref_totlen(c, NULL, ref), sizeof(node)),
-                                      &retlen, (void *)&node);
+               err = jffs2_flash_read(c, ref_offset(ref), len,
+                                      &retlen, bufstart);
                if (err) {
-                       JFFS2_ERROR("error %d reading node at 0x%08x in get_inode_nodes()\n", err, ref_offset(ref));
+                       JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
+                       goto free_out;
+               }
+               
+               if (retlen < len) {
+                       JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len);
+                       err = -EIO;
                        goto free_out;
                }
+               
+               node = (union jffs2_node_union *)bufstart;
                        
-               switch (je16_to_cpu(node.u.nodetype)) {
+               switch (je16_to_cpu(node->u.nodetype)) {
                        
                case JFFS2_NODETYPE_DIRENT:
-                       JFFS2_DBG_READINODE("node at %08x (%d) is a dirent node\n", ref_offset(ref), ref_flags(ref));
-                       
-                       if (retlen < sizeof(node.d)) {
-                               JFFS2_ERROR("short read dirent at %#08x\n", ref_offset(ref));
-                               err = -EIO;
-                               goto free_out;
-                       }
 
-                       err = read_direntry(c, ref, &node.d, retlen, &ret_fd, latest_mctime, mctime_ver);
+                       if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
+                               err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
+                               if (unlikely(err))
+                                       goto free_out;
+                       }
+                       
+                       err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
                        if (err == 1) {
                                jffs2_mark_node_obsolete(c, ref);
                                break;
                        } else if (unlikely(err))
                                goto free_out;
                        
-                       if (je32_to_cpu(node.d.version) > *highest_version)
-                               *highest_version = je32_to_cpu(node.d.version);
+                       if (je32_to_cpu(node->d.version) > *highest_version)
+                               *highest_version = je32_to_cpu(node->d.version);
 
                        break;
 
                case JFFS2_NODETYPE_INODE:
-                       JFFS2_DBG_READINODE("node at %08x (%d) is a data node\n", ref_offset(ref), ref_flags(ref));
                        
-                       if (retlen < sizeof(node.i)) {
-                               JFFS2_ERROR("short read dnode at %#08x\n", ref_offset(ref));
-                               err = -EIO;
-                               goto free_out;
+                       if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
+                               err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
+                               if (unlikely(err))
+                                       goto free_out;
                        }
 
-                       err = read_dnode(c, ref, &node.i, retlen, &ret_tn, latest_mctime, mctime_ver);
+                       err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
                        if (err == 1) {
                                jffs2_mark_node_obsolete(c, ref);
                                break;
                        } else if (unlikely(err))
                                goto free_out;
 
-                       if (je32_to_cpu(node.i.version) > *highest_version)
-                               *highest_version = je32_to_cpu(node.i.version);
+                       if (je32_to_cpu(node->i.version) > *highest_version)
+                               *highest_version = je32_to_cpu(node->i.version);
                        
-                       JFFS2_DBG_READINODE("version %d, highest_version now %d\n",
-                                       je32_to_cpu(node.i.version), *highest_version);
-
                        break;
 
                default:
-                       /* Check we've managed to read at least the common node header */
-                       if (retlen < sizeof(struct jffs2_unknown_node)) {
-                               JFFS2_ERROR("short read unknown node at %#08x\n", ref_offset(ref));
-                               return -EIO;
+                       if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
+                               err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
+                               if (unlikely(err))
+                                       goto free_out;
                        }
-
-                       err = read_unknown(c, ref, &node.u, retlen);
+                       
+                       err = read_unknown(c, ref, &node->u);
                        if (err == 1) {
                                jffs2_mark_node_obsolete(c, ref);
                                break;
@@ -505,17 +608,21 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
 
                }
                spin_lock(&c->erase_completion_lock);
-
        }
+
        spin_unlock(&c->erase_completion_lock);
        *tnp = ret_tn;
        *fdp = ret_fd;
+       kfree(buf);
 
+       JFFS2_DBG_READINODE("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
+                       f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
        return 0;
 
  free_out:
        jffs2_free_tmp_dnode_info_list(&ret_tn);
        jffs2_free_full_dirent_list(ret_fd);
+       kfree(buf);
        return err;
 }
 
@@ -523,14 +630,13 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
                                        struct jffs2_inode_info *f,
                                        struct jffs2_raw_inode *latest_node)
 {
-       struct jffs2_tmp_dnode_info *tn = NULL;
+       struct jffs2_tmp_dnode_info *tn;
        struct rb_root tn_list;
        struct rb_node *rb, *repl_rb;
        struct jffs2_full_dirent *fd_list;
-       struct jffs2_full_dnode *fn = NULL;
+       struct jffs2_full_dnode *fn, *first_fn = NULL;
        uint32_t crc;
        uint32_t latest_mctime, mctime_ver;
-       uint32_t mdata_ver = 0;
        size_t retlen;
        int ret;
 
@@ -550,42 +656,33 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
        rb = rb_first(&tn_list);
 
        while (rb) {
+               cond_resched();
                tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
                fn = tn->fn;
-
-               if (f->metadata) {
-                       if (likely(tn->version >= mdata_ver)) {
-                               JFFS2_DBG_READINODE("obsoleting old metadata at 0x%08x\n", ref_offset(f->metadata->raw));
-                               jffs2_mark_node_obsolete(c, f->metadata->raw);
-                               jffs2_free_full_dnode(f->metadata);
-                               f->metadata = NULL;
-                               
-                               mdata_ver = 0;
-                       } else {
-                               /* This should never happen. */
-                               JFFS2_ERROR("Er. New metadata at 0x%08x with ver %d is actually older than previous ver %d at 0x%08x\n",
-                                         ref_offset(fn->raw), tn->version, mdata_ver, ref_offset(f->metadata->raw));
-                               jffs2_mark_node_obsolete(c, fn->raw);
-                               jffs2_free_full_dnode(fn);
-                               /* Fill in latest_node from the metadata, not this one we're about to free... */
-                               fn = f->metadata;
-                               goto next_tn;
-                       }
-               }
+               ret = 1;
+               JFFS2_DBG_READINODE("consider node ver %u, phys offset "
+                       "%#08x(%d), range %u-%u.\n", tn->version,
+                       ref_offset(fn->raw), ref_flags(fn->raw),
+                       fn->ofs, fn->ofs + fn->size);
 
                if (fn->size) {
-                       jffs2_add_full_dnode_to_inode(c, f, fn);
-               } else {
-                       /* Zero-sized node at end of version list. Just a metadata update */
-                       JFFS2_DBG_READINODE("metadata @%08x: ver %d\n", ref_offset(fn->raw), tn->version);
+                       ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
+                       /* TODO: the error code isn't checked, check it */
+                       jffs2_dbg_fragtree_paranoia_check_nolock(f);
+                       BUG_ON(ret < 0);
+                       if (!first_fn && ret == 0)
+                               first_fn = fn;
+               } else if (!first_fn) {
+                       first_fn = fn;
                        f->metadata = fn;
-                       mdata_ver = tn->version;
-               }
-       next_tn:
+                       ret = 0; /* Prevent freeing the metadata update node */
+               } else
+                       jffs2_mark_node_obsolete(c, fn->raw);
+                       
                BUG_ON(rb->rb_left);
                if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
                        /* We were then left-hand child of our parent. We need
-                          to move our own right-hand child into our place. */
+                        * to move our own right-hand child into our place. */
                        repl_rb = rb->rb_right;
                        if (repl_rb)
                                repl_rb->rb_parent = rb->rb_parent;
@@ -595,7 +692,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
                rb = rb_next(rb);
 
                /* Remove the spent tn from the tree; don't bother rebalancing
-                  but put our right-hand child in our own place. */
+                * but put our right-hand child in our own place. */
                if (tn->rb.rb_parent) {
                        if (tn->rb.rb_parent->rb_left == &tn->rb)
                                tn->rb.rb_parent->rb_left = repl_rb;
@@ -606,10 +703,18 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
                        tn->rb.rb_right->rb_parent = NULL;
 
                jffs2_free_tmp_dnode_info(tn);
+               if (ret) {
+                       JFFS2_DBG_READINODE("delete dnode %u-%u.\n",
+                               fn->ofs, fn->ofs + fn->size);
+                       jffs2_free_full_dnode(fn);
+               }
        }
        jffs2_dbg_fragtree_paranoia_check_nolock(f);
 
-       if (!fn) {
+       BUG_ON(first_fn && ref_obsolete(first_fn->raw));
+
+       fn = first_fn;
+       if (unlikely(!first_fn)) {
                /* No data nodes for this inode. */
                if (f->inocache->ino != 1) {
                        JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);