Merge git://git.infradead.org/jffs2-xattr-2.6
[safe/jmp/linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased.
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 spin_unlock(&c->erase_completion_lock);
87                 jffs2_flush_wbuf_pad(c);
88                 spin_lock(&c->erase_completion_lock);
89                 goto again;
90         } else {
91                 /* Eep. All were empty */
92                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93                 return NULL;
94         }
95
96         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97         list_del(&ret->list);
98         c->gcblock = ret;
99         ret->gc_node = ret->first_node;
100         if (!ret->gc_node) {
101                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102                 BUG();
103         }
104
105         /* Have we accidentally picked a clean block with wasted space ? */
106         if (ret->wasted_size) {
107                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108                 ret->dirty_size += ret->wasted_size;
109                 c->wasted_size -= ret->wasted_size;
110                 c->dirty_size += ret->wasted_size;
111                 ret->wasted_size = 0;
112         }
113
114         return ret;
115 }
116
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123         struct jffs2_inode_info *f;
124         struct jffs2_inode_cache *ic;
125         struct jffs2_eraseblock *jeb;
126         struct jffs2_raw_node_ref *raw;
127         int ret = 0, inum, nlink;
128         int xattr = 0;
129
130         if (down_interruptible(&c->alloc_sem))
131                 return -EINTR;
132
133         for (;;) {
134                 spin_lock(&c->erase_completion_lock);
135                 if (!c->unchecked_size)
136                         break;
137
138                 /* We can't start doing GC yet. We haven't finished checking
139                    the node CRCs etc. Do it now. */
140
141                 /* checked_ino is protected by the alloc_sem */
142                 if (c->checked_ino > c->highest_ino && xattr) {
143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144                                c->unchecked_size);
145                         jffs2_dbg_dump_block_lists_nolock(c);
146                         spin_unlock(&c->erase_completion_lock);
147                         BUG();
148                 }
149
150                 spin_unlock(&c->erase_completion_lock);
151
152                 if (!xattr)
153                         xattr = jffs2_verify_xattr(c);
154
155                 spin_lock(&c->inocache_lock);
156
157                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
158
159                 if (!ic) {
160                         spin_unlock(&c->inocache_lock);
161                         continue;
162                 }
163
164                 if (!ic->nlink) {
165                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166                                   ic->ino));
167                         spin_unlock(&c->inocache_lock);
168                         continue;
169                 }
170                 switch(ic->state) {
171                 case INO_STATE_CHECKEDABSENT:
172                 case INO_STATE_PRESENT:
173                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174                         spin_unlock(&c->inocache_lock);
175                         continue;
176
177                 case INO_STATE_GC:
178                 case INO_STATE_CHECKING:
179                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180                         spin_unlock(&c->inocache_lock);
181                         BUG();
182
183                 case INO_STATE_READING:
184                         /* We need to wait for it to finish, lest we move on
185                            and trigger the BUG() above while we haven't yet
186                            finished checking all its nodes */
187                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
188                         /* We need to come back again for the _same_ inode. We've
189                          made no progress in this case, but that should be OK */
190                         c->checked_ino--;
191
192                         up(&c->alloc_sem);
193                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194                         return 0;
195
196                 default:
197                         BUG();
198
199                 case INO_STATE_UNCHECKED:
200                         ;
201                 }
202                 ic->state = INO_STATE_CHECKING;
203                 spin_unlock(&c->inocache_lock);
204
205                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206
207                 ret = jffs2_do_crccheck_inode(c, ic);
208                 if (ret)
209                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210
211                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212                 up(&c->alloc_sem);
213                 return ret;
214         }
215
216         /* First, work out which block we're garbage-collecting */
217         jeb = c->gcblock;
218
219         if (!jeb)
220                 jeb = jffs2_find_gc_block(c);
221
222         if (!jeb) {
223                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224                 spin_unlock(&c->erase_completion_lock);
225                 up(&c->alloc_sem);
226                 return -EIO;
227         }
228
229         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230         D1(if (c->nextblock)
231            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232
233         if (!jeb->used_size) {
234                 up(&c->alloc_sem);
235                 goto eraseit;
236         }
237
238         raw = jeb->gc_node;
239
240         while(ref_obsolete(raw)) {
241                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242                 raw = raw->next_phys;
243                 if (unlikely(!raw)) {
244                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
245                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
246                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247                         jeb->gc_node = raw;
248                         spin_unlock(&c->erase_completion_lock);
249                         up(&c->alloc_sem);
250                         BUG();
251                 }
252         }
253         jeb->gc_node = raw;
254
255         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256
257         if (!raw->next_in_ino) {
258                 /* Inode-less node. Clean marker, snapshot or something like that */
259                 /* FIXME: If it's something that needs to be copied, including something
260                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
261                 spin_unlock(&c->erase_completion_lock);
262                 jffs2_mark_node_obsolete(c, raw);
263                 up(&c->alloc_sem);
264                 goto eraseit_lock;
265         }
266
267         ic = jffs2_raw_ref_to_ic(raw);
268
269 #ifdef CONFIG_JFFS2_FS_XATTR
270         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
271          * We can decide whether this node is inode or xattr by ic->class.     */
272         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
273             || ic->class == RAWNODE_CLASS_XATTR_REF) {
274                 BUG_ON(raw->next_in_ino != (void *)ic);
275                 spin_unlock(&c->erase_completion_lock);
276
277                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
278                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
279                 } else {
280                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
281                 }
282                 goto release_sem;
283         }
284 #endif
285
286         /* We need to hold the inocache. Either the erase_completion_lock or
287            the inocache_lock are sufficient; we trade down since the inocache_lock
288            causes less contention. */
289         spin_lock(&c->inocache_lock);
290
291         spin_unlock(&c->erase_completion_lock);
292
293         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
294
295         /* Three possibilities:
296            1. Inode is already in-core. We must iget it and do proper
297               updating to its fragtree, etc.
298            2. Inode is not in-core, node is REF_PRISTINE. We lock the
299               inocache to prevent a read_inode(), copy the node intact.
300            3. Inode is not in-core, node is not pristine. We must iget()
301               and take the slow path.
302         */
303
304         switch(ic->state) {
305         case INO_STATE_CHECKEDABSENT:
306                 /* It's been checked, but it's not currently in-core.
307                    We can just copy any pristine nodes, but have
308                    to prevent anyone else from doing read_inode() while
309                    we're at it, so we set the state accordingly */
310                 if (ref_flags(raw) == REF_PRISTINE)
311                         ic->state = INO_STATE_GC;
312                 else {
313                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
314                                   ic->ino));
315                 }
316                 break;
317
318         case INO_STATE_PRESENT:
319                 /* It's in-core. GC must iget() it. */
320                 break;
321
322         case INO_STATE_UNCHECKED:
323         case INO_STATE_CHECKING:
324         case INO_STATE_GC:
325                 /* Should never happen. We should have finished checking
326                    by the time we actually start doing any GC, and since
327                    we're holding the alloc_sem, no other garbage collection
328                    can happen.
329                 */
330                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
331                        ic->ino, ic->state);
332                 up(&c->alloc_sem);
333                 spin_unlock(&c->inocache_lock);
334                 BUG();
335
336         case INO_STATE_READING:
337                 /* Someone's currently trying to read it. We must wait for
338                    them to finish and then go through the full iget() route
339                    to do the GC. However, sometimes read_inode() needs to get
340                    the alloc_sem() (for marking nodes invalid) so we must
341                    drop the alloc_sem before sleeping. */
342
343                 up(&c->alloc_sem);
344                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
345                           ic->ino, ic->state));
346                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
347                 /* And because we dropped the alloc_sem we must start again from the
348                    beginning. Ponder chance of livelock here -- we're returning success
349                    without actually making any progress.
350
351                    Q: What are the chances that the inode is back in INO_STATE_READING
352                    again by the time we next enter this function? And that this happens
353                    enough times to cause a real delay?
354
355                    A: Small enough that I don't care :)
356                 */
357                 return 0;
358         }
359
360         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
361            node intact, and we don't have to muck about with the fragtree etc.
362            because we know it's not in-core. If it _was_ in-core, we go through
363            all the iget() crap anyway */
364
365         if (ic->state == INO_STATE_GC) {
366                 spin_unlock(&c->inocache_lock);
367
368                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
369
370                 spin_lock(&c->inocache_lock);
371                 ic->state = INO_STATE_CHECKEDABSENT;
372                 wake_up(&c->inocache_wq);
373
374                 if (ret != -EBADFD) {
375                         spin_unlock(&c->inocache_lock);
376                         goto release_sem;
377                 }
378
379                 /* Fall through if it wanted us to, with inocache_lock held */
380         }
381
382         /* Prevent the fairly unlikely race where the gcblock is
383            entirely obsoleted by the final close of a file which had
384            the only valid nodes in the block, followed by erasure,
385            followed by freeing of the ic because the erased block(s)
386            held _all_ the nodes of that inode.... never been seen but
387            it's vaguely possible. */
388
389         inum = ic->ino;
390         nlink = ic->nlink;
391         spin_unlock(&c->inocache_lock);
392
393         f = jffs2_gc_fetch_inode(c, inum, nlink);
394         if (IS_ERR(f)) {
395                 ret = PTR_ERR(f);
396                 goto release_sem;
397         }
398         if (!f) {
399                 ret = 0;
400                 goto release_sem;
401         }
402
403         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
404
405         jffs2_gc_release_inode(c, f);
406
407  release_sem:
408         up(&c->alloc_sem);
409
410  eraseit_lock:
411         /* If we've finished this block, start it erasing */
412         spin_lock(&c->erase_completion_lock);
413
414  eraseit:
415         if (c->gcblock && !c->gcblock->used_size) {
416                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
417                 /* We're GC'ing an empty block? */
418                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
419                 c->gcblock = NULL;
420                 c->nr_erasing_blocks++;
421                 jffs2_erase_pending_trigger(c);
422         }
423         spin_unlock(&c->erase_completion_lock);
424
425         return ret;
426 }
427
428 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
429                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
430 {
431         struct jffs2_node_frag *frag;
432         struct jffs2_full_dnode *fn = NULL;
433         struct jffs2_full_dirent *fd;
434         uint32_t start = 0, end = 0, nrfrags = 0;
435         int ret = 0;
436
437         down(&f->sem);
438
439         /* Now we have the lock for this inode. Check that it's still the one at the head
440            of the list. */
441
442         spin_lock(&c->erase_completion_lock);
443
444         if (c->gcblock != jeb) {
445                 spin_unlock(&c->erase_completion_lock);
446                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
447                 goto upnout;
448         }
449         if (ref_obsolete(raw)) {
450                 spin_unlock(&c->erase_completion_lock);
451                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
452                 /* They'll call again */
453                 goto upnout;
454         }
455         spin_unlock(&c->erase_completion_lock);
456
457         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
458         if (f->metadata && f->metadata->raw == raw) {
459                 fn = f->metadata;
460                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
461                 goto upnout;
462         }
463
464         /* FIXME. Read node and do lookup? */
465         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
466                 if (frag->node && frag->node->raw == raw) {
467                         fn = frag->node;
468                         end = frag->ofs + frag->size;
469                         if (!nrfrags++)
470                                 start = frag->ofs;
471                         if (nrfrags == frag->node->frags)
472                                 break; /* We've found them all */
473                 }
474         }
475         if (fn) {
476                 if (ref_flags(raw) == REF_PRISTINE) {
477                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
478                         if (!ret) {
479                                 /* Urgh. Return it sensibly. */
480                                 frag->node->raw = f->inocache->nodes;
481                         }
482                         if (ret != -EBADFD)
483                                 goto upnout;
484                 }
485                 /* We found a datanode. Do the GC */
486                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
487                         /* It crosses a page boundary. Therefore, it must be a hole. */
488                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
489                 } else {
490                         /* It could still be a hole. But we GC the page this way anyway */
491                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
492                 }
493                 goto upnout;
494         }
495
496         /* Wasn't a dnode. Try dirent */
497         for (fd = f->dents; fd; fd=fd->next) {
498                 if (fd->raw == raw)
499                         break;
500         }
501
502         if (fd && fd->ino) {
503                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
504         } else if (fd) {
505                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
506         } else {
507                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
508                        ref_offset(raw), f->inocache->ino);
509                 if (ref_obsolete(raw)) {
510                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
511                 } else {
512                         jffs2_dbg_dump_node(c, ref_offset(raw));
513                         BUG();
514                 }
515         }
516  upnout:
517         up(&f->sem);
518
519         return ret;
520 }
521
522 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
523                                           struct jffs2_inode_cache *ic,
524                                           struct jffs2_raw_node_ref *raw)
525 {
526         union jffs2_node_union *node;
527         struct jffs2_raw_node_ref *nraw;
528         size_t retlen;
529         int ret;
530         uint32_t phys_ofs, alloclen;
531         uint32_t crc, rawlen;
532         int retried = 0;
533
534         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
535
536         rawlen = ref_totlen(c, c->gcblock, raw);
537
538         /* Ask for a small amount of space (or the totlen if smaller) because we
539            don't want to force wastage of the end of a block if splitting would
540            work. */
541         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
542                                 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
543                                 /* this is not the exact summary size of it,
544                                         it is only an upper estimation */
545
546         if (ret)
547                 return ret;
548
549         if (alloclen < rawlen) {
550                 /* Doesn't fit untouched. We'll go the old route and split it */
551                 return -EBADFD;
552         }
553
554         node = kmalloc(rawlen, GFP_KERNEL);
555         if (!node)
556                return -ENOMEM;
557
558         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
559         if (!ret && retlen != rawlen)
560                 ret = -EIO;
561         if (ret)
562                 goto out_node;
563
564         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
565         if (je32_to_cpu(node->u.hdr_crc) != crc) {
566                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
567                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
568                 goto bail;
569         }
570
571         switch(je16_to_cpu(node->u.nodetype)) {
572         case JFFS2_NODETYPE_INODE:
573                 crc = crc32(0, node, sizeof(node->i)-8);
574                 if (je32_to_cpu(node->i.node_crc) != crc) {
575                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
576                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
577                         goto bail;
578                 }
579
580                 if (je32_to_cpu(node->i.dsize)) {
581                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
582                         if (je32_to_cpu(node->i.data_crc) != crc) {
583                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
584                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
585                                 goto bail;
586                         }
587                 }
588                 break;
589
590         case JFFS2_NODETYPE_DIRENT:
591                 crc = crc32(0, node, sizeof(node->d)-8);
592                 if (je32_to_cpu(node->d.node_crc) != crc) {
593                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
594                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
595                         goto bail;
596                 }
597
598                 if (node->d.nsize) {
599                         crc = crc32(0, node->d.name, node->d.nsize);
600                         if (je32_to_cpu(node->d.name_crc) != crc) {
601                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
602                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
603                                 goto bail;
604                         }
605                 }
606                 break;
607         default:
608                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
609                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
610                 goto bail;
611         }
612
613         nraw = jffs2_alloc_raw_node_ref();
614         if (!nraw) {
615                 ret = -ENOMEM;
616                 goto out_node;
617         }
618
619         /* OK, all the CRCs are good; this node can just be copied as-is. */
620  retry:
621         nraw->flash_offset = phys_ofs;
622         nraw->__totlen = rawlen;
623         nraw->next_phys = NULL;
624
625         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
626
627         if (ret || (retlen != rawlen)) {
628                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
629                        rawlen, phys_ofs, ret, retlen);
630                 if (retlen) {
631                         /* Doesn't belong to any inode */
632                         nraw->next_in_ino = NULL;
633
634                         nraw->flash_offset |= REF_OBSOLETE;
635                         jffs2_add_physical_node_ref(c, nraw);
636                         jffs2_mark_node_obsolete(c, nraw);
637                 } else {
638                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
639                         jffs2_free_raw_node_ref(nraw);
640                 }
641                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
642                         /* Try to reallocate space and retry */
643                         uint32_t dummy;
644                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
645
646                         retried = 1;
647
648                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
649
650                         jffs2_dbg_acct_sanity_check(c,jeb);
651                         jffs2_dbg_acct_paranoia_check(c, jeb);
652
653                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
654                                                 /* this is not the exact summary size of it,
655                                                         it is only an upper estimation */
656
657                         if (!ret) {
658                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
659
660                                 jffs2_dbg_acct_sanity_check(c,jeb);
661                                 jffs2_dbg_acct_paranoia_check(c, jeb);
662
663                                 goto retry;
664                         }
665                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
666                         jffs2_free_raw_node_ref(nraw);
667                 }
668
669                 jffs2_free_raw_node_ref(nraw);
670                 if (!ret)
671                         ret = -EIO;
672                 goto out_node;
673         }
674         nraw->flash_offset |= REF_PRISTINE;
675         jffs2_add_physical_node_ref(c, nraw);
676
677         /* Link into per-inode list. This is safe because of the ic
678            state being INO_STATE_GC. Note that if we're doing this
679            for an inode which is in-core, the 'nraw' pointer is then
680            going to be fetched from ic->nodes by our caller. */
681         spin_lock(&c->erase_completion_lock);
682         nraw->next_in_ino = ic->nodes;
683         ic->nodes = nraw;
684         spin_unlock(&c->erase_completion_lock);
685
686         jffs2_mark_node_obsolete(c, raw);
687         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
688
689  out_node:
690         kfree(node);
691         return ret;
692  bail:
693         ret = -EBADFD;
694         goto out_node;
695 }
696
697 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
698                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
699 {
700         struct jffs2_full_dnode *new_fn;
701         struct jffs2_raw_inode ri;
702         struct jffs2_node_frag *last_frag;
703         union jffs2_device_node dev;
704         char *mdata = NULL, mdatalen = 0;
705         uint32_t alloclen, phys_ofs, ilen;
706         int ret;
707
708         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
709             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
710                 /* For these, we don't actually need to read the old node */
711                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
712                 mdata = (char *)&dev;
713                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
714         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
715                 mdatalen = fn->size;
716                 mdata = kmalloc(fn->size, GFP_KERNEL);
717                 if (!mdata) {
718                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
719                         return -ENOMEM;
720                 }
721                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
722                 if (ret) {
723                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
724                         kfree(mdata);
725                         return ret;
726                 }
727                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
728
729         }
730
731         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
732                                 JFFS2_SUMMARY_INODE_SIZE);
733         if (ret) {
734                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
735                        sizeof(ri)+ mdatalen, ret);
736                 goto out;
737         }
738
739         last_frag = frag_last(&f->fragtree);
740         if (last_frag)
741                 /* Fetch the inode length from the fragtree rather then
742                  * from i_size since i_size may have not been updated yet */
743                 ilen = last_frag->ofs + last_frag->size;
744         else
745                 ilen = JFFS2_F_I_SIZE(f);
746
747         memset(&ri, 0, sizeof(ri));
748         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
749         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
750         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
751         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
752
753         ri.ino = cpu_to_je32(f->inocache->ino);
754         ri.version = cpu_to_je32(++f->highest_version);
755         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
756         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
757         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
758         ri.isize = cpu_to_je32(ilen);
759         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
760         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
761         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
762         ri.offset = cpu_to_je32(0);
763         ri.csize = cpu_to_je32(mdatalen);
764         ri.dsize = cpu_to_je32(mdatalen);
765         ri.compr = JFFS2_COMPR_NONE;
766         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
767         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
768
769         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
770
771         if (IS_ERR(new_fn)) {
772                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
773                 ret = PTR_ERR(new_fn);
774                 goto out;
775         }
776         jffs2_mark_node_obsolete(c, fn->raw);
777         jffs2_free_full_dnode(fn);
778         f->metadata = new_fn;
779  out:
780         if (S_ISLNK(JFFS2_F_I_MODE(f)))
781                 kfree(mdata);
782         return ret;
783 }
784
785 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
786                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
787 {
788         struct jffs2_full_dirent *new_fd;
789         struct jffs2_raw_dirent rd;
790         uint32_t alloclen, phys_ofs;
791         int ret;
792
793         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
794         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
795         rd.nsize = strlen(fd->name);
796         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
797         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
798
799         rd.pino = cpu_to_je32(f->inocache->ino);
800         rd.version = cpu_to_je32(++f->highest_version);
801         rd.ino = cpu_to_je32(fd->ino);
802         /* If the times on this inode were set by explicit utime() they can be different,
803            so refrain from splatting them. */
804         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
805                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
806         else
807                 rd.mctime = cpu_to_je32(0);
808         rd.type = fd->type;
809         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
810         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
811
812         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
813                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
814         if (ret) {
815                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
816                        sizeof(rd)+rd.nsize, ret);
817                 return ret;
818         }
819         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
820
821         if (IS_ERR(new_fd)) {
822                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
823                 return PTR_ERR(new_fd);
824         }
825         jffs2_add_fd_to_list(c, new_fd, &f->dents);
826         return 0;
827 }
828
829 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
830                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
831 {
832         struct jffs2_full_dirent **fdp = &f->dents;
833         int found = 0;
834
835         /* On a medium where we can't actually mark nodes obsolete
836            pernamently, such as NAND flash, we need to work out
837            whether this deletion dirent is still needed to actively
838            delete a 'real' dirent with the same name that's still
839            somewhere else on the flash. */
840         if (!jffs2_can_mark_obsolete(c)) {
841                 struct jffs2_raw_dirent *rd;
842                 struct jffs2_raw_node_ref *raw;
843                 int ret;
844                 size_t retlen;
845                 int name_len = strlen(fd->name);
846                 uint32_t name_crc = crc32(0, fd->name, name_len);
847                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
848
849                 rd = kmalloc(rawlen, GFP_KERNEL);
850                 if (!rd)
851                         return -ENOMEM;
852
853                 /* Prevent the erase code from nicking the obsolete node refs while
854                    we're looking at them. I really don't like this extra lock but
855                    can't see any alternative. Suggestions on a postcard to... */
856                 down(&c->erase_free_sem);
857
858                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
859
860                         /* We only care about obsolete ones */
861                         if (!(ref_obsolete(raw)))
862                                 continue;
863
864                         /* Any dirent with the same name is going to have the same length... */
865                         if (ref_totlen(c, NULL, raw) != rawlen)
866                                 continue;
867
868                         /* Doesn't matter if there's one in the same erase block. We're going to
869                            delete it too at the same time. */
870                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
871                                 continue;
872
873                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
874
875                         /* This is an obsolete node belonging to the same directory, and it's of the right
876                            length. We need to take a closer look...*/
877                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
878                         if (ret) {
879                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
880                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
881                                 continue;
882                         }
883                         if (retlen != rawlen) {
884                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
885                                        retlen, rawlen, ref_offset(raw));
886                                 continue;
887                         }
888
889                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
890                                 continue;
891
892                         /* If the name CRC doesn't match, skip */
893                         if (je32_to_cpu(rd->name_crc) != name_crc)
894                                 continue;
895
896                         /* If the name length doesn't match, or it's another deletion dirent, skip */
897                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
898                                 continue;
899
900                         /* OK, check the actual name now */
901                         if (memcmp(rd->name, fd->name, name_len))
902                                 continue;
903
904                         /* OK. The name really does match. There really is still an older node on
905                            the flash which our deletion dirent obsoletes. So we have to write out
906                            a new deletion dirent to replace it */
907                         up(&c->erase_free_sem);
908
909                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
910                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
911                         kfree(rd);
912
913                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
914                 }
915
916                 up(&c->erase_free_sem);
917                 kfree(rd);
918         }
919
920         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
921            we should update the metadata node with those times accordingly */
922
923         /* No need for it any more. Just mark it obsolete and remove it from the list */
924         while (*fdp) {
925                 if ((*fdp) == fd) {
926                         found = 1;
927                         *fdp = fd->next;
928                         break;
929                 }
930                 fdp = &(*fdp)->next;
931         }
932         if (!found) {
933                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
934         }
935         jffs2_mark_node_obsolete(c, fd->raw);
936         jffs2_free_full_dirent(fd);
937         return 0;
938 }
939
940 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
941                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
942                                       uint32_t start, uint32_t end)
943 {
944         struct jffs2_raw_inode ri;
945         struct jffs2_node_frag *frag;
946         struct jffs2_full_dnode *new_fn;
947         uint32_t alloclen, phys_ofs, ilen;
948         int ret;
949
950         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
951                   f->inocache->ino, start, end));
952
953         memset(&ri, 0, sizeof(ri));
954
955         if(fn->frags > 1) {
956                 size_t readlen;
957                 uint32_t crc;
958                 /* It's partially obsoleted by a later write. So we have to
959                    write it out again with the _same_ version as before */
960                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
961                 if (readlen != sizeof(ri) || ret) {
962                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
963                         goto fill;
964                 }
965                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
966                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
967                                ref_offset(fn->raw),
968                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
969                         return -EIO;
970                 }
971                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
972                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
973                                ref_offset(fn->raw),
974                                je32_to_cpu(ri.totlen), sizeof(ri));
975                         return -EIO;
976                 }
977                 crc = crc32(0, &ri, sizeof(ri)-8);
978                 if (crc != je32_to_cpu(ri.node_crc)) {
979                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
980                                ref_offset(fn->raw),
981                                je32_to_cpu(ri.node_crc), crc);
982                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
983                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
984                                start, end, f->inocache->ino);
985                         goto fill;
986                 }
987                 if (ri.compr != JFFS2_COMPR_ZERO) {
988                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
989                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
990                                start, end, f->inocache->ino);
991                         goto fill;
992                 }
993         } else {
994         fill:
995                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
996                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
997                 ri.totlen = cpu_to_je32(sizeof(ri));
998                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
999
1000                 ri.ino = cpu_to_je32(f->inocache->ino);
1001                 ri.version = cpu_to_je32(++f->highest_version);
1002                 ri.offset = cpu_to_je32(start);
1003                 ri.dsize = cpu_to_je32(end - start);
1004                 ri.csize = cpu_to_je32(0);
1005                 ri.compr = JFFS2_COMPR_ZERO;
1006         }
1007
1008         frag = frag_last(&f->fragtree);
1009         if (frag)
1010                 /* Fetch the inode length from the fragtree rather then
1011                  * from i_size since i_size may have not been updated yet */
1012                 ilen = frag->ofs + frag->size;
1013         else
1014                 ilen = JFFS2_F_I_SIZE(f);
1015
1016         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1017         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1018         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1019         ri.isize = cpu_to_je32(ilen);
1020         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1021         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1022         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1023         ri.data_crc = cpu_to_je32(0);
1024         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1025
1026         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1027                                 JFFS2_SUMMARY_INODE_SIZE);
1028         if (ret) {
1029                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1030                        sizeof(ri), ret);
1031                 return ret;
1032         }
1033         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1034
1035         if (IS_ERR(new_fn)) {
1036                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1037                 return PTR_ERR(new_fn);
1038         }
1039         if (je32_to_cpu(ri.version) == f->highest_version) {
1040                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1041                 if (f->metadata) {
1042                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1043                         jffs2_free_full_dnode(f->metadata);
1044                         f->metadata = NULL;
1045                 }
1046                 return 0;
1047         }
1048
1049         /*
1050          * We should only get here in the case where the node we are
1051          * replacing had more than one frag, so we kept the same version
1052          * number as before. (Except in case of error -- see 'goto fill;'
1053          * above.)
1054          */
1055         D1(if(unlikely(fn->frags <= 1)) {
1056                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1057                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1058                        je32_to_cpu(ri.ino));
1059         });
1060
1061         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1062         mark_ref_normal(new_fn->raw);
1063
1064         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1065              frag; frag = frag_next(frag)) {
1066                 if (frag->ofs > fn->size + fn->ofs)
1067                         break;
1068                 if (frag->node == fn) {
1069                         frag->node = new_fn;
1070                         new_fn->frags++;
1071                         fn->frags--;
1072                 }
1073         }
1074         if (fn->frags) {
1075                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1076                 BUG();
1077         }
1078         if (!new_fn->frags) {
1079                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1080                 BUG();
1081         }
1082
1083         jffs2_mark_node_obsolete(c, fn->raw);
1084         jffs2_free_full_dnode(fn);
1085
1086         return 0;
1087 }
1088
1089 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1090                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1091                                        uint32_t start, uint32_t end)
1092 {
1093         struct jffs2_full_dnode *new_fn;
1094         struct jffs2_raw_inode ri;
1095         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1096         int ret = 0;
1097         unsigned char *comprbuf = NULL, *writebuf;
1098         unsigned long pg;
1099         unsigned char *pg_ptr;
1100
1101         memset(&ri, 0, sizeof(ri));
1102
1103         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1104                   f->inocache->ino, start, end));
1105
1106         orig_end = end;
1107         orig_start = start;
1108
1109         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1110                 /* Attempt to do some merging. But only expand to cover logically
1111                    adjacent frags if the block containing them is already considered
1112                    to be dirty. Otherwise we end up with GC just going round in
1113                    circles dirtying the nodes it already wrote out, especially
1114                    on NAND where we have small eraseblocks and hence a much higher
1115                    chance of nodes having to be split to cross boundaries. */
1116
1117                 struct jffs2_node_frag *frag;
1118                 uint32_t min, max;
1119
1120                 min = start & ~(PAGE_CACHE_SIZE-1);
1121                 max = min + PAGE_CACHE_SIZE;
1122
1123                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1124
1125                 /* BUG_ON(!frag) but that'll happen anyway... */
1126
1127                 BUG_ON(frag->ofs != start);
1128
1129                 /* First grow down... */
1130                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1131
1132                         /* If the previous frag doesn't even reach the beginning, there's
1133                            excessive fragmentation. Just merge. */
1134                         if (frag->ofs > min) {
1135                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1136                                           frag->ofs, frag->ofs+frag->size));
1137                                 start = frag->ofs;
1138                                 continue;
1139                         }
1140                         /* OK. This frag holds the first byte of the page. */
1141                         if (!frag->node || !frag->node->raw) {
1142                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1143                                           frag->ofs, frag->ofs+frag->size));
1144                                 break;
1145                         } else {
1146
1147                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1148                                    in a block which is still considered clean? If so, don't obsolete it.
1149                                    If not, cover it anyway. */
1150
1151                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1152                                 struct jffs2_eraseblock *jeb;
1153
1154                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1155
1156                                 if (jeb == c->gcblock) {
1157                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1158                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1159                                         start = frag->ofs;
1160                                         break;
1161                                 }
1162                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1163                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1164                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1165                                         break;
1166                                 }
1167
1168                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1169                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1170                                 start = frag->ofs;
1171                                 break;
1172                         }
1173                 }
1174
1175                 /* ... then up */
1176
1177                 /* Find last frag which is actually part of the node we're to GC. */
1178                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1179
1180                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1181
1182                         /* If the previous frag doesn't even reach the beginning, there's lots
1183                            of fragmentation. Just merge. */
1184                         if (frag->ofs+frag->size < max) {
1185                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1186                                           frag->ofs, frag->ofs+frag->size));
1187                                 end = frag->ofs + frag->size;
1188                                 continue;
1189                         }
1190
1191                         if (!frag->node || !frag->node->raw) {
1192                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1193                                           frag->ofs, frag->ofs+frag->size));
1194                                 break;
1195                         } else {
1196
1197                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1198                                    in a block which is still considered clean? If so, don't obsolete it.
1199                                    If not, cover it anyway. */
1200
1201                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1202                                 struct jffs2_eraseblock *jeb;
1203
1204                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1205
1206                                 if (jeb == c->gcblock) {
1207                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1208                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1209                                         end = frag->ofs + frag->size;
1210                                         break;
1211                                 }
1212                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1213                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1214                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1215                                         break;
1216                                 }
1217
1218                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1219                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1220                                 end = frag->ofs + frag->size;
1221                                 break;
1222                         }
1223                 }
1224                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1225                           orig_start, orig_end, start, end));
1226
1227                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1228                 BUG_ON(end < orig_end);
1229                 BUG_ON(start > orig_start);
1230         }
1231
1232         /* First, use readpage() to read the appropriate page into the page cache */
1233         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1234          *    triggered garbage collection in the first place?
1235          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1236          *    page OK. We'll actually write it out again in commit_write, which is a little
1237          *    suboptimal, but at least we're correct.
1238          */
1239         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1240
1241         if (IS_ERR(pg_ptr)) {
1242                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1243                 return PTR_ERR(pg_ptr);
1244         }
1245
1246         offset = start;
1247         while(offset < orig_end) {
1248                 uint32_t datalen;
1249                 uint32_t cdatalen;
1250                 uint16_t comprtype = JFFS2_COMPR_NONE;
1251
1252                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1253                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1254
1255                 if (ret) {
1256                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1257                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1258                         break;
1259                 }
1260                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1261                 datalen = end - offset;
1262
1263                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1264
1265                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1266
1267                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1268                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1269                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1270                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1271
1272                 ri.ino = cpu_to_je32(f->inocache->ino);
1273                 ri.version = cpu_to_je32(++f->highest_version);
1274                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1275                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1276                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1277                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1278                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1279                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1280                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1281                 ri.offset = cpu_to_je32(offset);
1282                 ri.csize = cpu_to_je32(cdatalen);
1283                 ri.dsize = cpu_to_je32(datalen);
1284                 ri.compr = comprtype & 0xff;
1285                 ri.usercompr = (comprtype >> 8) & 0xff;
1286                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1287                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1288
1289                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1290
1291                 jffs2_free_comprbuf(comprbuf, writebuf);
1292
1293                 if (IS_ERR(new_fn)) {
1294                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1295                         ret = PTR_ERR(new_fn);
1296                         break;
1297                 }
1298                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1299                 offset += datalen;
1300                 if (f->metadata) {
1301                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1302                         jffs2_free_full_dnode(f->metadata);
1303                         f->metadata = NULL;
1304                 }
1305         }
1306
1307         jffs2_gc_release_page(c, pg_ptr, &pg);
1308         return ret;
1309 }