[JFFS2] Prevent deadlock when flushing write buffer
[safe/jmp/linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.147 2005/03/20 21:43:22 dedekind Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased. 
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 spin_unlock(&c->erase_completion_lock);
87                 jffs2_flush_wbuf_pad(c);
88                 spin_lock(&c->erase_completion_lock);
89                 goto again;
90         } else {
91                 /* Eep. All were empty */
92                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93                 return NULL;
94         }
95
96         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97         list_del(&ret->list);
98         c->gcblock = ret;
99         ret->gc_node = ret->first_node;
100         if (!ret->gc_node) {
101                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102                 BUG();
103         }
104         
105         /* Have we accidentally picked a clean block with wasted space ? */
106         if (ret->wasted_size) {
107                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108                 ret->dirty_size += ret->wasted_size;
109                 c->wasted_size -= ret->wasted_size;
110                 c->dirty_size += ret->wasted_size;
111                 ret->wasted_size = 0;
112         }
113
114         D2(jffs2_dump_block_lists(c));
115         return ret;
116 }
117
118 /* jffs2_garbage_collect_pass
119  * Make a single attempt to progress GC. Move one node, and possibly
120  * start erasing one eraseblock.
121  */
122 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
123 {
124         struct jffs2_inode_info *f;
125         struct jffs2_inode_cache *ic;
126         struct jffs2_eraseblock *jeb;
127         struct jffs2_raw_node_ref *raw;
128         int ret = 0, inum, nlink;
129
130         if (down_interruptible(&c->alloc_sem))
131                 return -EINTR;
132
133         for (;;) {
134                 spin_lock(&c->erase_completion_lock);
135                 if (!c->unchecked_size)
136                         break;
137
138                 /* We can't start doing GC yet. We haven't finished checking
139                    the node CRCs etc. Do it now. */
140                 
141                 /* checked_ino is protected by the alloc_sem */
142                 if (c->checked_ino > c->highest_ino) {
143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144                                c->unchecked_size);
145                         D2(jffs2_dump_block_lists(c));
146                         spin_unlock(&c->erase_completion_lock);
147                         BUG();
148                 }
149
150                 spin_unlock(&c->erase_completion_lock);
151
152                 spin_lock(&c->inocache_lock);
153
154                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
155
156                 if (!ic) {
157                         spin_unlock(&c->inocache_lock);
158                         continue;
159                 }
160
161                 if (!ic->nlink) {
162                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
163                                   ic->ino));
164                         spin_unlock(&c->inocache_lock);
165                         continue;
166                 }
167                 switch(ic->state) {
168                 case INO_STATE_CHECKEDABSENT:
169                 case INO_STATE_PRESENT:
170                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
171                         spin_unlock(&c->inocache_lock);
172                         continue;
173
174                 case INO_STATE_GC:
175                 case INO_STATE_CHECKING:
176                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
177                         spin_unlock(&c->inocache_lock);
178                         BUG();
179
180                 case INO_STATE_READING:
181                         /* We need to wait for it to finish, lest we move on
182                            and trigger the BUG() above while we haven't yet 
183                            finished checking all its nodes */
184                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
185                         up(&c->alloc_sem);
186                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
187                         return 0;
188
189                 default:
190                         BUG();
191
192                 case INO_STATE_UNCHECKED:
193                         ;
194                 }
195                 ic->state = INO_STATE_CHECKING;
196                 spin_unlock(&c->inocache_lock);
197
198                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
199
200                 ret = jffs2_do_crccheck_inode(c, ic);
201                 if (ret)
202                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
203
204                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
205                 up(&c->alloc_sem);
206                 return ret;
207         }
208
209         /* First, work out which block we're garbage-collecting */
210         jeb = c->gcblock;
211
212         if (!jeb)
213                 jeb = jffs2_find_gc_block(c);
214
215         if (!jeb) {
216                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
217                 spin_unlock(&c->erase_completion_lock);
218                 up(&c->alloc_sem);
219                 return -EIO;
220         }
221
222         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
223         D1(if (c->nextblock)
224            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
225
226         if (!jeb->used_size) {
227                 up(&c->alloc_sem);
228                 goto eraseit;
229         }
230
231         raw = jeb->gc_node;
232                         
233         while(ref_obsolete(raw)) {
234                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
235                 raw = raw->next_phys;
236                 if (unlikely(!raw)) {
237                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
238                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 
239                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
240                         jeb->gc_node = raw;
241                         spin_unlock(&c->erase_completion_lock);
242                         up(&c->alloc_sem);
243                         BUG();
244                 }
245         }
246         jeb->gc_node = raw;
247
248         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
249
250         if (!raw->next_in_ino) {
251                 /* Inode-less node. Clean marker, snapshot or something like that */
252                 /* FIXME: If it's something that needs to be copied, including something
253                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
254                 spin_unlock(&c->erase_completion_lock);
255                 jffs2_mark_node_obsolete(c, raw);
256                 up(&c->alloc_sem);
257                 goto eraseit_lock;
258         }
259
260         ic = jffs2_raw_ref_to_ic(raw);
261
262         /* We need to hold the inocache. Either the erase_completion_lock or
263            the inocache_lock are sufficient; we trade down since the inocache_lock 
264            causes less contention. */
265         spin_lock(&c->inocache_lock);
266
267         spin_unlock(&c->erase_completion_lock);
268
269         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
270
271         /* Three possibilities:
272            1. Inode is already in-core. We must iget it and do proper
273               updating to its fragtree, etc.
274            2. Inode is not in-core, node is REF_PRISTINE. We lock the
275               inocache to prevent a read_inode(), copy the node intact.
276            3. Inode is not in-core, node is not pristine. We must iget()
277               and take the slow path.
278         */
279
280         switch(ic->state) {
281         case INO_STATE_CHECKEDABSENT:
282                 /* It's been checked, but it's not currently in-core. 
283                    We can just copy any pristine nodes, but have
284                    to prevent anyone else from doing read_inode() while
285                    we're at it, so we set the state accordingly */
286                 if (ref_flags(raw) == REF_PRISTINE)
287                         ic->state = INO_STATE_GC;
288                 else {
289                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", 
290                                   ic->ino));
291                 }
292                 break;
293
294         case INO_STATE_PRESENT:
295                 /* It's in-core. GC must iget() it. */
296                 break;
297
298         case INO_STATE_UNCHECKED:
299         case INO_STATE_CHECKING:
300         case INO_STATE_GC:
301                 /* Should never happen. We should have finished checking
302                    by the time we actually start doing any GC, and since 
303                    we're holding the alloc_sem, no other garbage collection 
304                    can happen.
305                 */
306                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
307                        ic->ino, ic->state);
308                 up(&c->alloc_sem);
309                 spin_unlock(&c->inocache_lock);
310                 BUG();
311
312         case INO_STATE_READING:
313                 /* Someone's currently trying to read it. We must wait for
314                    them to finish and then go through the full iget() route
315                    to do the GC. However, sometimes read_inode() needs to get
316                    the alloc_sem() (for marking nodes invalid) so we must
317                    drop the alloc_sem before sleeping. */
318
319                 up(&c->alloc_sem);
320                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
321                           ic->ino, ic->state));
322                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
323                 /* And because we dropped the alloc_sem we must start again from the 
324                    beginning. Ponder chance of livelock here -- we're returning success
325                    without actually making any progress.
326
327                    Q: What are the chances that the inode is back in INO_STATE_READING 
328                    again by the time we next enter this function? And that this happens
329                    enough times to cause a real delay?
330
331                    A: Small enough that I don't care :) 
332                 */
333                 return 0;
334         }
335
336         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
337            node intact, and we don't have to muck about with the fragtree etc. 
338            because we know it's not in-core. If it _was_ in-core, we go through
339            all the iget() crap anyway */
340
341         if (ic->state == INO_STATE_GC) {
342                 spin_unlock(&c->inocache_lock);
343
344                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
345
346                 spin_lock(&c->inocache_lock);
347                 ic->state = INO_STATE_CHECKEDABSENT;
348                 wake_up(&c->inocache_wq);
349
350                 if (ret != -EBADFD) {
351                         spin_unlock(&c->inocache_lock);
352                         goto release_sem;
353                 }
354
355                 /* Fall through if it wanted us to, with inocache_lock held */
356         }
357
358         /* Prevent the fairly unlikely race where the gcblock is
359            entirely obsoleted by the final close of a file which had
360            the only valid nodes in the block, followed by erasure,
361            followed by freeing of the ic because the erased block(s)
362            held _all_ the nodes of that inode.... never been seen but
363            it's vaguely possible. */
364
365         inum = ic->ino;
366         nlink = ic->nlink;
367         spin_unlock(&c->inocache_lock);
368
369         f = jffs2_gc_fetch_inode(c, inum, nlink);
370         if (IS_ERR(f)) {
371                 ret = PTR_ERR(f);
372                 goto release_sem;
373         }
374         if (!f) {
375                 ret = 0;
376                 goto release_sem;
377         }
378
379         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
380
381         jffs2_gc_release_inode(c, f);
382
383  release_sem:
384         up(&c->alloc_sem);
385
386  eraseit_lock:
387         /* If we've finished this block, start it erasing */
388         spin_lock(&c->erase_completion_lock);
389
390  eraseit:
391         if (c->gcblock && !c->gcblock->used_size) {
392                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
393                 /* We're GC'ing an empty block? */
394                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
395                 c->gcblock = NULL;
396                 c->nr_erasing_blocks++;
397                 jffs2_erase_pending_trigger(c);
398         }
399         spin_unlock(&c->erase_completion_lock);
400
401         return ret;
402 }
403
404 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
405                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
406 {
407         struct jffs2_node_frag *frag;
408         struct jffs2_full_dnode *fn = NULL;
409         struct jffs2_full_dirent *fd;
410         uint32_t start = 0, end = 0, nrfrags = 0;
411         int ret = 0;
412
413         down(&f->sem);
414
415         /* Now we have the lock for this inode. Check that it's still the one at the head
416            of the list. */
417
418         spin_lock(&c->erase_completion_lock);
419
420         if (c->gcblock != jeb) {
421                 spin_unlock(&c->erase_completion_lock);
422                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
423                 goto upnout;
424         }
425         if (ref_obsolete(raw)) {
426                 spin_unlock(&c->erase_completion_lock);
427                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
428                 /* They'll call again */
429                 goto upnout;
430         }
431         spin_unlock(&c->erase_completion_lock);
432
433         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
434         if (f->metadata && f->metadata->raw == raw) {
435                 fn = f->metadata;
436                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
437                 goto upnout;
438         }
439
440         /* FIXME. Read node and do lookup? */
441         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
442                 if (frag->node && frag->node->raw == raw) {
443                         fn = frag->node;
444                         end = frag->ofs + frag->size;
445                         if (!nrfrags++)
446                                 start = frag->ofs;
447                         if (nrfrags == frag->node->frags)
448                                 break; /* We've found them all */
449                 }
450         }
451         if (fn) {
452                 if (ref_flags(raw) == REF_PRISTINE) {
453                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
454                         if (!ret) {
455                                 /* Urgh. Return it sensibly. */
456                                 frag->node->raw = f->inocache->nodes;
457                         }       
458                         if (ret != -EBADFD)
459                                 goto upnout;
460                 }
461                 /* We found a datanode. Do the GC */
462                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
463                         /* It crosses a page boundary. Therefore, it must be a hole. */
464                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
465                 } else {
466                         /* It could still be a hole. But we GC the page this way anyway */
467                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
468                 }
469                 goto upnout;
470         }
471         
472         /* Wasn't a dnode. Try dirent */
473         for (fd = f->dents; fd; fd=fd->next) {
474                 if (fd->raw == raw)
475                         break;
476         }
477
478         if (fd && fd->ino) {
479                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
480         } else if (fd) {
481                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
482         } else {
483                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
484                        ref_offset(raw), f->inocache->ino);
485                 if (ref_obsolete(raw)) {
486                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
487                 } else {
488                         ret = -EIO;
489                 }
490         }
491  upnout:
492         up(&f->sem);
493
494         return ret;
495 }
496
497 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
498                                           struct jffs2_inode_cache *ic,
499                                           struct jffs2_raw_node_ref *raw)
500 {
501         union jffs2_node_union *node;
502         struct jffs2_raw_node_ref *nraw;
503         size_t retlen;
504         int ret;
505         uint32_t phys_ofs, alloclen;
506         uint32_t crc, rawlen;
507         int retried = 0;
508
509         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
510
511         rawlen = ref_totlen(c, c->gcblock, raw);
512
513         /* Ask for a small amount of space (or the totlen if smaller) because we
514            don't want to force wastage of the end of a block if splitting would
515            work. */
516         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN, 
517                                               rawlen), &phys_ofs, &alloclen);
518         if (ret)
519                 return ret;
520
521         if (alloclen < rawlen) {
522                 /* Doesn't fit untouched. We'll go the old route and split it */
523                 return -EBADFD;
524         }
525
526         node = kmalloc(rawlen, GFP_KERNEL);
527         if (!node)
528                return -ENOMEM;
529
530         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
531         if (!ret && retlen != rawlen)
532                 ret = -EIO;
533         if (ret)
534                 goto out_node;
535
536         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
537         if (je32_to_cpu(node->u.hdr_crc) != crc) {
538                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
539                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
540                 goto bail;
541         }
542
543         switch(je16_to_cpu(node->u.nodetype)) {
544         case JFFS2_NODETYPE_INODE:
545                 crc = crc32(0, node, sizeof(node->i)-8);
546                 if (je32_to_cpu(node->i.node_crc) != crc) {
547                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
548                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
549                         goto bail;
550                 }
551
552                 if (je32_to_cpu(node->i.dsize)) {
553                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
554                         if (je32_to_cpu(node->i.data_crc) != crc) {
555                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
556                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
557                                 goto bail;
558                         }
559                 }
560                 break;
561
562         case JFFS2_NODETYPE_DIRENT:
563                 crc = crc32(0, node, sizeof(node->d)-8);
564                 if (je32_to_cpu(node->d.node_crc) != crc) {
565                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
566                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
567                         goto bail;
568                 }
569
570                 if (node->d.nsize) {
571                         crc = crc32(0, node->d.name, node->d.nsize);
572                         if (je32_to_cpu(node->d.name_crc) != crc) {
573                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
574                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
575                                 goto bail;
576                         }
577                 }
578                 break;
579         default:
580                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", 
581                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
582                 goto bail;
583         }
584
585         nraw = jffs2_alloc_raw_node_ref();
586         if (!nraw) {
587                 ret = -ENOMEM;
588                 goto out_node;
589         }
590
591         /* OK, all the CRCs are good; this node can just be copied as-is. */
592  retry:
593         nraw->flash_offset = phys_ofs;
594         nraw->__totlen = rawlen;
595         nraw->next_phys = NULL;
596
597         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
598
599         if (ret || (retlen != rawlen)) {
600                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
601                        rawlen, phys_ofs, ret, retlen);
602                 if (retlen) {
603                         /* Doesn't belong to any inode */
604                         nraw->next_in_ino = NULL;
605
606                         nraw->flash_offset |= REF_OBSOLETE;
607                         jffs2_add_physical_node_ref(c, nraw);
608                         jffs2_mark_node_obsolete(c, nraw);
609                 } else {
610                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
611                         jffs2_free_raw_node_ref(nraw);
612                 }
613                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
614                         /* Try to reallocate space and retry */
615                         uint32_t dummy;
616                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
617
618                         retried = 1;
619
620                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
621                         
622                         ACCT_SANITY_CHECK(c,jeb);
623                         D1(ACCT_PARANOIA_CHECK(jeb));
624
625                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy);
626
627                         if (!ret) {
628                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
629
630                                 ACCT_SANITY_CHECK(c,jeb);
631                                 D1(ACCT_PARANOIA_CHECK(jeb));
632
633                                 goto retry;
634                         }
635                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
636                         jffs2_free_raw_node_ref(nraw);
637                 }
638
639                 jffs2_free_raw_node_ref(nraw);
640                 if (!ret)
641                         ret = -EIO;
642                 goto out_node;
643         }
644         nraw->flash_offset |= REF_PRISTINE;
645         jffs2_add_physical_node_ref(c, nraw);
646
647         /* Link into per-inode list. This is safe because of the ic
648            state being INO_STATE_GC. Note that if we're doing this
649            for an inode which is in-core, the 'nraw' pointer is then
650            going to be fetched from ic->nodes by our caller. */
651         spin_lock(&c->erase_completion_lock);
652         nraw->next_in_ino = ic->nodes;
653         ic->nodes = nraw;
654         spin_unlock(&c->erase_completion_lock);
655
656         jffs2_mark_node_obsolete(c, raw);
657         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
658
659  out_node:
660         kfree(node);
661         return ret;
662  bail:
663         ret = -EBADFD;
664         goto out_node;
665 }
666
667 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
668                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
669 {
670         struct jffs2_full_dnode *new_fn;
671         struct jffs2_raw_inode ri;
672         jint16_t dev;
673         char *mdata = NULL, mdatalen = 0;
674         uint32_t alloclen, phys_ofs;
675         int ret;
676
677         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
678             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
679                 /* For these, we don't actually need to read the old node */
680                 /* FIXME: for minor or major > 255. */
681                 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) | 
682                         JFFS2_F_I_RDEV_MIN(f)));
683                 mdata = (char *)&dev;
684                 mdatalen = sizeof(dev);
685                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
686         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
687                 mdatalen = fn->size;
688                 mdata = kmalloc(fn->size, GFP_KERNEL);
689                 if (!mdata) {
690                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
691                         return -ENOMEM;
692                 }
693                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
694                 if (ret) {
695                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
696                         kfree(mdata);
697                         return ret;
698                 }
699                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
700
701         }
702         
703         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
704         if (ret) {
705                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
706                        sizeof(ri)+ mdatalen, ret);
707                 goto out;
708         }
709         
710         memset(&ri, 0, sizeof(ri));
711         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
712         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
713         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
714         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
715
716         ri.ino = cpu_to_je32(f->inocache->ino);
717         ri.version = cpu_to_je32(++f->highest_version);
718         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
719         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
720         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
721         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
722         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
723         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
724         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
725         ri.offset = cpu_to_je32(0);
726         ri.csize = cpu_to_je32(mdatalen);
727         ri.dsize = cpu_to_je32(mdatalen);
728         ri.compr = JFFS2_COMPR_NONE;
729         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
730         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
731
732         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
733
734         if (IS_ERR(new_fn)) {
735                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
736                 ret = PTR_ERR(new_fn);
737                 goto out;
738         }
739         jffs2_mark_node_obsolete(c, fn->raw);
740         jffs2_free_full_dnode(fn);
741         f->metadata = new_fn;
742  out:
743         if (S_ISLNK(JFFS2_F_I_MODE(f)))
744                 kfree(mdata);
745         return ret;
746 }
747
748 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
749                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
750 {
751         struct jffs2_full_dirent *new_fd;
752         struct jffs2_raw_dirent rd;
753         uint32_t alloclen, phys_ofs;
754         int ret;
755
756         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
757         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
758         rd.nsize = strlen(fd->name);
759         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
760         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
761
762         rd.pino = cpu_to_je32(f->inocache->ino);
763         rd.version = cpu_to_je32(++f->highest_version);
764         rd.ino = cpu_to_je32(fd->ino);
765         rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
766         rd.type = fd->type;
767         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
768         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
769         
770         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
771         if (ret) {
772                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
773                        sizeof(rd)+rd.nsize, ret);
774                 return ret;
775         }
776         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
777
778         if (IS_ERR(new_fd)) {
779                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
780                 return PTR_ERR(new_fd);
781         }
782         jffs2_add_fd_to_list(c, new_fd, &f->dents);
783         return 0;
784 }
785
786 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
787                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
788 {
789         struct jffs2_full_dirent **fdp = &f->dents;
790         int found = 0;
791
792         /* On a medium where we can't actually mark nodes obsolete
793            pernamently, such as NAND flash, we need to work out
794            whether this deletion dirent is still needed to actively
795            delete a 'real' dirent with the same name that's still
796            somewhere else on the flash. */
797         if (!jffs2_can_mark_obsolete(c)) {
798                 struct jffs2_raw_dirent *rd;
799                 struct jffs2_raw_node_ref *raw;
800                 int ret;
801                 size_t retlen;
802                 int name_len = strlen(fd->name);
803                 uint32_t name_crc = crc32(0, fd->name, name_len);
804                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
805
806                 rd = kmalloc(rawlen, GFP_KERNEL);
807                 if (!rd)
808                         return -ENOMEM;
809
810                 /* Prevent the erase code from nicking the obsolete node refs while
811                    we're looking at them. I really don't like this extra lock but
812                    can't see any alternative. Suggestions on a postcard to... */
813                 down(&c->erase_free_sem);
814
815                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
816
817                         /* We only care about obsolete ones */
818                         if (!(ref_obsolete(raw)))
819                                 continue;
820
821                         /* Any dirent with the same name is going to have the same length... */
822                         if (ref_totlen(c, NULL, raw) != rawlen)
823                                 continue;
824
825                         /* Doesn't matter if there's one in the same erase block. We're going to 
826                            delete it too at the same time. */
827                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
828                                 continue;
829
830                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
831
832                         /* This is an obsolete node belonging to the same directory, and it's of the right
833                            length. We need to take a closer look...*/
834                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
835                         if (ret) {
836                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
837                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
838                                 continue;
839                         }
840                         if (retlen != rawlen) {
841                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
842                                        retlen, rawlen, ref_offset(raw));
843                                 continue;
844                         }
845
846                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
847                                 continue;
848
849                         /* If the name CRC doesn't match, skip */
850                         if (je32_to_cpu(rd->name_crc) != name_crc)
851                                 continue;
852
853                         /* If the name length doesn't match, or it's another deletion dirent, skip */
854                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
855                                 continue;
856
857                         /* OK, check the actual name now */
858                         if (memcmp(rd->name, fd->name, name_len))
859                                 continue;
860
861                         /* OK. The name really does match. There really is still an older node on
862                            the flash which our deletion dirent obsoletes. So we have to write out
863                            a new deletion dirent to replace it */
864                         up(&c->erase_free_sem);
865
866                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
867                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
868                         kfree(rd);
869
870                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
871                 }
872
873                 up(&c->erase_free_sem);
874                 kfree(rd);
875         }
876
877         /* No need for it any more. Just mark it obsolete and remove it from the list */
878         while (*fdp) {
879                 if ((*fdp) == fd) {
880                         found = 1;
881                         *fdp = fd->next;
882                         break;
883                 }
884                 fdp = &(*fdp)->next;
885         }
886         if (!found) {
887                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
888         }
889         jffs2_mark_node_obsolete(c, fd->raw);
890         jffs2_free_full_dirent(fd);
891         return 0;
892 }
893
894 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
895                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
896                                       uint32_t start, uint32_t end)
897 {
898         struct jffs2_raw_inode ri;
899         struct jffs2_node_frag *frag;
900         struct jffs2_full_dnode *new_fn;
901         uint32_t alloclen, phys_ofs;
902         int ret;
903
904         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
905                   f->inocache->ino, start, end));
906         
907         memset(&ri, 0, sizeof(ri));
908
909         if(fn->frags > 1) {
910                 size_t readlen;
911                 uint32_t crc;
912                 /* It's partially obsoleted by a later write. So we have to 
913                    write it out again with the _same_ version as before */
914                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
915                 if (readlen != sizeof(ri) || ret) {
916                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
917                         goto fill;
918                 }
919                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
920                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
921                                ref_offset(fn->raw),
922                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
923                         return -EIO;
924                 }
925                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
926                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
927                                ref_offset(fn->raw),
928                                je32_to_cpu(ri.totlen), sizeof(ri));
929                         return -EIO;
930                 }
931                 crc = crc32(0, &ri, sizeof(ri)-8);
932                 if (crc != je32_to_cpu(ri.node_crc)) {
933                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
934                                ref_offset(fn->raw), 
935                                je32_to_cpu(ri.node_crc), crc);
936                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
937                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
938                                start, end, f->inocache->ino);
939                         goto fill;
940                 }
941                 if (ri.compr != JFFS2_COMPR_ZERO) {
942                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
943                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
944                                start, end, f->inocache->ino);
945                         goto fill;
946                 }
947         } else {
948         fill:
949                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
950                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
951                 ri.totlen = cpu_to_je32(sizeof(ri));
952                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
953
954                 ri.ino = cpu_to_je32(f->inocache->ino);
955                 ri.version = cpu_to_je32(++f->highest_version);
956                 ri.offset = cpu_to_je32(start);
957                 ri.dsize = cpu_to_je32(end - start);
958                 ri.csize = cpu_to_je32(0);
959                 ri.compr = JFFS2_COMPR_ZERO;
960         }
961         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
962         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
963         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
964         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
965         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
966         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
967         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
968         ri.data_crc = cpu_to_je32(0);
969         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
970
971         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
972         if (ret) {
973                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
974                        sizeof(ri), ret);
975                 return ret;
976         }
977         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
978
979         if (IS_ERR(new_fn)) {
980                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
981                 return PTR_ERR(new_fn);
982         }
983         if (je32_to_cpu(ri.version) == f->highest_version) {
984                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
985                 if (f->metadata) {
986                         jffs2_mark_node_obsolete(c, f->metadata->raw);
987                         jffs2_free_full_dnode(f->metadata);
988                         f->metadata = NULL;
989                 }
990                 return 0;
991         }
992
993         /* 
994          * We should only get here in the case where the node we are
995          * replacing had more than one frag, so we kept the same version
996          * number as before. (Except in case of error -- see 'goto fill;' 
997          * above.)
998          */
999         D1(if(unlikely(fn->frags <= 1)) {
1000                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1001                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1002                        je32_to_cpu(ri.ino));
1003         });
1004
1005         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1006         mark_ref_normal(new_fn->raw);
1007
1008         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs); 
1009              frag; frag = frag_next(frag)) {
1010                 if (frag->ofs > fn->size + fn->ofs)
1011                         break;
1012                 if (frag->node == fn) {
1013                         frag->node = new_fn;
1014                         new_fn->frags++;
1015                         fn->frags--;
1016                 }
1017         }
1018         if (fn->frags) {
1019                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1020                 BUG();
1021         }
1022         if (!new_fn->frags) {
1023                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1024                 BUG();
1025         }
1026                 
1027         jffs2_mark_node_obsolete(c, fn->raw);
1028         jffs2_free_full_dnode(fn);
1029         
1030         return 0;
1031 }
1032
1033 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1034                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1035                                        uint32_t start, uint32_t end)
1036 {
1037         struct jffs2_full_dnode *new_fn;
1038         struct jffs2_raw_inode ri;
1039         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;      
1040         int ret = 0;
1041         unsigned char *comprbuf = NULL, *writebuf;
1042         unsigned long pg;
1043         unsigned char *pg_ptr;
1044  
1045         memset(&ri, 0, sizeof(ri));
1046
1047         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1048                   f->inocache->ino, start, end));
1049
1050         orig_end = end;
1051         orig_start = start;
1052
1053         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1054                 /* Attempt to do some merging. But only expand to cover logically
1055                    adjacent frags if the block containing them is already considered
1056                    to be dirty. Otherwise we end up with GC just going round in 
1057                    circles dirtying the nodes it already wrote out, especially 
1058                    on NAND where we have small eraseblocks and hence a much higher
1059                    chance of nodes having to be split to cross boundaries. */
1060
1061                 struct jffs2_node_frag *frag;
1062                 uint32_t min, max;
1063
1064                 min = start & ~(PAGE_CACHE_SIZE-1);
1065                 max = min + PAGE_CACHE_SIZE;
1066
1067                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1068
1069                 /* BUG_ON(!frag) but that'll happen anyway... */
1070
1071                 BUG_ON(frag->ofs != start);
1072
1073                 /* First grow down... */
1074                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1075
1076                         /* If the previous frag doesn't even reach the beginning, there's
1077                            excessive fragmentation. Just merge. */
1078                         if (frag->ofs > min) {
1079                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1080                                           frag->ofs, frag->ofs+frag->size));
1081                                 start = frag->ofs;
1082                                 continue;
1083                         }
1084                         /* OK. This frag holds the first byte of the page. */
1085                         if (!frag->node || !frag->node->raw) {
1086                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1087                                           frag->ofs, frag->ofs+frag->size));
1088                                 break;
1089                         } else {
1090
1091                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1092                                    in a block which is still considered clean? If so, don't obsolete it.
1093                                    If not, cover it anyway. */
1094
1095                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1096                                 struct jffs2_eraseblock *jeb;
1097
1098                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1099
1100                                 if (jeb == c->gcblock) {
1101                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1102                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1103                                         start = frag->ofs;
1104                                         break;
1105                                 }
1106                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1107                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1108                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1109                                         break;
1110                                 }
1111
1112                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1113                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1114                                 start = frag->ofs;
1115                                 break;
1116                         }
1117                 }
1118
1119                 /* ... then up */
1120
1121                 /* Find last frag which is actually part of the node we're to GC. */
1122                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1123
1124                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1125
1126                         /* If the previous frag doesn't even reach the beginning, there's lots
1127                            of fragmentation. Just merge. */
1128                         if (frag->ofs+frag->size < max) {
1129                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1130                                           frag->ofs, frag->ofs+frag->size));
1131                                 end = frag->ofs + frag->size;
1132                                 continue;
1133                         }
1134
1135                         if (!frag->node || !frag->node->raw) {
1136                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1137                                           frag->ofs, frag->ofs+frag->size));
1138                                 break;
1139                         } else {
1140
1141                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1142                                    in a block which is still considered clean? If so, don't obsolete it.
1143                                    If not, cover it anyway. */
1144
1145                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1146                                 struct jffs2_eraseblock *jeb;
1147
1148                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1149
1150                                 if (jeb == c->gcblock) {
1151                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1152                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1153                                         end = frag->ofs + frag->size;
1154                                         break;
1155                                 }
1156                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1157                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1158                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1159                                         break;
1160                                 }
1161
1162                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1163                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1164                                 end = frag->ofs + frag->size;
1165                                 break;
1166                         }
1167                 }
1168                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n", 
1169                           orig_start, orig_end, start, end));
1170
1171                 BUG_ON(end > JFFS2_F_I_SIZE(f));
1172                 BUG_ON(end < orig_end);
1173                 BUG_ON(start > orig_start);
1174         }
1175         
1176         /* First, use readpage() to read the appropriate page into the page cache */
1177         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1178          *    triggered garbage collection in the first place?
1179          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1180          *    page OK. We'll actually write it out again in commit_write, which is a little
1181          *    suboptimal, but at least we're correct.
1182          */
1183         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1184
1185         if (IS_ERR(pg_ptr)) {
1186                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1187                 return PTR_ERR(pg_ptr);
1188         }
1189
1190         offset = start;
1191         while(offset < orig_end) {
1192                 uint32_t datalen;
1193                 uint32_t cdatalen;
1194                 uint16_t comprtype = JFFS2_COMPR_NONE;
1195
1196                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1197
1198                 if (ret) {
1199                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1200                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1201                         break;
1202                 }
1203                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1204                 datalen = end - offset;
1205
1206                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1207
1208                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1209
1210                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1211                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1212                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1213                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1214
1215                 ri.ino = cpu_to_je32(f->inocache->ino);
1216                 ri.version = cpu_to_je32(++f->highest_version);
1217                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1218                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1219                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1220                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1221                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1222                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1223                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1224                 ri.offset = cpu_to_je32(offset);
1225                 ri.csize = cpu_to_je32(cdatalen);
1226                 ri.dsize = cpu_to_je32(datalen);
1227                 ri.compr = comprtype & 0xff;
1228                 ri.usercompr = (comprtype >> 8) & 0xff;
1229                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1230                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1231         
1232                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1233
1234                 jffs2_free_comprbuf(comprbuf, writebuf);
1235
1236                 if (IS_ERR(new_fn)) {
1237                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1238                         ret = PTR_ERR(new_fn);
1239                         break;
1240                 }
1241                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1242                 offset += datalen;
1243                 if (f->metadata) {
1244                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1245                         jffs2_free_full_dnode(f->metadata);
1246                         f->metadata = NULL;
1247                 }
1248         }
1249
1250         jffs2_gc_release_page(c, pg_ptr, &pg);
1251         return ret;
1252 }
1253