[JFFS2] Fix race in post-mount node checking
[safe/jmp/linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased.
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 spin_unlock(&c->erase_completion_lock);
87                 jffs2_flush_wbuf_pad(c);
88                 spin_lock(&c->erase_completion_lock);
89                 goto again;
90         } else {
91                 /* Eep. All were empty */
92                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93                 return NULL;
94         }
95
96         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97         list_del(&ret->list);
98         c->gcblock = ret;
99         ret->gc_node = ret->first_node;
100         if (!ret->gc_node) {
101                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102                 BUG();
103         }
104
105         /* Have we accidentally picked a clean block with wasted space ? */
106         if (ret->wasted_size) {
107                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108                 ret->dirty_size += ret->wasted_size;
109                 c->wasted_size -= ret->wasted_size;
110                 c->dirty_size += ret->wasted_size;
111                 ret->wasted_size = 0;
112         }
113
114         return ret;
115 }
116
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123         struct jffs2_inode_info *f;
124         struct jffs2_inode_cache *ic;
125         struct jffs2_eraseblock *jeb;
126         struct jffs2_raw_node_ref *raw;
127         int ret = 0, inum, nlink;
128
129         if (down_interruptible(&c->alloc_sem))
130                 return -EINTR;
131
132         for (;;) {
133                 spin_lock(&c->erase_completion_lock);
134                 if (!c->unchecked_size)
135                         break;
136
137                 /* We can't start doing GC yet. We haven't finished checking
138                    the node CRCs etc. Do it now. */
139
140                 /* checked_ino is protected by the alloc_sem */
141                 if (c->checked_ino > c->highest_ino) {
142                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143                                c->unchecked_size);
144                         jffs2_dbg_dump_block_lists_nolock(c);
145                         spin_unlock(&c->erase_completion_lock);
146                         BUG();
147                 }
148
149                 spin_unlock(&c->erase_completion_lock);
150
151                 spin_lock(&c->inocache_lock);
152
153                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
154
155                 if (!ic) {
156                         spin_unlock(&c->inocache_lock);
157                         continue;
158                 }
159
160                 if (!ic->nlink) {
161                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
162                                   ic->ino));
163                         spin_unlock(&c->inocache_lock);
164                         continue;
165                 }
166                 switch(ic->state) {
167                 case INO_STATE_CHECKEDABSENT:
168                 case INO_STATE_PRESENT:
169                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
170                         spin_unlock(&c->inocache_lock);
171                         continue;
172
173                 case INO_STATE_GC:
174                 case INO_STATE_CHECKING:
175                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
176                         spin_unlock(&c->inocache_lock);
177                         BUG();
178
179                 case INO_STATE_READING:
180                         /* We need to wait for it to finish, lest we move on
181                            and trigger the BUG() above while we haven't yet
182                            finished checking all its nodes */
183                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
184                         /* We need to come back again for the _same_ inode. We've
185                          made no progress in this case, but that should be OK */
186                         c->checked_ino--;
187
188                         up(&c->alloc_sem);
189                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
190                         return 0;
191
192                 default:
193                         BUG();
194
195                 case INO_STATE_UNCHECKED:
196                         ;
197                 }
198                 ic->state = INO_STATE_CHECKING;
199                 spin_unlock(&c->inocache_lock);
200
201                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
202
203                 ret = jffs2_do_crccheck_inode(c, ic);
204                 if (ret)
205                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
206
207                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
208                 up(&c->alloc_sem);
209                 return ret;
210         }
211
212         /* First, work out which block we're garbage-collecting */
213         jeb = c->gcblock;
214
215         if (!jeb)
216                 jeb = jffs2_find_gc_block(c);
217
218         if (!jeb) {
219                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
220                 spin_unlock(&c->erase_completion_lock);
221                 up(&c->alloc_sem);
222                 return -EIO;
223         }
224
225         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
226         D1(if (c->nextblock)
227            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
228
229         if (!jeb->used_size) {
230                 up(&c->alloc_sem);
231                 goto eraseit;
232         }
233
234         raw = jeb->gc_node;
235
236         while(ref_obsolete(raw)) {
237                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
238                 raw = raw->next_phys;
239                 if (unlikely(!raw)) {
240                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
241                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
242                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
243                         jeb->gc_node = raw;
244                         spin_unlock(&c->erase_completion_lock);
245                         up(&c->alloc_sem);
246                         BUG();
247                 }
248         }
249         jeb->gc_node = raw;
250
251         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
252
253         if (!raw->next_in_ino) {
254                 /* Inode-less node. Clean marker, snapshot or something like that */
255                 /* FIXME: If it's something that needs to be copied, including something
256                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
257                 spin_unlock(&c->erase_completion_lock);
258                 jffs2_mark_node_obsolete(c, raw);
259                 up(&c->alloc_sem);
260                 goto eraseit_lock;
261         }
262
263         ic = jffs2_raw_ref_to_ic(raw);
264
265         /* We need to hold the inocache. Either the erase_completion_lock or
266            the inocache_lock are sufficient; we trade down since the inocache_lock
267            causes less contention. */
268         spin_lock(&c->inocache_lock);
269
270         spin_unlock(&c->erase_completion_lock);
271
272         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
273
274         /* Three possibilities:
275            1. Inode is already in-core. We must iget it and do proper
276               updating to its fragtree, etc.
277            2. Inode is not in-core, node is REF_PRISTINE. We lock the
278               inocache to prevent a read_inode(), copy the node intact.
279            3. Inode is not in-core, node is not pristine. We must iget()
280               and take the slow path.
281         */
282
283         switch(ic->state) {
284         case INO_STATE_CHECKEDABSENT:
285                 /* It's been checked, but it's not currently in-core.
286                    We can just copy any pristine nodes, but have
287                    to prevent anyone else from doing read_inode() while
288                    we're at it, so we set the state accordingly */
289                 if (ref_flags(raw) == REF_PRISTINE)
290                         ic->state = INO_STATE_GC;
291                 else {
292                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
293                                   ic->ino));
294                 }
295                 break;
296
297         case INO_STATE_PRESENT:
298                 /* It's in-core. GC must iget() it. */
299                 break;
300
301         case INO_STATE_UNCHECKED:
302         case INO_STATE_CHECKING:
303         case INO_STATE_GC:
304                 /* Should never happen. We should have finished checking
305                    by the time we actually start doing any GC, and since
306                    we're holding the alloc_sem, no other garbage collection
307                    can happen.
308                 */
309                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
310                        ic->ino, ic->state);
311                 up(&c->alloc_sem);
312                 spin_unlock(&c->inocache_lock);
313                 BUG();
314
315         case INO_STATE_READING:
316                 /* Someone's currently trying to read it. We must wait for
317                    them to finish and then go through the full iget() route
318                    to do the GC. However, sometimes read_inode() needs to get
319                    the alloc_sem() (for marking nodes invalid) so we must
320                    drop the alloc_sem before sleeping. */
321
322                 up(&c->alloc_sem);
323                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
324                           ic->ino, ic->state));
325                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
326                 /* And because we dropped the alloc_sem we must start again from the
327                    beginning. Ponder chance of livelock here -- we're returning success
328                    without actually making any progress.
329
330                    Q: What are the chances that the inode is back in INO_STATE_READING
331                    again by the time we next enter this function? And that this happens
332                    enough times to cause a real delay?
333
334                    A: Small enough that I don't care :)
335                 */
336                 return 0;
337         }
338
339         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
340            node intact, and we don't have to muck about with the fragtree etc.
341            because we know it's not in-core. If it _was_ in-core, we go through
342            all the iget() crap anyway */
343
344         if (ic->state == INO_STATE_GC) {
345                 spin_unlock(&c->inocache_lock);
346
347                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
348
349                 spin_lock(&c->inocache_lock);
350                 ic->state = INO_STATE_CHECKEDABSENT;
351                 wake_up(&c->inocache_wq);
352
353                 if (ret != -EBADFD) {
354                         spin_unlock(&c->inocache_lock);
355                         goto release_sem;
356                 }
357
358                 /* Fall through if it wanted us to, with inocache_lock held */
359         }
360
361         /* Prevent the fairly unlikely race where the gcblock is
362            entirely obsoleted by the final close of a file which had
363            the only valid nodes in the block, followed by erasure,
364            followed by freeing of the ic because the erased block(s)
365            held _all_ the nodes of that inode.... never been seen but
366            it's vaguely possible. */
367
368         inum = ic->ino;
369         nlink = ic->nlink;
370         spin_unlock(&c->inocache_lock);
371
372         f = jffs2_gc_fetch_inode(c, inum, nlink);
373         if (IS_ERR(f)) {
374                 ret = PTR_ERR(f);
375                 goto release_sem;
376         }
377         if (!f) {
378                 ret = 0;
379                 goto release_sem;
380         }
381
382         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
383
384         jffs2_gc_release_inode(c, f);
385
386  release_sem:
387         up(&c->alloc_sem);
388
389  eraseit_lock:
390         /* If we've finished this block, start it erasing */
391         spin_lock(&c->erase_completion_lock);
392
393  eraseit:
394         if (c->gcblock && !c->gcblock->used_size) {
395                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
396                 /* We're GC'ing an empty block? */
397                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
398                 c->gcblock = NULL;
399                 c->nr_erasing_blocks++;
400                 jffs2_erase_pending_trigger(c);
401         }
402         spin_unlock(&c->erase_completion_lock);
403
404         return ret;
405 }
406
407 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
408                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
409 {
410         struct jffs2_node_frag *frag;
411         struct jffs2_full_dnode *fn = NULL;
412         struct jffs2_full_dirent *fd;
413         uint32_t start = 0, end = 0, nrfrags = 0;
414         int ret = 0;
415
416         down(&f->sem);
417
418         /* Now we have the lock for this inode. Check that it's still the one at the head
419            of the list. */
420
421         spin_lock(&c->erase_completion_lock);
422
423         if (c->gcblock != jeb) {
424                 spin_unlock(&c->erase_completion_lock);
425                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
426                 goto upnout;
427         }
428         if (ref_obsolete(raw)) {
429                 spin_unlock(&c->erase_completion_lock);
430                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
431                 /* They'll call again */
432                 goto upnout;
433         }
434         spin_unlock(&c->erase_completion_lock);
435
436         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
437         if (f->metadata && f->metadata->raw == raw) {
438                 fn = f->metadata;
439                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
440                 goto upnout;
441         }
442
443         /* FIXME. Read node and do lookup? */
444         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
445                 if (frag->node && frag->node->raw == raw) {
446                         fn = frag->node;
447                         end = frag->ofs + frag->size;
448                         if (!nrfrags++)
449                                 start = frag->ofs;
450                         if (nrfrags == frag->node->frags)
451                                 break; /* We've found them all */
452                 }
453         }
454         if (fn) {
455                 if (ref_flags(raw) == REF_PRISTINE) {
456                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
457                         if (!ret) {
458                                 /* Urgh. Return it sensibly. */
459                                 frag->node->raw = f->inocache->nodes;
460                         }
461                         if (ret != -EBADFD)
462                                 goto upnout;
463                 }
464                 /* We found a datanode. Do the GC */
465                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
466                         /* It crosses a page boundary. Therefore, it must be a hole. */
467                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
468                 } else {
469                         /* It could still be a hole. But we GC the page this way anyway */
470                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
471                 }
472                 goto upnout;
473         }
474
475         /* Wasn't a dnode. Try dirent */
476         for (fd = f->dents; fd; fd=fd->next) {
477                 if (fd->raw == raw)
478                         break;
479         }
480
481         if (fd && fd->ino) {
482                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
483         } else if (fd) {
484                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
485         } else {
486                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
487                        ref_offset(raw), f->inocache->ino);
488                 if (ref_obsolete(raw)) {
489                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
490                 } else {
491                         jffs2_dbg_dump_node(c, ref_offset(raw));
492                         BUG();
493                 }
494         }
495  upnout:
496         up(&f->sem);
497
498         return ret;
499 }
500
501 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
502                                           struct jffs2_inode_cache *ic,
503                                           struct jffs2_raw_node_ref *raw)
504 {
505         union jffs2_node_union *node;
506         struct jffs2_raw_node_ref *nraw;
507         size_t retlen;
508         int ret;
509         uint32_t phys_ofs, alloclen;
510         uint32_t crc, rawlen;
511         int retried = 0;
512
513         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
514
515         rawlen = ref_totlen(c, c->gcblock, raw);
516
517         /* Ask for a small amount of space (or the totlen if smaller) because we
518            don't want to force wastage of the end of a block if splitting would
519            work. */
520         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
521                                 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
522                                 /* this is not the exact summary size of it,
523                                         it is only an upper estimation */
524
525         if (ret)
526                 return ret;
527
528         if (alloclen < rawlen) {
529                 /* Doesn't fit untouched. We'll go the old route and split it */
530                 return -EBADFD;
531         }
532
533         node = kmalloc(rawlen, GFP_KERNEL);
534         if (!node)
535                return -ENOMEM;
536
537         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
538         if (!ret && retlen != rawlen)
539                 ret = -EIO;
540         if (ret)
541                 goto out_node;
542
543         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
544         if (je32_to_cpu(node->u.hdr_crc) != crc) {
545                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
546                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
547                 goto bail;
548         }
549
550         switch(je16_to_cpu(node->u.nodetype)) {
551         case JFFS2_NODETYPE_INODE:
552                 crc = crc32(0, node, sizeof(node->i)-8);
553                 if (je32_to_cpu(node->i.node_crc) != crc) {
554                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
555                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
556                         goto bail;
557                 }
558
559                 if (je32_to_cpu(node->i.dsize)) {
560                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
561                         if (je32_to_cpu(node->i.data_crc) != crc) {
562                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
563                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
564                                 goto bail;
565                         }
566                 }
567                 break;
568
569         case JFFS2_NODETYPE_DIRENT:
570                 crc = crc32(0, node, sizeof(node->d)-8);
571                 if (je32_to_cpu(node->d.node_crc) != crc) {
572                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
573                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
574                         goto bail;
575                 }
576
577                 if (node->d.nsize) {
578                         crc = crc32(0, node->d.name, node->d.nsize);
579                         if (je32_to_cpu(node->d.name_crc) != crc) {
580                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
581                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
582                                 goto bail;
583                         }
584                 }
585                 break;
586         default:
587                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
588                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
589                 goto bail;
590         }
591
592         nraw = jffs2_alloc_raw_node_ref();
593         if (!nraw) {
594                 ret = -ENOMEM;
595                 goto out_node;
596         }
597
598         /* OK, all the CRCs are good; this node can just be copied as-is. */
599  retry:
600         nraw->flash_offset = phys_ofs;
601         nraw->__totlen = rawlen;
602         nraw->next_phys = NULL;
603
604         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
605
606         if (ret || (retlen != rawlen)) {
607                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
608                        rawlen, phys_ofs, ret, retlen);
609                 if (retlen) {
610                         /* Doesn't belong to any inode */
611                         nraw->next_in_ino = NULL;
612
613                         nraw->flash_offset |= REF_OBSOLETE;
614                         jffs2_add_physical_node_ref(c, nraw);
615                         jffs2_mark_node_obsolete(c, nraw);
616                 } else {
617                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
618                         jffs2_free_raw_node_ref(nraw);
619                 }
620                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
621                         /* Try to reallocate space and retry */
622                         uint32_t dummy;
623                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
624
625                         retried = 1;
626
627                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
628
629                         jffs2_dbg_acct_sanity_check(c,jeb);
630                         jffs2_dbg_acct_paranoia_check(c, jeb);
631
632                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
633                                                 /* this is not the exact summary size of it,
634                                                         it is only an upper estimation */
635
636                         if (!ret) {
637                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
638
639                                 jffs2_dbg_acct_sanity_check(c,jeb);
640                                 jffs2_dbg_acct_paranoia_check(c, jeb);
641
642                                 goto retry;
643                         }
644                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
645                         jffs2_free_raw_node_ref(nraw);
646                 }
647
648                 jffs2_free_raw_node_ref(nraw);
649                 if (!ret)
650                         ret = -EIO;
651                 goto out_node;
652         }
653         nraw->flash_offset |= REF_PRISTINE;
654         jffs2_add_physical_node_ref(c, nraw);
655
656         /* Link into per-inode list. This is safe because of the ic
657            state being INO_STATE_GC. Note that if we're doing this
658            for an inode which is in-core, the 'nraw' pointer is then
659            going to be fetched from ic->nodes by our caller. */
660         spin_lock(&c->erase_completion_lock);
661         nraw->next_in_ino = ic->nodes;
662         ic->nodes = nraw;
663         spin_unlock(&c->erase_completion_lock);
664
665         jffs2_mark_node_obsolete(c, raw);
666         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
667
668  out_node:
669         kfree(node);
670         return ret;
671  bail:
672         ret = -EBADFD;
673         goto out_node;
674 }
675
676 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
677                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
678 {
679         struct jffs2_full_dnode *new_fn;
680         struct jffs2_raw_inode ri;
681         struct jffs2_node_frag *last_frag;
682         jint16_t dev;
683         char *mdata = NULL, mdatalen = 0;
684         uint32_t alloclen, phys_ofs, ilen;
685         int ret;
686
687         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
688             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
689                 /* For these, we don't actually need to read the old node */
690                 /* FIXME: for minor or major > 255. */
691                 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
692                         JFFS2_F_I_RDEV_MIN(f)));
693                 mdata = (char *)&dev;
694                 mdatalen = sizeof(dev);
695                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
696         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
697                 mdatalen = fn->size;
698                 mdata = kmalloc(fn->size, GFP_KERNEL);
699                 if (!mdata) {
700                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
701                         return -ENOMEM;
702                 }
703                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
704                 if (ret) {
705                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
706                         kfree(mdata);
707                         return ret;
708                 }
709                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
710
711         }
712
713         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
714                                 JFFS2_SUMMARY_INODE_SIZE);
715         if (ret) {
716                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
717                        sizeof(ri)+ mdatalen, ret);
718                 goto out;
719         }
720
721         last_frag = frag_last(&f->fragtree);
722         if (last_frag)
723                 /* Fetch the inode length from the fragtree rather then
724                  * from i_size since i_size may have not been updated yet */
725                 ilen = last_frag->ofs + last_frag->size;
726         else
727                 ilen = JFFS2_F_I_SIZE(f);
728
729         memset(&ri, 0, sizeof(ri));
730         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
731         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
732         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
733         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
734
735         ri.ino = cpu_to_je32(f->inocache->ino);
736         ri.version = cpu_to_je32(++f->highest_version);
737         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
738         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
739         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
740         ri.isize = cpu_to_je32(ilen);
741         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
742         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
743         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
744         ri.offset = cpu_to_je32(0);
745         ri.csize = cpu_to_je32(mdatalen);
746         ri.dsize = cpu_to_je32(mdatalen);
747         ri.compr = JFFS2_COMPR_NONE;
748         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
749         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
750
751         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
752
753         if (IS_ERR(new_fn)) {
754                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
755                 ret = PTR_ERR(new_fn);
756                 goto out;
757         }
758         jffs2_mark_node_obsolete(c, fn->raw);
759         jffs2_free_full_dnode(fn);
760         f->metadata = new_fn;
761  out:
762         if (S_ISLNK(JFFS2_F_I_MODE(f)))
763                 kfree(mdata);
764         return ret;
765 }
766
767 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
768                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
769 {
770         struct jffs2_full_dirent *new_fd;
771         struct jffs2_raw_dirent rd;
772         uint32_t alloclen, phys_ofs;
773         int ret;
774
775         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
776         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
777         rd.nsize = strlen(fd->name);
778         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
779         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
780
781         rd.pino = cpu_to_je32(f->inocache->ino);
782         rd.version = cpu_to_je32(++f->highest_version);
783         rd.ino = cpu_to_je32(fd->ino);
784         /* If the times on this inode were set by explicit utime() they can be different,
785            so refrain from splatting them. */
786         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
787                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
788         else
789                 rd.mctime = cpu_to_je32(0);
790         rd.type = fd->type;
791         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
792         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
793
794         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
795                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
796         if (ret) {
797                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
798                        sizeof(rd)+rd.nsize, ret);
799                 return ret;
800         }
801         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
802
803         if (IS_ERR(new_fd)) {
804                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
805                 return PTR_ERR(new_fd);
806         }
807         jffs2_add_fd_to_list(c, new_fd, &f->dents);
808         return 0;
809 }
810
811 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
812                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
813 {
814         struct jffs2_full_dirent **fdp = &f->dents;
815         int found = 0;
816
817         /* On a medium where we can't actually mark nodes obsolete
818            pernamently, such as NAND flash, we need to work out
819            whether this deletion dirent is still needed to actively
820            delete a 'real' dirent with the same name that's still
821            somewhere else on the flash. */
822         if (!jffs2_can_mark_obsolete(c)) {
823                 struct jffs2_raw_dirent *rd;
824                 struct jffs2_raw_node_ref *raw;
825                 int ret;
826                 size_t retlen;
827                 int name_len = strlen(fd->name);
828                 uint32_t name_crc = crc32(0, fd->name, name_len);
829                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
830
831                 rd = kmalloc(rawlen, GFP_KERNEL);
832                 if (!rd)
833                         return -ENOMEM;
834
835                 /* Prevent the erase code from nicking the obsolete node refs while
836                    we're looking at them. I really don't like this extra lock but
837                    can't see any alternative. Suggestions on a postcard to... */
838                 down(&c->erase_free_sem);
839
840                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
841
842                         /* We only care about obsolete ones */
843                         if (!(ref_obsolete(raw)))
844                                 continue;
845
846                         /* Any dirent with the same name is going to have the same length... */
847                         if (ref_totlen(c, NULL, raw) != rawlen)
848                                 continue;
849
850                         /* Doesn't matter if there's one in the same erase block. We're going to
851                            delete it too at the same time. */
852                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
853                                 continue;
854
855                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
856
857                         /* This is an obsolete node belonging to the same directory, and it's of the right
858                            length. We need to take a closer look...*/
859                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
860                         if (ret) {
861                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
862                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
863                                 continue;
864                         }
865                         if (retlen != rawlen) {
866                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
867                                        retlen, rawlen, ref_offset(raw));
868                                 continue;
869                         }
870
871                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
872                                 continue;
873
874                         /* If the name CRC doesn't match, skip */
875                         if (je32_to_cpu(rd->name_crc) != name_crc)
876                                 continue;
877
878                         /* If the name length doesn't match, or it's another deletion dirent, skip */
879                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
880                                 continue;
881
882                         /* OK, check the actual name now */
883                         if (memcmp(rd->name, fd->name, name_len))
884                                 continue;
885
886                         /* OK. The name really does match. There really is still an older node on
887                            the flash which our deletion dirent obsoletes. So we have to write out
888                            a new deletion dirent to replace it */
889                         up(&c->erase_free_sem);
890
891                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
892                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
893                         kfree(rd);
894
895                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
896                 }
897
898                 up(&c->erase_free_sem);
899                 kfree(rd);
900         }
901
902         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
903            we should update the metadata node with those times accordingly */
904
905         /* No need for it any more. Just mark it obsolete and remove it from the list */
906         while (*fdp) {
907                 if ((*fdp) == fd) {
908                         found = 1;
909                         *fdp = fd->next;
910                         break;
911                 }
912                 fdp = &(*fdp)->next;
913         }
914         if (!found) {
915                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
916         }
917         jffs2_mark_node_obsolete(c, fd->raw);
918         jffs2_free_full_dirent(fd);
919         return 0;
920 }
921
922 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
923                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
924                                       uint32_t start, uint32_t end)
925 {
926         struct jffs2_raw_inode ri;
927         struct jffs2_node_frag *frag;
928         struct jffs2_full_dnode *new_fn;
929         uint32_t alloclen, phys_ofs, ilen;
930         int ret;
931
932         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
933                   f->inocache->ino, start, end));
934
935         memset(&ri, 0, sizeof(ri));
936
937         if(fn->frags > 1) {
938                 size_t readlen;
939                 uint32_t crc;
940                 /* It's partially obsoleted by a later write. So we have to
941                    write it out again with the _same_ version as before */
942                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
943                 if (readlen != sizeof(ri) || ret) {
944                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
945                         goto fill;
946                 }
947                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
948                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
949                                ref_offset(fn->raw),
950                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
951                         return -EIO;
952                 }
953                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
954                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
955                                ref_offset(fn->raw),
956                                je32_to_cpu(ri.totlen), sizeof(ri));
957                         return -EIO;
958                 }
959                 crc = crc32(0, &ri, sizeof(ri)-8);
960                 if (crc != je32_to_cpu(ri.node_crc)) {
961                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
962                                ref_offset(fn->raw),
963                                je32_to_cpu(ri.node_crc), crc);
964                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
965                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
966                                start, end, f->inocache->ino);
967                         goto fill;
968                 }
969                 if (ri.compr != JFFS2_COMPR_ZERO) {
970                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
971                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
972                                start, end, f->inocache->ino);
973                         goto fill;
974                 }
975         } else {
976         fill:
977                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
978                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
979                 ri.totlen = cpu_to_je32(sizeof(ri));
980                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
981
982                 ri.ino = cpu_to_je32(f->inocache->ino);
983                 ri.version = cpu_to_je32(++f->highest_version);
984                 ri.offset = cpu_to_je32(start);
985                 ri.dsize = cpu_to_je32(end - start);
986                 ri.csize = cpu_to_je32(0);
987                 ri.compr = JFFS2_COMPR_ZERO;
988         }
989
990         frag = frag_last(&f->fragtree);
991         if (frag)
992                 /* Fetch the inode length from the fragtree rather then
993                  * from i_size since i_size may have not been updated yet */
994                 ilen = frag->ofs + frag->size;
995         else
996                 ilen = JFFS2_F_I_SIZE(f);
997
998         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
999         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1000         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1001         ri.isize = cpu_to_je32(ilen);
1002         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1003         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1004         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1005         ri.data_crc = cpu_to_je32(0);
1006         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1007
1008         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1009                                 JFFS2_SUMMARY_INODE_SIZE);
1010         if (ret) {
1011                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1012                        sizeof(ri), ret);
1013                 return ret;
1014         }
1015         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1016
1017         if (IS_ERR(new_fn)) {
1018                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1019                 return PTR_ERR(new_fn);
1020         }
1021         if (je32_to_cpu(ri.version) == f->highest_version) {
1022                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1023                 if (f->metadata) {
1024                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1025                         jffs2_free_full_dnode(f->metadata);
1026                         f->metadata = NULL;
1027                 }
1028                 return 0;
1029         }
1030
1031         /*
1032          * We should only get here in the case where the node we are
1033          * replacing had more than one frag, so we kept the same version
1034          * number as before. (Except in case of error -- see 'goto fill;'
1035          * above.)
1036          */
1037         D1(if(unlikely(fn->frags <= 1)) {
1038                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1039                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1040                        je32_to_cpu(ri.ino));
1041         });
1042
1043         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1044         mark_ref_normal(new_fn->raw);
1045
1046         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1047              frag; frag = frag_next(frag)) {
1048                 if (frag->ofs > fn->size + fn->ofs)
1049                         break;
1050                 if (frag->node == fn) {
1051                         frag->node = new_fn;
1052                         new_fn->frags++;
1053                         fn->frags--;
1054                 }
1055         }
1056         if (fn->frags) {
1057                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1058                 BUG();
1059         }
1060         if (!new_fn->frags) {
1061                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1062                 BUG();
1063         }
1064
1065         jffs2_mark_node_obsolete(c, fn->raw);
1066         jffs2_free_full_dnode(fn);
1067
1068         return 0;
1069 }
1070
1071 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1072                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1073                                        uint32_t start, uint32_t end)
1074 {
1075         struct jffs2_full_dnode *new_fn;
1076         struct jffs2_raw_inode ri;
1077         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1078         int ret = 0;
1079         unsigned char *comprbuf = NULL, *writebuf;
1080         unsigned long pg;
1081         unsigned char *pg_ptr;
1082
1083         memset(&ri, 0, sizeof(ri));
1084
1085         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1086                   f->inocache->ino, start, end));
1087
1088         orig_end = end;
1089         orig_start = start;
1090
1091         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1092                 /* Attempt to do some merging. But only expand to cover logically
1093                    adjacent frags if the block containing them is already considered
1094                    to be dirty. Otherwise we end up with GC just going round in
1095                    circles dirtying the nodes it already wrote out, especially
1096                    on NAND where we have small eraseblocks and hence a much higher
1097                    chance of nodes having to be split to cross boundaries. */
1098
1099                 struct jffs2_node_frag *frag;
1100                 uint32_t min, max;
1101
1102                 min = start & ~(PAGE_CACHE_SIZE-1);
1103                 max = min + PAGE_CACHE_SIZE;
1104
1105                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1106
1107                 /* BUG_ON(!frag) but that'll happen anyway... */
1108
1109                 BUG_ON(frag->ofs != start);
1110
1111                 /* First grow down... */
1112                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1113
1114                         /* If the previous frag doesn't even reach the beginning, there's
1115                            excessive fragmentation. Just merge. */
1116                         if (frag->ofs > min) {
1117                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1118                                           frag->ofs, frag->ofs+frag->size));
1119                                 start = frag->ofs;
1120                                 continue;
1121                         }
1122                         /* OK. This frag holds the first byte of the page. */
1123                         if (!frag->node || !frag->node->raw) {
1124                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1125                                           frag->ofs, frag->ofs+frag->size));
1126                                 break;
1127                         } else {
1128
1129                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1130                                    in a block which is still considered clean? If so, don't obsolete it.
1131                                    If not, cover it anyway. */
1132
1133                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1134                                 struct jffs2_eraseblock *jeb;
1135
1136                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1137
1138                                 if (jeb == c->gcblock) {
1139                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1140                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1141                                         start = frag->ofs;
1142                                         break;
1143                                 }
1144                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1145                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1146                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1147                                         break;
1148                                 }
1149
1150                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1151                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1152                                 start = frag->ofs;
1153                                 break;
1154                         }
1155                 }
1156
1157                 /* ... then up */
1158
1159                 /* Find last frag which is actually part of the node we're to GC. */
1160                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1161
1162                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1163
1164                         /* If the previous frag doesn't even reach the beginning, there's lots
1165                            of fragmentation. Just merge. */
1166                         if (frag->ofs+frag->size < max) {
1167                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1168                                           frag->ofs, frag->ofs+frag->size));
1169                                 end = frag->ofs + frag->size;
1170                                 continue;
1171                         }
1172
1173                         if (!frag->node || !frag->node->raw) {
1174                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1175                                           frag->ofs, frag->ofs+frag->size));
1176                                 break;
1177                         } else {
1178
1179                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1180                                    in a block which is still considered clean? If so, don't obsolete it.
1181                                    If not, cover it anyway. */
1182
1183                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1184                                 struct jffs2_eraseblock *jeb;
1185
1186                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1187
1188                                 if (jeb == c->gcblock) {
1189                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1190                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1191                                         end = frag->ofs + frag->size;
1192                                         break;
1193                                 }
1194                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1195                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1196                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1197                                         break;
1198                                 }
1199
1200                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1201                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1202                                 end = frag->ofs + frag->size;
1203                                 break;
1204                         }
1205                 }
1206                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1207                           orig_start, orig_end, start, end));
1208
1209                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1210                 BUG_ON(end < orig_end);
1211                 BUG_ON(start > orig_start);
1212         }
1213
1214         /* First, use readpage() to read the appropriate page into the page cache */
1215         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1216          *    triggered garbage collection in the first place?
1217          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1218          *    page OK. We'll actually write it out again in commit_write, which is a little
1219          *    suboptimal, but at least we're correct.
1220          */
1221         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1222
1223         if (IS_ERR(pg_ptr)) {
1224                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1225                 return PTR_ERR(pg_ptr);
1226         }
1227
1228         offset = start;
1229         while(offset < orig_end) {
1230                 uint32_t datalen;
1231                 uint32_t cdatalen;
1232                 uint16_t comprtype = JFFS2_COMPR_NONE;
1233
1234                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1235                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1236
1237                 if (ret) {
1238                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1239                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1240                         break;
1241                 }
1242                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1243                 datalen = end - offset;
1244
1245                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1246
1247                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1248
1249                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1250                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1251                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1252                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1253
1254                 ri.ino = cpu_to_je32(f->inocache->ino);
1255                 ri.version = cpu_to_je32(++f->highest_version);
1256                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1257                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1258                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1259                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1260                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1261                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1262                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1263                 ri.offset = cpu_to_je32(offset);
1264                 ri.csize = cpu_to_je32(cdatalen);
1265                 ri.dsize = cpu_to_je32(datalen);
1266                 ri.compr = comprtype & 0xff;
1267                 ri.usercompr = (comprtype >> 8) & 0xff;
1268                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1269                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1270
1271                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1272
1273                 jffs2_free_comprbuf(comprbuf, writebuf);
1274
1275                 if (IS_ERR(new_fn)) {
1276                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1277                         ret = PTR_ERR(new_fn);
1278                         break;
1279                 }
1280                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1281                 offset += datalen;
1282                 if (f->metadata) {
1283                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1284                         jffs2_free_full_dnode(f->metadata);
1285                         f->metadata = NULL;
1286                 }
1287         }
1288
1289         jffs2_gc_release_page(c, pg_ptr, &pg);
1290         return ret;
1291 }