[JFFS2] Support new device nodes
[safe/jmp/linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased.
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 spin_unlock(&c->erase_completion_lock);
87                 jffs2_flush_wbuf_pad(c);
88                 spin_lock(&c->erase_completion_lock);
89                 goto again;
90         } else {
91                 /* Eep. All were empty */
92                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93                 return NULL;
94         }
95
96         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97         list_del(&ret->list);
98         c->gcblock = ret;
99         ret->gc_node = ret->first_node;
100         if (!ret->gc_node) {
101                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102                 BUG();
103         }
104
105         /* Have we accidentally picked a clean block with wasted space ? */
106         if (ret->wasted_size) {
107                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108                 ret->dirty_size += ret->wasted_size;
109                 c->wasted_size -= ret->wasted_size;
110                 c->dirty_size += ret->wasted_size;
111                 ret->wasted_size = 0;
112         }
113
114         return ret;
115 }
116
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123         struct jffs2_inode_info *f;
124         struct jffs2_inode_cache *ic;
125         struct jffs2_eraseblock *jeb;
126         struct jffs2_raw_node_ref *raw;
127         int ret = 0, inum, nlink;
128
129         if (down_interruptible(&c->alloc_sem))
130                 return -EINTR;
131
132         for (;;) {
133                 spin_lock(&c->erase_completion_lock);
134                 if (!c->unchecked_size)
135                         break;
136
137                 /* We can't start doing GC yet. We haven't finished checking
138                    the node CRCs etc. Do it now. */
139
140                 /* checked_ino is protected by the alloc_sem */
141                 if (c->checked_ino > c->highest_ino) {
142                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143                                c->unchecked_size);
144                         jffs2_dbg_dump_block_lists_nolock(c);
145                         spin_unlock(&c->erase_completion_lock);
146                         BUG();
147                 }
148
149                 spin_unlock(&c->erase_completion_lock);
150
151                 spin_lock(&c->inocache_lock);
152
153                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
154
155                 if (!ic) {
156                         spin_unlock(&c->inocache_lock);
157                         continue;
158                 }
159
160                 if (!ic->nlink) {
161                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
162                                   ic->ino));
163                         spin_unlock(&c->inocache_lock);
164                         continue;
165                 }
166                 switch(ic->state) {
167                 case INO_STATE_CHECKEDABSENT:
168                 case INO_STATE_PRESENT:
169                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
170                         spin_unlock(&c->inocache_lock);
171                         continue;
172
173                 case INO_STATE_GC:
174                 case INO_STATE_CHECKING:
175                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
176                         spin_unlock(&c->inocache_lock);
177                         BUG();
178
179                 case INO_STATE_READING:
180                         /* We need to wait for it to finish, lest we move on
181                            and trigger the BUG() above while we haven't yet
182                            finished checking all its nodes */
183                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
184                         /* We need to come back again for the _same_ inode. We've
185                          made no progress in this case, but that should be OK */
186                         c->checked_ino--;
187
188                         up(&c->alloc_sem);
189                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
190                         return 0;
191
192                 default:
193                         BUG();
194
195                 case INO_STATE_UNCHECKED:
196                         ;
197                 }
198                 ic->state = INO_STATE_CHECKING;
199                 spin_unlock(&c->inocache_lock);
200
201                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
202
203                 ret = jffs2_do_crccheck_inode(c, ic);
204                 if (ret)
205                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
206
207                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
208                 up(&c->alloc_sem);
209                 return ret;
210         }
211
212         /* First, work out which block we're garbage-collecting */
213         jeb = c->gcblock;
214
215         if (!jeb)
216                 jeb = jffs2_find_gc_block(c);
217
218         if (!jeb) {
219                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
220                 spin_unlock(&c->erase_completion_lock);
221                 up(&c->alloc_sem);
222                 return -EIO;
223         }
224
225         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
226         D1(if (c->nextblock)
227            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
228
229         if (!jeb->used_size) {
230                 up(&c->alloc_sem);
231                 goto eraseit;
232         }
233
234         raw = jeb->gc_node;
235
236         while(ref_obsolete(raw)) {
237                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
238                 raw = raw->next_phys;
239                 if (unlikely(!raw)) {
240                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
241                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
242                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
243                         jeb->gc_node = raw;
244                         spin_unlock(&c->erase_completion_lock);
245                         up(&c->alloc_sem);
246                         BUG();
247                 }
248         }
249         jeb->gc_node = raw;
250
251         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
252
253         if (!raw->next_in_ino) {
254                 /* Inode-less node. Clean marker, snapshot or something like that */
255                 /* FIXME: If it's something that needs to be copied, including something
256                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
257                 spin_unlock(&c->erase_completion_lock);
258                 jffs2_mark_node_obsolete(c, raw);
259                 up(&c->alloc_sem);
260                 goto eraseit_lock;
261         }
262
263         ic = jffs2_raw_ref_to_ic(raw);
264
265         /* We need to hold the inocache. Either the erase_completion_lock or
266            the inocache_lock are sufficient; we trade down since the inocache_lock
267            causes less contention. */
268         spin_lock(&c->inocache_lock);
269
270         spin_unlock(&c->erase_completion_lock);
271
272         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
273
274         /* Three possibilities:
275            1. Inode is already in-core. We must iget it and do proper
276               updating to its fragtree, etc.
277            2. Inode is not in-core, node is REF_PRISTINE. We lock the
278               inocache to prevent a read_inode(), copy the node intact.
279            3. Inode is not in-core, node is not pristine. We must iget()
280               and take the slow path.
281         */
282
283         switch(ic->state) {
284         case INO_STATE_CHECKEDABSENT:
285                 /* It's been checked, but it's not currently in-core.
286                    We can just copy any pristine nodes, but have
287                    to prevent anyone else from doing read_inode() while
288                    we're at it, so we set the state accordingly */
289                 if (ref_flags(raw) == REF_PRISTINE)
290                         ic->state = INO_STATE_GC;
291                 else {
292                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
293                                   ic->ino));
294                 }
295                 break;
296
297         case INO_STATE_PRESENT:
298                 /* It's in-core. GC must iget() it. */
299                 break;
300
301         case INO_STATE_UNCHECKED:
302         case INO_STATE_CHECKING:
303         case INO_STATE_GC:
304                 /* Should never happen. We should have finished checking
305                    by the time we actually start doing any GC, and since
306                    we're holding the alloc_sem, no other garbage collection
307                    can happen.
308                 */
309                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
310                        ic->ino, ic->state);
311                 up(&c->alloc_sem);
312                 spin_unlock(&c->inocache_lock);
313                 BUG();
314
315         case INO_STATE_READING:
316                 /* Someone's currently trying to read it. We must wait for
317                    them to finish and then go through the full iget() route
318                    to do the GC. However, sometimes read_inode() needs to get
319                    the alloc_sem() (for marking nodes invalid) so we must
320                    drop the alloc_sem before sleeping. */
321
322                 up(&c->alloc_sem);
323                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
324                           ic->ino, ic->state));
325                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
326                 /* And because we dropped the alloc_sem we must start again from the
327                    beginning. Ponder chance of livelock here -- we're returning success
328                    without actually making any progress.
329
330                    Q: What are the chances that the inode is back in INO_STATE_READING
331                    again by the time we next enter this function? And that this happens
332                    enough times to cause a real delay?
333
334                    A: Small enough that I don't care :)
335                 */
336                 return 0;
337         }
338
339         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
340            node intact, and we don't have to muck about with the fragtree etc.
341            because we know it's not in-core. If it _was_ in-core, we go through
342            all the iget() crap anyway */
343
344         if (ic->state == INO_STATE_GC) {
345                 spin_unlock(&c->inocache_lock);
346
347                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
348
349                 spin_lock(&c->inocache_lock);
350                 ic->state = INO_STATE_CHECKEDABSENT;
351                 wake_up(&c->inocache_wq);
352
353                 if (ret != -EBADFD) {
354                         spin_unlock(&c->inocache_lock);
355                         goto release_sem;
356                 }
357
358                 /* Fall through if it wanted us to, with inocache_lock held */
359         }
360
361         /* Prevent the fairly unlikely race where the gcblock is
362            entirely obsoleted by the final close of a file which had
363            the only valid nodes in the block, followed by erasure,
364            followed by freeing of the ic because the erased block(s)
365            held _all_ the nodes of that inode.... never been seen but
366            it's vaguely possible. */
367
368         inum = ic->ino;
369         nlink = ic->nlink;
370         spin_unlock(&c->inocache_lock);
371
372         f = jffs2_gc_fetch_inode(c, inum, nlink);
373         if (IS_ERR(f)) {
374                 ret = PTR_ERR(f);
375                 goto release_sem;
376         }
377         if (!f) {
378                 ret = 0;
379                 goto release_sem;
380         }
381
382         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
383
384         jffs2_gc_release_inode(c, f);
385
386  release_sem:
387         up(&c->alloc_sem);
388
389  eraseit_lock:
390         /* If we've finished this block, start it erasing */
391         spin_lock(&c->erase_completion_lock);
392
393  eraseit:
394         if (c->gcblock && !c->gcblock->used_size) {
395                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
396                 /* We're GC'ing an empty block? */
397                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
398                 c->gcblock = NULL;
399                 c->nr_erasing_blocks++;
400                 jffs2_erase_pending_trigger(c);
401         }
402         spin_unlock(&c->erase_completion_lock);
403
404         return ret;
405 }
406
407 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
408                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
409 {
410         struct jffs2_node_frag *frag;
411         struct jffs2_full_dnode *fn = NULL;
412         struct jffs2_full_dirent *fd;
413         uint32_t start = 0, end = 0, nrfrags = 0;
414         int ret = 0;
415
416         down(&f->sem);
417
418         /* Now we have the lock for this inode. Check that it's still the one at the head
419            of the list. */
420
421         spin_lock(&c->erase_completion_lock);
422
423         if (c->gcblock != jeb) {
424                 spin_unlock(&c->erase_completion_lock);
425                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
426                 goto upnout;
427         }
428         if (ref_obsolete(raw)) {
429                 spin_unlock(&c->erase_completion_lock);
430                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
431                 /* They'll call again */
432                 goto upnout;
433         }
434         spin_unlock(&c->erase_completion_lock);
435
436         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
437         if (f->metadata && f->metadata->raw == raw) {
438                 fn = f->metadata;
439                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
440                 goto upnout;
441         }
442
443         /* FIXME. Read node and do lookup? */
444         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
445                 if (frag->node && frag->node->raw == raw) {
446                         fn = frag->node;
447                         end = frag->ofs + frag->size;
448                         if (!nrfrags++)
449                                 start = frag->ofs;
450                         if (nrfrags == frag->node->frags)
451                                 break; /* We've found them all */
452                 }
453         }
454         if (fn) {
455                 if (ref_flags(raw) == REF_PRISTINE) {
456                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
457                         if (!ret) {
458                                 /* Urgh. Return it sensibly. */
459                                 frag->node->raw = f->inocache->nodes;
460                         }
461                         if (ret != -EBADFD)
462                                 goto upnout;
463                 }
464                 /* We found a datanode. Do the GC */
465                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
466                         /* It crosses a page boundary. Therefore, it must be a hole. */
467                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
468                 } else {
469                         /* It could still be a hole. But we GC the page this way anyway */
470                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
471                 }
472                 goto upnout;
473         }
474
475         /* Wasn't a dnode. Try dirent */
476         for (fd = f->dents; fd; fd=fd->next) {
477                 if (fd->raw == raw)
478                         break;
479         }
480
481         if (fd && fd->ino) {
482                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
483         } else if (fd) {
484                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
485         } else {
486                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
487                        ref_offset(raw), f->inocache->ino);
488                 if (ref_obsolete(raw)) {
489                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
490                 } else {
491                         jffs2_dbg_dump_node(c, ref_offset(raw));
492                         BUG();
493                 }
494         }
495  upnout:
496         up(&f->sem);
497
498         return ret;
499 }
500
501 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
502                                           struct jffs2_inode_cache *ic,
503                                           struct jffs2_raw_node_ref *raw)
504 {
505         union jffs2_node_union *node;
506         struct jffs2_raw_node_ref *nraw;
507         size_t retlen;
508         int ret;
509         uint32_t phys_ofs, alloclen;
510         uint32_t crc, rawlen;
511         int retried = 0;
512
513         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
514
515         rawlen = ref_totlen(c, c->gcblock, raw);
516
517         /* Ask for a small amount of space (or the totlen if smaller) because we
518            don't want to force wastage of the end of a block if splitting would
519            work. */
520         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
521                                 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
522                                 /* this is not the exact summary size of it,
523                                         it is only an upper estimation */
524
525         if (ret)
526                 return ret;
527
528         if (alloclen < rawlen) {
529                 /* Doesn't fit untouched. We'll go the old route and split it */
530                 return -EBADFD;
531         }
532
533         node = kmalloc(rawlen, GFP_KERNEL);
534         if (!node)
535                return -ENOMEM;
536
537         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
538         if (!ret && retlen != rawlen)
539                 ret = -EIO;
540         if (ret)
541                 goto out_node;
542
543         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
544         if (je32_to_cpu(node->u.hdr_crc) != crc) {
545                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
546                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
547                 goto bail;
548         }
549
550         switch(je16_to_cpu(node->u.nodetype)) {
551         case JFFS2_NODETYPE_INODE:
552                 crc = crc32(0, node, sizeof(node->i)-8);
553                 if (je32_to_cpu(node->i.node_crc) != crc) {
554                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
555                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
556                         goto bail;
557                 }
558
559                 if (je32_to_cpu(node->i.dsize)) {
560                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
561                         if (je32_to_cpu(node->i.data_crc) != crc) {
562                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
563                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
564                                 goto bail;
565                         }
566                 }
567                 break;
568
569         case JFFS2_NODETYPE_DIRENT:
570                 crc = crc32(0, node, sizeof(node->d)-8);
571                 if (je32_to_cpu(node->d.node_crc) != crc) {
572                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
573                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
574                         goto bail;
575                 }
576
577                 if (node->d.nsize) {
578                         crc = crc32(0, node->d.name, node->d.nsize);
579                         if (je32_to_cpu(node->d.name_crc) != crc) {
580                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
581                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
582                                 goto bail;
583                         }
584                 }
585                 break;
586         default:
587                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
588                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
589                 goto bail;
590         }
591
592         nraw = jffs2_alloc_raw_node_ref();
593         if (!nraw) {
594                 ret = -ENOMEM;
595                 goto out_node;
596         }
597
598         /* OK, all the CRCs are good; this node can just be copied as-is. */
599  retry:
600         nraw->flash_offset = phys_ofs;
601         nraw->__totlen = rawlen;
602         nraw->next_phys = NULL;
603
604         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
605
606         if (ret || (retlen != rawlen)) {
607                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
608                        rawlen, phys_ofs, ret, retlen);
609                 if (retlen) {
610                         /* Doesn't belong to any inode */
611                         nraw->next_in_ino = NULL;
612
613                         nraw->flash_offset |= REF_OBSOLETE;
614                         jffs2_add_physical_node_ref(c, nraw);
615                         jffs2_mark_node_obsolete(c, nraw);
616                 } else {
617                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
618                         jffs2_free_raw_node_ref(nraw);
619                 }
620                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
621                         /* Try to reallocate space and retry */
622                         uint32_t dummy;
623                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
624
625                         retried = 1;
626
627                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
628
629                         jffs2_dbg_acct_sanity_check(c,jeb);
630                         jffs2_dbg_acct_paranoia_check(c, jeb);
631
632                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
633                                                 /* this is not the exact summary size of it,
634                                                         it is only an upper estimation */
635
636                         if (!ret) {
637                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
638
639                                 jffs2_dbg_acct_sanity_check(c,jeb);
640                                 jffs2_dbg_acct_paranoia_check(c, jeb);
641
642                                 goto retry;
643                         }
644                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
645                         jffs2_free_raw_node_ref(nraw);
646                 }
647
648                 jffs2_free_raw_node_ref(nraw);
649                 if (!ret)
650                         ret = -EIO;
651                 goto out_node;
652         }
653         nraw->flash_offset |= REF_PRISTINE;
654         jffs2_add_physical_node_ref(c, nraw);
655
656         /* Link into per-inode list. This is safe because of the ic
657            state being INO_STATE_GC. Note that if we're doing this
658            for an inode which is in-core, the 'nraw' pointer is then
659            going to be fetched from ic->nodes by our caller. */
660         spin_lock(&c->erase_completion_lock);
661         nraw->next_in_ino = ic->nodes;
662         ic->nodes = nraw;
663         spin_unlock(&c->erase_completion_lock);
664
665         jffs2_mark_node_obsolete(c, raw);
666         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
667
668  out_node:
669         kfree(node);
670         return ret;
671  bail:
672         ret = -EBADFD;
673         goto out_node;
674 }
675
676 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
677                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
678 {
679         struct jffs2_full_dnode *new_fn;
680         struct jffs2_raw_inode ri;
681         struct jffs2_node_frag *last_frag;
682         union jffs2_device_node dev;
683         char *mdata = NULL, mdatalen = 0;
684         uint32_t alloclen, phys_ofs, ilen;
685         int ret;
686
687         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
688             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
689                 /* For these, we don't actually need to read the old node */
690                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
691                 mdata = (char *)&dev;
692                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
693         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
694                 mdatalen = fn->size;
695                 mdata = kmalloc(fn->size, GFP_KERNEL);
696                 if (!mdata) {
697                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
698                         return -ENOMEM;
699                 }
700                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
701                 if (ret) {
702                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
703                         kfree(mdata);
704                         return ret;
705                 }
706                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
707
708         }
709
710         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
711                                 JFFS2_SUMMARY_INODE_SIZE);
712         if (ret) {
713                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
714                        sizeof(ri)+ mdatalen, ret);
715                 goto out;
716         }
717
718         last_frag = frag_last(&f->fragtree);
719         if (last_frag)
720                 /* Fetch the inode length from the fragtree rather then
721                  * from i_size since i_size may have not been updated yet */
722                 ilen = last_frag->ofs + last_frag->size;
723         else
724                 ilen = JFFS2_F_I_SIZE(f);
725
726         memset(&ri, 0, sizeof(ri));
727         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
728         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
729         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
730         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
731
732         ri.ino = cpu_to_je32(f->inocache->ino);
733         ri.version = cpu_to_je32(++f->highest_version);
734         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
735         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
736         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
737         ri.isize = cpu_to_je32(ilen);
738         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
739         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
740         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
741         ri.offset = cpu_to_je32(0);
742         ri.csize = cpu_to_je32(mdatalen);
743         ri.dsize = cpu_to_je32(mdatalen);
744         ri.compr = JFFS2_COMPR_NONE;
745         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
746         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
747
748         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
749
750         if (IS_ERR(new_fn)) {
751                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
752                 ret = PTR_ERR(new_fn);
753                 goto out;
754         }
755         jffs2_mark_node_obsolete(c, fn->raw);
756         jffs2_free_full_dnode(fn);
757         f->metadata = new_fn;
758  out:
759         if (S_ISLNK(JFFS2_F_I_MODE(f)))
760                 kfree(mdata);
761         return ret;
762 }
763
764 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
765                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
766 {
767         struct jffs2_full_dirent *new_fd;
768         struct jffs2_raw_dirent rd;
769         uint32_t alloclen, phys_ofs;
770         int ret;
771
772         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
773         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
774         rd.nsize = strlen(fd->name);
775         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
776         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
777
778         rd.pino = cpu_to_je32(f->inocache->ino);
779         rd.version = cpu_to_je32(++f->highest_version);
780         rd.ino = cpu_to_je32(fd->ino);
781         /* If the times on this inode were set by explicit utime() they can be different,
782            so refrain from splatting them. */
783         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
784                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
785         else
786                 rd.mctime = cpu_to_je32(0);
787         rd.type = fd->type;
788         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
789         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
790
791         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
792                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
793         if (ret) {
794                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
795                        sizeof(rd)+rd.nsize, ret);
796                 return ret;
797         }
798         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
799
800         if (IS_ERR(new_fd)) {
801                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
802                 return PTR_ERR(new_fd);
803         }
804         jffs2_add_fd_to_list(c, new_fd, &f->dents);
805         return 0;
806 }
807
808 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
809                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
810 {
811         struct jffs2_full_dirent **fdp = &f->dents;
812         int found = 0;
813
814         /* On a medium where we can't actually mark nodes obsolete
815            pernamently, such as NAND flash, we need to work out
816            whether this deletion dirent is still needed to actively
817            delete a 'real' dirent with the same name that's still
818            somewhere else on the flash. */
819         if (!jffs2_can_mark_obsolete(c)) {
820                 struct jffs2_raw_dirent *rd;
821                 struct jffs2_raw_node_ref *raw;
822                 int ret;
823                 size_t retlen;
824                 int name_len = strlen(fd->name);
825                 uint32_t name_crc = crc32(0, fd->name, name_len);
826                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
827
828                 rd = kmalloc(rawlen, GFP_KERNEL);
829                 if (!rd)
830                         return -ENOMEM;
831
832                 /* Prevent the erase code from nicking the obsolete node refs while
833                    we're looking at them. I really don't like this extra lock but
834                    can't see any alternative. Suggestions on a postcard to... */
835                 down(&c->erase_free_sem);
836
837                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
838
839                         /* We only care about obsolete ones */
840                         if (!(ref_obsolete(raw)))
841                                 continue;
842
843                         /* Any dirent with the same name is going to have the same length... */
844                         if (ref_totlen(c, NULL, raw) != rawlen)
845                                 continue;
846
847                         /* Doesn't matter if there's one in the same erase block. We're going to
848                            delete it too at the same time. */
849                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
850                                 continue;
851
852                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
853
854                         /* This is an obsolete node belonging to the same directory, and it's of the right
855                            length. We need to take a closer look...*/
856                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
857                         if (ret) {
858                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
859                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
860                                 continue;
861                         }
862                         if (retlen != rawlen) {
863                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
864                                        retlen, rawlen, ref_offset(raw));
865                                 continue;
866                         }
867
868                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
869                                 continue;
870
871                         /* If the name CRC doesn't match, skip */
872                         if (je32_to_cpu(rd->name_crc) != name_crc)
873                                 continue;
874
875                         /* If the name length doesn't match, or it's another deletion dirent, skip */
876                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
877                                 continue;
878
879                         /* OK, check the actual name now */
880                         if (memcmp(rd->name, fd->name, name_len))
881                                 continue;
882
883                         /* OK. The name really does match. There really is still an older node on
884                            the flash which our deletion dirent obsoletes. So we have to write out
885                            a new deletion dirent to replace it */
886                         up(&c->erase_free_sem);
887
888                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
889                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
890                         kfree(rd);
891
892                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
893                 }
894
895                 up(&c->erase_free_sem);
896                 kfree(rd);
897         }
898
899         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
900            we should update the metadata node with those times accordingly */
901
902         /* No need for it any more. Just mark it obsolete and remove it from the list */
903         while (*fdp) {
904                 if ((*fdp) == fd) {
905                         found = 1;
906                         *fdp = fd->next;
907                         break;
908                 }
909                 fdp = &(*fdp)->next;
910         }
911         if (!found) {
912                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
913         }
914         jffs2_mark_node_obsolete(c, fd->raw);
915         jffs2_free_full_dirent(fd);
916         return 0;
917 }
918
919 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
920                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
921                                       uint32_t start, uint32_t end)
922 {
923         struct jffs2_raw_inode ri;
924         struct jffs2_node_frag *frag;
925         struct jffs2_full_dnode *new_fn;
926         uint32_t alloclen, phys_ofs, ilen;
927         int ret;
928
929         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
930                   f->inocache->ino, start, end));
931
932         memset(&ri, 0, sizeof(ri));
933
934         if(fn->frags > 1) {
935                 size_t readlen;
936                 uint32_t crc;
937                 /* It's partially obsoleted by a later write. So we have to
938                    write it out again with the _same_ version as before */
939                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
940                 if (readlen != sizeof(ri) || ret) {
941                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
942                         goto fill;
943                 }
944                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
945                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
946                                ref_offset(fn->raw),
947                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
948                         return -EIO;
949                 }
950                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
951                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
952                                ref_offset(fn->raw),
953                                je32_to_cpu(ri.totlen), sizeof(ri));
954                         return -EIO;
955                 }
956                 crc = crc32(0, &ri, sizeof(ri)-8);
957                 if (crc != je32_to_cpu(ri.node_crc)) {
958                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
959                                ref_offset(fn->raw),
960                                je32_to_cpu(ri.node_crc), crc);
961                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
962                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
963                                start, end, f->inocache->ino);
964                         goto fill;
965                 }
966                 if (ri.compr != JFFS2_COMPR_ZERO) {
967                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
968                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
969                                start, end, f->inocache->ino);
970                         goto fill;
971                 }
972         } else {
973         fill:
974                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
975                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
976                 ri.totlen = cpu_to_je32(sizeof(ri));
977                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
978
979                 ri.ino = cpu_to_je32(f->inocache->ino);
980                 ri.version = cpu_to_je32(++f->highest_version);
981                 ri.offset = cpu_to_je32(start);
982                 ri.dsize = cpu_to_je32(end - start);
983                 ri.csize = cpu_to_je32(0);
984                 ri.compr = JFFS2_COMPR_ZERO;
985         }
986
987         frag = frag_last(&f->fragtree);
988         if (frag)
989                 /* Fetch the inode length from the fragtree rather then
990                  * from i_size since i_size may have not been updated yet */
991                 ilen = frag->ofs + frag->size;
992         else
993                 ilen = JFFS2_F_I_SIZE(f);
994
995         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
996         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
997         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
998         ri.isize = cpu_to_je32(ilen);
999         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1000         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1001         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1002         ri.data_crc = cpu_to_je32(0);
1003         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1004
1005         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1006                                 JFFS2_SUMMARY_INODE_SIZE);
1007         if (ret) {
1008                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1009                        sizeof(ri), ret);
1010                 return ret;
1011         }
1012         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1013
1014         if (IS_ERR(new_fn)) {
1015                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1016                 return PTR_ERR(new_fn);
1017         }
1018         if (je32_to_cpu(ri.version) == f->highest_version) {
1019                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1020                 if (f->metadata) {
1021                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1022                         jffs2_free_full_dnode(f->metadata);
1023                         f->metadata = NULL;
1024                 }
1025                 return 0;
1026         }
1027
1028         /*
1029          * We should only get here in the case where the node we are
1030          * replacing had more than one frag, so we kept the same version
1031          * number as before. (Except in case of error -- see 'goto fill;'
1032          * above.)
1033          */
1034         D1(if(unlikely(fn->frags <= 1)) {
1035                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1036                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1037                        je32_to_cpu(ri.ino));
1038         });
1039
1040         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1041         mark_ref_normal(new_fn->raw);
1042
1043         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1044              frag; frag = frag_next(frag)) {
1045                 if (frag->ofs > fn->size + fn->ofs)
1046                         break;
1047                 if (frag->node == fn) {
1048                         frag->node = new_fn;
1049                         new_fn->frags++;
1050                         fn->frags--;
1051                 }
1052         }
1053         if (fn->frags) {
1054                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1055                 BUG();
1056         }
1057         if (!new_fn->frags) {
1058                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1059                 BUG();
1060         }
1061
1062         jffs2_mark_node_obsolete(c, fn->raw);
1063         jffs2_free_full_dnode(fn);
1064
1065         return 0;
1066 }
1067
1068 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1069                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1070                                        uint32_t start, uint32_t end)
1071 {
1072         struct jffs2_full_dnode *new_fn;
1073         struct jffs2_raw_inode ri;
1074         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1075         int ret = 0;
1076         unsigned char *comprbuf = NULL, *writebuf;
1077         unsigned long pg;
1078         unsigned char *pg_ptr;
1079
1080         memset(&ri, 0, sizeof(ri));
1081
1082         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1083                   f->inocache->ino, start, end));
1084
1085         orig_end = end;
1086         orig_start = start;
1087
1088         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1089                 /* Attempt to do some merging. But only expand to cover logically
1090                    adjacent frags if the block containing them is already considered
1091                    to be dirty. Otherwise we end up with GC just going round in
1092                    circles dirtying the nodes it already wrote out, especially
1093                    on NAND where we have small eraseblocks and hence a much higher
1094                    chance of nodes having to be split to cross boundaries. */
1095
1096                 struct jffs2_node_frag *frag;
1097                 uint32_t min, max;
1098
1099                 min = start & ~(PAGE_CACHE_SIZE-1);
1100                 max = min + PAGE_CACHE_SIZE;
1101
1102                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1103
1104                 /* BUG_ON(!frag) but that'll happen anyway... */
1105
1106                 BUG_ON(frag->ofs != start);
1107
1108                 /* First grow down... */
1109                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1110
1111                         /* If the previous frag doesn't even reach the beginning, there's
1112                            excessive fragmentation. Just merge. */
1113                         if (frag->ofs > min) {
1114                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1115                                           frag->ofs, frag->ofs+frag->size));
1116                                 start = frag->ofs;
1117                                 continue;
1118                         }
1119                         /* OK. This frag holds the first byte of the page. */
1120                         if (!frag->node || !frag->node->raw) {
1121                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1122                                           frag->ofs, frag->ofs+frag->size));
1123                                 break;
1124                         } else {
1125
1126                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1127                                    in a block which is still considered clean? If so, don't obsolete it.
1128                                    If not, cover it anyway. */
1129
1130                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1131                                 struct jffs2_eraseblock *jeb;
1132
1133                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1134
1135                                 if (jeb == c->gcblock) {
1136                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1137                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1138                                         start = frag->ofs;
1139                                         break;
1140                                 }
1141                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1142                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1143                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1144                                         break;
1145                                 }
1146
1147                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1148                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1149                                 start = frag->ofs;
1150                                 break;
1151                         }
1152                 }
1153
1154                 /* ... then up */
1155
1156                 /* Find last frag which is actually part of the node we're to GC. */
1157                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1158
1159                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1160
1161                         /* If the previous frag doesn't even reach the beginning, there's lots
1162                            of fragmentation. Just merge. */
1163                         if (frag->ofs+frag->size < max) {
1164                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1165                                           frag->ofs, frag->ofs+frag->size));
1166                                 end = frag->ofs + frag->size;
1167                                 continue;
1168                         }
1169
1170                         if (!frag->node || !frag->node->raw) {
1171                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1172                                           frag->ofs, frag->ofs+frag->size));
1173                                 break;
1174                         } else {
1175
1176                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1177                                    in a block which is still considered clean? If so, don't obsolete it.
1178                                    If not, cover it anyway. */
1179
1180                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1181                                 struct jffs2_eraseblock *jeb;
1182
1183                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1184
1185                                 if (jeb == c->gcblock) {
1186                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1187                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1188                                         end = frag->ofs + frag->size;
1189                                         break;
1190                                 }
1191                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1192                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1193                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1194                                         break;
1195                                 }
1196
1197                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1198                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1199                                 end = frag->ofs + frag->size;
1200                                 break;
1201                         }
1202                 }
1203                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1204                           orig_start, orig_end, start, end));
1205
1206                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1207                 BUG_ON(end < orig_end);
1208                 BUG_ON(start > orig_start);
1209         }
1210
1211         /* First, use readpage() to read the appropriate page into the page cache */
1212         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1213          *    triggered garbage collection in the first place?
1214          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1215          *    page OK. We'll actually write it out again in commit_write, which is a little
1216          *    suboptimal, but at least we're correct.
1217          */
1218         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1219
1220         if (IS_ERR(pg_ptr)) {
1221                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1222                 return PTR_ERR(pg_ptr);
1223         }
1224
1225         offset = start;
1226         while(offset < orig_end) {
1227                 uint32_t datalen;
1228                 uint32_t cdatalen;
1229                 uint16_t comprtype = JFFS2_COMPR_NONE;
1230
1231                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1232                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1233
1234                 if (ret) {
1235                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1236                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1237                         break;
1238                 }
1239                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1240                 datalen = end - offset;
1241
1242                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1243
1244                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1245
1246                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1247                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1248                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1249                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1250
1251                 ri.ino = cpu_to_je32(f->inocache->ino);
1252                 ri.version = cpu_to_je32(++f->highest_version);
1253                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1254                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1255                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1256                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1257                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1258                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1259                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1260                 ri.offset = cpu_to_je32(offset);
1261                 ri.csize = cpu_to_je32(cdatalen);
1262                 ri.dsize = cpu_to_je32(datalen);
1263                 ri.compr = comprtype & 0xff;
1264                 ri.usercompr = (comprtype >> 8) & 0xff;
1265                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1266                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1267
1268                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1269
1270                 jffs2_free_comprbuf(comprbuf, writebuf);
1271
1272                 if (IS_ERR(new_fn)) {
1273                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1274                         ret = PTR_ERR(new_fn);
1275                         break;
1276                 }
1277                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1278                 offset += datalen;
1279                 if (f->metadata) {
1280                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1281                         jffs2_free_full_dnode(f->metadata);
1282                         f->metadata = NULL;
1283                 }
1284         }
1285
1286         jffs2_gc_release_page(c, pg_ptr, &pg);
1287         return ret;
1288 }