Merge branch 'bkl/ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic...
[safe/jmp/linux-2.6] / fs / nilfs2 / recovery.c
1 /*
2  * recovery.c - NILFS recovery logic
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>
21  */
22
23 #include <linux/buffer_head.h>
24 #include <linux/blkdev.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/crc32.h>
28 #include "nilfs.h"
29 #include "segment.h"
30 #include "sufile.h"
31 #include "page.h"
32 #include "segbuf.h"
33
34 /*
35  * Segment check result
36  */
37 enum {
38         NILFS_SEG_VALID,
39         NILFS_SEG_NO_SUPER_ROOT,
40         NILFS_SEG_FAIL_IO,
41         NILFS_SEG_FAIL_MAGIC,
42         NILFS_SEG_FAIL_SEQ,
43         NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
44         NILFS_SEG_FAIL_CHECKSUM_FULL,
45         NILFS_SEG_FAIL_CONSISTENCY,
46 };
47
48 /* work structure for recovery */
49 struct nilfs_recovery_block {
50         ino_t ino;              /* Inode number of the file that this block
51                                    belongs to */
52         sector_t blocknr;       /* block number */
53         __u64 vblocknr;         /* virtual block number */
54         unsigned long blkoff;   /* File offset of the data block (per block) */
55         struct list_head list;
56 };
57
58
59 static int nilfs_warn_segment_error(int err)
60 {
61         switch (err) {
62         case NILFS_SEG_FAIL_IO:
63                 printk(KERN_WARNING
64                        "NILFS warning: I/O error on loading last segment\n");
65                 return -EIO;
66         case NILFS_SEG_FAIL_MAGIC:
67                 printk(KERN_WARNING
68                        "NILFS warning: Segment magic number invalid\n");
69                 break;
70         case NILFS_SEG_FAIL_SEQ:
71                 printk(KERN_WARNING
72                        "NILFS warning: Sequence number mismatch\n");
73                 break;
74         case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
75                 printk(KERN_WARNING
76                        "NILFS warning: Checksum error in super root\n");
77                 break;
78         case NILFS_SEG_FAIL_CHECKSUM_FULL:
79                 printk(KERN_WARNING
80                        "NILFS warning: Checksum error in segment payload\n");
81                 break;
82         case NILFS_SEG_FAIL_CONSISTENCY:
83                 printk(KERN_WARNING
84                        "NILFS warning: Inconsistent segment\n");
85                 break;
86         case NILFS_SEG_NO_SUPER_ROOT:
87                 printk(KERN_WARNING
88                        "NILFS warning: No super root in the last segment\n");
89                 break;
90         }
91         return -EINVAL;
92 }
93
94 static void store_segsum_info(struct nilfs_segsum_info *ssi,
95                               struct nilfs_segment_summary *sum,
96                               unsigned int blocksize)
97 {
98         ssi->flags = le16_to_cpu(sum->ss_flags);
99         ssi->seg_seq = le64_to_cpu(sum->ss_seq);
100         ssi->ctime = le64_to_cpu(sum->ss_create);
101         ssi->next = le64_to_cpu(sum->ss_next);
102         ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
103         ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
104         ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
105
106         ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
107         ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
108
109         /* need to verify ->ss_bytes field if read ->ss_cno */
110 }
111
112 /**
113  * calc_crc_cont - check CRC of blocks continuously
114  * @sbi: nilfs_sb_info
115  * @bhs: buffer head of start block
116  * @sum: place to store result
117  * @offset: offset bytes in the first block
118  * @check_bytes: number of bytes to be checked
119  * @start: DBN of start block
120  * @nblock: number of blocks to be checked
121  */
122 static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
123                          u32 *sum, unsigned long offset, u64 check_bytes,
124                          sector_t start, unsigned long nblock)
125 {
126         unsigned long blocksize = sbi->s_super->s_blocksize;
127         unsigned long size;
128         u32 crc;
129
130         BUG_ON(offset >= blocksize);
131         check_bytes -= offset;
132         size = min_t(u64, check_bytes, blocksize - offset);
133         crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
134                        (unsigned char *)bhs->b_data + offset, size);
135         if (--nblock > 0) {
136                 do {
137                         struct buffer_head *bh
138                                 = sb_bread(sbi->s_super, ++start);
139                         if (!bh)
140                                 return -EIO;
141                         check_bytes -= size;
142                         size = min_t(u64, check_bytes, blocksize);
143                         crc = crc32_le(crc, bh->b_data, size);
144                         brelse(bh);
145                 } while (--nblock > 0);
146         }
147         *sum = crc;
148         return 0;
149 }
150
151 /**
152  * nilfs_read_super_root_block - read super root block
153  * @sb: super_block
154  * @sr_block: disk block number of the super root block
155  * @pbh: address of a buffer_head pointer to return super root buffer
156  * @check: CRC check flag
157  */
158 int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
159                                 struct buffer_head **pbh, int check)
160 {
161         struct buffer_head *bh_sr;
162         struct nilfs_super_root *sr;
163         u32 crc;
164         int ret;
165
166         *pbh = NULL;
167         bh_sr = sb_bread(sb, sr_block);
168         if (unlikely(!bh_sr)) {
169                 ret = NILFS_SEG_FAIL_IO;
170                 goto failed;
171         }
172
173         sr = (struct nilfs_super_root *)bh_sr->b_data;
174         if (check) {
175                 unsigned bytes = le16_to_cpu(sr->sr_bytes);
176
177                 if (bytes == 0 || bytes > sb->s_blocksize) {
178                         ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
179                         goto failed_bh;
180                 }
181                 if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
182                                   sizeof(sr->sr_sum), bytes, sr_block, 1)) {
183                         ret = NILFS_SEG_FAIL_IO;
184                         goto failed_bh;
185                 }
186                 if (crc != le32_to_cpu(sr->sr_sum)) {
187                         ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
188                         goto failed_bh;
189                 }
190         }
191         *pbh = bh_sr;
192         return 0;
193
194  failed_bh:
195         brelse(bh_sr);
196
197  failed:
198         return nilfs_warn_segment_error(ret);
199 }
200
201 /**
202  * load_segment_summary - read segment summary of the specified partial segment
203  * @sbi: nilfs_sb_info
204  * @pseg_start: start disk block number of partial segment
205  * @seg_seq: sequence number requested
206  * @ssi: pointer to nilfs_segsum_info struct to store information
207  */
208 static int
209 load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
210                      u64 seg_seq, struct nilfs_segsum_info *ssi)
211 {
212         struct buffer_head *bh_sum;
213         struct nilfs_segment_summary *sum;
214         unsigned long nblock;
215         u32 crc;
216         int ret = NILFS_SEG_FAIL_IO;
217
218         bh_sum = sb_bread(sbi->s_super, pseg_start);
219         if (!bh_sum)
220                 goto out;
221
222         sum = (struct nilfs_segment_summary *)bh_sum->b_data;
223
224         /* Check consistency of segment summary */
225         if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
226                 ret = NILFS_SEG_FAIL_MAGIC;
227                 goto failed;
228         }
229         store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
230         if (seg_seq != ssi->seg_seq) {
231                 ret = NILFS_SEG_FAIL_SEQ;
232                 goto failed;
233         }
234
235         nblock = ssi->nblocks;
236         if (unlikely(nblock == 0 ||
237                      nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
238                 /* This limits the number of blocks read in the CRC check */
239                 ret = NILFS_SEG_FAIL_CONSISTENCY;
240                 goto failed;
241         }
242         if (calc_crc_cont(sbi, bh_sum, &crc, sizeof(sum->ss_datasum),
243                           ((u64)nblock << sbi->s_super->s_blocksize_bits),
244                           pseg_start, nblock)) {
245                 ret = NILFS_SEG_FAIL_IO;
246                 goto failed;
247         }
248         if (crc == le32_to_cpu(sum->ss_datasum))
249                 ret = 0;
250         else
251                 ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
252  failed:
253         brelse(bh_sum);
254  out:
255         return ret;
256 }
257
258 static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
259                         unsigned int *offset, unsigned int bytes)
260 {
261         void *ptr;
262         sector_t blocknr;
263
264         BUG_ON((*pbh)->b_size < *offset);
265         if (bytes > (*pbh)->b_size - *offset) {
266                 blocknr = (*pbh)->b_blocknr;
267                 brelse(*pbh);
268                 *pbh = sb_bread(sb, blocknr + 1);
269                 if (unlikely(!*pbh))
270                         return NULL;
271                 *offset = 0;
272         }
273         ptr = (*pbh)->b_data + *offset;
274         *offset += bytes;
275         return ptr;
276 }
277
278 static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
279                         unsigned int *offset, unsigned int bytes,
280                         unsigned long count)
281 {
282         unsigned int rest_item_in_current_block
283                 = ((*pbh)->b_size - *offset) / bytes;
284
285         if (count <= rest_item_in_current_block) {
286                 *offset += bytes * count;
287         } else {
288                 sector_t blocknr = (*pbh)->b_blocknr;
289                 unsigned int nitem_per_block = (*pbh)->b_size / bytes;
290                 unsigned int bcnt;
291
292                 count -= rest_item_in_current_block;
293                 bcnt = DIV_ROUND_UP(count, nitem_per_block);
294                 *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
295
296                 brelse(*pbh);
297                 *pbh = sb_bread(sb, blocknr + bcnt);
298         }
299 }
300
301 static int
302 collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
303                            struct nilfs_segsum_info *ssi,
304                            struct list_head *head)
305 {
306         struct buffer_head *bh;
307         unsigned int offset;
308         unsigned long nfinfo = ssi->nfinfo;
309         sector_t blocknr = sum_blocknr + ssi->nsumblk;
310         ino_t ino;
311         int err = -EIO;
312
313         if (!nfinfo)
314                 return 0;
315
316         bh = sb_bread(sbi->s_super, sum_blocknr);
317         if (unlikely(!bh))
318                 goto out;
319
320         offset = le16_to_cpu(
321                 ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
322         for (;;) {
323                 unsigned long nblocks, ndatablk, nnodeblk;
324                 struct nilfs_finfo *finfo;
325
326                 finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
327                 if (unlikely(!finfo))
328                         goto out;
329
330                 ino = le64_to_cpu(finfo->fi_ino);
331                 nblocks = le32_to_cpu(finfo->fi_nblocks);
332                 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
333                 nnodeblk = nblocks - ndatablk;
334
335                 while (ndatablk-- > 0) {
336                         struct nilfs_recovery_block *rb;
337                         struct nilfs_binfo_v *binfo;
338
339                         binfo = segsum_get(sbi->s_super, &bh, &offset,
340                                            sizeof(*binfo));
341                         if (unlikely(!binfo))
342                                 goto out;
343
344                         rb = kmalloc(sizeof(*rb), GFP_NOFS);
345                         if (unlikely(!rb)) {
346                                 err = -ENOMEM;
347                                 goto out;
348                         }
349                         rb->ino = ino;
350                         rb->blocknr = blocknr++;
351                         rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
352                         rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
353                         /* INIT_LIST_HEAD(&rb->list); */
354                         list_add_tail(&rb->list, head);
355                 }
356                 if (--nfinfo == 0)
357                         break;
358                 blocknr += nnodeblk; /* always 0 for the data sync segments */
359                 segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
360                             nnodeblk);
361                 if (unlikely(!bh))
362                         goto out;
363         }
364         err = 0;
365  out:
366         brelse(bh);   /* brelse(NULL) is just ignored */
367         return err;
368 }
369
370 static void dispose_recovery_list(struct list_head *head)
371 {
372         while (!list_empty(head)) {
373                 struct nilfs_recovery_block *rb
374                         = list_entry(head->next,
375                                      struct nilfs_recovery_block, list);
376                 list_del(&rb->list);
377                 kfree(rb);
378         }
379 }
380
381 struct nilfs_segment_entry {
382         struct list_head        list;
383         __u64                   segnum;
384 };
385
386 static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
387 {
388         struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
389
390         if (unlikely(!ent))
391                 return -ENOMEM;
392
393         ent->segnum = segnum;
394         INIT_LIST_HEAD(&ent->list);
395         list_add_tail(&ent->list, head);
396         return 0;
397 }
398
399 void nilfs_dispose_segment_list(struct list_head *head)
400 {
401         while (!list_empty(head)) {
402                 struct nilfs_segment_entry *ent
403                         = list_entry(head->next,
404                                      struct nilfs_segment_entry, list);
405                 list_del(&ent->list);
406                 kfree(ent);
407         }
408 }
409
410 static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
411                                               struct nilfs_sb_info *sbi,
412                                               struct nilfs_recovery_info *ri)
413 {
414         struct list_head *head = &ri->ri_used_segments;
415         struct nilfs_segment_entry *ent, *n;
416         struct inode *sufile = nilfs->ns_sufile;
417         __u64 segnum[4];
418         int err;
419         int i;
420
421         segnum[0] = nilfs->ns_segnum;
422         segnum[1] = nilfs->ns_nextnum;
423         segnum[2] = ri->ri_segnum;
424         segnum[3] = ri->ri_nextnum;
425
426         nilfs_attach_writer(nilfs, sbi);
427         /*
428          * Releasing the next segment of the latest super root.
429          * The next segment is invalidated by this recovery.
430          */
431         err = nilfs_sufile_free(sufile, segnum[1]);
432         if (unlikely(err))
433                 goto failed;
434
435         for (i = 1; i < 4; i++) {
436                 err = nilfs_segment_list_add(head, segnum[i]);
437                 if (unlikely(err))
438                         goto failed;
439         }
440
441         /*
442          * Collecting segments written after the latest super root.
443          * These are marked dirty to avoid being reallocated in the next write.
444          */
445         list_for_each_entry_safe(ent, n, head, list) {
446                 if (ent->segnum != segnum[0]) {
447                         err = nilfs_sufile_scrap(sufile, ent->segnum);
448                         if (unlikely(err))
449                                 goto failed;
450                 }
451                 list_del(&ent->list);
452                 kfree(ent);
453         }
454
455         /* Allocate new segments for recovery */
456         err = nilfs_sufile_alloc(sufile, &segnum[0]);
457         if (unlikely(err))
458                 goto failed;
459
460         nilfs->ns_pseg_offset = 0;
461         nilfs->ns_seg_seq = ri->ri_seq + 2;
462         nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
463
464  failed:
465         /* No need to recover sufile because it will be destroyed on error */
466         nilfs_detach_writer(nilfs, sbi);
467         return err;
468 }
469
470 static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
471                                      struct nilfs_recovery_block *rb,
472                                      struct page *page)
473 {
474         struct buffer_head *bh_org;
475         void *kaddr;
476
477         bh_org = sb_bread(sbi->s_super, rb->blocknr);
478         if (unlikely(!bh_org))
479                 return -EIO;
480
481         kaddr = kmap_atomic(page, KM_USER0);
482         memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
483         kunmap_atomic(kaddr, KM_USER0);
484         brelse(bh_org);
485         return 0;
486 }
487
488 static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
489                                 struct list_head *head,
490                                 unsigned long *nr_salvaged_blocks)
491 {
492         struct inode *inode;
493         struct nilfs_recovery_block *rb, *n;
494         unsigned blocksize = sbi->s_super->s_blocksize;
495         struct page *page;
496         loff_t pos;
497         int err = 0, err2 = 0;
498
499         list_for_each_entry_safe(rb, n, head, list) {
500                 inode = nilfs_iget(sbi->s_super, rb->ino);
501                 if (IS_ERR(inode)) {
502                         err = PTR_ERR(inode);
503                         inode = NULL;
504                         goto failed_inode;
505                 }
506
507                 pos = rb->blkoff << inode->i_blkbits;
508                 page = NULL;
509                 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
510                                         0, &page, NULL, nilfs_get_block);
511                 if (unlikely(err))
512                         goto failed_inode;
513
514                 err = nilfs_recovery_copy_block(sbi, rb, page);
515                 if (unlikely(err))
516                         goto failed_page;
517
518                 err = nilfs_set_file_dirty(sbi, inode, 1);
519                 if (unlikely(err))
520                         goto failed_page;
521
522                 block_write_end(NULL, inode->i_mapping, pos, blocksize,
523                                 blocksize, page, NULL);
524
525                 unlock_page(page);
526                 page_cache_release(page);
527
528                 (*nr_salvaged_blocks)++;
529                 goto next;
530
531  failed_page:
532                 unlock_page(page);
533                 page_cache_release(page);
534
535  failed_inode:
536                 printk(KERN_WARNING
537                        "NILFS warning: error recovering data block "
538                        "(err=%d, ino=%lu, block-offset=%llu)\n",
539                        err, (unsigned long)rb->ino,
540                        (unsigned long long)rb->blkoff);
541                 if (!err2)
542                         err2 = err;
543  next:
544                 iput(inode); /* iput(NULL) is just ignored */
545                 list_del_init(&rb->list);
546                 kfree(rb);
547         }
548         return err2;
549 }
550
551 /**
552  * nilfs_do_roll_forward - salvage logical segments newer than the latest
553  * checkpoint
554  * @sbi: nilfs_sb_info
555  * @nilfs: the_nilfs
556  * @ri: pointer to a nilfs_recovery_info
557  */
558 static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
559                                  struct nilfs_sb_info *sbi,
560                                  struct nilfs_recovery_info *ri)
561 {
562         struct nilfs_segsum_info ssi;
563         sector_t pseg_start;
564         sector_t seg_start, seg_end;  /* Starting/ending DBN of full segment */
565         unsigned long nsalvaged_blocks = 0;
566         u64 seg_seq;
567         __u64 segnum, nextnum = 0;
568         int empty_seg = 0;
569         int err = 0, ret;
570         LIST_HEAD(dsync_blocks);  /* list of data blocks to be recovered */
571         enum {
572                 RF_INIT_ST,
573                 RF_DSYNC_ST,   /* scanning data-sync segments */
574         };
575         int state = RF_INIT_ST;
576
577         nilfs_attach_writer(nilfs, sbi);
578         pseg_start = ri->ri_lsegs_start;
579         seg_seq = ri->ri_lsegs_start_seq;
580         segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
581         nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
582
583         while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
584
585                 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi);
586                 if (ret) {
587                         if (ret == NILFS_SEG_FAIL_IO) {
588                                 err = -EIO;
589                                 goto failed;
590                         }
591                         goto strayed;
592                 }
593                 if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
594                         goto confused;
595
596                 /* Found a valid partial segment; do recovery actions */
597                 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
598                 empty_seg = 0;
599                 nilfs->ns_ctime = ssi.ctime;
600                 if (!(ssi.flags & NILFS_SS_GC))
601                         nilfs->ns_nongc_ctime = ssi.ctime;
602
603                 switch (state) {
604                 case RF_INIT_ST:
605                         if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
606                                 goto try_next_pseg;
607                         state = RF_DSYNC_ST;
608                         /* Fall through */
609                 case RF_DSYNC_ST:
610                         if (!NILFS_SEG_DSYNC(&ssi))
611                                 goto confused;
612
613                         err = collect_blocks_from_segsum(
614                                 sbi, pseg_start, &ssi, &dsync_blocks);
615                         if (unlikely(err))
616                                 goto failed;
617                         if (NILFS_SEG_LOGEND(&ssi)) {
618                                 err = recover_dsync_blocks(
619                                         sbi, &dsync_blocks, &nsalvaged_blocks);
620                                 if (unlikely(err))
621                                         goto failed;
622                                 state = RF_INIT_ST;
623                         }
624                         break; /* Fall through to try_next_pseg */
625                 }
626
627  try_next_pseg:
628                 if (pseg_start == ri->ri_lsegs_end)
629                         break;
630                 pseg_start += ssi.nblocks;
631                 if (pseg_start < seg_end)
632                         continue;
633                 goto feed_segment;
634
635  strayed:
636                 if (pseg_start == ri->ri_lsegs_end)
637                         break;
638
639  feed_segment:
640                 /* Looking to the next full segment */
641                 if (empty_seg++)
642                         break;
643                 seg_seq++;
644                 segnum = nextnum;
645                 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
646                 pseg_start = seg_start;
647         }
648
649         if (nsalvaged_blocks) {
650                 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
651                        sbi->s_super->s_id, nsalvaged_blocks);
652                 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
653         }
654  out:
655         dispose_recovery_list(&dsync_blocks);
656         nilfs_detach_writer(sbi->s_nilfs, sbi);
657         return err;
658
659  confused:
660         err = -EINVAL;
661  failed:
662         printk(KERN_ERR
663                "NILFS (device %s): Error roll-forwarding "
664                "(err=%d, pseg block=%llu). ",
665                sbi->s_super->s_id, err, (unsigned long long)pseg_start);
666         goto out;
667 }
668
669 static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
670                                       struct nilfs_sb_info *sbi,
671                                       struct nilfs_recovery_info *ri)
672 {
673         struct buffer_head *bh;
674         int err;
675
676         if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
677             nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
678                 return;
679
680         bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
681         BUG_ON(!bh);
682         memset(bh->b_data, 0, bh->b_size);
683         set_buffer_dirty(bh);
684         err = sync_dirty_buffer(bh);
685         if (unlikely(err))
686                 printk(KERN_WARNING
687                        "NILFS warning: buffer sync write failed during "
688                        "post-cleaning of recovery.\n");
689         brelse(bh);
690 }
691
692 /**
693  * nilfs_recover_logical_segments - salvage logical segments written after
694  * the latest super root
695  * @nilfs: the_nilfs
696  * @sbi: nilfs_sb_info
697  * @ri: pointer to a nilfs_recovery_info struct to store search results.
698  *
699  * Return Value: On success, 0 is returned.  On error, one of the following
700  * negative error code is returned.
701  *
702  * %-EINVAL - Inconsistent filesystem state.
703  *
704  * %-EIO - I/O error
705  *
706  * %-ENOSPC - No space left on device (only in a panic state).
707  *
708  * %-ERESTARTSYS - Interrupted.
709  *
710  * %-ENOMEM - Insufficient memory available.
711  */
712 int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
713                                    struct nilfs_sb_info *sbi,
714                                    struct nilfs_recovery_info *ri)
715 {
716         int err;
717
718         if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
719                 return 0;
720
721         err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
722         if (unlikely(err)) {
723                 printk(KERN_ERR
724                        "NILFS: error loading the latest checkpoint.\n");
725                 return err;
726         }
727
728         err = nilfs_do_roll_forward(nilfs, sbi, ri);
729         if (unlikely(err))
730                 goto failed;
731
732         if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
733                 err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri);
734                 if (unlikely(err)) {
735                         printk(KERN_ERR "NILFS: Error preparing segments for "
736                                "recovery.\n");
737                         goto failed;
738                 }
739
740                 err = nilfs_attach_segment_constructor(sbi);
741                 if (unlikely(err))
742                         goto failed;
743
744                 set_nilfs_discontinued(nilfs);
745                 err = nilfs_construct_segment(sbi->s_super);
746                 nilfs_detach_segment_constructor(sbi);
747
748                 if (unlikely(err)) {
749                         printk(KERN_ERR "NILFS: Oops! recovery failed. "
750                                "(err=%d)\n", err);
751                         goto failed;
752                 }
753
754                 nilfs_finish_roll_forward(nilfs, sbi, ri);
755         }
756
757  failed:
758         nilfs_detach_checkpoint(sbi);
759         return err;
760 }
761
762 /**
763  * nilfs_search_super_root - search the latest valid super root
764  * @nilfs: the_nilfs
765  * @sbi: nilfs_sb_info
766  * @ri: pointer to a nilfs_recovery_info struct to store search results.
767  *
768  * nilfs_search_super_root() looks for the latest super-root from a partial
769  * segment pointed by the superblock.  It sets up struct the_nilfs through
770  * this search. It fills nilfs_recovery_info (ri) required for recovery.
771  *
772  * Return Value: On success, 0 is returned.  On error, one of the following
773  * negative error code is returned.
774  *
775  * %-EINVAL - No valid segment found
776  *
777  * %-EIO - I/O error
778  */
779 int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
780                             struct nilfs_recovery_info *ri)
781 {
782         struct nilfs_segsum_info ssi;
783         sector_t pseg_start, pseg_end, sr_pseg_start = 0;
784         sector_t seg_start, seg_end; /* range of full segment (block number) */
785         sector_t b, end;
786         u64 seg_seq;
787         __u64 segnum, nextnum = 0;
788         __u64 cno;
789         LIST_HEAD(segments);
790         int empty_seg = 0, scan_newer = 0;
791         int ret;
792
793         pseg_start = nilfs->ns_last_pseg;
794         seg_seq = nilfs->ns_last_seq;
795         cno = nilfs->ns_last_cno;
796         segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
797
798         /* Calculate range of segment */
799         nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
800
801         /* Read ahead segment */
802         b = seg_start;
803         while (b <= seg_end)
804                 sb_breadahead(sbi->s_super, b++);
805
806         for (;;) {
807                 /* Load segment summary */
808                 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi);
809                 if (ret) {
810                         if (ret == NILFS_SEG_FAIL_IO)
811                                 goto failed;
812                         goto strayed;
813                 }
814                 pseg_end = pseg_start + ssi.nblocks - 1;
815                 if (unlikely(pseg_end > seg_end)) {
816                         ret = NILFS_SEG_FAIL_CONSISTENCY;
817                         goto strayed;
818                 }
819
820                 /* A valid partial segment */
821                 ri->ri_pseg_start = pseg_start;
822                 ri->ri_seq = seg_seq;
823                 ri->ri_segnum = segnum;
824                 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
825                 ri->ri_nextnum = nextnum;
826                 empty_seg = 0;
827
828                 if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
829                         /* This will never happen because a superblock
830                            (last_segment) always points to a pseg
831                            having a super root. */
832                         ret = NILFS_SEG_FAIL_CONSISTENCY;
833                         goto failed;
834                 }
835
836                 if (pseg_start == seg_start) {
837                         nilfs_get_segment_range(nilfs, nextnum, &b, &end);
838                         while (b <= end)
839                                 sb_breadahead(sbi->s_super, b++);
840                 }
841                 if (!NILFS_SEG_HAS_SR(&ssi)) {
842                         if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
843                                 ri->ri_lsegs_start = pseg_start;
844                                 ri->ri_lsegs_start_seq = seg_seq;
845                         }
846                         if (NILFS_SEG_LOGEND(&ssi))
847                                 ri->ri_lsegs_end = pseg_start;
848                         goto try_next_pseg;
849                 }
850
851                 /* A valid super root was found. */
852                 ri->ri_cno = cno++;
853                 ri->ri_super_root = pseg_end;
854                 ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
855
856                 nilfs_dispose_segment_list(&segments);
857                 nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
858                         + ssi.nblocks - seg_start;
859                 nilfs->ns_seg_seq = seg_seq;
860                 nilfs->ns_segnum = segnum;
861                 nilfs->ns_cno = cno;  /* nilfs->ns_cno = ri->ri_cno + 1 */
862                 nilfs->ns_ctime = ssi.ctime;
863                 nilfs->ns_nextnum = nextnum;
864
865                 if (scan_newer)
866                         ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
867                 else {
868                         if (nilfs->ns_mount_state & NILFS_VALID_FS)
869                                 goto super_root_found;
870                         scan_newer = 1;
871                 }
872
873                 /* reset region for roll-forward */
874                 pseg_start += ssi.nblocks;
875                 if (pseg_start < seg_end)
876                         continue;
877                 goto feed_segment;
878
879  try_next_pseg:
880                 /* Standing on a course, or met an inconsistent state */
881                 pseg_start += ssi.nblocks;
882                 if (pseg_start < seg_end)
883                         continue;
884                 goto feed_segment;
885
886  strayed:
887                 /* Off the trail */
888                 if (!scan_newer)
889                         /*
890                          * This can happen if a checkpoint was written without
891                          * barriers, or as a result of an I/O failure.
892                          */
893                         goto failed;
894
895  feed_segment:
896                 /* Looking to the next full segment */
897                 if (empty_seg++)
898                         goto super_root_found; /* found a valid super root */
899
900                 ret = nilfs_segment_list_add(&segments, segnum);
901                 if (unlikely(ret))
902                         goto failed;
903
904                 seg_seq++;
905                 segnum = nextnum;
906                 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
907                 pseg_start = seg_start;
908         }
909
910  super_root_found:
911         /* Updating pointers relating to the latest checkpoint */
912         list_splice_tail(&segments, &ri->ri_used_segments);
913         nilfs->ns_last_pseg = sr_pseg_start;
914         nilfs->ns_last_seq = nilfs->ns_seg_seq;
915         nilfs->ns_last_cno = ri->ri_cno;
916         return 0;
917
918  failed:
919         nilfs_dispose_segment_list(&segments);
920         return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
921 }