cf6fe3631554129f500eeb0e9db3bc505fc46042
[safe/jmp/linux-2.6] / fs / gfs2 / lops.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/lm_interface.h>
17
18 #include "gfs2.h"
19 #include "incore.h"
20 #include "inode.h"
21 #include "glock.h"
22 #include "log.h"
23 #include "lops.h"
24 #include "meta_io.h"
25 #include "recovery.h"
26 #include "rgrp.h"
27 #include "trans.h"
28 #include "util.h"
29
30 /**
31  * gfs2_pin - Pin a buffer in memory
32  * @sdp: The superblock
33  * @bh: The buffer to be pinned
34  *
35  * The log lock must be held when calling this function
36  */
37 static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
38 {
39         struct gfs2_bufdata *bd;
40
41         gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
42
43         clear_buffer_dirty(bh);
44         if (test_set_buffer_pinned(bh))
45                 gfs2_assert_withdraw(sdp, 0);
46         if (!buffer_uptodate(bh))
47                 gfs2_io_error_bh(sdp, bh);
48         bd = bh->b_private;
49         /* If this buffer is in the AIL and it has already been written
50          * to in-place disk block, remove it from the AIL.
51          */
52         if (bd->bd_ail)
53                 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
54         get_bh(bh);
55 }
56
57 /**
58  * gfs2_unpin - Unpin a buffer
59  * @sdp: the filesystem the buffer belongs to
60  * @bh: The buffer to unpin
61  * @ai:
62  *
63  */
64
65 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
66                        struct gfs2_ail *ai)
67 {
68         struct gfs2_bufdata *bd = bh->b_private;
69
70         gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
71
72         if (!buffer_pinned(bh))
73                 gfs2_assert_withdraw(sdp, 0);
74
75         lock_buffer(bh);
76         mark_buffer_dirty(bh);
77         clear_buffer_pinned(bh);
78
79         gfs2_log_lock(sdp);
80         if (bd->bd_ail) {
81                 list_del(&bd->bd_ail_st_list);
82                 brelse(bh);
83         } else {
84                 struct gfs2_glock *gl = bd->bd_gl;
85                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
86                 atomic_inc(&gl->gl_ail_count);
87         }
88         bd->bd_ail = ai;
89         list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
90         gfs2_log_unlock(sdp);
91         unlock_buffer(bh);
92 }
93
94 static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
95 {
96         struct gfs2_glock *gl;
97         struct gfs2_trans *tr = current->journal_info;
98
99         tr->tr_touched = 1;
100
101         gl = container_of(le, struct gfs2_glock, gl_le);
102         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
103                 return;
104
105         if (!list_empty(&le->le_list))
106                 return;
107
108         gfs2_glock_hold(gl);
109         set_bit(GLF_DIRTY, &gl->gl_flags);
110         sdp->sd_log_num_gl++;
111         list_add(&le->le_list, &sdp->sd_log_le_gl);
112 }
113
114 static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
115 {
116         gfs2_log_lock(sdp);
117         __glock_lo_add(sdp, le);
118         gfs2_log_unlock(sdp);
119 }
120
121 static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
122 {
123         struct list_head *head = &sdp->sd_log_le_gl;
124         struct gfs2_glock *gl;
125
126         while (!list_empty(head)) {
127                 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
128                 list_del_init(&gl->gl_le.le_list);
129                 sdp->sd_log_num_gl--;
130
131                 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
132                 gfs2_glock_put(gl);
133         }
134         gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
135 }
136
137 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
138 {
139         struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
140         struct gfs2_trans *tr;
141
142         lock_buffer(bd->bd_bh);
143         gfs2_log_lock(sdp);
144         if (!list_empty(&bd->bd_list_tr))
145                 goto out;
146         tr = current->journal_info;
147         tr->tr_touched = 1;
148         tr->tr_num_buf++;
149         list_add(&bd->bd_list_tr, &tr->tr_list_buf);
150         if (!list_empty(&le->le_list))
151                 goto out;
152         __glock_lo_add(sdp, &bd->bd_gl->gl_le);
153         gfs2_meta_check(sdp, bd->bd_bh);
154         gfs2_pin(sdp, bd->bd_bh);
155         sdp->sd_log_num_buf++;
156         list_add(&le->le_list, &sdp->sd_log_le_buf);
157         tr->tr_num_buf_new++;
158 out:
159         gfs2_log_unlock(sdp);
160         unlock_buffer(bd->bd_bh);
161 }
162
163 static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
164 {
165         struct list_head *head = &tr->tr_list_buf;
166         struct gfs2_bufdata *bd;
167
168         gfs2_log_lock(sdp);
169         while (!list_empty(head)) {
170                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
171                 list_del_init(&bd->bd_list_tr);
172                 tr->tr_num_buf--;
173         }
174         gfs2_log_unlock(sdp);
175         gfs2_assert_warn(sdp, !tr->tr_num_buf);
176 }
177
178 static void buf_lo_before_commit(struct gfs2_sbd *sdp)
179 {
180         struct buffer_head *bh;
181         struct gfs2_log_descriptor *ld;
182         struct gfs2_bufdata *bd1 = NULL, *bd2;
183         unsigned int total;
184         unsigned int offset = BUF_OFFSET;
185         unsigned int limit;
186         unsigned int num;
187         unsigned n;
188         __be64 *ptr;
189
190         limit = buf_limit(sdp);
191         /* for 4k blocks, limit = 503 */
192
193         gfs2_log_lock(sdp);
194         total = sdp->sd_log_num_buf;
195         bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
196         while(total) {
197                 num = total;
198                 if (total > limit)
199                         num = limit;
200                 gfs2_log_unlock(sdp);
201                 bh = gfs2_log_get_buf(sdp);
202                 gfs2_log_lock(sdp);
203                 ld = (struct gfs2_log_descriptor *)bh->b_data;
204                 ptr = (__be64 *)(bh->b_data + offset);
205                 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
206                 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
207                 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
208                 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
209                 ld->ld_length = cpu_to_be32(num + 1);
210                 ld->ld_data1 = cpu_to_be32(num);
211                 ld->ld_data2 = cpu_to_be32(0);
212                 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
213
214                 n = 0;
215                 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
216                                              bd_le.le_list) {
217                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
218                         if (++n >= num)
219                                 break;
220                 }
221
222                 gfs2_log_unlock(sdp);
223                 set_buffer_dirty(bh);
224                 ll_rw_block(WRITE, 1, &bh);
225                 gfs2_log_lock(sdp);
226
227                 n = 0;
228                 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
229                                              bd_le.le_list) {
230                         gfs2_log_unlock(sdp);
231                         bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
232                         set_buffer_dirty(bh);
233                         ll_rw_block(WRITE, 1, &bh);
234                         gfs2_log_lock(sdp);
235                         if (++n >= num)
236                                 break;
237                 }
238
239                 BUG_ON(total < num);
240                 total -= num;
241         }
242         gfs2_log_unlock(sdp);
243 }
244
245 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
246 {
247         struct list_head *head = &sdp->sd_log_le_buf;
248         struct gfs2_bufdata *bd;
249
250         while (!list_empty(head)) {
251                 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
252                 list_del_init(&bd->bd_le.le_list);
253                 sdp->sd_log_num_buf--;
254
255                 gfs2_unpin(sdp, bd->bd_bh, ai);
256         }
257         gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
258 }
259
260 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
261                                struct gfs2_log_header_host *head, int pass)
262 {
263         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
264
265         if (pass != 0)
266                 return;
267
268         sdp->sd_found_blocks = 0;
269         sdp->sd_replayed_blocks = 0;
270 }
271
272 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
273                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
274                                 int pass)
275 {
276         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
277         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
278         struct gfs2_glock *gl = ip->i_gl;
279         unsigned int blks = be32_to_cpu(ld->ld_data1);
280         struct buffer_head *bh_log, *bh_ip;
281         u64 blkno;
282         int error = 0;
283
284         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
285                 return 0;
286
287         gfs2_replay_incr_blk(sdp, &start);
288
289         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
290                 blkno = be64_to_cpu(*ptr++);
291
292                 sdp->sd_found_blocks++;
293
294                 if (gfs2_revoke_check(sdp, blkno, start))
295                         continue;
296
297                 error = gfs2_replay_read_block(jd, start, &bh_log);
298                 if (error)
299                         return error;
300
301                 bh_ip = gfs2_meta_new(gl, blkno);
302                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
303
304                 if (gfs2_meta_check(sdp, bh_ip))
305                         error = -EIO;
306                 else
307                         mark_buffer_dirty(bh_ip);
308
309                 brelse(bh_log);
310                 brelse(bh_ip);
311
312                 if (error)
313                         break;
314
315                 sdp->sd_replayed_blocks++;
316         }
317
318         return error;
319 }
320
321 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 {
323         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
324         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
325
326         if (error) {
327                 gfs2_meta_sync(ip->i_gl);
328                 return;
329         }
330         if (pass != 1)
331                 return;
332
333         gfs2_meta_sync(ip->i_gl);
334
335         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
336                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
337 }
338
339 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
340 {
341         struct gfs2_trans *tr;
342
343         tr = current->journal_info;
344         tr->tr_touched = 1;
345         tr->tr_num_revoke++;
346
347         gfs2_log_lock(sdp);
348         sdp->sd_log_num_revoke++;
349         list_add(&le->le_list, &sdp->sd_log_le_revoke);
350         gfs2_log_unlock(sdp);
351 }
352
353 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
354 {
355         struct gfs2_log_descriptor *ld;
356         struct gfs2_meta_header *mh;
357         struct buffer_head *bh;
358         unsigned int offset;
359         struct list_head *head = &sdp->sd_log_le_revoke;
360         struct gfs2_bufdata *bd;
361
362         if (!sdp->sd_log_num_revoke)
363                 return;
364
365         bh = gfs2_log_get_buf(sdp);
366         ld = (struct gfs2_log_descriptor *)bh->b_data;
367         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
368         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
369         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
370         ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
371         ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
372                                                     sizeof(u64)));
373         ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
374         ld->ld_data2 = cpu_to_be32(0);
375         memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
376         offset = sizeof(struct gfs2_log_descriptor);
377
378         while (!list_empty(head)) {
379                 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
380                 list_del_init(&bd->bd_le.le_list);
381                 sdp->sd_log_num_revoke--;
382
383                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
384                         set_buffer_dirty(bh);
385                         ll_rw_block(WRITE, 1, &bh);
386
387                         bh = gfs2_log_get_buf(sdp);
388                         mh = (struct gfs2_meta_header *)bh->b_data;
389                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
390                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
391                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
392                         offset = sizeof(struct gfs2_meta_header);
393                 }
394
395                 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
396                 kfree(bd);
397
398                 offset += sizeof(u64);
399         }
400         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
401
402         set_buffer_dirty(bh);
403         ll_rw_block(WRITE, 1, &bh);
404 }
405
406 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
407                                   struct gfs2_log_header_host *head, int pass)
408 {
409         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
410
411         if (pass != 0)
412                 return;
413
414         sdp->sd_found_revokes = 0;
415         sdp->sd_replay_tail = head->lh_tail;
416 }
417
418 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
419                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
420                                    int pass)
421 {
422         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
423         unsigned int blks = be32_to_cpu(ld->ld_length);
424         unsigned int revokes = be32_to_cpu(ld->ld_data1);
425         struct buffer_head *bh;
426         unsigned int offset;
427         u64 blkno;
428         int first = 1;
429         int error;
430
431         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
432                 return 0;
433
434         offset = sizeof(struct gfs2_log_descriptor);
435
436         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
437                 error = gfs2_replay_read_block(jd, start, &bh);
438                 if (error)
439                         return error;
440
441                 if (!first)
442                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
443
444                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
445                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
446
447                         error = gfs2_revoke_add(sdp, blkno, start);
448                         if (error < 0)
449                                 return error;
450                         else if (error)
451                                 sdp->sd_found_revokes++;
452
453                         if (!--revokes)
454                                 break;
455                         offset += sizeof(u64);
456                 }
457
458                 brelse(bh);
459                 offset = sizeof(struct gfs2_meta_header);
460                 first = 0;
461         }
462
463         return 0;
464 }
465
466 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
467 {
468         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
469
470         if (error) {
471                 gfs2_revoke_clean(sdp);
472                 return;
473         }
474         if (pass != 1)
475                 return;
476
477         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
478                 jd->jd_jid, sdp->sd_found_revokes);
479
480         gfs2_revoke_clean(sdp);
481 }
482
483 static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
484 {
485         struct gfs2_rgrpd *rgd;
486         struct gfs2_trans *tr = current->journal_info;
487
488         tr->tr_touched = 1;
489
490         rgd = container_of(le, struct gfs2_rgrpd, rd_le);
491
492         gfs2_log_lock(sdp);
493         if (!list_empty(&le->le_list)){
494                 gfs2_log_unlock(sdp);
495                 return;
496         }
497         gfs2_rgrp_bh_hold(rgd);
498         sdp->sd_log_num_rg++;
499         list_add(&le->le_list, &sdp->sd_log_le_rg);
500         gfs2_log_unlock(sdp);
501 }
502
503 static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
504 {
505         struct list_head *head = &sdp->sd_log_le_rg;
506         struct gfs2_rgrpd *rgd;
507
508         while (!list_empty(head)) {
509                 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
510                 list_del_init(&rgd->rd_le.le_list);
511                 sdp->sd_log_num_rg--;
512
513                 gfs2_rgrp_repolish_clones(rgd);
514                 gfs2_rgrp_bh_put(rgd);
515         }
516         gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
517 }
518
519 /**
520  * databuf_lo_add - Add a databuf to the transaction.
521  *
522  * This is used in two distinct cases:
523  * i) In ordered write mode
524  *    We put the data buffer on a list so that we can ensure that its
525  *    synced to disk at the right time
526  * ii) In journaled data mode
527  *    We need to journal the data block in the same way as metadata in
528  *    the functions above. The difference is that here we have a tag
529  *    which is two __be64's being the block number (as per meta data)
530  *    and a flag which says whether the data block needs escaping or
531  *    not. This means we need a new log entry for each 251 or so data
532  *    blocks, which isn't an enormous overhead but twice as much as
533  *    for normal metadata blocks.
534  */
535 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
536 {
537         struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
538         struct gfs2_trans *tr = current->journal_info;
539         struct address_space *mapping = bd->bd_bh->b_page->mapping;
540         struct gfs2_inode *ip = GFS2_I(mapping->host);
541
542         lock_buffer(bd->bd_bh);
543         gfs2_log_lock(sdp);
544         if (!list_empty(&bd->bd_list_tr))
545                 goto out;
546         tr->tr_touched = 1;
547         if (gfs2_is_jdata(ip)) {
548                 tr->tr_num_buf++;
549                 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
550         }
551         if (!list_empty(&le->le_list))
552                 goto out;
553
554         __glock_lo_add(sdp, &bd->bd_gl->gl_le);
555         if (gfs2_is_jdata(ip)) {
556                 gfs2_pin(sdp, bd->bd_bh);
557                 tr->tr_num_databuf_new++;
558                 sdp->sd_log_num_databuf++;
559                 list_add(&le->le_list, &sdp->sd_log_le_databuf);
560         } else {
561                 list_add(&le->le_list, &sdp->sd_log_le_ordered);
562         }
563 out:
564         gfs2_log_unlock(sdp);
565         unlock_buffer(bd->bd_bh);
566 }
567
568 static int gfs2_check_magic(struct buffer_head *bh)
569 {
570         struct page *page = bh->b_page;
571         void *kaddr;
572         __be32 *ptr;
573         int rv = 0;
574
575         kaddr = kmap_atomic(page, KM_USER0);
576         ptr = kaddr + bh_offset(bh);
577         if (*ptr == cpu_to_be32(GFS2_MAGIC))
578                 rv = 1;
579         kunmap_atomic(kaddr, KM_USER0);
580
581         return rv;
582 }
583
584 /**
585  * databuf_lo_before_commit - Scan the data buffers, writing as we go
586  *
587  */
588
589 static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
590 {
591         struct gfs2_bufdata *bd1 = NULL, *bd2;
592         struct buffer_head *bh = NULL,*bh1 = NULL;
593         struct gfs2_log_descriptor *ld;
594         unsigned int limit;
595         unsigned int total;
596         unsigned int num, n;
597         __be64 *ptr = NULL;
598         int magic;
599
600
601         limit = databuf_limit(sdp);
602
603         gfs2_log_lock(sdp);
604         total = sdp->sd_log_num_databuf;
605         bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
606                                        bd_le.le_list);
607         while(total) {
608                 num = total;
609                 if (num > limit)
610                         num = limit;
611
612                 gfs2_log_unlock(sdp);
613                 bh = gfs2_log_get_buf(sdp);
614                 gfs2_log_lock(sdp);
615
616                 ld = (struct gfs2_log_descriptor *)bh->b_data;
617                 ptr = (__be64 *)(bh->b_data + DATABUF_OFFSET);
618                 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
619                 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
620                 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
621                 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
622                 ld->ld_length = cpu_to_be32(num + 1);
623                 ld->ld_data1 = cpu_to_be32(num);
624                 ld->ld_data2 = cpu_to_be32(0);
625                 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
626
627                 n = 0;
628                 list_for_each_entry_continue(bd1, &sdp->sd_log_le_databuf,
629                                              bd_le.le_list) {
630                         bh1 = bd1->bd_bh;
631
632                         magic = gfs2_check_magic(bh1);
633                         *ptr++ = cpu_to_be64(bh1->b_blocknr);
634                         *ptr++ = cpu_to_be64((__u64)magic);
635                         clear_buffer_escaped(bh1);
636                         if (unlikely(magic != 0))
637                                 set_buffer_escaped(bh1);
638                         if (++n >= num)
639                                 break;
640                 }
641                 gfs2_log_unlock(sdp);
642                 if (bh) {
643                         set_buffer_dirty(bh);
644                         ll_rw_block(WRITE, 1, &bh);
645                         bh = NULL;
646                         ptr = NULL;
647                 }
648                 n = 0;
649                 gfs2_log_lock(sdp);
650                 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
651                                              bd_le.le_list) {
652                         if (!bd2->bd_bh)
653                                 continue;
654                         /* copy buffer if it needs escaping */
655                         gfs2_log_unlock(sdp);
656                         if (unlikely(buffer_escaped(bd2->bd_bh))) {
657                                 void *kaddr;
658                                 struct page *page = bd2->bd_bh->b_page;
659                                 bh = gfs2_log_get_buf(sdp);
660                                 kaddr = kmap_atomic(page, KM_USER0);
661                                 memcpy(bh->b_data,
662                                        kaddr + bh_offset(bd2->bd_bh),
663                                        sdp->sd_sb.sb_bsize);
664                                 kunmap_atomic(kaddr, KM_USER0);
665                                 *(__be32 *)bh->b_data = 0;
666                         } else {
667                                 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
668                         }
669                         set_buffer_dirty(bh);
670                         ll_rw_block(WRITE, 1, &bh);
671                         gfs2_log_lock(sdp);
672                         if (++n >= num)
673                                 break;
674                 }
675                 bh = NULL;
676                 BUG_ON(total < num);
677                 total -= num;
678         }
679         gfs2_log_unlock(sdp);
680 }
681
682 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
683                                     struct gfs2_log_descriptor *ld,
684                                     __be64 *ptr, int pass)
685 {
686         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
687         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
688         struct gfs2_glock *gl = ip->i_gl;
689         unsigned int blks = be32_to_cpu(ld->ld_data1);
690         struct buffer_head *bh_log, *bh_ip;
691         u64 blkno;
692         u64 esc;
693         int error = 0;
694
695         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
696                 return 0;
697
698         gfs2_replay_incr_blk(sdp, &start);
699         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
700                 blkno = be64_to_cpu(*ptr++);
701                 esc = be64_to_cpu(*ptr++);
702
703                 sdp->sd_found_blocks++;
704
705                 if (gfs2_revoke_check(sdp, blkno, start))
706                         continue;
707
708                 error = gfs2_replay_read_block(jd, start, &bh_log);
709                 if (error)
710                         return error;
711
712                 bh_ip = gfs2_meta_new(gl, blkno);
713                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
714
715                 /* Unescape */
716                 if (esc) {
717                         __be32 *eptr = (__be32 *)bh_ip->b_data;
718                         *eptr = cpu_to_be32(GFS2_MAGIC);
719                 }
720                 mark_buffer_dirty(bh_ip);
721
722                 brelse(bh_log);
723                 brelse(bh_ip);
724                 if (error)
725                         break;
726
727                 sdp->sd_replayed_blocks++;
728         }
729
730         return error;
731 }
732
733 /* FIXME: sort out accounting for log blocks etc. */
734
735 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
736 {
737         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
738         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
739
740         if (error) {
741                 gfs2_meta_sync(ip->i_gl);
742                 return;
743         }
744         if (pass != 1)
745                 return;
746
747         /* data sync? */
748         gfs2_meta_sync(ip->i_gl);
749
750         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
751                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
752 }
753
754 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
755 {
756         struct list_head *head = &sdp->sd_log_le_databuf;
757         struct gfs2_bufdata *bd;
758
759         while (!list_empty(head)) {
760                 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
761                 list_del_init(&bd->bd_le.le_list);
762                 sdp->sd_log_num_databuf--;
763                 gfs2_unpin(sdp, bd->bd_bh, ai);
764         }
765         gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
766 }
767
768
769 const struct gfs2_log_operations gfs2_glock_lops = {
770         .lo_add = glock_lo_add,
771         .lo_after_commit = glock_lo_after_commit,
772         .lo_name = "glock",
773 };
774
775 const struct gfs2_log_operations gfs2_buf_lops = {
776         .lo_add = buf_lo_add,
777         .lo_incore_commit = buf_lo_incore_commit,
778         .lo_before_commit = buf_lo_before_commit,
779         .lo_after_commit = buf_lo_after_commit,
780         .lo_before_scan = buf_lo_before_scan,
781         .lo_scan_elements = buf_lo_scan_elements,
782         .lo_after_scan = buf_lo_after_scan,
783         .lo_name = "buf",
784 };
785
786 const struct gfs2_log_operations gfs2_revoke_lops = {
787         .lo_add = revoke_lo_add,
788         .lo_before_commit = revoke_lo_before_commit,
789         .lo_before_scan = revoke_lo_before_scan,
790         .lo_scan_elements = revoke_lo_scan_elements,
791         .lo_after_scan = revoke_lo_after_scan,
792         .lo_name = "revoke",
793 };
794
795 const struct gfs2_log_operations gfs2_rg_lops = {
796         .lo_add = rg_lo_add,
797         .lo_after_commit = rg_lo_after_commit,
798         .lo_name = "rg",
799 };
800
801 const struct gfs2_log_operations gfs2_databuf_lops = {
802         .lo_add = databuf_lo_add,
803         .lo_incore_commit = buf_lo_incore_commit,
804         .lo_before_commit = databuf_lo_before_commit,
805         .lo_after_commit = databuf_lo_after_commit,
806         .lo_scan_elements = databuf_lo_scan_elements,
807         .lo_after_scan = databuf_lo_after_scan,
808         .lo_name = "databuf",
809 };
810
811 const struct gfs2_log_operations *gfs2_log_ops[] = {
812         &gfs2_glock_lops,
813         &gfs2_buf_lops,
814         &gfs2_revoke_lops,
815         &gfs2_rg_lops,
816         &gfs2_databuf_lops,
817         NULL,
818 };
819