[GFS2] Move pin/unpin into lops.c, clean up locking
[safe/jmp/linux-2.6] / fs / gfs2 / lops.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/lm_interface.h>
17
18 #include "gfs2.h"
19 #include "incore.h"
20 #include "inode.h"
21 #include "glock.h"
22 #include "log.h"
23 #include "lops.h"
24 #include "meta_io.h"
25 #include "recovery.h"
26 #include "rgrp.h"
27 #include "trans.h"
28 #include "util.h"
29
30 /**
31  * gfs2_pin - Pin a buffer in memory
32  * @sdp: The superblock
33  * @bh: The buffer to be pinned
34  *
35  * The log lock must be held when calling this function
36  */
37 static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
38 {
39         struct gfs2_bufdata *bd;
40
41         gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
42
43         clear_buffer_dirty(bh);
44         if (test_set_buffer_pinned(bh))
45                 gfs2_assert_withdraw(sdp, 0);
46         if (!buffer_uptodate(bh))
47                 gfs2_io_error_bh(sdp, bh);
48         bd = bh->b_private;
49         /* If this buffer is in the AIL and it has already been written
50          * to in-place disk block, remove it from the AIL.
51          */
52         if (bd->bd_ail)
53                 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
54         get_bh(bh);
55 }
56
57 /**
58  * gfs2_unpin - Unpin a buffer
59  * @sdp: the filesystem the buffer belongs to
60  * @bh: The buffer to unpin
61  * @ai:
62  *
63  */
64
65 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
66                        struct gfs2_ail *ai)
67 {
68         struct gfs2_bufdata *bd = bh->b_private;
69
70         gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
71
72         if (!buffer_pinned(bh))
73                 gfs2_assert_withdraw(sdp, 0);
74
75         lock_buffer(bh);
76         mark_buffer_dirty(bh);
77         clear_buffer_pinned(bh);
78
79         gfs2_log_lock(sdp);
80         if (bd->bd_ail) {
81                 list_del(&bd->bd_ail_st_list);
82                 brelse(bh);
83         } else {
84                 struct gfs2_glock *gl = bd->bd_gl;
85                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
86                 atomic_inc(&gl->gl_ail_count);
87         }
88         bd->bd_ail = ai;
89         list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
90         gfs2_log_unlock(sdp);
91         unlock_buffer(bh);
92 }
93
94 static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
95 {
96         struct gfs2_glock *gl;
97         struct gfs2_trans *tr = current->journal_info;
98
99         tr->tr_touched = 1;
100
101         gl = container_of(le, struct gfs2_glock, gl_le);
102         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
103                 return;
104
105         if (!list_empty(&le->le_list))
106                 return;
107
108         gfs2_glock_hold(gl);
109         set_bit(GLF_DIRTY, &gl->gl_flags);
110         sdp->sd_log_num_gl++;
111         list_add(&le->le_list, &sdp->sd_log_le_gl);
112 }
113
114 static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
115 {
116         gfs2_log_lock(sdp);
117         __glock_lo_add(sdp, le);
118         gfs2_log_unlock(sdp);
119 }
120
121 static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
122 {
123         struct list_head *head = &sdp->sd_log_le_gl;
124         struct gfs2_glock *gl;
125
126         while (!list_empty(head)) {
127                 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
128                 list_del_init(&gl->gl_le.le_list);
129                 sdp->sd_log_num_gl--;
130
131                 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
132                 gfs2_glock_put(gl);
133         }
134         gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
135 }
136
137 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
138 {
139         struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
140         struct gfs2_trans *tr;
141
142         lock_buffer(bd->bd_bh);
143         gfs2_log_lock(sdp);
144         if (!list_empty(&bd->bd_list_tr))
145                 goto out;
146         tr = current->journal_info;
147         tr->tr_touched = 1;
148         tr->tr_num_buf++;
149         list_add(&bd->bd_list_tr, &tr->tr_list_buf);
150         if (!list_empty(&le->le_list))
151                 goto out;
152         __glock_lo_add(sdp, &bd->bd_gl->gl_le);
153         gfs2_meta_check(sdp, bd->bd_bh);
154         gfs2_pin(sdp, bd->bd_bh);
155         sdp->sd_log_num_buf++;
156         list_add(&le->le_list, &sdp->sd_log_le_buf);
157         tr->tr_num_buf_new++;
158 out:
159         gfs2_log_unlock(sdp);
160         unlock_buffer(bd->bd_bh);
161 }
162
163 static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
164 {
165         struct list_head *head = &tr->tr_list_buf;
166         struct gfs2_bufdata *bd;
167
168         gfs2_log_lock(sdp);
169         while (!list_empty(head)) {
170                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
171                 list_del_init(&bd->bd_list_tr);
172                 tr->tr_num_buf--;
173         }
174         gfs2_log_unlock(sdp);
175         gfs2_assert_warn(sdp, !tr->tr_num_buf);
176 }
177
178 static void buf_lo_before_commit(struct gfs2_sbd *sdp)
179 {
180         struct buffer_head *bh;
181         struct gfs2_log_descriptor *ld;
182         struct gfs2_bufdata *bd1 = NULL, *bd2;
183         unsigned int total;
184         unsigned int offset = BUF_OFFSET;
185         unsigned int limit;
186         unsigned int num;
187         unsigned n;
188         __be64 *ptr;
189
190         limit = buf_limit(sdp);
191         /* for 4k blocks, limit = 503 */
192
193         gfs2_log_lock(sdp);
194         total = sdp->sd_log_num_buf;
195         bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
196         while(total) {
197                 num = total;
198                 if (total > limit)
199                         num = limit;
200                 gfs2_log_unlock(sdp);
201                 bh = gfs2_log_get_buf(sdp);
202                 gfs2_log_lock(sdp);
203                 ld = (struct gfs2_log_descriptor *)bh->b_data;
204                 ptr = (__be64 *)(bh->b_data + offset);
205                 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
206                 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
207                 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
208                 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
209                 ld->ld_length = cpu_to_be32(num + 1);
210                 ld->ld_data1 = cpu_to_be32(num);
211                 ld->ld_data2 = cpu_to_be32(0);
212                 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
213
214                 n = 0;
215                 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
216                                              bd_le.le_list) {
217                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
218                         if (++n >= num)
219                                 break;
220                 }
221
222                 gfs2_log_unlock(sdp);
223                 set_buffer_dirty(bh);
224                 ll_rw_block(WRITE, 1, &bh);
225                 gfs2_log_lock(sdp);
226
227                 n = 0;
228                 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
229                                              bd_le.le_list) {
230                         gfs2_log_unlock(sdp);
231                         bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
232                         set_buffer_dirty(bh);
233                         ll_rw_block(WRITE, 1, &bh);
234                         gfs2_log_lock(sdp);
235                         if (++n >= num)
236                                 break;
237                 }
238
239                 BUG_ON(total < num);
240                 total -= num;
241         }
242         gfs2_log_unlock(sdp);
243 }
244
245 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
246 {
247         struct list_head *head = &sdp->sd_log_le_buf;
248         struct gfs2_bufdata *bd;
249
250         while (!list_empty(head)) {
251                 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
252                 list_del_init(&bd->bd_le.le_list);
253                 sdp->sd_log_num_buf--;
254
255                 gfs2_unpin(sdp, bd->bd_bh, ai);
256         }
257         gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
258 }
259
260 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
261                                struct gfs2_log_header_host *head, int pass)
262 {
263         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
264
265         if (pass != 0)
266                 return;
267
268         sdp->sd_found_blocks = 0;
269         sdp->sd_replayed_blocks = 0;
270 }
271
272 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
273                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
274                                 int pass)
275 {
276         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
277         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
278         struct gfs2_glock *gl = ip->i_gl;
279         unsigned int blks = be32_to_cpu(ld->ld_data1);
280         struct buffer_head *bh_log, *bh_ip;
281         u64 blkno;
282         int error = 0;
283
284         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
285                 return 0;
286
287         gfs2_replay_incr_blk(sdp, &start);
288
289         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
290                 blkno = be64_to_cpu(*ptr++);
291
292                 sdp->sd_found_blocks++;
293
294                 if (gfs2_revoke_check(sdp, blkno, start))
295                         continue;
296
297                 error = gfs2_replay_read_block(jd, start, &bh_log);
298                 if (error)
299                         return error;
300
301                 bh_ip = gfs2_meta_new(gl, blkno);
302                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
303
304                 if (gfs2_meta_check(sdp, bh_ip))
305                         error = -EIO;
306                 else
307                         mark_buffer_dirty(bh_ip);
308
309                 brelse(bh_log);
310                 brelse(bh_ip);
311
312                 if (error)
313                         break;
314
315                 sdp->sd_replayed_blocks++;
316         }
317
318         return error;
319 }
320
321 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 {
323         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
324         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
325
326         if (error) {
327                 gfs2_meta_sync(ip->i_gl);
328                 return;
329         }
330         if (pass != 1)
331                 return;
332
333         gfs2_meta_sync(ip->i_gl);
334
335         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
336                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
337 }
338
339 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
340 {
341         struct gfs2_trans *tr;
342
343         tr = current->journal_info;
344         tr->tr_touched = 1;
345         tr->tr_num_revoke++;
346
347         gfs2_log_lock(sdp);
348         sdp->sd_log_num_revoke++;
349         list_add(&le->le_list, &sdp->sd_log_le_revoke);
350         gfs2_log_unlock(sdp);
351 }
352
353 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
354 {
355         struct gfs2_log_descriptor *ld;
356         struct gfs2_meta_header *mh;
357         struct buffer_head *bh;
358         unsigned int offset;
359         struct list_head *head = &sdp->sd_log_le_revoke;
360         struct gfs2_revoke *rv;
361
362         if (!sdp->sd_log_num_revoke)
363                 return;
364
365         bh = gfs2_log_get_buf(sdp);
366         ld = (struct gfs2_log_descriptor *)bh->b_data;
367         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
368         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
369         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
370         ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
371         ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
372                                                     sizeof(u64)));
373         ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
374         ld->ld_data2 = cpu_to_be32(0);
375         memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
376         offset = sizeof(struct gfs2_log_descriptor);
377
378         while (!list_empty(head)) {
379                 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
380                 list_del_init(&rv->rv_le.le_list);
381                 sdp->sd_log_num_revoke--;
382
383                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
384                         set_buffer_dirty(bh);
385                         ll_rw_block(WRITE, 1, &bh);
386
387                         bh = gfs2_log_get_buf(sdp);
388                         mh = (struct gfs2_meta_header *)bh->b_data;
389                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
390                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
391                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
392                         offset = sizeof(struct gfs2_meta_header);
393                 }
394
395                 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
396                 kfree(rv);
397
398                 offset += sizeof(u64);
399         }
400         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
401
402         set_buffer_dirty(bh);
403         ll_rw_block(WRITE, 1, &bh);
404 }
405
406 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
407                                   struct gfs2_log_header_host *head, int pass)
408 {
409         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
410
411         if (pass != 0)
412                 return;
413
414         sdp->sd_found_revokes = 0;
415         sdp->sd_replay_tail = head->lh_tail;
416 }
417
418 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
419                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
420                                    int pass)
421 {
422         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
423         unsigned int blks = be32_to_cpu(ld->ld_length);
424         unsigned int revokes = be32_to_cpu(ld->ld_data1);
425         struct buffer_head *bh;
426         unsigned int offset;
427         u64 blkno;
428         int first = 1;
429         int error;
430
431         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
432                 return 0;
433
434         offset = sizeof(struct gfs2_log_descriptor);
435
436         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
437                 error = gfs2_replay_read_block(jd, start, &bh);
438                 if (error)
439                         return error;
440
441                 if (!first)
442                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
443
444                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
445                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
446
447                         error = gfs2_revoke_add(sdp, blkno, start);
448                         if (error < 0)
449                                 return error;
450                         else if (error)
451                                 sdp->sd_found_revokes++;
452
453                         if (!--revokes)
454                                 break;
455                         offset += sizeof(u64);
456                 }
457
458                 brelse(bh);
459                 offset = sizeof(struct gfs2_meta_header);
460                 first = 0;
461         }
462
463         return 0;
464 }
465
466 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
467 {
468         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
469
470         if (error) {
471                 gfs2_revoke_clean(sdp);
472                 return;
473         }
474         if (pass != 1)
475                 return;
476
477         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
478                 jd->jd_jid, sdp->sd_found_revokes);
479
480         gfs2_revoke_clean(sdp);
481 }
482
483 static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
484 {
485         struct gfs2_rgrpd *rgd;
486         struct gfs2_trans *tr = current->journal_info;
487
488         tr->tr_touched = 1;
489
490         rgd = container_of(le, struct gfs2_rgrpd, rd_le);
491
492         gfs2_log_lock(sdp);
493         if (!list_empty(&le->le_list)){
494                 gfs2_log_unlock(sdp);
495                 return;
496         }
497         gfs2_rgrp_bh_hold(rgd);
498         sdp->sd_log_num_rg++;
499         list_add(&le->le_list, &sdp->sd_log_le_rg);
500         gfs2_log_unlock(sdp);
501 }
502
503 static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
504 {
505         struct list_head *head = &sdp->sd_log_le_rg;
506         struct gfs2_rgrpd *rgd;
507
508         while (!list_empty(head)) {
509                 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
510                 list_del_init(&rgd->rd_le.le_list);
511                 sdp->sd_log_num_rg--;
512
513                 gfs2_rgrp_repolish_clones(rgd);
514                 gfs2_rgrp_bh_put(rgd);
515         }
516         gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
517 }
518
519 /**
520  * databuf_lo_add - Add a databuf to the transaction.
521  *
522  * This is used in two distinct cases:
523  * i) In ordered write mode
524  *    We put the data buffer on a list so that we can ensure that its
525  *    synced to disk at the right time
526  * ii) In journaled data mode
527  *    We need to journal the data block in the same way as metadata in
528  *    the functions above. The difference is that here we have a tag
529  *    which is two __be64's being the block number (as per meta data)
530  *    and a flag which says whether the data block needs escaping or
531  *    not. This means we need a new log entry for each 251 or so data
532  *    blocks, which isn't an enormous overhead but twice as much as
533  *    for normal metadata blocks.
534  */
535 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
536 {
537         struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
538         struct gfs2_trans *tr = current->journal_info;
539         struct address_space *mapping = bd->bd_bh->b_page->mapping;
540         struct gfs2_inode *ip = GFS2_I(mapping->host);
541
542         lock_buffer(bd->bd_bh);
543         gfs2_log_lock(sdp);
544         if (!list_empty(&bd->bd_list_tr))
545                 goto out;
546         tr->tr_touched = 1;
547         if (gfs2_is_jdata(ip)) {
548                 tr->tr_num_buf++;
549                 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
550         }
551         if (!list_empty(&le->le_list))
552                 goto out;
553
554         __glock_lo_add(sdp, &bd->bd_gl->gl_le);
555         if (gfs2_is_jdata(ip)) {
556                 gfs2_pin(sdp, bd->bd_bh);
557                 tr->tr_num_databuf_new++;
558                 sdp->sd_log_num_jdata++;
559         }
560         sdp->sd_log_num_databuf++;
561         list_add(&le->le_list, &sdp->sd_log_le_databuf);
562 out:
563         gfs2_log_unlock(sdp);
564         unlock_buffer(bd->bd_bh);
565 }
566
567 static int gfs2_check_magic(struct buffer_head *bh)
568 {
569         struct page *page = bh->b_page;
570         void *kaddr;
571         __be32 *ptr;
572         int rv = 0;
573
574         kaddr = kmap_atomic(page, KM_USER0);
575         ptr = kaddr + bh_offset(bh);
576         if (*ptr == cpu_to_be32(GFS2_MAGIC))
577                 rv = 1;
578         kunmap_atomic(kaddr, KM_USER0);
579
580         return rv;
581 }
582
583 /**
584  * databuf_lo_before_commit - Scan the data buffers, writing as we go
585  *
586  * Here we scan through the lists of buffers and make the assumption
587  * that any buffer thats been pinned is being journaled, and that
588  * any unpinned buffer is an ordered write data buffer and therefore
589  * will be written back rather than journaled.
590  */
591 static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
592 {
593         LIST_HEAD(started);
594         struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
595         struct buffer_head *bh = NULL,*bh1 = NULL;
596         struct gfs2_log_descriptor *ld;
597         unsigned int limit;
598         unsigned int total_dbuf;
599         unsigned int total_jdata;
600         unsigned int num, n;
601         __be64 *ptr = NULL;
602
603         limit = databuf_limit(sdp);
604
605         /*
606          * Start writing ordered buffers, write journaled buffers
607          * into the log along with a header
608          */
609         gfs2_log_lock(sdp);
610         total_dbuf = sdp->sd_log_num_databuf;
611         total_jdata = sdp->sd_log_num_jdata;
612         bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
613                                        bd_le.le_list);
614         while(total_dbuf) {
615                 num = total_jdata;
616                 if (num > limit)
617                         num = limit;
618                 n = 0;
619                 list_for_each_entry_safe_continue(bd1, bdt,
620                                                   &sdp->sd_log_le_databuf,
621                                                   bd_le.le_list) {
622                         /* store off the buffer head in a local ptr since
623                          * gfs2_bufdata might change when we drop the log lock
624                          */
625                         bh1 = bd1->bd_bh;
626
627                         /* An ordered write buffer */
628                         if (bh1 && !buffer_pinned(bh1)) {
629                                 list_move(&bd1->bd_le.le_list, &started);
630                                 if (bd1 == bd2) {
631                                         bd2 = NULL;
632                                         bd2 = list_prepare_entry(bd2,
633                                                         &sdp->sd_log_le_databuf,
634                                                         bd_le.le_list);
635                                 }
636                                 total_dbuf--;
637                                 if (bh1) {
638                                         if (buffer_dirty(bh1)) {
639                                                 get_bh(bh1);
640
641                                                 gfs2_log_unlock(sdp);
642
643                                                 ll_rw_block(SWRITE, 1, &bh1);
644                                                 brelse(bh1);
645
646                                                 gfs2_log_lock(sdp);
647                                         }
648                                         continue;
649                                 }
650                                 continue;
651                         } else if (bh1) { /* A journaled buffer */
652                                 int magic;
653                                 gfs2_log_unlock(sdp);
654                                 if (!bh) {
655                                         bh = gfs2_log_get_buf(sdp);
656                                         ld = (struct gfs2_log_descriptor *)
657                                              bh->b_data;
658                                         ptr = (__be64 *)(bh->b_data +
659                                                          DATABUF_OFFSET);
660                                         ld->ld_header.mh_magic =
661                                                 cpu_to_be32(GFS2_MAGIC);
662                                         ld->ld_header.mh_type =
663                                                 cpu_to_be32(GFS2_METATYPE_LD);
664                                         ld->ld_header.mh_format =
665                                                 cpu_to_be32(GFS2_FORMAT_LD);
666                                         ld->ld_type =
667                                                 cpu_to_be32(GFS2_LOG_DESC_JDATA);
668                                         ld->ld_length = cpu_to_be32(num + 1);
669                                         ld->ld_data1 = cpu_to_be32(num);
670                                         ld->ld_data2 = cpu_to_be32(0);
671                                         memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
672                                 }
673                                 magic = gfs2_check_magic(bh1);
674                                 *ptr++ = cpu_to_be64(bh1->b_blocknr);
675                                 *ptr++ = cpu_to_be64((__u64)magic);
676                                 clear_buffer_escaped(bh1);
677                                 if (unlikely(magic != 0))
678                                         set_buffer_escaped(bh1);
679                                 gfs2_log_lock(sdp);
680                                 if (++n >= num)
681                                         break;
682                         } else if (!bh1) {
683                                 total_dbuf--;
684                                 sdp->sd_log_num_databuf--;
685                                 list_del_init(&bd1->bd_le.le_list);
686                                 if (bd1 == bd2) {
687                                         bd2 = NULL;
688                                         bd2 = list_prepare_entry(bd2,
689                                                 &sdp->sd_log_le_databuf,
690                                                 bd_le.le_list);
691                                 }
692                                 kmem_cache_free(gfs2_bufdata_cachep, bd1);
693                         }
694                 }
695                 gfs2_log_unlock(sdp);
696                 if (bh) {
697                         set_buffer_dirty(bh);
698                         ll_rw_block(WRITE, 1, &bh);
699                         bh = NULL;
700                         ptr = NULL;
701                 }
702                 n = 0;
703                 gfs2_log_lock(sdp);
704                 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
705                                              bd_le.le_list) {
706                         if (!bd2->bd_bh)
707                                 continue;
708                         /* copy buffer if it needs escaping */
709                         gfs2_log_unlock(sdp);
710                         if (unlikely(buffer_escaped(bd2->bd_bh))) {
711                                 void *kaddr;
712                                 struct page *page = bd2->bd_bh->b_page;
713                                 bh = gfs2_log_get_buf(sdp);
714                                 kaddr = kmap_atomic(page, KM_USER0);
715                                 memcpy(bh->b_data,
716                                        kaddr + bh_offset(bd2->bd_bh),
717                                        sdp->sd_sb.sb_bsize);
718                                 kunmap_atomic(kaddr, KM_USER0);
719                                 *(__be32 *)bh->b_data = 0;
720                         } else {
721                                 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
722                         }
723                         set_buffer_dirty(bh);
724                         ll_rw_block(WRITE, 1, &bh);
725                         gfs2_log_lock(sdp);
726                         if (++n >= num)
727                                 break;
728                 }
729                 bh = NULL;
730                 BUG_ON(total_dbuf < num);
731                 total_dbuf -= num;
732                 total_jdata -= num;
733         }
734         gfs2_log_unlock(sdp);
735
736         /* Wait on all ordered buffers */
737         while (!list_empty(&started)) {
738                 gfs2_log_lock(sdp);
739                 bd1 = list_entry(started.next, struct gfs2_bufdata,
740                                  bd_le.le_list);
741                 list_del_init(&bd1->bd_le.le_list);
742                 sdp->sd_log_num_databuf--;
743                 bh = bd1->bd_bh;
744                 if (bh) {
745                         bh->b_private = NULL;
746                         get_bh(bh);
747                         gfs2_log_unlock(sdp);
748                         wait_on_buffer(bh);
749                         brelse(bh);
750                 } else
751                         gfs2_log_unlock(sdp);
752
753                 kmem_cache_free(gfs2_bufdata_cachep, bd1);
754         }
755
756         /* We've removed all the ordered write bufs here, so only jdata left */
757         gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
758 }
759
760 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
761                                     struct gfs2_log_descriptor *ld,
762                                     __be64 *ptr, int pass)
763 {
764         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
765         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
766         struct gfs2_glock *gl = ip->i_gl;
767         unsigned int blks = be32_to_cpu(ld->ld_data1);
768         struct buffer_head *bh_log, *bh_ip;
769         u64 blkno;
770         u64 esc;
771         int error = 0;
772
773         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
774                 return 0;
775
776         gfs2_replay_incr_blk(sdp, &start);
777         for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
778                 blkno = be64_to_cpu(*ptr++);
779                 esc = be64_to_cpu(*ptr++);
780
781                 sdp->sd_found_blocks++;
782
783                 if (gfs2_revoke_check(sdp, blkno, start))
784                         continue;
785
786                 error = gfs2_replay_read_block(jd, start, &bh_log);
787                 if (error)
788                         return error;
789
790                 bh_ip = gfs2_meta_new(gl, blkno);
791                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
792
793                 /* Unescape */
794                 if (esc) {
795                         __be32 *eptr = (__be32 *)bh_ip->b_data;
796                         *eptr = cpu_to_be32(GFS2_MAGIC);
797                 }
798                 mark_buffer_dirty(bh_ip);
799
800                 brelse(bh_log);
801                 brelse(bh_ip);
802                 if (error)
803                         break;
804
805                 sdp->sd_replayed_blocks++;
806         }
807
808         return error;
809 }
810
811 /* FIXME: sort out accounting for log blocks etc. */
812
813 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
814 {
815         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
816         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
817
818         if (error) {
819                 gfs2_meta_sync(ip->i_gl);
820                 return;
821         }
822         if (pass != 1)
823                 return;
824
825         /* data sync? */
826         gfs2_meta_sync(ip->i_gl);
827
828         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
829                 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
830 }
831
832 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
833 {
834         struct list_head *head = &sdp->sd_log_le_databuf;
835         struct gfs2_bufdata *bd;
836
837         while (!list_empty(head)) {
838                 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
839                 list_del_init(&bd->bd_le.le_list);
840                 sdp->sd_log_num_databuf--;
841                 sdp->sd_log_num_jdata--;
842                 gfs2_unpin(sdp, bd->bd_bh, ai);
843         }
844         gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
845         gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
846 }
847
848
849 const struct gfs2_log_operations gfs2_glock_lops = {
850         .lo_add = glock_lo_add,
851         .lo_after_commit = glock_lo_after_commit,
852         .lo_name = "glock",
853 };
854
855 const struct gfs2_log_operations gfs2_buf_lops = {
856         .lo_add = buf_lo_add,
857         .lo_incore_commit = buf_lo_incore_commit,
858         .lo_before_commit = buf_lo_before_commit,
859         .lo_after_commit = buf_lo_after_commit,
860         .lo_before_scan = buf_lo_before_scan,
861         .lo_scan_elements = buf_lo_scan_elements,
862         .lo_after_scan = buf_lo_after_scan,
863         .lo_name = "buf",
864 };
865
866 const struct gfs2_log_operations gfs2_revoke_lops = {
867         .lo_add = revoke_lo_add,
868         .lo_before_commit = revoke_lo_before_commit,
869         .lo_before_scan = revoke_lo_before_scan,
870         .lo_scan_elements = revoke_lo_scan_elements,
871         .lo_after_scan = revoke_lo_after_scan,
872         .lo_name = "revoke",
873 };
874
875 const struct gfs2_log_operations gfs2_rg_lops = {
876         .lo_add = rg_lo_add,
877         .lo_after_commit = rg_lo_after_commit,
878         .lo_name = "rg",
879 };
880
881 const struct gfs2_log_operations gfs2_databuf_lops = {
882         .lo_add = databuf_lo_add,
883         .lo_incore_commit = buf_lo_incore_commit,
884         .lo_before_commit = databuf_lo_before_commit,
885         .lo_after_commit = databuf_lo_after_commit,
886         .lo_scan_elements = databuf_lo_scan_elements,
887         .lo_after_scan = databuf_lo_after_scan,
888         .lo_name = "databuf",
889 };
890
891 const struct gfs2_log_operations *gfs2_log_ops[] = {
892         &gfs2_glock_lops,
893         &gfs2_buf_lops,
894         &gfs2_revoke_lops,
895         &gfs2_rg_lops,
896         &gfs2_databuf_lops,
897         NULL,
898 };
899