ocfs2: Morph the haphazard OCFS2_IS_VALID_GROUP_DESC() checks.
[safe/jmp/linux-2.6] / fs / ocfs2 / suballoc.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * suballoc.c
5  *
6  * metadata alloc and free
7  * Inspired by ext3 block groups.
8  *
9  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26
27 #include <linux/fs.h>
28 #include <linux/types.h>
29 #include <linux/slab.h>
30 #include <linux/highmem.h>
31
32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
33 #include <cluster/masklog.h>
34
35 #include "ocfs2.h"
36
37 #include "alloc.h"
38 #include "dlmglue.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "localalloc.h"
42 #include "suballoc.h"
43 #include "super.h"
44 #include "sysfile.h"
45 #include "uptodate.h"
46
47 #include "buffer_head_io.h"
48
49 #define NOT_ALLOC_NEW_GROUP             0
50 #define ALLOC_NEW_GROUP                 1
51
52 #define OCFS2_MAX_INODES_TO_STEAL       1024
53
54 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
55 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
56 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
57 static int ocfs2_block_group_fill(handle_t *handle,
58                                   struct inode *alloc_inode,
59                                   struct buffer_head *bg_bh,
60                                   u64 group_blkno,
61                                   u16 my_chain,
62                                   struct ocfs2_chain_list *cl);
63 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64                                    struct inode *alloc_inode,
65                                    struct buffer_head *bh,
66                                    u64 max_block);
67
68 static int ocfs2_cluster_group_search(struct inode *inode,
69                                       struct buffer_head *group_bh,
70                                       u32 bits_wanted, u32 min_bits,
71                                       u64 max_block,
72                                       u16 *bit_off, u16 *bits_found);
73 static int ocfs2_block_group_search(struct inode *inode,
74                                     struct buffer_head *group_bh,
75                                     u32 bits_wanted, u32 min_bits,
76                                     u64 max_block,
77                                     u16 *bit_off, u16 *bits_found);
78 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
79                                      struct ocfs2_alloc_context *ac,
80                                      handle_t *handle,
81                                      u32 bits_wanted,
82                                      u32 min_bits,
83                                      u16 *bit_off,
84                                      unsigned int *num_bits,
85                                      u64 *bg_blkno);
86 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
87                                          int nr);
88 static inline int ocfs2_block_group_set_bits(handle_t *handle,
89                                              struct inode *alloc_inode,
90                                              struct ocfs2_group_desc *bg,
91                                              struct buffer_head *group_bh,
92                                              unsigned int bit_off,
93                                              unsigned int num_bits);
94 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
95                                                struct inode *alloc_inode,
96                                                struct ocfs2_group_desc *bg,
97                                                struct buffer_head *group_bh,
98                                                unsigned int bit_off,
99                                                unsigned int num_bits);
100
101 static int ocfs2_relink_block_group(handle_t *handle,
102                                     struct inode *alloc_inode,
103                                     struct buffer_head *fe_bh,
104                                     struct buffer_head *bg_bh,
105                                     struct buffer_head *prev_bg_bh,
106                                     u16 chain);
107 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
108                                                      u32 wanted);
109 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110                                                    u64 bg_blkno,
111                                                    u16 bg_bit_off);
112 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
113                                                 u64 data_blkno,
114                                                 u64 *bg_blkno,
115                                                 u16 *bg_bit_off);
116 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117                                              u32 bits_wanted, u64 max_block,
118                                              struct ocfs2_alloc_context **ac);
119
120 void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
121 {
122         struct inode *inode = ac->ac_inode;
123
124         if (inode) {
125                 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
126                         ocfs2_inode_unlock(inode, 1);
127
128                 mutex_unlock(&inode->i_mutex);
129
130                 iput(inode);
131                 ac->ac_inode = NULL;
132         }
133         brelse(ac->ac_bh);
134         ac->ac_bh = NULL;
135 }
136
137 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
138 {
139         ocfs2_free_ac_resource(ac);
140         kfree(ac);
141 }
142
143 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
144 {
145         return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
146 }
147
148 int ocfs2_validate_group_descriptor(struct super_block *sb,
149                                     struct ocfs2_dinode *di,
150                                     struct buffer_head *bh,
151                                     int clean_error)
152 {
153         unsigned int max_bits;
154         struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
155
156 #define do_error(fmt, ...)                                              \
157         do{                                                             \
158                 if (clean_error)                                        \
159                         mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__);        \
160                 else                                                    \
161                         ocfs2_error(sb, fmt, ##__VA_ARGS__);            \
162         } while (0)
163
164         if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
165                 do_error("Group descriptor #%llu has bad signature %.*s",
166                          (unsigned long long)bh->b_blocknr, 7,
167                          gd->bg_signature);
168                 return -EINVAL;
169         }
170
171         if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
172                 do_error("Group descriptor #%llu has an invalid bg_blkno "
173                          "of %llu",
174                          (unsigned long long)bh->b_blocknr,
175                          (unsigned long long)le64_to_cpu(gd->bg_blkno));
176                 return -EINVAL;
177         }
178
179         if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
180                 do_error("Group descriptor #%llu has an invalid "
181                          "fs_generation of #%u",
182                          (unsigned long long)bh->b_blocknr,
183                          le32_to_cpu(gd->bg_generation));
184                 return -EINVAL;
185         }
186
187         if (di->i_blkno != gd->bg_parent_dinode) {
188                 do_error("Group descriptor #%llu has bad parent "
189                          "pointer (%llu, expected %llu)",
190                          (unsigned long long)bh->b_blocknr,
191                          (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
192                          (unsigned long long)le64_to_cpu(di->i_blkno));
193                 return -EINVAL;
194         }
195
196         max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
197         if (le16_to_cpu(gd->bg_bits) > max_bits) {
198                 do_error("Group descriptor #%llu has bit count of %u",
199                          (unsigned long long)bh->b_blocknr,
200                          le16_to_cpu(gd->bg_bits));
201                 return -EINVAL;
202         }
203
204         if (le16_to_cpu(gd->bg_chain) >=
205             le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
206                 do_error("Group descriptor #%llu has bad chain %u",
207                          (unsigned long long)bh->b_blocknr,
208                          le16_to_cpu(gd->bg_chain));
209                 return -EINVAL;
210         }
211
212         if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
213                 do_error("Group descriptor #%llu has bit count %u but "
214                          "claims that %u are free",
215                          (unsigned long long)bh->b_blocknr,
216                          le16_to_cpu(gd->bg_bits),
217                          le16_to_cpu(gd->bg_free_bits_count));
218                 return -EINVAL;
219         }
220
221         if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
222                 do_error("Group descriptor #%llu has bit count %u but "
223                          "max bitmap bits of %u",
224                          (unsigned long long)bh->b_blocknr,
225                          le16_to_cpu(gd->bg_bits),
226                          8 * le16_to_cpu(gd->bg_size));
227                 return -EINVAL;
228         }
229 #undef do_error
230
231         return 0;
232 }
233
234 int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
235                                 u64 gd_blkno, struct buffer_head **bh)
236 {
237         int rc;
238         struct buffer_head *tmp = *bh;
239
240         rc = ocfs2_read_block(inode, gd_blkno, &tmp);
241         if (rc)
242                 goto out;
243
244         rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
245         if (rc) {
246                 brelse(tmp);
247                 goto out;
248         }
249
250         /* If ocfs2_read_block() got us a new bh, pass it up. */
251         if (!*bh)
252                 *bh = tmp;
253
254 out:
255         return rc;
256 }
257
258 static int ocfs2_block_group_fill(handle_t *handle,
259                                   struct inode *alloc_inode,
260                                   struct buffer_head *bg_bh,
261                                   u64 group_blkno,
262                                   u16 my_chain,
263                                   struct ocfs2_chain_list *cl)
264 {
265         int status = 0;
266         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
267         struct super_block * sb = alloc_inode->i_sb;
268
269         mlog_entry_void();
270
271         if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
272                 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
273                             "b_blocknr (%llu)",
274                             (unsigned long long)group_blkno,
275                             (unsigned long long) bg_bh->b_blocknr);
276                 status = -EIO;
277                 goto bail;
278         }
279
280         status = ocfs2_journal_access(handle,
281                                       alloc_inode,
282                                       bg_bh,
283                                       OCFS2_JOURNAL_ACCESS_CREATE);
284         if (status < 0) {
285                 mlog_errno(status);
286                 goto bail;
287         }
288
289         memset(bg, 0, sb->s_blocksize);
290         strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
291         bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
292         bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
293         bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
294         bg->bg_chain = cpu_to_le16(my_chain);
295         bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
296         bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
297         bg->bg_blkno = cpu_to_le64(group_blkno);
298         /* set the 1st bit in the bitmap to account for the descriptor block */
299         ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
300         bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
301
302         status = ocfs2_journal_dirty(handle, bg_bh);
303         if (status < 0)
304                 mlog_errno(status);
305
306         /* There is no need to zero out or otherwise initialize the
307          * other blocks in a group - All valid FS metadata in a block
308          * group stores the superblock fs_generation value at
309          * allocation time. */
310
311 bail:
312         mlog_exit(status);
313         return status;
314 }
315
316 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
317 {
318         u16 curr, best;
319
320         best = curr = 0;
321         while (curr < le16_to_cpu(cl->cl_count)) {
322                 if (le32_to_cpu(cl->cl_recs[best].c_total) >
323                     le32_to_cpu(cl->cl_recs[curr].c_total))
324                         best = curr;
325                 curr++;
326         }
327         return best;
328 }
329
330 /*
331  * We expect the block group allocator to already be locked.
332  */
333 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
334                                    struct inode *alloc_inode,
335                                    struct buffer_head *bh,
336                                    u64 max_block)
337 {
338         int status, credits;
339         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
340         struct ocfs2_chain_list *cl;
341         struct ocfs2_alloc_context *ac = NULL;
342         handle_t *handle = NULL;
343         u32 bit_off, num_bits;
344         u16 alloc_rec;
345         u64 bg_blkno;
346         struct buffer_head *bg_bh = NULL;
347         struct ocfs2_group_desc *bg;
348
349         BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
350
351         mlog_entry_void();
352
353         cl = &fe->id2.i_chain;
354         status = ocfs2_reserve_clusters_with_limit(osb,
355                                                    le16_to_cpu(cl->cl_cpg),
356                                                    max_block, &ac);
357         if (status < 0) {
358                 if (status != -ENOSPC)
359                         mlog_errno(status);
360                 goto bail;
361         }
362
363         credits = ocfs2_calc_group_alloc_credits(osb->sb,
364                                                  le16_to_cpu(cl->cl_cpg));
365         handle = ocfs2_start_trans(osb, credits);
366         if (IS_ERR(handle)) {
367                 status = PTR_ERR(handle);
368                 handle = NULL;
369                 mlog_errno(status);
370                 goto bail;
371         }
372
373         status = ocfs2_claim_clusters(osb,
374                                       handle,
375                                       ac,
376                                       le16_to_cpu(cl->cl_cpg),
377                                       &bit_off,
378                                       &num_bits);
379         if (status < 0) {
380                 if (status != -ENOSPC)
381                         mlog_errno(status);
382                 goto bail;
383         }
384
385         alloc_rec = ocfs2_find_smallest_chain(cl);
386
387         /* setup the group */
388         bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
389         mlog(0, "new descriptor, record %u, at block %llu\n",
390              alloc_rec, (unsigned long long)bg_blkno);
391
392         bg_bh = sb_getblk(osb->sb, bg_blkno);
393         if (!bg_bh) {
394                 status = -EIO;
395                 mlog_errno(status);
396                 goto bail;
397         }
398         ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
399
400         status = ocfs2_block_group_fill(handle,
401                                         alloc_inode,
402                                         bg_bh,
403                                         bg_blkno,
404                                         alloc_rec,
405                                         cl);
406         if (status < 0) {
407                 mlog_errno(status);
408                 goto bail;
409         }
410
411         bg = (struct ocfs2_group_desc *) bg_bh->b_data;
412
413         status = ocfs2_journal_access(handle, alloc_inode,
414                                       bh, OCFS2_JOURNAL_ACCESS_WRITE);
415         if (status < 0) {
416                 mlog_errno(status);
417                 goto bail;
418         }
419
420         le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
421                      le16_to_cpu(bg->bg_free_bits_count));
422         le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
423         cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg_blkno);
424         if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
425                 le16_add_cpu(&cl->cl_next_free_rec, 1);
426
427         le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
428                                         le16_to_cpu(bg->bg_free_bits_count));
429         le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
430         le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
431
432         status = ocfs2_journal_dirty(handle, bh);
433         if (status < 0) {
434                 mlog_errno(status);
435                 goto bail;
436         }
437
438         spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
439         OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
440         fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
441                                              le32_to_cpu(fe->i_clusters)));
442         spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
443         i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
444         alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
445
446         status = 0;
447 bail:
448         if (handle)
449                 ocfs2_commit_trans(osb, handle);
450
451         if (ac)
452                 ocfs2_free_alloc_context(ac);
453
454         brelse(bg_bh);
455
456         mlog_exit(status);
457         return status;
458 }
459
460 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
461                                        struct ocfs2_alloc_context *ac,
462                                        int type,
463                                        u32 slot,
464                                        int alloc_new_group)
465 {
466         int status;
467         u32 bits_wanted = ac->ac_bits_wanted;
468         struct inode *alloc_inode;
469         struct buffer_head *bh = NULL;
470         struct ocfs2_dinode *fe;
471         u32 free_bits;
472
473         mlog_entry_void();
474
475         alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
476         if (!alloc_inode) {
477                 mlog_errno(-EINVAL);
478                 return -EINVAL;
479         }
480
481         mutex_lock(&alloc_inode->i_mutex);
482
483         status = ocfs2_inode_lock(alloc_inode, &bh, 1);
484         if (status < 0) {
485                 mutex_unlock(&alloc_inode->i_mutex);
486                 iput(alloc_inode);
487
488                 mlog_errno(status);
489                 return status;
490         }
491
492         ac->ac_inode = alloc_inode;
493         ac->ac_alloc_slot = slot;
494
495         fe = (struct ocfs2_dinode *) bh->b_data;
496
497         /* The bh was validated by the inode read inside
498          * ocfs2_inode_lock().  Any corruption is a code bug. */
499         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
500
501         if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
502                 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
503                             (unsigned long long)le64_to_cpu(fe->i_blkno));
504                 status = -EIO;
505                 goto bail;
506         }
507
508         free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
509                 le32_to_cpu(fe->id1.bitmap1.i_used);
510
511         if (bits_wanted > free_bits) {
512                 /* cluster bitmap never grows */
513                 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
514                         mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
515                              bits_wanted, free_bits);
516                         status = -ENOSPC;
517                         goto bail;
518                 }
519
520                 if (alloc_new_group != ALLOC_NEW_GROUP) {
521                         mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
522                              "and we don't alloc a new group for it.\n",
523                              slot, bits_wanted, free_bits);
524                         status = -ENOSPC;
525                         goto bail;
526                 }
527
528                 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
529                                                  ac->ac_max_block);
530                 if (status < 0) {
531                         if (status != -ENOSPC)
532                                 mlog_errno(status);
533                         goto bail;
534                 }
535                 atomic_inc(&osb->alloc_stats.bg_extends);
536
537                 /* You should never ask for this much metadata */
538                 BUG_ON(bits_wanted >
539                        (le32_to_cpu(fe->id1.bitmap1.i_total)
540                         - le32_to_cpu(fe->id1.bitmap1.i_used)));
541         }
542
543         get_bh(bh);
544         ac->ac_bh = bh;
545 bail:
546         brelse(bh);
547
548         mlog_exit(status);
549         return status;
550 }
551
552 int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
553                                       int blocks,
554                                       struct ocfs2_alloc_context **ac)
555 {
556         int status;
557         u32 slot;
558
559         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
560         if (!(*ac)) {
561                 status = -ENOMEM;
562                 mlog_errno(status);
563                 goto bail;
564         }
565
566         (*ac)->ac_bits_wanted = blocks;
567         (*ac)->ac_which = OCFS2_AC_USE_META;
568         slot = osb->slot_num;
569         (*ac)->ac_group_search = ocfs2_block_group_search;
570
571         status = ocfs2_reserve_suballoc_bits(osb, (*ac),
572                                              EXTENT_ALLOC_SYSTEM_INODE,
573                                              slot, ALLOC_NEW_GROUP);
574         if (status < 0) {
575                 if (status != -ENOSPC)
576                         mlog_errno(status);
577                 goto bail;
578         }
579
580         status = 0;
581 bail:
582         if ((status < 0) && *ac) {
583                 ocfs2_free_alloc_context(*ac);
584                 *ac = NULL;
585         }
586
587         mlog_exit(status);
588         return status;
589 }
590
591 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
592                                struct ocfs2_extent_list *root_el,
593                                struct ocfs2_alloc_context **ac)
594 {
595         return ocfs2_reserve_new_metadata_blocks(osb,
596                                         ocfs2_extend_meta_needed(root_el),
597                                         ac);
598 }
599
600 static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
601                                               struct ocfs2_alloc_context *ac)
602 {
603         int i, status = -ENOSPC;
604         s16 slot = ocfs2_get_inode_steal_slot(osb);
605
606         /* Start to steal inodes from the first slot after ours. */
607         if (slot == OCFS2_INVALID_SLOT)
608                 slot = osb->slot_num + 1;
609
610         for (i = 0; i < osb->max_slots; i++, slot++) {
611                 if (slot == osb->max_slots)
612                         slot = 0;
613
614                 if (slot == osb->slot_num)
615                         continue;
616
617                 status = ocfs2_reserve_suballoc_bits(osb, ac,
618                                                      INODE_ALLOC_SYSTEM_INODE,
619                                                      slot, NOT_ALLOC_NEW_GROUP);
620                 if (status >= 0) {
621                         ocfs2_set_inode_steal_slot(osb, slot);
622                         break;
623                 }
624
625                 ocfs2_free_ac_resource(ac);
626         }
627
628         return status;
629 }
630
631 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
632                             struct ocfs2_alloc_context **ac)
633 {
634         int status;
635         s16 slot = ocfs2_get_inode_steal_slot(osb);
636
637         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
638         if (!(*ac)) {
639                 status = -ENOMEM;
640                 mlog_errno(status);
641                 goto bail;
642         }
643
644         (*ac)->ac_bits_wanted = 1;
645         (*ac)->ac_which = OCFS2_AC_USE_INODE;
646
647         (*ac)->ac_group_search = ocfs2_block_group_search;
648
649         /*
650          * stat(2) can't handle i_ino > 32bits, so we tell the
651          * lower levels not to allocate us a block group past that
652          * limit.  The 'inode64' mount option avoids this behavior.
653          */
654         if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
655                 (*ac)->ac_max_block = (u32)~0U;
656
657         /*
658          * slot is set when we successfully steal inode from other nodes.
659          * It is reset in 3 places:
660          * 1. when we flush the truncate log
661          * 2. when we complete local alloc recovery.
662          * 3. when we successfully allocate from our own slot.
663          * After it is set, we will go on stealing inodes until we find the
664          * need to check our slots to see whether there is some space for us.
665          */
666         if (slot != OCFS2_INVALID_SLOT &&
667             atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
668                 goto inode_steal;
669
670         atomic_set(&osb->s_num_inodes_stolen, 0);
671         status = ocfs2_reserve_suballoc_bits(osb, *ac,
672                                              INODE_ALLOC_SYSTEM_INODE,
673                                              osb->slot_num, ALLOC_NEW_GROUP);
674         if (status >= 0) {
675                 status = 0;
676
677                 /*
678                  * Some inodes must be freed by us, so try to allocate
679                  * from our own next time.
680                  */
681                 if (slot != OCFS2_INVALID_SLOT)
682                         ocfs2_init_inode_steal_slot(osb);
683                 goto bail;
684         } else if (status < 0 && status != -ENOSPC) {
685                 mlog_errno(status);
686                 goto bail;
687         }
688
689         ocfs2_free_ac_resource(*ac);
690
691 inode_steal:
692         status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
693         atomic_inc(&osb->s_num_inodes_stolen);
694         if (status < 0) {
695                 if (status != -ENOSPC)
696                         mlog_errno(status);
697                 goto bail;
698         }
699
700         status = 0;
701 bail:
702         if ((status < 0) && *ac) {
703                 ocfs2_free_alloc_context(*ac);
704                 *ac = NULL;
705         }
706
707         mlog_exit(status);
708         return status;
709 }
710
711 /* local alloc code has to do the same thing, so rather than do this
712  * twice.. */
713 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
714                                       struct ocfs2_alloc_context *ac)
715 {
716         int status;
717
718         ac->ac_which = OCFS2_AC_USE_MAIN;
719         ac->ac_group_search = ocfs2_cluster_group_search;
720
721         status = ocfs2_reserve_suballoc_bits(osb, ac,
722                                              GLOBAL_BITMAP_SYSTEM_INODE,
723                                              OCFS2_INVALID_SLOT,
724                                              ALLOC_NEW_GROUP);
725         if (status < 0 && status != -ENOSPC) {
726                 mlog_errno(status);
727                 goto bail;
728         }
729
730 bail:
731         return status;
732 }
733
734 /* Callers don't need to care which bitmap (local alloc or main) to
735  * use so we figure it out for them, but unfortunately this clutters
736  * things a bit. */
737 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
738                                              u32 bits_wanted, u64 max_block,
739                                              struct ocfs2_alloc_context **ac)
740 {
741         int status;
742
743         mlog_entry_void();
744
745         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
746         if (!(*ac)) {
747                 status = -ENOMEM;
748                 mlog_errno(status);
749                 goto bail;
750         }
751
752         (*ac)->ac_bits_wanted = bits_wanted;
753         (*ac)->ac_max_block = max_block;
754
755         status = -ENOSPC;
756         if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
757                 status = ocfs2_reserve_local_alloc_bits(osb,
758                                                         bits_wanted,
759                                                         *ac);
760                 if (status == -EFBIG) {
761                         /* The local alloc window is outside ac_max_block.
762                          * use the main bitmap. */
763                         status = -ENOSPC;
764                 } else if ((status < 0) && (status != -ENOSPC)) {
765                         mlog_errno(status);
766                         goto bail;
767                 }
768         }
769
770         if (status == -ENOSPC) {
771                 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
772                 if (status < 0) {
773                         if (status != -ENOSPC)
774                                 mlog_errno(status);
775                         goto bail;
776                 }
777         }
778
779         status = 0;
780 bail:
781         if ((status < 0) && *ac) {
782                 ocfs2_free_alloc_context(*ac);
783                 *ac = NULL;
784         }
785
786         mlog_exit(status);
787         return status;
788 }
789
790 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
791                            u32 bits_wanted,
792                            struct ocfs2_alloc_context **ac)
793 {
794         return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
795 }
796
797 /*
798  * More or less lifted from ext3. I'll leave their description below:
799  *
800  * "For ext3 allocations, we must not reuse any blocks which are
801  * allocated in the bitmap buffer's "last committed data" copy.  This
802  * prevents deletes from freeing up the page for reuse until we have
803  * committed the delete transaction.
804  *
805  * If we didn't do this, then deleting something and reallocating it as
806  * data would allow the old block to be overwritten before the
807  * transaction committed (because we force data to disk before commit).
808  * This would lead to corruption if we crashed between overwriting the
809  * data and committing the delete.
810  *
811  * @@@ We may want to make this allocation behaviour conditional on
812  * data-writes at some point, and disable it for metadata allocations or
813  * sync-data inodes."
814  *
815  * Note: OCFS2 already does this differently for metadata vs data
816  * allocations, as those bitmaps are separate and undo access is never
817  * called on a metadata group descriptor.
818  */
819 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
820                                          int nr)
821 {
822         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
823
824         if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
825                 return 0;
826         if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
827                 return 1;
828
829         bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
830         return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
831 }
832
833 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
834                                              struct buffer_head *bg_bh,
835                                              unsigned int bits_wanted,
836                                              unsigned int total_bits,
837                                              u16 *bit_off,
838                                              u16 *bits_found)
839 {
840         void *bitmap;
841         u16 best_offset, best_size;
842         int offset, start, found, status = 0;
843         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
844
845         /* Callers got this descriptor from
846          * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
847         BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
848
849         found = start = best_offset = best_size = 0;
850         bitmap = bg->bg_bitmap;
851
852         while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
853                 if (offset == total_bits)
854                         break;
855
856                 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
857                         /* We found a zero, but we can't use it as it
858                          * hasn't been put to disk yet! */
859                         found = 0;
860                         start = offset + 1;
861                 } else if (offset == start) {
862                         /* we found a zero */
863                         found++;
864                         /* move start to the next bit to test */
865                         start++;
866                 } else {
867                         /* got a zero after some ones */
868                         found = 1;
869                         start = offset + 1;
870                 }
871                 if (found > best_size) {
872                         best_size = found;
873                         best_offset = start - found;
874                 }
875                 /* we got everything we needed */
876                 if (found == bits_wanted) {
877                         /* mlog(0, "Found it all!\n"); */
878                         break;
879                 }
880         }
881
882         /* XXX: I think the first clause is equivalent to the second
883          *      - jlbec */
884         if (found == bits_wanted) {
885                 *bit_off = start - found;
886                 *bits_found = found;
887         } else if (best_size) {
888                 *bit_off = best_offset;
889                 *bits_found = best_size;
890         } else {
891                 status = -ENOSPC;
892                 /* No error log here -- see the comment above
893                  * ocfs2_test_bg_bit_allocatable */
894         }
895
896         return status;
897 }
898
899 static inline int ocfs2_block_group_set_bits(handle_t *handle,
900                                              struct inode *alloc_inode,
901                                              struct ocfs2_group_desc *bg,
902                                              struct buffer_head *group_bh,
903                                              unsigned int bit_off,
904                                              unsigned int num_bits)
905 {
906         int status;
907         void *bitmap = bg->bg_bitmap;
908         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
909
910         mlog_entry_void();
911
912         /* All callers get the descriptor via
913          * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
914         BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
915         BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
916
917         mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
918              num_bits);
919
920         if (ocfs2_is_cluster_bitmap(alloc_inode))
921                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
922
923         status = ocfs2_journal_access(handle,
924                                       alloc_inode,
925                                       group_bh,
926                                       journal_type);
927         if (status < 0) {
928                 mlog_errno(status);
929                 goto bail;
930         }
931
932         le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
933
934         while(num_bits--)
935                 ocfs2_set_bit(bit_off++, bitmap);
936
937         status = ocfs2_journal_dirty(handle,
938                                      group_bh);
939         if (status < 0) {
940                 mlog_errno(status);
941                 goto bail;
942         }
943
944 bail:
945         mlog_exit(status);
946         return status;
947 }
948
949 /* find the one with the most empty bits */
950 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
951 {
952         u16 curr, best;
953
954         BUG_ON(!cl->cl_next_free_rec);
955
956         best = curr = 0;
957         while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
958                 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
959                     le32_to_cpu(cl->cl_recs[best].c_free))
960                         best = curr;
961                 curr++;
962         }
963
964         BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
965         return best;
966 }
967
968 static int ocfs2_relink_block_group(handle_t *handle,
969                                     struct inode *alloc_inode,
970                                     struct buffer_head *fe_bh,
971                                     struct buffer_head *bg_bh,
972                                     struct buffer_head *prev_bg_bh,
973                                     u16 chain)
974 {
975         int status;
976         /* there is a really tiny chance the journal calls could fail,
977          * but we wouldn't want inconsistent blocks in *any* case. */
978         u64 fe_ptr, bg_ptr, prev_bg_ptr;
979         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
980         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
981         struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
982
983         /* The caller got these descriptors from
984          * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
985         BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
986         BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
987
988         mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
989              (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
990              (unsigned long long)le64_to_cpu(bg->bg_blkno),
991              (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
992
993         fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
994         bg_ptr = le64_to_cpu(bg->bg_next_group);
995         prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
996
997         status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
998                                       OCFS2_JOURNAL_ACCESS_WRITE);
999         if (status < 0) {
1000                 mlog_errno(status);
1001                 goto out_rollback;
1002         }
1003
1004         prev_bg->bg_next_group = bg->bg_next_group;
1005
1006         status = ocfs2_journal_dirty(handle, prev_bg_bh);
1007         if (status < 0) {
1008                 mlog_errno(status);
1009                 goto out_rollback;
1010         }
1011
1012         status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
1013                                       OCFS2_JOURNAL_ACCESS_WRITE);
1014         if (status < 0) {
1015                 mlog_errno(status);
1016                 goto out_rollback;
1017         }
1018
1019         bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1020
1021         status = ocfs2_journal_dirty(handle, bg_bh);
1022         if (status < 0) {
1023                 mlog_errno(status);
1024                 goto out_rollback;
1025         }
1026
1027         status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
1028                                       OCFS2_JOURNAL_ACCESS_WRITE);
1029         if (status < 0) {
1030                 mlog_errno(status);
1031                 goto out_rollback;
1032         }
1033
1034         fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1035
1036         status = ocfs2_journal_dirty(handle, fe_bh);
1037         if (status < 0) {
1038                 mlog_errno(status);
1039                 goto out_rollback;
1040         }
1041
1042         status = 0;
1043 out_rollback:
1044         if (status < 0) {
1045                 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
1046                 bg->bg_next_group = cpu_to_le64(bg_ptr);
1047                 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1048         }
1049
1050         mlog_exit(status);
1051         return status;
1052 }
1053
1054 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
1055                                                      u32 wanted)
1056 {
1057         return le16_to_cpu(bg->bg_free_bits_count) > wanted;
1058 }
1059
1060 /* return 0 on success, -ENOSPC to keep searching and any other < 0
1061  * value on error. */
1062 static int ocfs2_cluster_group_search(struct inode *inode,
1063                                       struct buffer_head *group_bh,
1064                                       u32 bits_wanted, u32 min_bits,
1065                                       u64 max_block,
1066                                       u16 *bit_off, u16 *bits_found)
1067 {
1068         int search = -ENOSPC;
1069         int ret;
1070         u64 blkoff;
1071         struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1072         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1073         u16 tmp_off, tmp_found;
1074         unsigned int max_bits, gd_cluster_off;
1075
1076         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1077
1078         if (gd->bg_free_bits_count) {
1079                 max_bits = le16_to_cpu(gd->bg_bits);
1080
1081                 /* Tail groups in cluster bitmaps which aren't cpg
1082                  * aligned are prone to partial extention by a failed
1083                  * fs resize. If the file system resize never got to
1084                  * update the dinode cluster count, then we don't want
1085                  * to trust any clusters past it, regardless of what
1086                  * the group descriptor says. */
1087                 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
1088                                                           le64_to_cpu(gd->bg_blkno));
1089                 if ((gd_cluster_off + max_bits) >
1090                     OCFS2_I(inode)->ip_clusters) {
1091                         max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1092                         mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
1093                              (unsigned long long)le64_to_cpu(gd->bg_blkno),
1094                              le16_to_cpu(gd->bg_bits),
1095                              OCFS2_I(inode)->ip_clusters, max_bits);
1096                 }
1097
1098                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1099                                                         group_bh, bits_wanted,
1100                                                         max_bits,
1101                                                         &tmp_off, &tmp_found);
1102                 if (ret)
1103                         return ret;
1104
1105                 if (max_block) {
1106                         blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1107                                                           gd_cluster_off +
1108                                                           tmp_off + tmp_found);
1109                         mlog(0, "Checking %llu against %llu\n",
1110                              (unsigned long long)blkoff,
1111                              (unsigned long long)max_block);
1112                         if (blkoff > max_block)
1113                                 return -ENOSPC;
1114                 }
1115
1116                 /* ocfs2_block_group_find_clear_bits() might
1117                  * return success, but we still want to return
1118                  * -ENOSPC unless it found the minimum number
1119                  * of bits. */
1120                 if (min_bits <= tmp_found) {
1121                         *bit_off = tmp_off;
1122                         *bits_found = tmp_found;
1123                         search = 0; /* success */
1124                 } else if (tmp_found) {
1125                         /*
1126                          * Don't show bits which we'll be returning
1127                          * for allocation to the local alloc bitmap.
1128                          */
1129                         ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
1130                 }
1131         }
1132
1133         return search;
1134 }
1135
1136 static int ocfs2_block_group_search(struct inode *inode,
1137                                     struct buffer_head *group_bh,
1138                                     u32 bits_wanted, u32 min_bits,
1139                                     u64 max_block,
1140                                     u16 *bit_off, u16 *bits_found)
1141 {
1142         int ret = -ENOSPC;
1143         u64 blkoff;
1144         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1145
1146         BUG_ON(min_bits != 1);
1147         BUG_ON(ocfs2_is_cluster_bitmap(inode));
1148
1149         if (bg->bg_free_bits_count) {
1150                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1151                                                         group_bh, bits_wanted,
1152                                                         le16_to_cpu(bg->bg_bits),
1153                                                         bit_off, bits_found);
1154                 if (!ret && max_block) {
1155                         blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1156                                 *bits_found;
1157                         mlog(0, "Checking %llu against %llu\n",
1158                              (unsigned long long)blkoff,
1159                              (unsigned long long)max_block);
1160                         if (blkoff > max_block)
1161                                 ret = -ENOSPC;
1162                 }
1163         }
1164
1165         return ret;
1166 }
1167
1168 static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1169                                        handle_t *handle,
1170                                        struct buffer_head *di_bh,
1171                                        u32 num_bits,
1172                                        u16 chain)
1173 {
1174         int ret;
1175         u32 tmp_used;
1176         struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1177         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1178
1179         ret = ocfs2_journal_access(handle, inode, di_bh,
1180                                    OCFS2_JOURNAL_ACCESS_WRITE);
1181         if (ret < 0) {
1182                 mlog_errno(ret);
1183                 goto out;
1184         }
1185
1186         tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1187         di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1188         le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1189
1190         ret = ocfs2_journal_dirty(handle, di_bh);
1191         if (ret < 0)
1192                 mlog_errno(ret);
1193
1194 out:
1195         return ret;
1196 }
1197
1198 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1199                                   handle_t *handle,
1200                                   u32 bits_wanted,
1201                                   u32 min_bits,
1202                                   u16 *bit_off,
1203                                   unsigned int *num_bits,
1204                                   u64 gd_blkno,
1205                                   u16 *bits_left)
1206 {
1207         int ret;
1208         u16 found;
1209         struct buffer_head *group_bh = NULL;
1210         struct ocfs2_group_desc *gd;
1211         struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1212         struct inode *alloc_inode = ac->ac_inode;
1213
1214         ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
1215                                           &group_bh);
1216         if (ret < 0) {
1217                 mlog_errno(ret);
1218                 return ret;
1219         }
1220
1221         gd = (struct ocfs2_group_desc *) group_bh->b_data;
1222         ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1223                                   ac->ac_max_block, bit_off, &found);
1224         if (ret < 0) {
1225                 if (ret != -ENOSPC)
1226                         mlog_errno(ret);
1227                 goto out;
1228         }
1229
1230         *num_bits = found;
1231
1232         ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1233                                                *num_bits,
1234                                                le16_to_cpu(gd->bg_chain));
1235         if (ret < 0) {
1236                 mlog_errno(ret);
1237                 goto out;
1238         }
1239
1240         ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1241                                          *bit_off, *num_bits);
1242         if (ret < 0)
1243                 mlog_errno(ret);
1244
1245         *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1246
1247 out:
1248         brelse(group_bh);
1249
1250         return ret;
1251 }
1252
1253 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1254                               handle_t *handle,
1255                               u32 bits_wanted,
1256                               u32 min_bits,
1257                               u16 *bit_off,
1258                               unsigned int *num_bits,
1259                               u64 *bg_blkno,
1260                               u16 *bits_left)
1261 {
1262         int status;
1263         u16 chain, tmp_bits;
1264         u32 tmp_used;
1265         u64 next_group;
1266         struct inode *alloc_inode = ac->ac_inode;
1267         struct buffer_head *group_bh = NULL;
1268         struct buffer_head *prev_group_bh = NULL;
1269         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1270         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1271         struct ocfs2_group_desc *bg;
1272
1273         chain = ac->ac_chain;
1274         mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1275              bits_wanted, chain,
1276              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1277
1278         status = ocfs2_read_group_descriptor(alloc_inode, fe,
1279                                              le64_to_cpu(cl->cl_recs[chain].c_blkno),
1280                                              &group_bh);
1281         if (status < 0) {
1282                 mlog_errno(status);
1283                 goto bail;
1284         }
1285         bg = (struct ocfs2_group_desc *) group_bh->b_data;
1286
1287         status = -ENOSPC;
1288         /* for now, the chain search is a bit simplistic. We just use
1289          * the 1st group with any empty bits. */
1290         while ((status = ac->ac_group_search(alloc_inode, group_bh,
1291                                              bits_wanted, min_bits,
1292                                              ac->ac_max_block, bit_off,
1293                                              &tmp_bits)) == -ENOSPC) {
1294                 if (!bg->bg_next_group)
1295                         break;
1296
1297                 brelse(prev_group_bh);
1298                 prev_group_bh = NULL;
1299
1300                 next_group = le64_to_cpu(bg->bg_next_group);
1301                 prev_group_bh = group_bh;
1302                 group_bh = NULL;
1303                 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1304                                                      next_group, &group_bh);
1305                 if (status < 0) {
1306                         mlog_errno(status);
1307                         goto bail;
1308                 }
1309                 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1310         }
1311         if (status < 0) {
1312                 if (status != -ENOSPC)
1313                         mlog_errno(status);
1314                 goto bail;
1315         }
1316
1317         mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1318              tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1319
1320         *num_bits = tmp_bits;
1321
1322         BUG_ON(*num_bits == 0);
1323
1324         /*
1325          * Keep track of previous block descriptor read. When
1326          * we find a target, if we have read more than X
1327          * number of descriptors, and the target is reasonably
1328          * empty, relink him to top of his chain.
1329          *
1330          * We've read 0 extra blocks and only send one more to
1331          * the transaction, yet the next guy to search has a
1332          * much easier time.
1333          *
1334          * Do this *after* figuring out how many bits we're taking out
1335          * of our target group.
1336          */
1337         if (ac->ac_allow_chain_relink &&
1338             (prev_group_bh) &&
1339             (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
1340                 status = ocfs2_relink_block_group(handle, alloc_inode,
1341                                                   ac->ac_bh, group_bh,
1342                                                   prev_group_bh, chain);
1343                 if (status < 0) {
1344                         mlog_errno(status);
1345                         goto bail;
1346                 }
1347         }
1348
1349         /* Ok, claim our bits now: set the info on dinode, chainlist
1350          * and then the group */
1351         status = ocfs2_journal_access(handle,
1352                                       alloc_inode,
1353                                       ac->ac_bh,
1354                                       OCFS2_JOURNAL_ACCESS_WRITE);
1355         if (status < 0) {
1356                 mlog_errno(status);
1357                 goto bail;
1358         }
1359
1360         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1361         fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
1362         le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
1363
1364         status = ocfs2_journal_dirty(handle,
1365                                      ac->ac_bh);
1366         if (status < 0) {
1367                 mlog_errno(status);
1368                 goto bail;
1369         }
1370
1371         status = ocfs2_block_group_set_bits(handle,
1372                                             alloc_inode,
1373                                             bg,
1374                                             group_bh,
1375                                             *bit_off,
1376                                             *num_bits);
1377         if (status < 0) {
1378                 mlog_errno(status);
1379                 goto bail;
1380         }
1381
1382         mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
1383              (unsigned long long)le64_to_cpu(fe->i_blkno));
1384
1385         *bg_blkno = le64_to_cpu(bg->bg_blkno);
1386         *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1387 bail:
1388         brelse(group_bh);
1389         brelse(prev_group_bh);
1390
1391         mlog_exit(status);
1392         return status;
1393 }
1394
1395 /* will give out up to bits_wanted contiguous bits. */
1396 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1397                                      struct ocfs2_alloc_context *ac,
1398                                      handle_t *handle,
1399                                      u32 bits_wanted,
1400                                      u32 min_bits,
1401                                      u16 *bit_off,
1402                                      unsigned int *num_bits,
1403                                      u64 *bg_blkno)
1404 {
1405         int status;
1406         u16 victim, i;
1407         u16 bits_left = 0;
1408         u64 hint_blkno = ac->ac_last_group;
1409         struct ocfs2_chain_list *cl;
1410         struct ocfs2_dinode *fe;
1411
1412         mlog_entry_void();
1413
1414         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1415         BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1416         BUG_ON(!ac->ac_bh);
1417
1418         fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1419
1420         /* The bh was validated by the inode read during
1421          * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
1422         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1423
1424         if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1425             le32_to_cpu(fe->id1.bitmap1.i_total)) {
1426                 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
1427                             "bits but only %u total.",
1428                             (unsigned long long)le64_to_cpu(fe->i_blkno),
1429                             le32_to_cpu(fe->id1.bitmap1.i_used),
1430                             le32_to_cpu(fe->id1.bitmap1.i_total));
1431                 status = -EIO;
1432                 goto bail;
1433         }
1434
1435         if (hint_blkno) {
1436                 /* Attempt to short-circuit the usual search mechanism
1437                  * by jumping straight to the most recently used
1438                  * allocation group. This helps us mantain some
1439                  * contiguousness across allocations. */
1440                 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1441                                                 min_bits, bit_off, num_bits,
1442                                                 hint_blkno, &bits_left);
1443                 if (!status) {
1444                         /* Be careful to update *bg_blkno here as the
1445                          * caller is expecting it to be filled in, and
1446                          * ocfs2_search_one_group() won't do that for
1447                          * us. */
1448                         *bg_blkno = hint_blkno;
1449                         goto set_hint;
1450                 }
1451                 if (status < 0 && status != -ENOSPC) {
1452                         mlog_errno(status);
1453                         goto bail;
1454                 }
1455         }
1456
1457         cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1458
1459         victim = ocfs2_find_victim_chain(cl);
1460         ac->ac_chain = victim;
1461         ac->ac_allow_chain_relink = 1;
1462
1463         status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
1464                                     num_bits, bg_blkno, &bits_left);
1465         if (!status)
1466                 goto set_hint;
1467         if (status < 0 && status != -ENOSPC) {
1468                 mlog_errno(status);
1469                 goto bail;
1470         }
1471
1472         mlog(0, "Search of victim chain %u came up with nothing, "
1473              "trying all chains now.\n", victim);
1474
1475         /* If we didn't pick a good victim, then just default to
1476          * searching each chain in order. Don't allow chain relinking
1477          * because we only calculate enough journal credits for one
1478          * relink per alloc. */
1479         ac->ac_allow_chain_relink = 0;
1480         for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1481                 if (i == victim)
1482                         continue;
1483                 if (!cl->cl_recs[i].c_free)
1484                         continue;
1485
1486                 ac->ac_chain = i;
1487                 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1488                                             bit_off, num_bits, bg_blkno,
1489                                             &bits_left);
1490                 if (!status)
1491                         break;
1492                 if (status < 0 && status != -ENOSPC) {
1493                         mlog_errno(status);
1494                         goto bail;
1495                 }
1496         }
1497
1498 set_hint:
1499         if (status != -ENOSPC) {
1500                 /* If the next search of this group is not likely to
1501                  * yield a suitable extent, then we reset the last
1502                  * group hint so as to not waste a disk read */
1503                 if (bits_left < min_bits)
1504                         ac->ac_last_group = 0;
1505                 else
1506                         ac->ac_last_group = *bg_blkno;
1507         }
1508
1509 bail:
1510         mlog_exit(status);
1511         return status;
1512 }
1513
1514 int ocfs2_claim_metadata(struct ocfs2_super *osb,
1515                          handle_t *handle,
1516                          struct ocfs2_alloc_context *ac,
1517                          u32 bits_wanted,
1518                          u16 *suballoc_bit_start,
1519                          unsigned int *num_bits,
1520                          u64 *blkno_start)
1521 {
1522         int status;
1523         u64 bg_blkno;
1524
1525         BUG_ON(!ac);
1526         BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1527         BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1528
1529         status = ocfs2_claim_suballoc_bits(osb,
1530                                            ac,
1531                                            handle,
1532                                            bits_wanted,
1533                                            1,
1534                                            suballoc_bit_start,
1535                                            num_bits,
1536                                            &bg_blkno);
1537         if (status < 0) {
1538                 mlog_errno(status);
1539                 goto bail;
1540         }
1541         atomic_inc(&osb->alloc_stats.bg_allocs);
1542
1543         *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
1544         ac->ac_bits_given += (*num_bits);
1545         status = 0;
1546 bail:
1547         mlog_exit(status);
1548         return status;
1549 }
1550
1551 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1552                           handle_t *handle,
1553                           struct ocfs2_alloc_context *ac,
1554                           u16 *suballoc_bit,
1555                           u64 *fe_blkno)
1556 {
1557         int status;
1558         unsigned int num_bits;
1559         u64 bg_blkno;
1560
1561         mlog_entry_void();
1562
1563         BUG_ON(!ac);
1564         BUG_ON(ac->ac_bits_given != 0);
1565         BUG_ON(ac->ac_bits_wanted != 1);
1566         BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
1567
1568         status = ocfs2_claim_suballoc_bits(osb,
1569                                            ac,
1570                                            handle,
1571                                            1,
1572                                            1,
1573                                            suballoc_bit,
1574                                            &num_bits,
1575                                            &bg_blkno);
1576         if (status < 0) {
1577                 mlog_errno(status);
1578                 goto bail;
1579         }
1580         atomic_inc(&osb->alloc_stats.bg_allocs);
1581
1582         BUG_ON(num_bits != 1);
1583
1584         *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1585         ac->ac_bits_given++;
1586         status = 0;
1587 bail:
1588         mlog_exit(status);
1589         return status;
1590 }
1591
1592 /* translate a group desc. blkno and it's bitmap offset into
1593  * disk cluster offset. */
1594 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
1595                                                    u64 bg_blkno,
1596                                                    u16 bg_bit_off)
1597 {
1598         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1599         u32 cluster = 0;
1600
1601         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1602
1603         if (bg_blkno != osb->first_cluster_group_blkno)
1604                 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
1605         cluster += (u32) bg_bit_off;
1606         return cluster;
1607 }
1608
1609 /* given a cluster offset, calculate which block group it belongs to
1610  * and return that block offset. */
1611 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
1612 {
1613         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1614         u32 group_no;
1615
1616         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1617
1618         group_no = cluster / osb->bitmap_cpg;
1619         if (!group_no)
1620                 return osb->first_cluster_group_blkno;
1621         return ocfs2_clusters_to_blocks(inode->i_sb,
1622                                         group_no * osb->bitmap_cpg);
1623 }
1624
1625 /* given the block number of a cluster start, calculate which cluster
1626  * group and descriptor bitmap offset that corresponds to. */
1627 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1628                                                 u64 data_blkno,
1629                                                 u64 *bg_blkno,
1630                                                 u16 *bg_bit_off)
1631 {
1632         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1633         u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
1634
1635         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1636
1637         *bg_blkno = ocfs2_which_cluster_group(inode,
1638                                               data_cluster);
1639
1640         if (*bg_blkno == osb->first_cluster_group_blkno)
1641                 *bg_bit_off = (u16) data_cluster;
1642         else
1643                 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
1644                                                              data_blkno - *bg_blkno);
1645 }
1646
1647 /*
1648  * min_bits - minimum contiguous chunk from this total allocation we
1649  * can handle. set to what we asked for originally for a full
1650  * contig. allocation, set to '1' to indicate we can deal with extents
1651  * of any size.
1652  */
1653 int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1654                            handle_t *handle,
1655                            struct ocfs2_alloc_context *ac,
1656                            u32 min_clusters,
1657                            u32 max_clusters,
1658                            u32 *cluster_start,
1659                            u32 *num_clusters)
1660 {
1661         int status;
1662         unsigned int bits_wanted = max_clusters;
1663         u64 bg_blkno = 0;
1664         u16 bg_bit_off;
1665
1666         mlog_entry_void();
1667
1668         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1669
1670         BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
1671                && ac->ac_which != OCFS2_AC_USE_MAIN);
1672
1673         if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
1674                 status = ocfs2_claim_local_alloc_bits(osb,
1675                                                       handle,
1676                                                       ac,
1677                                                       bits_wanted,
1678                                                       cluster_start,
1679                                                       num_clusters);
1680                 if (!status)
1681                         atomic_inc(&osb->alloc_stats.local_data);
1682         } else {
1683                 if (min_clusters > (osb->bitmap_cpg - 1)) {
1684                         /* The only paths asking for contiguousness
1685                          * should know about this already. */
1686                         mlog(ML_ERROR, "minimum allocation requested %u exceeds "
1687                              "group bitmap size %u!\n", min_clusters,
1688                              osb->bitmap_cpg);
1689                         status = -ENOSPC;
1690                         goto bail;
1691                 }
1692                 /* clamp the current request down to a realistic size. */
1693                 if (bits_wanted > (osb->bitmap_cpg - 1))
1694                         bits_wanted = osb->bitmap_cpg - 1;
1695
1696                 status = ocfs2_claim_suballoc_bits(osb,
1697                                                    ac,
1698                                                    handle,
1699                                                    bits_wanted,
1700                                                    min_clusters,
1701                                                    &bg_bit_off,
1702                                                    num_clusters,
1703                                                    &bg_blkno);
1704                 if (!status) {
1705                         *cluster_start =
1706                                 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1707                                                                  bg_blkno,
1708                                                                  bg_bit_off);
1709                         atomic_inc(&osb->alloc_stats.bitmap_data);
1710                 }
1711         }
1712         if (status < 0) {
1713                 if (status != -ENOSPC)
1714                         mlog_errno(status);
1715                 goto bail;
1716         }
1717
1718         ac->ac_bits_given += *num_clusters;
1719
1720 bail:
1721         mlog_exit(status);
1722         return status;
1723 }
1724
1725 int ocfs2_claim_clusters(struct ocfs2_super *osb,
1726                          handle_t *handle,
1727                          struct ocfs2_alloc_context *ac,
1728                          u32 min_clusters,
1729                          u32 *cluster_start,
1730                          u32 *num_clusters)
1731 {
1732         unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1733
1734         return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
1735                                       bits_wanted, cluster_start, num_clusters);
1736 }
1737
1738 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1739                                                struct inode *alloc_inode,
1740                                                struct ocfs2_group_desc *bg,
1741                                                struct buffer_head *group_bh,
1742                                                unsigned int bit_off,
1743                                                unsigned int num_bits)
1744 {
1745         int status;
1746         unsigned int tmp;
1747         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1748         struct ocfs2_group_desc *undo_bg = NULL;
1749
1750         mlog_entry_void();
1751
1752         /* The caller got this descriptor from
1753          * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
1754         BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1755
1756         mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1757
1758         if (ocfs2_is_cluster_bitmap(alloc_inode))
1759                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1760
1761         status = ocfs2_journal_access(handle, alloc_inode, group_bh,
1762                                       journal_type);
1763         if (status < 0) {
1764                 mlog_errno(status);
1765                 goto bail;
1766         }
1767
1768         if (ocfs2_is_cluster_bitmap(alloc_inode))
1769                 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
1770
1771         tmp = num_bits;
1772         while(tmp--) {
1773                 ocfs2_clear_bit((bit_off + tmp),
1774                                 (unsigned long *) bg->bg_bitmap);
1775                 if (ocfs2_is_cluster_bitmap(alloc_inode))
1776                         ocfs2_set_bit(bit_off + tmp,
1777                                       (unsigned long *) undo_bg->bg_bitmap);
1778         }
1779         le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1780
1781         status = ocfs2_journal_dirty(handle, group_bh);
1782         if (status < 0)
1783                 mlog_errno(status);
1784 bail:
1785         return status;
1786 }
1787
1788 /*
1789  * expects the suballoc inode to already be locked.
1790  */
1791 int ocfs2_free_suballoc_bits(handle_t *handle,
1792                              struct inode *alloc_inode,
1793                              struct buffer_head *alloc_bh,
1794                              unsigned int start_bit,
1795                              u64 bg_blkno,
1796                              unsigned int count)
1797 {
1798         int status = 0;
1799         u32 tmp_used;
1800         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1801         struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1802         struct buffer_head *group_bh = NULL;
1803         struct ocfs2_group_desc *group;
1804
1805         mlog_entry_void();
1806
1807         /* The alloc_bh comes from ocfs2_free_dinode() or
1808          * ocfs2_free_clusters().  The callers have all locked the
1809          * allocator and gotten alloc_bh from the lock call.  This
1810          * validates the dinode buffer.  Any corruption that has happended
1811          * is a code bug. */
1812         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1813         BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1814
1815         mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1816              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1817              (unsigned long long)bg_blkno, start_bit);
1818
1819         status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
1820                                              &group_bh);
1821         if (status < 0) {
1822                 mlog_errno(status);
1823                 goto bail;
1824         }
1825         group = (struct ocfs2_group_desc *) group_bh->b_data;
1826
1827         BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1828
1829         status = ocfs2_block_group_clear_bits(handle, alloc_inode,
1830                                               group, group_bh,
1831                                               start_bit, count);
1832         if (status < 0) {
1833                 mlog_errno(status);
1834                 goto bail;
1835         }
1836
1837         status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
1838                                       OCFS2_JOURNAL_ACCESS_WRITE);
1839         if (status < 0) {
1840                 mlog_errno(status);
1841                 goto bail;
1842         }
1843
1844         le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
1845                      count);
1846         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1847         fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
1848
1849         status = ocfs2_journal_dirty(handle, alloc_bh);
1850         if (status < 0) {
1851                 mlog_errno(status);
1852                 goto bail;
1853         }
1854
1855 bail:
1856         brelse(group_bh);
1857
1858         mlog_exit(status);
1859         return status;
1860 }
1861
1862 int ocfs2_free_dinode(handle_t *handle,
1863                       struct inode *inode_alloc_inode,
1864                       struct buffer_head *inode_alloc_bh,
1865                       struct ocfs2_dinode *di)
1866 {
1867         u64 blk = le64_to_cpu(di->i_blkno);
1868         u16 bit = le16_to_cpu(di->i_suballoc_bit);
1869         u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1870
1871         return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
1872                                         inode_alloc_bh, bit, bg_blkno, 1);
1873 }
1874
1875 int ocfs2_free_clusters(handle_t *handle,
1876                        struct inode *bitmap_inode,
1877                        struct buffer_head *bitmap_bh,
1878                        u64 start_blk,
1879                        unsigned int num_clusters)
1880 {
1881         int status;
1882         u16 bg_start_bit;
1883         u64 bg_blkno;
1884         struct ocfs2_dinode *fe;
1885
1886         /* You can't ever have a contiguous set of clusters
1887          * bigger than a block group bitmap so we never have to worry
1888          * about looping on them. */
1889
1890         mlog_entry_void();
1891
1892         /* This is expensive. We can safely remove once this stuff has
1893          * gotten tested really well. */
1894         BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
1895
1896         fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
1897
1898         ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
1899                                      &bg_start_bit);
1900
1901         mlog(0, "want to free %u clusters starting at block %llu\n",
1902              num_clusters, (unsigned long long)start_blk);
1903         mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
1904              (unsigned long long)bg_blkno, bg_start_bit);
1905
1906         status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1907                                           bg_start_bit, bg_blkno,
1908                                           num_clusters);
1909         if (status < 0) {
1910                 mlog_errno(status);
1911                 goto out;
1912         }
1913
1914         ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
1915                                          num_clusters);
1916
1917 out:
1918         mlog_exit(status);
1919         return status;
1920 }
1921
1922 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
1923 {
1924         printk("Block Group:\n");
1925         printk("bg_signature:       %s\n", bg->bg_signature);
1926         printk("bg_size:            %u\n", bg->bg_size);
1927         printk("bg_bits:            %u\n", bg->bg_bits);
1928         printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
1929         printk("bg_chain:           %u\n", bg->bg_chain);
1930         printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
1931         printk("bg_next_group:      %llu\n",
1932                (unsigned long long)bg->bg_next_group);
1933         printk("bg_parent_dinode:   %llu\n",
1934                (unsigned long long)bg->bg_parent_dinode);
1935         printk("bg_blkno:           %llu\n",
1936                (unsigned long long)bg->bg_blkno);
1937 }
1938
1939 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1940 {
1941         int i;
1942
1943         printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
1944         printk("i_signature:                  %s\n", fe->i_signature);
1945         printk("i_size:                       %llu\n",
1946                (unsigned long long)fe->i_size);
1947         printk("i_clusters:                   %u\n", fe->i_clusters);
1948         printk("i_generation:                 %u\n",
1949                le32_to_cpu(fe->i_generation));
1950         printk("id1.bitmap1.i_used:           %u\n",
1951                le32_to_cpu(fe->id1.bitmap1.i_used));
1952         printk("id1.bitmap1.i_total:          %u\n",
1953                le32_to_cpu(fe->id1.bitmap1.i_total));
1954         printk("id2.i_chain.cl_cpg:           %u\n", fe->id2.i_chain.cl_cpg);
1955         printk("id2.i_chain.cl_bpc:           %u\n", fe->id2.i_chain.cl_bpc);
1956         printk("id2.i_chain.cl_count:         %u\n", fe->id2.i_chain.cl_count);
1957         printk("id2.i_chain.cl_next_free_rec: %u\n",
1958                fe->id2.i_chain.cl_next_free_rec);
1959         for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
1960                 printk("fe->id2.i_chain.cl_recs[%d].c_free:  %u\n", i,
1961                        fe->id2.i_chain.cl_recs[i].c_free);
1962                 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
1963                        fe->id2.i_chain.cl_recs[i].c_total);
1964                 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
1965                        (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1966         }
1967 }
1968
1969 /*
1970  * For a given allocation, determine which allocators will need to be
1971  * accessed, and lock them, reserving the appropriate number of bits.
1972  *
1973  * Sparse file systems call this from ocfs2_write_begin_nolock()
1974  * and ocfs2_allocate_unwritten_extents().
1975  *
1976  * File systems which don't support holes call this from
1977  * ocfs2_extend_allocation().
1978  */
1979 int ocfs2_lock_allocators(struct inode *inode,
1980                           struct ocfs2_extent_tree *et,
1981                           u32 clusters_to_add, u32 extents_to_split,
1982                           struct ocfs2_alloc_context **data_ac,
1983                           struct ocfs2_alloc_context **meta_ac)
1984 {
1985         int ret = 0, num_free_extents;
1986         unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1987         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1988
1989         *meta_ac = NULL;
1990         if (data_ac)
1991                 *data_ac = NULL;
1992
1993         BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1994
1995         num_free_extents = ocfs2_num_free_extents(osb, inode, et);
1996         if (num_free_extents < 0) {
1997                 ret = num_free_extents;
1998                 mlog_errno(ret);
1999                 goto out;
2000         }
2001
2002         /*
2003          * Sparse allocation file systems need to be more conservative
2004          * with reserving room for expansion - the actual allocation
2005          * happens while we've got a journal handle open so re-taking
2006          * a cluster lock (because we ran out of room for another
2007          * extent) will violate ordering rules.
2008          *
2009          * Most of the time we'll only be seeing this 1 cluster at a time
2010          * anyway.
2011          *
2012          * Always lock for any unwritten extents - we might want to
2013          * add blocks during a split.
2014          */
2015         if (!num_free_extents ||
2016             (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
2017                 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
2018                 if (ret < 0) {
2019                         if (ret != -ENOSPC)
2020                                 mlog_errno(ret);
2021                         goto out;
2022                 }
2023         }
2024
2025         if (clusters_to_add == 0)
2026                 goto out;
2027
2028         ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2029         if (ret < 0) {
2030                 if (ret != -ENOSPC)
2031                         mlog_errno(ret);
2032                 goto out;
2033         }
2034
2035 out:
2036         if (ret) {
2037                 if (*meta_ac) {
2038                         ocfs2_free_alloc_context(*meta_ac);
2039                         *meta_ac = NULL;
2040                 }
2041
2042                 /*
2043                  * We cannot have an error and a non null *data_ac.
2044                  */
2045         }
2046
2047         return ret;
2048 }