ddba97dc06a0c031214e1bb5f8530ff2d2deea31
[safe/jmp/linux-2.6] / fs / ocfs2 / suballoc.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * suballoc.c
5  *
6  * metadata alloc and free
7  * Inspired by ext3 block groups.
8  *
9  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26
27 #include <linux/fs.h>
28 #include <linux/types.h>
29 #include <linux/slab.h>
30 #include <linux/highmem.h>
31
32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
33 #include <cluster/masklog.h>
34
35 #include "ocfs2.h"
36
37 #include "alloc.h"
38 #include "dlmglue.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "localalloc.h"
42 #include "suballoc.h"
43 #include "super.h"
44 #include "sysfile.h"
45 #include "uptodate.h"
46
47 #include "buffer_head_io.h"
48
49 #define NOT_ALLOC_NEW_GROUP             0
50 #define ALLOC_NEW_GROUP                 1
51
52 #define OCFS2_MAX_INODES_TO_STEAL       1024
53
54 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
55 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
56 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
57 static int ocfs2_block_group_fill(handle_t *handle,
58                                   struct inode *alloc_inode,
59                                   struct buffer_head *bg_bh,
60                                   u64 group_blkno,
61                                   u16 my_chain,
62                                   struct ocfs2_chain_list *cl);
63 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64                                    struct inode *alloc_inode,
65                                    struct buffer_head *bh,
66                                    u64 max_block);
67
68 static int ocfs2_cluster_group_search(struct inode *inode,
69                                       struct buffer_head *group_bh,
70                                       u32 bits_wanted, u32 min_bits,
71                                       u64 max_block,
72                                       u16 *bit_off, u16 *bits_found);
73 static int ocfs2_block_group_search(struct inode *inode,
74                                     struct buffer_head *group_bh,
75                                     u32 bits_wanted, u32 min_bits,
76                                     u64 max_block,
77                                     u16 *bit_off, u16 *bits_found);
78 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
79                                      struct ocfs2_alloc_context *ac,
80                                      handle_t *handle,
81                                      u32 bits_wanted,
82                                      u32 min_bits,
83                                      u16 *bit_off,
84                                      unsigned int *num_bits,
85                                      u64 *bg_blkno);
86 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
87                                          int nr);
88 static inline int ocfs2_block_group_set_bits(handle_t *handle,
89                                              struct inode *alloc_inode,
90                                              struct ocfs2_group_desc *bg,
91                                              struct buffer_head *group_bh,
92                                              unsigned int bit_off,
93                                              unsigned int num_bits);
94 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
95                                                struct inode *alloc_inode,
96                                                struct ocfs2_group_desc *bg,
97                                                struct buffer_head *group_bh,
98                                                unsigned int bit_off,
99                                                unsigned int num_bits);
100
101 static int ocfs2_relink_block_group(handle_t *handle,
102                                     struct inode *alloc_inode,
103                                     struct buffer_head *fe_bh,
104                                     struct buffer_head *bg_bh,
105                                     struct buffer_head *prev_bg_bh,
106                                     u16 chain);
107 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
108                                                      u32 wanted);
109 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110                                                    u64 bg_blkno,
111                                                    u16 bg_bit_off);
112 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
113                                                 u64 data_blkno,
114                                                 u64 *bg_blkno,
115                                                 u16 *bg_bit_off);
116 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117                                              u32 bits_wanted, u64 max_block,
118                                              struct ocfs2_alloc_context **ac);
119
120 void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
121 {
122         struct inode *inode = ac->ac_inode;
123
124         if (inode) {
125                 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
126                         ocfs2_inode_unlock(inode, 1);
127
128                 mutex_unlock(&inode->i_mutex);
129
130                 iput(inode);
131                 ac->ac_inode = NULL;
132         }
133         brelse(ac->ac_bh);
134         ac->ac_bh = NULL;
135 }
136
137 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
138 {
139         ocfs2_free_ac_resource(ac);
140         kfree(ac);
141 }
142
143 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
144 {
145         return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
146 }
147
148 /* somewhat more expensive than our other checks, so use sparingly. */
149 int ocfs2_validate_group_descriptor(struct super_block *sb,
150                                     struct ocfs2_dinode *di,
151                                     struct ocfs2_group_desc *gd,
152                                     int clean_error)
153 {
154         unsigned int max_bits;
155
156 #define do_error(fmt, ...)                                              \
157         do{                                                             \
158                 if (clean_error)                                        \
159                         mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__);        \
160                 else                                                    \
161                         ocfs2_error(sb, fmt, ##__VA_ARGS__);            \
162         } while (0)
163
164         if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
165                 do_error("Group Descriptor #%llu has bad signature %.*s",
166                          (unsigned long long)le64_to_cpu(gd->bg_blkno), 7,
167                          gd->bg_signature);
168                 return -EINVAL;
169         }
170
171         if (di->i_blkno != gd->bg_parent_dinode) {
172                 do_error("Group descriptor # %llu has bad parent "
173                          "pointer (%llu, expected %llu)",
174                          (unsigned long long)le64_to_cpu(gd->bg_blkno),
175                          (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
176                          (unsigned long long)le64_to_cpu(di->i_blkno));
177                 return -EINVAL;
178         }
179
180         max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
181         if (le16_to_cpu(gd->bg_bits) > max_bits) {
182                 do_error("Group descriptor # %llu has bit count of %u",
183                          (unsigned long long)le64_to_cpu(gd->bg_blkno),
184                          le16_to_cpu(gd->bg_bits));
185                 return -EINVAL;
186         }
187
188         if (le16_to_cpu(gd->bg_chain) >=
189             le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
190                 do_error("Group descriptor # %llu has bad chain %u",
191                          (unsigned long long)le64_to_cpu(gd->bg_blkno),
192                          le16_to_cpu(gd->bg_chain));
193                 return -EINVAL;
194         }
195
196         if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
197                 do_error("Group descriptor # %llu has bit count %u but "
198                          "claims that %u are free",
199                          (unsigned long long)le64_to_cpu(gd->bg_blkno),
200                          le16_to_cpu(gd->bg_bits),
201                          le16_to_cpu(gd->bg_free_bits_count));
202                 return -EINVAL;
203         }
204
205         if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
206                 do_error("Group descriptor # %llu has bit count %u but "
207                          "max bitmap bits of %u",
208                          (unsigned long long)le64_to_cpu(gd->bg_blkno),
209                          le16_to_cpu(gd->bg_bits),
210                          8 * le16_to_cpu(gd->bg_size));
211                 return -EINVAL;
212         }
213 #undef do_error
214
215         return 0;
216 }
217
218 static int ocfs2_block_group_fill(handle_t *handle,
219                                   struct inode *alloc_inode,
220                                   struct buffer_head *bg_bh,
221                                   u64 group_blkno,
222                                   u16 my_chain,
223                                   struct ocfs2_chain_list *cl)
224 {
225         int status = 0;
226         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
227         struct super_block * sb = alloc_inode->i_sb;
228
229         mlog_entry_void();
230
231         if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
232                 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
233                             "b_blocknr (%llu)",
234                             (unsigned long long)group_blkno,
235                             (unsigned long long) bg_bh->b_blocknr);
236                 status = -EIO;
237                 goto bail;
238         }
239
240         status = ocfs2_journal_access(handle,
241                                       alloc_inode,
242                                       bg_bh,
243                                       OCFS2_JOURNAL_ACCESS_CREATE);
244         if (status < 0) {
245                 mlog_errno(status);
246                 goto bail;
247         }
248
249         memset(bg, 0, sb->s_blocksize);
250         strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
251         bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
252         bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
253         bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
254         bg->bg_chain = cpu_to_le16(my_chain);
255         bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
256         bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
257         bg->bg_blkno = cpu_to_le64(group_blkno);
258         /* set the 1st bit in the bitmap to account for the descriptor block */
259         ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
260         bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
261
262         status = ocfs2_journal_dirty(handle, bg_bh);
263         if (status < 0)
264                 mlog_errno(status);
265
266         /* There is no need to zero out or otherwise initialize the
267          * other blocks in a group - All valid FS metadata in a block
268          * group stores the superblock fs_generation value at
269          * allocation time. */
270
271 bail:
272         mlog_exit(status);
273         return status;
274 }
275
276 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
277 {
278         u16 curr, best;
279
280         best = curr = 0;
281         while (curr < le16_to_cpu(cl->cl_count)) {
282                 if (le32_to_cpu(cl->cl_recs[best].c_total) >
283                     le32_to_cpu(cl->cl_recs[curr].c_total))
284                         best = curr;
285                 curr++;
286         }
287         return best;
288 }
289
290 /*
291  * We expect the block group allocator to already be locked.
292  */
293 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
294                                    struct inode *alloc_inode,
295                                    struct buffer_head *bh,
296                                    u64 max_block)
297 {
298         int status, credits;
299         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
300         struct ocfs2_chain_list *cl;
301         struct ocfs2_alloc_context *ac = NULL;
302         handle_t *handle = NULL;
303         u32 bit_off, num_bits;
304         u16 alloc_rec;
305         u64 bg_blkno;
306         struct buffer_head *bg_bh = NULL;
307         struct ocfs2_group_desc *bg;
308
309         BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
310
311         mlog_entry_void();
312
313         cl = &fe->id2.i_chain;
314         status = ocfs2_reserve_clusters_with_limit(osb,
315                                                    le16_to_cpu(cl->cl_cpg),
316                                                    max_block, &ac);
317         if (status < 0) {
318                 if (status != -ENOSPC)
319                         mlog_errno(status);
320                 goto bail;
321         }
322
323         credits = ocfs2_calc_group_alloc_credits(osb->sb,
324                                                  le16_to_cpu(cl->cl_cpg));
325         handle = ocfs2_start_trans(osb, credits);
326         if (IS_ERR(handle)) {
327                 status = PTR_ERR(handle);
328                 handle = NULL;
329                 mlog_errno(status);
330                 goto bail;
331         }
332
333         status = ocfs2_claim_clusters(osb,
334                                       handle,
335                                       ac,
336                                       le16_to_cpu(cl->cl_cpg),
337                                       &bit_off,
338                                       &num_bits);
339         if (status < 0) {
340                 if (status != -ENOSPC)
341                         mlog_errno(status);
342                 goto bail;
343         }
344
345         alloc_rec = ocfs2_find_smallest_chain(cl);
346
347         /* setup the group */
348         bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
349         mlog(0, "new descriptor, record %u, at block %llu\n",
350              alloc_rec, (unsigned long long)bg_blkno);
351
352         bg_bh = sb_getblk(osb->sb, bg_blkno);
353         if (!bg_bh) {
354                 status = -EIO;
355                 mlog_errno(status);
356                 goto bail;
357         }
358         ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
359
360         status = ocfs2_block_group_fill(handle,
361                                         alloc_inode,
362                                         bg_bh,
363                                         bg_blkno,
364                                         alloc_rec,
365                                         cl);
366         if (status < 0) {
367                 mlog_errno(status);
368                 goto bail;
369         }
370
371         bg = (struct ocfs2_group_desc *) bg_bh->b_data;
372
373         status = ocfs2_journal_access(handle, alloc_inode,
374                                       bh, OCFS2_JOURNAL_ACCESS_WRITE);
375         if (status < 0) {
376                 mlog_errno(status);
377                 goto bail;
378         }
379
380         le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
381                      le16_to_cpu(bg->bg_free_bits_count));
382         le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
383         cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg_blkno);
384         if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
385                 le16_add_cpu(&cl->cl_next_free_rec, 1);
386
387         le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
388                                         le16_to_cpu(bg->bg_free_bits_count));
389         le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
390         le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
391
392         status = ocfs2_journal_dirty(handle, bh);
393         if (status < 0) {
394                 mlog_errno(status);
395                 goto bail;
396         }
397
398         spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
399         OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
400         fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
401                                              le32_to_cpu(fe->i_clusters)));
402         spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
403         i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
404         alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
405
406         status = 0;
407 bail:
408         if (handle)
409                 ocfs2_commit_trans(osb, handle);
410
411         if (ac)
412                 ocfs2_free_alloc_context(ac);
413
414         brelse(bg_bh);
415
416         mlog_exit(status);
417         return status;
418 }
419
420 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
421                                        struct ocfs2_alloc_context *ac,
422                                        int type,
423                                        u32 slot,
424                                        int alloc_new_group)
425 {
426         int status;
427         u32 bits_wanted = ac->ac_bits_wanted;
428         struct inode *alloc_inode;
429         struct buffer_head *bh = NULL;
430         struct ocfs2_dinode *fe;
431         u32 free_bits;
432
433         mlog_entry_void();
434
435         alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
436         if (!alloc_inode) {
437                 mlog_errno(-EINVAL);
438                 return -EINVAL;
439         }
440
441         mutex_lock(&alloc_inode->i_mutex);
442
443         status = ocfs2_inode_lock(alloc_inode, &bh, 1);
444         if (status < 0) {
445                 mutex_unlock(&alloc_inode->i_mutex);
446                 iput(alloc_inode);
447
448                 mlog_errno(status);
449                 return status;
450         }
451
452         ac->ac_inode = alloc_inode;
453         ac->ac_alloc_slot = slot;
454
455         fe = (struct ocfs2_dinode *) bh->b_data;
456
457         /* The bh was validated by the inode read inside
458          * ocfs2_inode_lock().  Any corruption is a code bug. */
459         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
460
461         if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
462                 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
463                             (unsigned long long)le64_to_cpu(fe->i_blkno));
464                 status = -EIO;
465                 goto bail;
466         }
467
468         free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
469                 le32_to_cpu(fe->id1.bitmap1.i_used);
470
471         if (bits_wanted > free_bits) {
472                 /* cluster bitmap never grows */
473                 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
474                         mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
475                              bits_wanted, free_bits);
476                         status = -ENOSPC;
477                         goto bail;
478                 }
479
480                 if (alloc_new_group != ALLOC_NEW_GROUP) {
481                         mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
482                              "and we don't alloc a new group for it.\n",
483                              slot, bits_wanted, free_bits);
484                         status = -ENOSPC;
485                         goto bail;
486                 }
487
488                 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
489                                                  ac->ac_max_block);
490                 if (status < 0) {
491                         if (status != -ENOSPC)
492                                 mlog_errno(status);
493                         goto bail;
494                 }
495                 atomic_inc(&osb->alloc_stats.bg_extends);
496
497                 /* You should never ask for this much metadata */
498                 BUG_ON(bits_wanted >
499                        (le32_to_cpu(fe->id1.bitmap1.i_total)
500                         - le32_to_cpu(fe->id1.bitmap1.i_used)));
501         }
502
503         get_bh(bh);
504         ac->ac_bh = bh;
505 bail:
506         brelse(bh);
507
508         mlog_exit(status);
509         return status;
510 }
511
512 int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
513                                       int blocks,
514                                       struct ocfs2_alloc_context **ac)
515 {
516         int status;
517         u32 slot;
518
519         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
520         if (!(*ac)) {
521                 status = -ENOMEM;
522                 mlog_errno(status);
523                 goto bail;
524         }
525
526         (*ac)->ac_bits_wanted = blocks;
527         (*ac)->ac_which = OCFS2_AC_USE_META;
528         slot = osb->slot_num;
529         (*ac)->ac_group_search = ocfs2_block_group_search;
530
531         status = ocfs2_reserve_suballoc_bits(osb, (*ac),
532                                              EXTENT_ALLOC_SYSTEM_INODE,
533                                              slot, ALLOC_NEW_GROUP);
534         if (status < 0) {
535                 if (status != -ENOSPC)
536                         mlog_errno(status);
537                 goto bail;
538         }
539
540         status = 0;
541 bail:
542         if ((status < 0) && *ac) {
543                 ocfs2_free_alloc_context(*ac);
544                 *ac = NULL;
545         }
546
547         mlog_exit(status);
548         return status;
549 }
550
551 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
552                                struct ocfs2_extent_list *root_el,
553                                struct ocfs2_alloc_context **ac)
554 {
555         return ocfs2_reserve_new_metadata_blocks(osb,
556                                         ocfs2_extend_meta_needed(root_el),
557                                         ac);
558 }
559
560 static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
561                                               struct ocfs2_alloc_context *ac)
562 {
563         int i, status = -ENOSPC;
564         s16 slot = ocfs2_get_inode_steal_slot(osb);
565
566         /* Start to steal inodes from the first slot after ours. */
567         if (slot == OCFS2_INVALID_SLOT)
568                 slot = osb->slot_num + 1;
569
570         for (i = 0; i < osb->max_slots; i++, slot++) {
571                 if (slot == osb->max_slots)
572                         slot = 0;
573
574                 if (slot == osb->slot_num)
575                         continue;
576
577                 status = ocfs2_reserve_suballoc_bits(osb, ac,
578                                                      INODE_ALLOC_SYSTEM_INODE,
579                                                      slot, NOT_ALLOC_NEW_GROUP);
580                 if (status >= 0) {
581                         ocfs2_set_inode_steal_slot(osb, slot);
582                         break;
583                 }
584
585                 ocfs2_free_ac_resource(ac);
586         }
587
588         return status;
589 }
590
591 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
592                             struct ocfs2_alloc_context **ac)
593 {
594         int status;
595         s16 slot = ocfs2_get_inode_steal_slot(osb);
596
597         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
598         if (!(*ac)) {
599                 status = -ENOMEM;
600                 mlog_errno(status);
601                 goto bail;
602         }
603
604         (*ac)->ac_bits_wanted = 1;
605         (*ac)->ac_which = OCFS2_AC_USE_INODE;
606
607         (*ac)->ac_group_search = ocfs2_block_group_search;
608
609         /*
610          * stat(2) can't handle i_ino > 32bits, so we tell the
611          * lower levels not to allocate us a block group past that
612          * limit.  The 'inode64' mount option avoids this behavior.
613          */
614         if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
615                 (*ac)->ac_max_block = (u32)~0U;
616
617         /*
618          * slot is set when we successfully steal inode from other nodes.
619          * It is reset in 3 places:
620          * 1. when we flush the truncate log
621          * 2. when we complete local alloc recovery.
622          * 3. when we successfully allocate from our own slot.
623          * After it is set, we will go on stealing inodes until we find the
624          * need to check our slots to see whether there is some space for us.
625          */
626         if (slot != OCFS2_INVALID_SLOT &&
627             atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
628                 goto inode_steal;
629
630         atomic_set(&osb->s_num_inodes_stolen, 0);
631         status = ocfs2_reserve_suballoc_bits(osb, *ac,
632                                              INODE_ALLOC_SYSTEM_INODE,
633                                              osb->slot_num, ALLOC_NEW_GROUP);
634         if (status >= 0) {
635                 status = 0;
636
637                 /*
638                  * Some inodes must be freed by us, so try to allocate
639                  * from our own next time.
640                  */
641                 if (slot != OCFS2_INVALID_SLOT)
642                         ocfs2_init_inode_steal_slot(osb);
643                 goto bail;
644         } else if (status < 0 && status != -ENOSPC) {
645                 mlog_errno(status);
646                 goto bail;
647         }
648
649         ocfs2_free_ac_resource(*ac);
650
651 inode_steal:
652         status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
653         atomic_inc(&osb->s_num_inodes_stolen);
654         if (status < 0) {
655                 if (status != -ENOSPC)
656                         mlog_errno(status);
657                 goto bail;
658         }
659
660         status = 0;
661 bail:
662         if ((status < 0) && *ac) {
663                 ocfs2_free_alloc_context(*ac);
664                 *ac = NULL;
665         }
666
667         mlog_exit(status);
668         return status;
669 }
670
671 /* local alloc code has to do the same thing, so rather than do this
672  * twice.. */
673 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
674                                       struct ocfs2_alloc_context *ac)
675 {
676         int status;
677
678         ac->ac_which = OCFS2_AC_USE_MAIN;
679         ac->ac_group_search = ocfs2_cluster_group_search;
680
681         status = ocfs2_reserve_suballoc_bits(osb, ac,
682                                              GLOBAL_BITMAP_SYSTEM_INODE,
683                                              OCFS2_INVALID_SLOT,
684                                              ALLOC_NEW_GROUP);
685         if (status < 0 && status != -ENOSPC) {
686                 mlog_errno(status);
687                 goto bail;
688         }
689
690 bail:
691         return status;
692 }
693
694 /* Callers don't need to care which bitmap (local alloc or main) to
695  * use so we figure it out for them, but unfortunately this clutters
696  * things a bit. */
697 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
698                                              u32 bits_wanted, u64 max_block,
699                                              struct ocfs2_alloc_context **ac)
700 {
701         int status;
702
703         mlog_entry_void();
704
705         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
706         if (!(*ac)) {
707                 status = -ENOMEM;
708                 mlog_errno(status);
709                 goto bail;
710         }
711
712         (*ac)->ac_bits_wanted = bits_wanted;
713         (*ac)->ac_max_block = max_block;
714
715         status = -ENOSPC;
716         if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
717                 status = ocfs2_reserve_local_alloc_bits(osb,
718                                                         bits_wanted,
719                                                         *ac);
720                 if (status == -EFBIG) {
721                         /* The local alloc window is outside ac_max_block.
722                          * use the main bitmap. */
723                         status = -ENOSPC;
724                 } else if ((status < 0) && (status != -ENOSPC)) {
725                         mlog_errno(status);
726                         goto bail;
727                 }
728         }
729
730         if (status == -ENOSPC) {
731                 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
732                 if (status < 0) {
733                         if (status != -ENOSPC)
734                                 mlog_errno(status);
735                         goto bail;
736                 }
737         }
738
739         status = 0;
740 bail:
741         if ((status < 0) && *ac) {
742                 ocfs2_free_alloc_context(*ac);
743                 *ac = NULL;
744         }
745
746         mlog_exit(status);
747         return status;
748 }
749
750 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
751                            u32 bits_wanted,
752                            struct ocfs2_alloc_context **ac)
753 {
754         return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
755 }
756
757 /*
758  * More or less lifted from ext3. I'll leave their description below:
759  *
760  * "For ext3 allocations, we must not reuse any blocks which are
761  * allocated in the bitmap buffer's "last committed data" copy.  This
762  * prevents deletes from freeing up the page for reuse until we have
763  * committed the delete transaction.
764  *
765  * If we didn't do this, then deleting something and reallocating it as
766  * data would allow the old block to be overwritten before the
767  * transaction committed (because we force data to disk before commit).
768  * This would lead to corruption if we crashed between overwriting the
769  * data and committing the delete.
770  *
771  * @@@ We may want to make this allocation behaviour conditional on
772  * data-writes at some point, and disable it for metadata allocations or
773  * sync-data inodes."
774  *
775  * Note: OCFS2 already does this differently for metadata vs data
776  * allocations, as those bitmaps are separate and undo access is never
777  * called on a metadata group descriptor.
778  */
779 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
780                                          int nr)
781 {
782         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
783
784         if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
785                 return 0;
786         if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
787                 return 1;
788
789         bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
790         return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
791 }
792
793 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
794                                              struct buffer_head *bg_bh,
795                                              unsigned int bits_wanted,
796                                              unsigned int total_bits,
797                                              u16 *bit_off,
798                                              u16 *bits_found)
799 {
800         void *bitmap;
801         u16 best_offset, best_size;
802         int offset, start, found, status = 0;
803         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
804
805         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
806                 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
807                 return -EIO;
808         }
809
810         found = start = best_offset = best_size = 0;
811         bitmap = bg->bg_bitmap;
812
813         while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
814                 if (offset == total_bits)
815                         break;
816
817                 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
818                         /* We found a zero, but we can't use it as it
819                          * hasn't been put to disk yet! */
820                         found = 0;
821                         start = offset + 1;
822                 } else if (offset == start) {
823                         /* we found a zero */
824                         found++;
825                         /* move start to the next bit to test */
826                         start++;
827                 } else {
828                         /* got a zero after some ones */
829                         found = 1;
830                         start = offset + 1;
831                 }
832                 if (found > best_size) {
833                         best_size = found;
834                         best_offset = start - found;
835                 }
836                 /* we got everything we needed */
837                 if (found == bits_wanted) {
838                         /* mlog(0, "Found it all!\n"); */
839                         break;
840                 }
841         }
842
843         /* XXX: I think the first clause is equivalent to the second
844          *      - jlbec */
845         if (found == bits_wanted) {
846                 *bit_off = start - found;
847                 *bits_found = found;
848         } else if (best_size) {
849                 *bit_off = best_offset;
850                 *bits_found = best_size;
851         } else {
852                 status = -ENOSPC;
853                 /* No error log here -- see the comment above
854                  * ocfs2_test_bg_bit_allocatable */
855         }
856
857         return status;
858 }
859
860 static inline int ocfs2_block_group_set_bits(handle_t *handle,
861                                              struct inode *alloc_inode,
862                                              struct ocfs2_group_desc *bg,
863                                              struct buffer_head *group_bh,
864                                              unsigned int bit_off,
865                                              unsigned int num_bits)
866 {
867         int status;
868         void *bitmap = bg->bg_bitmap;
869         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
870
871         mlog_entry_void();
872
873         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
874                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
875                 status = -EIO;
876                 goto bail;
877         }
878         BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
879
880         mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
881              num_bits);
882
883         if (ocfs2_is_cluster_bitmap(alloc_inode))
884                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
885
886         status = ocfs2_journal_access(handle,
887                                       alloc_inode,
888                                       group_bh,
889                                       journal_type);
890         if (status < 0) {
891                 mlog_errno(status);
892                 goto bail;
893         }
894
895         le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
896
897         while(num_bits--)
898                 ocfs2_set_bit(bit_off++, bitmap);
899
900         status = ocfs2_journal_dirty(handle,
901                                      group_bh);
902         if (status < 0) {
903                 mlog_errno(status);
904                 goto bail;
905         }
906
907 bail:
908         mlog_exit(status);
909         return status;
910 }
911
912 /* find the one with the most empty bits */
913 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
914 {
915         u16 curr, best;
916
917         BUG_ON(!cl->cl_next_free_rec);
918
919         best = curr = 0;
920         while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
921                 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
922                     le32_to_cpu(cl->cl_recs[best].c_free))
923                         best = curr;
924                 curr++;
925         }
926
927         BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
928         return best;
929 }
930
931 static int ocfs2_relink_block_group(handle_t *handle,
932                                     struct inode *alloc_inode,
933                                     struct buffer_head *fe_bh,
934                                     struct buffer_head *bg_bh,
935                                     struct buffer_head *prev_bg_bh,
936                                     u16 chain)
937 {
938         int status;
939         /* there is a really tiny chance the journal calls could fail,
940          * but we wouldn't want inconsistent blocks in *any* case. */
941         u64 fe_ptr, bg_ptr, prev_bg_ptr;
942         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
943         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
944         struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
945
946         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
947                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
948                 status = -EIO;
949                 goto out;
950         }
951         if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
952                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
953                 status = -EIO;
954                 goto out;
955         }
956
957         mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
958              (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
959              (unsigned long long)le64_to_cpu(bg->bg_blkno),
960              (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
961
962         fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
963         bg_ptr = le64_to_cpu(bg->bg_next_group);
964         prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
965
966         status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
967                                       OCFS2_JOURNAL_ACCESS_WRITE);
968         if (status < 0) {
969                 mlog_errno(status);
970                 goto out_rollback;
971         }
972
973         prev_bg->bg_next_group = bg->bg_next_group;
974
975         status = ocfs2_journal_dirty(handle, prev_bg_bh);
976         if (status < 0) {
977                 mlog_errno(status);
978                 goto out_rollback;
979         }
980
981         status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
982                                       OCFS2_JOURNAL_ACCESS_WRITE);
983         if (status < 0) {
984                 mlog_errno(status);
985                 goto out_rollback;
986         }
987
988         bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
989
990         status = ocfs2_journal_dirty(handle, bg_bh);
991         if (status < 0) {
992                 mlog_errno(status);
993                 goto out_rollback;
994         }
995
996         status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
997                                       OCFS2_JOURNAL_ACCESS_WRITE);
998         if (status < 0) {
999                 mlog_errno(status);
1000                 goto out_rollback;
1001         }
1002
1003         fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1004
1005         status = ocfs2_journal_dirty(handle, fe_bh);
1006         if (status < 0) {
1007                 mlog_errno(status);
1008                 goto out_rollback;
1009         }
1010
1011         status = 0;
1012 out_rollback:
1013         if (status < 0) {
1014                 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
1015                 bg->bg_next_group = cpu_to_le64(bg_ptr);
1016                 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1017         }
1018 out:
1019         mlog_exit(status);
1020         return status;
1021 }
1022
1023 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
1024                                                      u32 wanted)
1025 {
1026         return le16_to_cpu(bg->bg_free_bits_count) > wanted;
1027 }
1028
1029 /* return 0 on success, -ENOSPC to keep searching and any other < 0
1030  * value on error. */
1031 static int ocfs2_cluster_group_search(struct inode *inode,
1032                                       struct buffer_head *group_bh,
1033                                       u32 bits_wanted, u32 min_bits,
1034                                       u64 max_block,
1035                                       u16 *bit_off, u16 *bits_found)
1036 {
1037         int search = -ENOSPC;
1038         int ret;
1039         u64 blkoff;
1040         struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1041         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1042         u16 tmp_off, tmp_found;
1043         unsigned int max_bits, gd_cluster_off;
1044
1045         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1046
1047         if (gd->bg_free_bits_count) {
1048                 max_bits = le16_to_cpu(gd->bg_bits);
1049
1050                 /* Tail groups in cluster bitmaps which aren't cpg
1051                  * aligned are prone to partial extention by a failed
1052                  * fs resize. If the file system resize never got to
1053                  * update the dinode cluster count, then we don't want
1054                  * to trust any clusters past it, regardless of what
1055                  * the group descriptor says. */
1056                 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
1057                                                           le64_to_cpu(gd->bg_blkno));
1058                 if ((gd_cluster_off + max_bits) >
1059                     OCFS2_I(inode)->ip_clusters) {
1060                         max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1061                         mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
1062                              (unsigned long long)le64_to_cpu(gd->bg_blkno),
1063                              le16_to_cpu(gd->bg_bits),
1064                              OCFS2_I(inode)->ip_clusters, max_bits);
1065                 }
1066
1067                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1068                                                         group_bh, bits_wanted,
1069                                                         max_bits,
1070                                                         &tmp_off, &tmp_found);
1071                 if (ret)
1072                         return ret;
1073
1074                 if (max_block) {
1075                         blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1076                                                           gd_cluster_off +
1077                                                           tmp_off + tmp_found);
1078                         mlog(0, "Checking %llu against %llu\n",
1079                              (unsigned long long)blkoff,
1080                              (unsigned long long)max_block);
1081                         if (blkoff > max_block)
1082                                 return -ENOSPC;
1083                 }
1084
1085                 /* ocfs2_block_group_find_clear_bits() might
1086                  * return success, but we still want to return
1087                  * -ENOSPC unless it found the minimum number
1088                  * of bits. */
1089                 if (min_bits <= tmp_found) {
1090                         *bit_off = tmp_off;
1091                         *bits_found = tmp_found;
1092                         search = 0; /* success */
1093                 } else if (tmp_found) {
1094                         /*
1095                          * Don't show bits which we'll be returning
1096                          * for allocation to the local alloc bitmap.
1097                          */
1098                         ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
1099                 }
1100         }
1101
1102         return search;
1103 }
1104
1105 static int ocfs2_block_group_search(struct inode *inode,
1106                                     struct buffer_head *group_bh,
1107                                     u32 bits_wanted, u32 min_bits,
1108                                     u64 max_block,
1109                                     u16 *bit_off, u16 *bits_found)
1110 {
1111         int ret = -ENOSPC;
1112         u64 blkoff;
1113         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1114
1115         BUG_ON(min_bits != 1);
1116         BUG_ON(ocfs2_is_cluster_bitmap(inode));
1117
1118         if (bg->bg_free_bits_count) {
1119                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1120                                                         group_bh, bits_wanted,
1121                                                         le16_to_cpu(bg->bg_bits),
1122                                                         bit_off, bits_found);
1123                 if (!ret && max_block) {
1124                         blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1125                                 *bits_found;
1126                         mlog(0, "Checking %llu against %llu\n",
1127                              (unsigned long long)blkoff,
1128                              (unsigned long long)max_block);
1129                         if (blkoff > max_block)
1130                                 ret = -ENOSPC;
1131                 }
1132         }
1133
1134         return ret;
1135 }
1136
1137 static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1138                                        handle_t *handle,
1139                                        struct buffer_head *di_bh,
1140                                        u32 num_bits,
1141                                        u16 chain)
1142 {
1143         int ret;
1144         u32 tmp_used;
1145         struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1146         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1147
1148         ret = ocfs2_journal_access(handle, inode, di_bh,
1149                                    OCFS2_JOURNAL_ACCESS_WRITE);
1150         if (ret < 0) {
1151                 mlog_errno(ret);
1152                 goto out;
1153         }
1154
1155         tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1156         di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1157         le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1158
1159         ret = ocfs2_journal_dirty(handle, di_bh);
1160         if (ret < 0)
1161                 mlog_errno(ret);
1162
1163 out:
1164         return ret;
1165 }
1166
1167 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1168                                   handle_t *handle,
1169                                   u32 bits_wanted,
1170                                   u32 min_bits,
1171                                   u16 *bit_off,
1172                                   unsigned int *num_bits,
1173                                   u64 gd_blkno,
1174                                   u16 *bits_left)
1175 {
1176         int ret;
1177         u16 found;
1178         struct buffer_head *group_bh = NULL;
1179         struct ocfs2_group_desc *gd;
1180         struct inode *alloc_inode = ac->ac_inode;
1181
1182         ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
1183         if (ret < 0) {
1184                 mlog_errno(ret);
1185                 return ret;
1186         }
1187
1188         gd = (struct ocfs2_group_desc *) group_bh->b_data;
1189         if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1190                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1191                 ret = -EIO;
1192                 goto out;
1193         }
1194
1195         ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1196                                   ac->ac_max_block, bit_off, &found);
1197         if (ret < 0) {
1198                 if (ret != -ENOSPC)
1199                         mlog_errno(ret);
1200                 goto out;
1201         }
1202
1203         *num_bits = found;
1204
1205         ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1206                                                *num_bits,
1207                                                le16_to_cpu(gd->bg_chain));
1208         if (ret < 0) {
1209                 mlog_errno(ret);
1210                 goto out;
1211         }
1212
1213         ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1214                                          *bit_off, *num_bits);
1215         if (ret < 0)
1216                 mlog_errno(ret);
1217
1218         *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1219
1220 out:
1221         brelse(group_bh);
1222
1223         return ret;
1224 }
1225
1226 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1227                               handle_t *handle,
1228                               u32 bits_wanted,
1229                               u32 min_bits,
1230                               u16 *bit_off,
1231                               unsigned int *num_bits,
1232                               u64 *bg_blkno,
1233                               u16 *bits_left)
1234 {
1235         int status;
1236         u16 chain, tmp_bits;
1237         u32 tmp_used;
1238         u64 next_group;
1239         struct inode *alloc_inode = ac->ac_inode;
1240         struct buffer_head *group_bh = NULL;
1241         struct buffer_head *prev_group_bh = NULL;
1242         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1243         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1244         struct ocfs2_group_desc *bg;
1245
1246         chain = ac->ac_chain;
1247         mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1248              bits_wanted, chain,
1249              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1250
1251         status = ocfs2_read_block(alloc_inode,
1252                                   le64_to_cpu(cl->cl_recs[chain].c_blkno),
1253                                   &group_bh);
1254         if (status < 0) {
1255                 mlog_errno(status);
1256                 goto bail;
1257         }
1258         bg = (struct ocfs2_group_desc *) group_bh->b_data;
1259         status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1260         if (status) {
1261                 mlog_errno(status);
1262                 goto bail;
1263         }
1264
1265         status = -ENOSPC;
1266         /* for now, the chain search is a bit simplistic. We just use
1267          * the 1st group with any empty bits. */
1268         while ((status = ac->ac_group_search(alloc_inode, group_bh,
1269                                              bits_wanted, min_bits,
1270                                              ac->ac_max_block, bit_off,
1271                                              &tmp_bits)) == -ENOSPC) {
1272                 if (!bg->bg_next_group)
1273                         break;
1274
1275                 brelse(prev_group_bh);
1276                 prev_group_bh = NULL;
1277
1278                 next_group = le64_to_cpu(bg->bg_next_group);
1279                 prev_group_bh = group_bh;
1280                 group_bh = NULL;
1281                 status = ocfs2_read_block(alloc_inode,
1282                                           next_group, &group_bh);
1283                 if (status < 0) {
1284                         mlog_errno(status);
1285                         goto bail;
1286                 }
1287                 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1288                 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1289                 if (status) {
1290                         mlog_errno(status);
1291                         goto bail;
1292                 }
1293         }
1294         if (status < 0) {
1295                 if (status != -ENOSPC)
1296                         mlog_errno(status);
1297                 goto bail;
1298         }
1299
1300         mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1301              tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1302
1303         *num_bits = tmp_bits;
1304
1305         BUG_ON(*num_bits == 0);
1306
1307         /*
1308          * Keep track of previous block descriptor read. When
1309          * we find a target, if we have read more than X
1310          * number of descriptors, and the target is reasonably
1311          * empty, relink him to top of his chain.
1312          *
1313          * We've read 0 extra blocks and only send one more to
1314          * the transaction, yet the next guy to search has a
1315          * much easier time.
1316          *
1317          * Do this *after* figuring out how many bits we're taking out
1318          * of our target group.
1319          */
1320         if (ac->ac_allow_chain_relink &&
1321             (prev_group_bh) &&
1322             (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
1323                 status = ocfs2_relink_block_group(handle, alloc_inode,
1324                                                   ac->ac_bh, group_bh,
1325                                                   prev_group_bh, chain);
1326                 if (status < 0) {
1327                         mlog_errno(status);
1328                         goto bail;
1329                 }
1330         }
1331
1332         /* Ok, claim our bits now: set the info on dinode, chainlist
1333          * and then the group */
1334         status = ocfs2_journal_access(handle,
1335                                       alloc_inode,
1336                                       ac->ac_bh,
1337                                       OCFS2_JOURNAL_ACCESS_WRITE);
1338         if (status < 0) {
1339                 mlog_errno(status);
1340                 goto bail;
1341         }
1342
1343         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1344         fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
1345         le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
1346
1347         status = ocfs2_journal_dirty(handle,
1348                                      ac->ac_bh);
1349         if (status < 0) {
1350                 mlog_errno(status);
1351                 goto bail;
1352         }
1353
1354         status = ocfs2_block_group_set_bits(handle,
1355                                             alloc_inode,
1356                                             bg,
1357                                             group_bh,
1358                                             *bit_off,
1359                                             *num_bits);
1360         if (status < 0) {
1361                 mlog_errno(status);
1362                 goto bail;
1363         }
1364
1365         mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
1366              (unsigned long long)le64_to_cpu(fe->i_blkno));
1367
1368         *bg_blkno = le64_to_cpu(bg->bg_blkno);
1369         *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1370 bail:
1371         brelse(group_bh);
1372         brelse(prev_group_bh);
1373
1374         mlog_exit(status);
1375         return status;
1376 }
1377
1378 /* will give out up to bits_wanted contiguous bits. */
1379 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1380                                      struct ocfs2_alloc_context *ac,
1381                                      handle_t *handle,
1382                                      u32 bits_wanted,
1383                                      u32 min_bits,
1384                                      u16 *bit_off,
1385                                      unsigned int *num_bits,
1386                                      u64 *bg_blkno)
1387 {
1388         int status;
1389         u16 victim, i;
1390         u16 bits_left = 0;
1391         u64 hint_blkno = ac->ac_last_group;
1392         struct ocfs2_chain_list *cl;
1393         struct ocfs2_dinode *fe;
1394
1395         mlog_entry_void();
1396
1397         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1398         BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1399         BUG_ON(!ac->ac_bh);
1400
1401         fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1402
1403         /* The bh was validated by the inode read during
1404          * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
1405         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1406
1407         if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1408             le32_to_cpu(fe->id1.bitmap1.i_total)) {
1409                 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
1410                             "bits but only %u total.",
1411                             (unsigned long long)le64_to_cpu(fe->i_blkno),
1412                             le32_to_cpu(fe->id1.bitmap1.i_used),
1413                             le32_to_cpu(fe->id1.bitmap1.i_total));
1414                 status = -EIO;
1415                 goto bail;
1416         }
1417
1418         if (hint_blkno) {
1419                 /* Attempt to short-circuit the usual search mechanism
1420                  * by jumping straight to the most recently used
1421                  * allocation group. This helps us mantain some
1422                  * contiguousness across allocations. */
1423                 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1424                                                 min_bits, bit_off, num_bits,
1425                                                 hint_blkno, &bits_left);
1426                 if (!status) {
1427                         /* Be careful to update *bg_blkno here as the
1428                          * caller is expecting it to be filled in, and
1429                          * ocfs2_search_one_group() won't do that for
1430                          * us. */
1431                         *bg_blkno = hint_blkno;
1432                         goto set_hint;
1433                 }
1434                 if (status < 0 && status != -ENOSPC) {
1435                         mlog_errno(status);
1436                         goto bail;
1437                 }
1438         }
1439
1440         cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1441
1442         victim = ocfs2_find_victim_chain(cl);
1443         ac->ac_chain = victim;
1444         ac->ac_allow_chain_relink = 1;
1445
1446         status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
1447                                     num_bits, bg_blkno, &bits_left);
1448         if (!status)
1449                 goto set_hint;
1450         if (status < 0 && status != -ENOSPC) {
1451                 mlog_errno(status);
1452                 goto bail;
1453         }
1454
1455         mlog(0, "Search of victim chain %u came up with nothing, "
1456              "trying all chains now.\n", victim);
1457
1458         /* If we didn't pick a good victim, then just default to
1459          * searching each chain in order. Don't allow chain relinking
1460          * because we only calculate enough journal credits for one
1461          * relink per alloc. */
1462         ac->ac_allow_chain_relink = 0;
1463         for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1464                 if (i == victim)
1465                         continue;
1466                 if (!cl->cl_recs[i].c_free)
1467                         continue;
1468
1469                 ac->ac_chain = i;
1470                 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1471                                             bit_off, num_bits, bg_blkno,
1472                                             &bits_left);
1473                 if (!status)
1474                         break;
1475                 if (status < 0 && status != -ENOSPC) {
1476                         mlog_errno(status);
1477                         goto bail;
1478                 }
1479         }
1480
1481 set_hint:
1482         if (status != -ENOSPC) {
1483                 /* If the next search of this group is not likely to
1484                  * yield a suitable extent, then we reset the last
1485                  * group hint so as to not waste a disk read */
1486                 if (bits_left < min_bits)
1487                         ac->ac_last_group = 0;
1488                 else
1489                         ac->ac_last_group = *bg_blkno;
1490         }
1491
1492 bail:
1493         mlog_exit(status);
1494         return status;
1495 }
1496
1497 int ocfs2_claim_metadata(struct ocfs2_super *osb,
1498                          handle_t *handle,
1499                          struct ocfs2_alloc_context *ac,
1500                          u32 bits_wanted,
1501                          u16 *suballoc_bit_start,
1502                          unsigned int *num_bits,
1503                          u64 *blkno_start)
1504 {
1505         int status;
1506         u64 bg_blkno;
1507
1508         BUG_ON(!ac);
1509         BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1510         BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1511
1512         status = ocfs2_claim_suballoc_bits(osb,
1513                                            ac,
1514                                            handle,
1515                                            bits_wanted,
1516                                            1,
1517                                            suballoc_bit_start,
1518                                            num_bits,
1519                                            &bg_blkno);
1520         if (status < 0) {
1521                 mlog_errno(status);
1522                 goto bail;
1523         }
1524         atomic_inc(&osb->alloc_stats.bg_allocs);
1525
1526         *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
1527         ac->ac_bits_given += (*num_bits);
1528         status = 0;
1529 bail:
1530         mlog_exit(status);
1531         return status;
1532 }
1533
1534 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1535                           handle_t *handle,
1536                           struct ocfs2_alloc_context *ac,
1537                           u16 *suballoc_bit,
1538                           u64 *fe_blkno)
1539 {
1540         int status;
1541         unsigned int num_bits;
1542         u64 bg_blkno;
1543
1544         mlog_entry_void();
1545
1546         BUG_ON(!ac);
1547         BUG_ON(ac->ac_bits_given != 0);
1548         BUG_ON(ac->ac_bits_wanted != 1);
1549         BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
1550
1551         status = ocfs2_claim_suballoc_bits(osb,
1552                                            ac,
1553                                            handle,
1554                                            1,
1555                                            1,
1556                                            suballoc_bit,
1557                                            &num_bits,
1558                                            &bg_blkno);
1559         if (status < 0) {
1560                 mlog_errno(status);
1561                 goto bail;
1562         }
1563         atomic_inc(&osb->alloc_stats.bg_allocs);
1564
1565         BUG_ON(num_bits != 1);
1566
1567         *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1568         ac->ac_bits_given++;
1569         status = 0;
1570 bail:
1571         mlog_exit(status);
1572         return status;
1573 }
1574
1575 /* translate a group desc. blkno and it's bitmap offset into
1576  * disk cluster offset. */
1577 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
1578                                                    u64 bg_blkno,
1579                                                    u16 bg_bit_off)
1580 {
1581         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1582         u32 cluster = 0;
1583
1584         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1585
1586         if (bg_blkno != osb->first_cluster_group_blkno)
1587                 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
1588         cluster += (u32) bg_bit_off;
1589         return cluster;
1590 }
1591
1592 /* given a cluster offset, calculate which block group it belongs to
1593  * and return that block offset. */
1594 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
1595 {
1596         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1597         u32 group_no;
1598
1599         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1600
1601         group_no = cluster / osb->bitmap_cpg;
1602         if (!group_no)
1603                 return osb->first_cluster_group_blkno;
1604         return ocfs2_clusters_to_blocks(inode->i_sb,
1605                                         group_no * osb->bitmap_cpg);
1606 }
1607
1608 /* given the block number of a cluster start, calculate which cluster
1609  * group and descriptor bitmap offset that corresponds to. */
1610 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1611                                                 u64 data_blkno,
1612                                                 u64 *bg_blkno,
1613                                                 u16 *bg_bit_off)
1614 {
1615         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1616         u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
1617
1618         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1619
1620         *bg_blkno = ocfs2_which_cluster_group(inode,
1621                                               data_cluster);
1622
1623         if (*bg_blkno == osb->first_cluster_group_blkno)
1624                 *bg_bit_off = (u16) data_cluster;
1625         else
1626                 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
1627                                                              data_blkno - *bg_blkno);
1628 }
1629
1630 /*
1631  * min_bits - minimum contiguous chunk from this total allocation we
1632  * can handle. set to what we asked for originally for a full
1633  * contig. allocation, set to '1' to indicate we can deal with extents
1634  * of any size.
1635  */
1636 int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1637                            handle_t *handle,
1638                            struct ocfs2_alloc_context *ac,
1639                            u32 min_clusters,
1640                            u32 max_clusters,
1641                            u32 *cluster_start,
1642                            u32 *num_clusters)
1643 {
1644         int status;
1645         unsigned int bits_wanted = max_clusters;
1646         u64 bg_blkno = 0;
1647         u16 bg_bit_off;
1648
1649         mlog_entry_void();
1650
1651         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1652
1653         BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
1654                && ac->ac_which != OCFS2_AC_USE_MAIN);
1655
1656         if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
1657                 status = ocfs2_claim_local_alloc_bits(osb,
1658                                                       handle,
1659                                                       ac,
1660                                                       bits_wanted,
1661                                                       cluster_start,
1662                                                       num_clusters);
1663                 if (!status)
1664                         atomic_inc(&osb->alloc_stats.local_data);
1665         } else {
1666                 if (min_clusters > (osb->bitmap_cpg - 1)) {
1667                         /* The only paths asking for contiguousness
1668                          * should know about this already. */
1669                         mlog(ML_ERROR, "minimum allocation requested %u exceeds "
1670                              "group bitmap size %u!\n", min_clusters,
1671                              osb->bitmap_cpg);
1672                         status = -ENOSPC;
1673                         goto bail;
1674                 }
1675                 /* clamp the current request down to a realistic size. */
1676                 if (bits_wanted > (osb->bitmap_cpg - 1))
1677                         bits_wanted = osb->bitmap_cpg - 1;
1678
1679                 status = ocfs2_claim_suballoc_bits(osb,
1680                                                    ac,
1681                                                    handle,
1682                                                    bits_wanted,
1683                                                    min_clusters,
1684                                                    &bg_bit_off,
1685                                                    num_clusters,
1686                                                    &bg_blkno);
1687                 if (!status) {
1688                         *cluster_start =
1689                                 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1690                                                                  bg_blkno,
1691                                                                  bg_bit_off);
1692                         atomic_inc(&osb->alloc_stats.bitmap_data);
1693                 }
1694         }
1695         if (status < 0) {
1696                 if (status != -ENOSPC)
1697                         mlog_errno(status);
1698                 goto bail;
1699         }
1700
1701         ac->ac_bits_given += *num_clusters;
1702
1703 bail:
1704         mlog_exit(status);
1705         return status;
1706 }
1707
1708 int ocfs2_claim_clusters(struct ocfs2_super *osb,
1709                          handle_t *handle,
1710                          struct ocfs2_alloc_context *ac,
1711                          u32 min_clusters,
1712                          u32 *cluster_start,
1713                          u32 *num_clusters)
1714 {
1715         unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1716
1717         return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
1718                                       bits_wanted, cluster_start, num_clusters);
1719 }
1720
1721 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1722                                                struct inode *alloc_inode,
1723                                                struct ocfs2_group_desc *bg,
1724                                                struct buffer_head *group_bh,
1725                                                unsigned int bit_off,
1726                                                unsigned int num_bits)
1727 {
1728         int status;
1729         unsigned int tmp;
1730         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1731         struct ocfs2_group_desc *undo_bg = NULL;
1732
1733         mlog_entry_void();
1734
1735         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
1736                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
1737                 status = -EIO;
1738                 goto bail;
1739         }
1740
1741         mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1742
1743         if (ocfs2_is_cluster_bitmap(alloc_inode))
1744                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1745
1746         status = ocfs2_journal_access(handle, alloc_inode, group_bh,
1747                                       journal_type);
1748         if (status < 0) {
1749                 mlog_errno(status);
1750                 goto bail;
1751         }
1752
1753         if (ocfs2_is_cluster_bitmap(alloc_inode))
1754                 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
1755
1756         tmp = num_bits;
1757         while(tmp--) {
1758                 ocfs2_clear_bit((bit_off + tmp),
1759                                 (unsigned long *) bg->bg_bitmap);
1760                 if (ocfs2_is_cluster_bitmap(alloc_inode))
1761                         ocfs2_set_bit(bit_off + tmp,
1762                                       (unsigned long *) undo_bg->bg_bitmap);
1763         }
1764         le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1765
1766         status = ocfs2_journal_dirty(handle, group_bh);
1767         if (status < 0)
1768                 mlog_errno(status);
1769 bail:
1770         return status;
1771 }
1772
1773 /*
1774  * expects the suballoc inode to already be locked.
1775  */
1776 int ocfs2_free_suballoc_bits(handle_t *handle,
1777                              struct inode *alloc_inode,
1778                              struct buffer_head *alloc_bh,
1779                              unsigned int start_bit,
1780                              u64 bg_blkno,
1781                              unsigned int count)
1782 {
1783         int status = 0;
1784         u32 tmp_used;
1785         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1786         struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1787         struct buffer_head *group_bh = NULL;
1788         struct ocfs2_group_desc *group;
1789
1790         mlog_entry_void();
1791
1792         /* The alloc_bh comes from ocfs2_free_dinode() or
1793          * ocfs2_free_clusters().  The callers have all locked the
1794          * allocator and gotten alloc_bh from the lock call.  This
1795          * validates the dinode buffer.  Any corruption that has happended
1796          * is a code bug. */
1797         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1798         BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1799
1800         mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1801              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1802              (unsigned long long)bg_blkno, start_bit);
1803
1804         status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
1805         if (status < 0) {
1806                 mlog_errno(status);
1807                 goto bail;
1808         }
1809
1810         group = (struct ocfs2_group_desc *) group_bh->b_data;
1811         status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
1812         if (status) {
1813                 mlog_errno(status);
1814                 goto bail;
1815         }
1816         BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1817
1818         status = ocfs2_block_group_clear_bits(handle, alloc_inode,
1819                                               group, group_bh,
1820                                               start_bit, count);
1821         if (status < 0) {
1822                 mlog_errno(status);
1823                 goto bail;
1824         }
1825
1826         status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
1827                                       OCFS2_JOURNAL_ACCESS_WRITE);
1828         if (status < 0) {
1829                 mlog_errno(status);
1830                 goto bail;
1831         }
1832
1833         le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
1834                      count);
1835         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1836         fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
1837
1838         status = ocfs2_journal_dirty(handle, alloc_bh);
1839         if (status < 0) {
1840                 mlog_errno(status);
1841                 goto bail;
1842         }
1843
1844 bail:
1845         brelse(group_bh);
1846
1847         mlog_exit(status);
1848         return status;
1849 }
1850
1851 int ocfs2_free_dinode(handle_t *handle,
1852                       struct inode *inode_alloc_inode,
1853                       struct buffer_head *inode_alloc_bh,
1854                       struct ocfs2_dinode *di)
1855 {
1856         u64 blk = le64_to_cpu(di->i_blkno);
1857         u16 bit = le16_to_cpu(di->i_suballoc_bit);
1858         u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1859
1860         return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
1861                                         inode_alloc_bh, bit, bg_blkno, 1);
1862 }
1863
1864 int ocfs2_free_clusters(handle_t *handle,
1865                        struct inode *bitmap_inode,
1866                        struct buffer_head *bitmap_bh,
1867                        u64 start_blk,
1868                        unsigned int num_clusters)
1869 {
1870         int status;
1871         u16 bg_start_bit;
1872         u64 bg_blkno;
1873         struct ocfs2_dinode *fe;
1874
1875         /* You can't ever have a contiguous set of clusters
1876          * bigger than a block group bitmap so we never have to worry
1877          * about looping on them. */
1878
1879         mlog_entry_void();
1880
1881         /* This is expensive. We can safely remove once this stuff has
1882          * gotten tested really well. */
1883         BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
1884
1885         fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
1886
1887         ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
1888                                      &bg_start_bit);
1889
1890         mlog(0, "want to free %u clusters starting at block %llu\n",
1891              num_clusters, (unsigned long long)start_blk);
1892         mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
1893              (unsigned long long)bg_blkno, bg_start_bit);
1894
1895         status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1896                                           bg_start_bit, bg_blkno,
1897                                           num_clusters);
1898         if (status < 0) {
1899                 mlog_errno(status);
1900                 goto out;
1901         }
1902
1903         ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
1904                                          num_clusters);
1905
1906 out:
1907         mlog_exit(status);
1908         return status;
1909 }
1910
1911 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
1912 {
1913         printk("Block Group:\n");
1914         printk("bg_signature:       %s\n", bg->bg_signature);
1915         printk("bg_size:            %u\n", bg->bg_size);
1916         printk("bg_bits:            %u\n", bg->bg_bits);
1917         printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
1918         printk("bg_chain:           %u\n", bg->bg_chain);
1919         printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
1920         printk("bg_next_group:      %llu\n",
1921                (unsigned long long)bg->bg_next_group);
1922         printk("bg_parent_dinode:   %llu\n",
1923                (unsigned long long)bg->bg_parent_dinode);
1924         printk("bg_blkno:           %llu\n",
1925                (unsigned long long)bg->bg_blkno);
1926 }
1927
1928 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1929 {
1930         int i;
1931
1932         printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
1933         printk("i_signature:                  %s\n", fe->i_signature);
1934         printk("i_size:                       %llu\n",
1935                (unsigned long long)fe->i_size);
1936         printk("i_clusters:                   %u\n", fe->i_clusters);
1937         printk("i_generation:                 %u\n",
1938                le32_to_cpu(fe->i_generation));
1939         printk("id1.bitmap1.i_used:           %u\n",
1940                le32_to_cpu(fe->id1.bitmap1.i_used));
1941         printk("id1.bitmap1.i_total:          %u\n",
1942                le32_to_cpu(fe->id1.bitmap1.i_total));
1943         printk("id2.i_chain.cl_cpg:           %u\n", fe->id2.i_chain.cl_cpg);
1944         printk("id2.i_chain.cl_bpc:           %u\n", fe->id2.i_chain.cl_bpc);
1945         printk("id2.i_chain.cl_count:         %u\n", fe->id2.i_chain.cl_count);
1946         printk("id2.i_chain.cl_next_free_rec: %u\n",
1947                fe->id2.i_chain.cl_next_free_rec);
1948         for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
1949                 printk("fe->id2.i_chain.cl_recs[%d].c_free:  %u\n", i,
1950                        fe->id2.i_chain.cl_recs[i].c_free);
1951                 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
1952                        fe->id2.i_chain.cl_recs[i].c_total);
1953                 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
1954                        (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1955         }
1956 }
1957
1958 /*
1959  * For a given allocation, determine which allocators will need to be
1960  * accessed, and lock them, reserving the appropriate number of bits.
1961  *
1962  * Sparse file systems call this from ocfs2_write_begin_nolock()
1963  * and ocfs2_allocate_unwritten_extents().
1964  *
1965  * File systems which don't support holes call this from
1966  * ocfs2_extend_allocation().
1967  */
1968 int ocfs2_lock_allocators(struct inode *inode,
1969                           struct ocfs2_extent_tree *et,
1970                           u32 clusters_to_add, u32 extents_to_split,
1971                           struct ocfs2_alloc_context **data_ac,
1972                           struct ocfs2_alloc_context **meta_ac)
1973 {
1974         int ret = 0, num_free_extents;
1975         unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1976         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1977
1978         *meta_ac = NULL;
1979         if (data_ac)
1980                 *data_ac = NULL;
1981
1982         BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1983
1984         num_free_extents = ocfs2_num_free_extents(osb, inode, et);
1985         if (num_free_extents < 0) {
1986                 ret = num_free_extents;
1987                 mlog_errno(ret);
1988                 goto out;
1989         }
1990
1991         /*
1992          * Sparse allocation file systems need to be more conservative
1993          * with reserving room for expansion - the actual allocation
1994          * happens while we've got a journal handle open so re-taking
1995          * a cluster lock (because we ran out of room for another
1996          * extent) will violate ordering rules.
1997          *
1998          * Most of the time we'll only be seeing this 1 cluster at a time
1999          * anyway.
2000          *
2001          * Always lock for any unwritten extents - we might want to
2002          * add blocks during a split.
2003          */
2004         if (!num_free_extents ||
2005             (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
2006                 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
2007                 if (ret < 0) {
2008                         if (ret != -ENOSPC)
2009                                 mlog_errno(ret);
2010                         goto out;
2011                 }
2012         }
2013
2014         if (clusters_to_add == 0)
2015                 goto out;
2016
2017         ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2018         if (ret < 0) {
2019                 if (ret != -ENOSPC)
2020                         mlog_errno(ret);
2021                 goto out;
2022         }
2023
2024 out:
2025         if (ret) {
2026                 if (*meta_ac) {
2027                         ocfs2_free_alloc_context(*meta_ac);
2028                         *meta_ac = NULL;
2029                 }
2030
2031                 /*
2032                  * We cannot have an error and a non null *data_ac.
2033                  */
2034         }
2035
2036         return ret;
2037 }