ocfs2: Wrap group descriptor reads in a dedicated function.
[safe/jmp/linux-2.6] / fs / ocfs2 / suballoc.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * suballoc.c
5  *
6  * metadata alloc and free
7  * Inspired by ext3 block groups.
8  *
9  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26
27 #include <linux/fs.h>
28 #include <linux/types.h>
29 #include <linux/slab.h>
30 #include <linux/highmem.h>
31
32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC
33 #include <cluster/masklog.h>
34
35 #include "ocfs2.h"
36
37 #include "alloc.h"
38 #include "dlmglue.h"
39 #include "inode.h"
40 #include "journal.h"
41 #include "localalloc.h"
42 #include "suballoc.h"
43 #include "super.h"
44 #include "sysfile.h"
45 #include "uptodate.h"
46
47 #include "buffer_head_io.h"
48
49 #define NOT_ALLOC_NEW_GROUP             0
50 #define ALLOC_NEW_GROUP                 1
51
52 #define OCFS2_MAX_INODES_TO_STEAL       1024
53
54 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
55 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
56 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
57 static int ocfs2_block_group_fill(handle_t *handle,
58                                   struct inode *alloc_inode,
59                                   struct buffer_head *bg_bh,
60                                   u64 group_blkno,
61                                   u16 my_chain,
62                                   struct ocfs2_chain_list *cl);
63 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64                                    struct inode *alloc_inode,
65                                    struct buffer_head *bh,
66                                    u64 max_block);
67
68 static int ocfs2_cluster_group_search(struct inode *inode,
69                                       struct buffer_head *group_bh,
70                                       u32 bits_wanted, u32 min_bits,
71                                       u64 max_block,
72                                       u16 *bit_off, u16 *bits_found);
73 static int ocfs2_block_group_search(struct inode *inode,
74                                     struct buffer_head *group_bh,
75                                     u32 bits_wanted, u32 min_bits,
76                                     u64 max_block,
77                                     u16 *bit_off, u16 *bits_found);
78 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
79                                      struct ocfs2_alloc_context *ac,
80                                      handle_t *handle,
81                                      u32 bits_wanted,
82                                      u32 min_bits,
83                                      u16 *bit_off,
84                                      unsigned int *num_bits,
85                                      u64 *bg_blkno);
86 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
87                                          int nr);
88 static inline int ocfs2_block_group_set_bits(handle_t *handle,
89                                              struct inode *alloc_inode,
90                                              struct ocfs2_group_desc *bg,
91                                              struct buffer_head *group_bh,
92                                              unsigned int bit_off,
93                                              unsigned int num_bits);
94 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
95                                                struct inode *alloc_inode,
96                                                struct ocfs2_group_desc *bg,
97                                                struct buffer_head *group_bh,
98                                                unsigned int bit_off,
99                                                unsigned int num_bits);
100
101 static int ocfs2_relink_block_group(handle_t *handle,
102                                     struct inode *alloc_inode,
103                                     struct buffer_head *fe_bh,
104                                     struct buffer_head *bg_bh,
105                                     struct buffer_head *prev_bg_bh,
106                                     u16 chain);
107 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
108                                                      u32 wanted);
109 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110                                                    u64 bg_blkno,
111                                                    u16 bg_bit_off);
112 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
113                                                 u64 data_blkno,
114                                                 u64 *bg_blkno,
115                                                 u16 *bg_bit_off);
116 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117                                              u32 bits_wanted, u64 max_block,
118                                              struct ocfs2_alloc_context **ac);
119
120 void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
121 {
122         struct inode *inode = ac->ac_inode;
123
124         if (inode) {
125                 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
126                         ocfs2_inode_unlock(inode, 1);
127
128                 mutex_unlock(&inode->i_mutex);
129
130                 iput(inode);
131                 ac->ac_inode = NULL;
132         }
133         brelse(ac->ac_bh);
134         ac->ac_bh = NULL;
135 }
136
137 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
138 {
139         ocfs2_free_ac_resource(ac);
140         kfree(ac);
141 }
142
143 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
144 {
145         return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
146 }
147
148 int ocfs2_validate_group_descriptor(struct super_block *sb,
149                                     struct ocfs2_dinode *di,
150                                     struct buffer_head *bh,
151                                     int clean_error)
152 {
153         unsigned int max_bits;
154         struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
155
156 #define do_error(fmt, ...)                                              \
157         do{                                                             \
158                 if (clean_error)                                        \
159                         mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__);        \
160                 else                                                    \
161                         ocfs2_error(sb, fmt, ##__VA_ARGS__);            \
162         } while (0)
163
164         if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
165                 do_error("Group descriptor #%llu has bad signature %.*s",
166                          (unsigned long long)bh->b_blocknr, 7,
167                          gd->bg_signature);
168                 return -EINVAL;
169         }
170
171         if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
172                 do_error("Group descriptor #%llu has an invalid bg_blkno "
173                          "of %llu",
174                          (unsigned long long)bh->b_blocknr,
175                          (unsigned long long)le64_to_cpu(gd->bg_blkno));
176                 return -EINVAL;
177         }
178
179         if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
180                 do_error("Group descriptor #%llu has an invalid "
181                          "fs_generation of #%u",
182                          (unsigned long long)bh->b_blocknr,
183                          le32_to_cpu(gd->bg_generation));
184                 return -EINVAL;
185         }
186
187         if (di->i_blkno != gd->bg_parent_dinode) {
188                 do_error("Group descriptor #%llu has bad parent "
189                          "pointer (%llu, expected %llu)",
190                          (unsigned long long)bh->b_blocknr,
191                          (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
192                          (unsigned long long)le64_to_cpu(di->i_blkno));
193                 return -EINVAL;
194         }
195
196         max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
197         if (le16_to_cpu(gd->bg_bits) > max_bits) {
198                 do_error("Group descriptor #%llu has bit count of %u",
199                          (unsigned long long)bh->b_blocknr,
200                          le16_to_cpu(gd->bg_bits));
201                 return -EINVAL;
202         }
203
204         if (le16_to_cpu(gd->bg_chain) >=
205             le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
206                 do_error("Group descriptor #%llu has bad chain %u",
207                          (unsigned long long)bh->b_blocknr,
208                          le16_to_cpu(gd->bg_chain));
209                 return -EINVAL;
210         }
211
212         if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
213                 do_error("Group descriptor #%llu has bit count %u but "
214                          "claims that %u are free",
215                          (unsigned long long)bh->b_blocknr,
216                          le16_to_cpu(gd->bg_bits),
217                          le16_to_cpu(gd->bg_free_bits_count));
218                 return -EINVAL;
219         }
220
221         if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
222                 do_error("Group descriptor #%llu has bit count %u but "
223                          "max bitmap bits of %u",
224                          (unsigned long long)bh->b_blocknr,
225                          le16_to_cpu(gd->bg_bits),
226                          8 * le16_to_cpu(gd->bg_size));
227                 return -EINVAL;
228         }
229 #undef do_error
230
231         return 0;
232 }
233
234 int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
235                                 u64 gd_blkno, struct buffer_head **bh)
236 {
237         int rc;
238         struct buffer_head *tmp = *bh;
239
240         rc = ocfs2_read_block(inode, gd_blkno, &tmp);
241         if (rc)
242                 goto out;
243
244         rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
245         if (rc) {
246                 brelse(tmp);
247                 goto out;
248         }
249
250         /* If ocfs2_read_block() got us a new bh, pass it up. */
251         if (!*bh)
252                 *bh = tmp;
253
254 out:
255         return rc;
256 }
257
258 static int ocfs2_block_group_fill(handle_t *handle,
259                                   struct inode *alloc_inode,
260                                   struct buffer_head *bg_bh,
261                                   u64 group_blkno,
262                                   u16 my_chain,
263                                   struct ocfs2_chain_list *cl)
264 {
265         int status = 0;
266         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
267         struct super_block * sb = alloc_inode->i_sb;
268
269         mlog_entry_void();
270
271         if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
272                 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
273                             "b_blocknr (%llu)",
274                             (unsigned long long)group_blkno,
275                             (unsigned long long) bg_bh->b_blocknr);
276                 status = -EIO;
277                 goto bail;
278         }
279
280         status = ocfs2_journal_access(handle,
281                                       alloc_inode,
282                                       bg_bh,
283                                       OCFS2_JOURNAL_ACCESS_CREATE);
284         if (status < 0) {
285                 mlog_errno(status);
286                 goto bail;
287         }
288
289         memset(bg, 0, sb->s_blocksize);
290         strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
291         bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
292         bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
293         bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
294         bg->bg_chain = cpu_to_le16(my_chain);
295         bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
296         bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
297         bg->bg_blkno = cpu_to_le64(group_blkno);
298         /* set the 1st bit in the bitmap to account for the descriptor block */
299         ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
300         bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
301
302         status = ocfs2_journal_dirty(handle, bg_bh);
303         if (status < 0)
304                 mlog_errno(status);
305
306         /* There is no need to zero out or otherwise initialize the
307          * other blocks in a group - All valid FS metadata in a block
308          * group stores the superblock fs_generation value at
309          * allocation time. */
310
311 bail:
312         mlog_exit(status);
313         return status;
314 }
315
316 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
317 {
318         u16 curr, best;
319
320         best = curr = 0;
321         while (curr < le16_to_cpu(cl->cl_count)) {
322                 if (le32_to_cpu(cl->cl_recs[best].c_total) >
323                     le32_to_cpu(cl->cl_recs[curr].c_total))
324                         best = curr;
325                 curr++;
326         }
327         return best;
328 }
329
330 /*
331  * We expect the block group allocator to already be locked.
332  */
333 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
334                                    struct inode *alloc_inode,
335                                    struct buffer_head *bh,
336                                    u64 max_block)
337 {
338         int status, credits;
339         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
340         struct ocfs2_chain_list *cl;
341         struct ocfs2_alloc_context *ac = NULL;
342         handle_t *handle = NULL;
343         u32 bit_off, num_bits;
344         u16 alloc_rec;
345         u64 bg_blkno;
346         struct buffer_head *bg_bh = NULL;
347         struct ocfs2_group_desc *bg;
348
349         BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
350
351         mlog_entry_void();
352
353         cl = &fe->id2.i_chain;
354         status = ocfs2_reserve_clusters_with_limit(osb,
355                                                    le16_to_cpu(cl->cl_cpg),
356                                                    max_block, &ac);
357         if (status < 0) {
358                 if (status != -ENOSPC)
359                         mlog_errno(status);
360                 goto bail;
361         }
362
363         credits = ocfs2_calc_group_alloc_credits(osb->sb,
364                                                  le16_to_cpu(cl->cl_cpg));
365         handle = ocfs2_start_trans(osb, credits);
366         if (IS_ERR(handle)) {
367                 status = PTR_ERR(handle);
368                 handle = NULL;
369                 mlog_errno(status);
370                 goto bail;
371         }
372
373         status = ocfs2_claim_clusters(osb,
374                                       handle,
375                                       ac,
376                                       le16_to_cpu(cl->cl_cpg),
377                                       &bit_off,
378                                       &num_bits);
379         if (status < 0) {
380                 if (status != -ENOSPC)
381                         mlog_errno(status);
382                 goto bail;
383         }
384
385         alloc_rec = ocfs2_find_smallest_chain(cl);
386
387         /* setup the group */
388         bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
389         mlog(0, "new descriptor, record %u, at block %llu\n",
390              alloc_rec, (unsigned long long)bg_blkno);
391
392         bg_bh = sb_getblk(osb->sb, bg_blkno);
393         if (!bg_bh) {
394                 status = -EIO;
395                 mlog_errno(status);
396                 goto bail;
397         }
398         ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
399
400         status = ocfs2_block_group_fill(handle,
401                                         alloc_inode,
402                                         bg_bh,
403                                         bg_blkno,
404                                         alloc_rec,
405                                         cl);
406         if (status < 0) {
407                 mlog_errno(status);
408                 goto bail;
409         }
410
411         bg = (struct ocfs2_group_desc *) bg_bh->b_data;
412
413         status = ocfs2_journal_access(handle, alloc_inode,
414                                       bh, OCFS2_JOURNAL_ACCESS_WRITE);
415         if (status < 0) {
416                 mlog_errno(status);
417                 goto bail;
418         }
419
420         le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
421                      le16_to_cpu(bg->bg_free_bits_count));
422         le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
423         cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg_blkno);
424         if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
425                 le16_add_cpu(&cl->cl_next_free_rec, 1);
426
427         le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
428                                         le16_to_cpu(bg->bg_free_bits_count));
429         le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
430         le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
431
432         status = ocfs2_journal_dirty(handle, bh);
433         if (status < 0) {
434                 mlog_errno(status);
435                 goto bail;
436         }
437
438         spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
439         OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
440         fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
441                                              le32_to_cpu(fe->i_clusters)));
442         spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
443         i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
444         alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
445
446         status = 0;
447 bail:
448         if (handle)
449                 ocfs2_commit_trans(osb, handle);
450
451         if (ac)
452                 ocfs2_free_alloc_context(ac);
453
454         brelse(bg_bh);
455
456         mlog_exit(status);
457         return status;
458 }
459
460 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
461                                        struct ocfs2_alloc_context *ac,
462                                        int type,
463                                        u32 slot,
464                                        int alloc_new_group)
465 {
466         int status;
467         u32 bits_wanted = ac->ac_bits_wanted;
468         struct inode *alloc_inode;
469         struct buffer_head *bh = NULL;
470         struct ocfs2_dinode *fe;
471         u32 free_bits;
472
473         mlog_entry_void();
474
475         alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
476         if (!alloc_inode) {
477                 mlog_errno(-EINVAL);
478                 return -EINVAL;
479         }
480
481         mutex_lock(&alloc_inode->i_mutex);
482
483         status = ocfs2_inode_lock(alloc_inode, &bh, 1);
484         if (status < 0) {
485                 mutex_unlock(&alloc_inode->i_mutex);
486                 iput(alloc_inode);
487
488                 mlog_errno(status);
489                 return status;
490         }
491
492         ac->ac_inode = alloc_inode;
493         ac->ac_alloc_slot = slot;
494
495         fe = (struct ocfs2_dinode *) bh->b_data;
496
497         /* The bh was validated by the inode read inside
498          * ocfs2_inode_lock().  Any corruption is a code bug. */
499         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
500
501         if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
502                 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
503                             (unsigned long long)le64_to_cpu(fe->i_blkno));
504                 status = -EIO;
505                 goto bail;
506         }
507
508         free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
509                 le32_to_cpu(fe->id1.bitmap1.i_used);
510
511         if (bits_wanted > free_bits) {
512                 /* cluster bitmap never grows */
513                 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
514                         mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
515                              bits_wanted, free_bits);
516                         status = -ENOSPC;
517                         goto bail;
518                 }
519
520                 if (alloc_new_group != ALLOC_NEW_GROUP) {
521                         mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
522                              "and we don't alloc a new group for it.\n",
523                              slot, bits_wanted, free_bits);
524                         status = -ENOSPC;
525                         goto bail;
526                 }
527
528                 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
529                                                  ac->ac_max_block);
530                 if (status < 0) {
531                         if (status != -ENOSPC)
532                                 mlog_errno(status);
533                         goto bail;
534                 }
535                 atomic_inc(&osb->alloc_stats.bg_extends);
536
537                 /* You should never ask for this much metadata */
538                 BUG_ON(bits_wanted >
539                        (le32_to_cpu(fe->id1.bitmap1.i_total)
540                         - le32_to_cpu(fe->id1.bitmap1.i_used)));
541         }
542
543         get_bh(bh);
544         ac->ac_bh = bh;
545 bail:
546         brelse(bh);
547
548         mlog_exit(status);
549         return status;
550 }
551
552 int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
553                                       int blocks,
554                                       struct ocfs2_alloc_context **ac)
555 {
556         int status;
557         u32 slot;
558
559         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
560         if (!(*ac)) {
561                 status = -ENOMEM;
562                 mlog_errno(status);
563                 goto bail;
564         }
565
566         (*ac)->ac_bits_wanted = blocks;
567         (*ac)->ac_which = OCFS2_AC_USE_META;
568         slot = osb->slot_num;
569         (*ac)->ac_group_search = ocfs2_block_group_search;
570
571         status = ocfs2_reserve_suballoc_bits(osb, (*ac),
572                                              EXTENT_ALLOC_SYSTEM_INODE,
573                                              slot, ALLOC_NEW_GROUP);
574         if (status < 0) {
575                 if (status != -ENOSPC)
576                         mlog_errno(status);
577                 goto bail;
578         }
579
580         status = 0;
581 bail:
582         if ((status < 0) && *ac) {
583                 ocfs2_free_alloc_context(*ac);
584                 *ac = NULL;
585         }
586
587         mlog_exit(status);
588         return status;
589 }
590
591 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
592                                struct ocfs2_extent_list *root_el,
593                                struct ocfs2_alloc_context **ac)
594 {
595         return ocfs2_reserve_new_metadata_blocks(osb,
596                                         ocfs2_extend_meta_needed(root_el),
597                                         ac);
598 }
599
600 static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
601                                               struct ocfs2_alloc_context *ac)
602 {
603         int i, status = -ENOSPC;
604         s16 slot = ocfs2_get_inode_steal_slot(osb);
605
606         /* Start to steal inodes from the first slot after ours. */
607         if (slot == OCFS2_INVALID_SLOT)
608                 slot = osb->slot_num + 1;
609
610         for (i = 0; i < osb->max_slots; i++, slot++) {
611                 if (slot == osb->max_slots)
612                         slot = 0;
613
614                 if (slot == osb->slot_num)
615                         continue;
616
617                 status = ocfs2_reserve_suballoc_bits(osb, ac,
618                                                      INODE_ALLOC_SYSTEM_INODE,
619                                                      slot, NOT_ALLOC_NEW_GROUP);
620                 if (status >= 0) {
621                         ocfs2_set_inode_steal_slot(osb, slot);
622                         break;
623                 }
624
625                 ocfs2_free_ac_resource(ac);
626         }
627
628         return status;
629 }
630
631 int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
632                             struct ocfs2_alloc_context **ac)
633 {
634         int status;
635         s16 slot = ocfs2_get_inode_steal_slot(osb);
636
637         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
638         if (!(*ac)) {
639                 status = -ENOMEM;
640                 mlog_errno(status);
641                 goto bail;
642         }
643
644         (*ac)->ac_bits_wanted = 1;
645         (*ac)->ac_which = OCFS2_AC_USE_INODE;
646
647         (*ac)->ac_group_search = ocfs2_block_group_search;
648
649         /*
650          * stat(2) can't handle i_ino > 32bits, so we tell the
651          * lower levels not to allocate us a block group past that
652          * limit.  The 'inode64' mount option avoids this behavior.
653          */
654         if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
655                 (*ac)->ac_max_block = (u32)~0U;
656
657         /*
658          * slot is set when we successfully steal inode from other nodes.
659          * It is reset in 3 places:
660          * 1. when we flush the truncate log
661          * 2. when we complete local alloc recovery.
662          * 3. when we successfully allocate from our own slot.
663          * After it is set, we will go on stealing inodes until we find the
664          * need to check our slots to see whether there is some space for us.
665          */
666         if (slot != OCFS2_INVALID_SLOT &&
667             atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
668                 goto inode_steal;
669
670         atomic_set(&osb->s_num_inodes_stolen, 0);
671         status = ocfs2_reserve_suballoc_bits(osb, *ac,
672                                              INODE_ALLOC_SYSTEM_INODE,
673                                              osb->slot_num, ALLOC_NEW_GROUP);
674         if (status >= 0) {
675                 status = 0;
676
677                 /*
678                  * Some inodes must be freed by us, so try to allocate
679                  * from our own next time.
680                  */
681                 if (slot != OCFS2_INVALID_SLOT)
682                         ocfs2_init_inode_steal_slot(osb);
683                 goto bail;
684         } else if (status < 0 && status != -ENOSPC) {
685                 mlog_errno(status);
686                 goto bail;
687         }
688
689         ocfs2_free_ac_resource(*ac);
690
691 inode_steal:
692         status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
693         atomic_inc(&osb->s_num_inodes_stolen);
694         if (status < 0) {
695                 if (status != -ENOSPC)
696                         mlog_errno(status);
697                 goto bail;
698         }
699
700         status = 0;
701 bail:
702         if ((status < 0) && *ac) {
703                 ocfs2_free_alloc_context(*ac);
704                 *ac = NULL;
705         }
706
707         mlog_exit(status);
708         return status;
709 }
710
711 /* local alloc code has to do the same thing, so rather than do this
712  * twice.. */
713 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
714                                       struct ocfs2_alloc_context *ac)
715 {
716         int status;
717
718         ac->ac_which = OCFS2_AC_USE_MAIN;
719         ac->ac_group_search = ocfs2_cluster_group_search;
720
721         status = ocfs2_reserve_suballoc_bits(osb, ac,
722                                              GLOBAL_BITMAP_SYSTEM_INODE,
723                                              OCFS2_INVALID_SLOT,
724                                              ALLOC_NEW_GROUP);
725         if (status < 0 && status != -ENOSPC) {
726                 mlog_errno(status);
727                 goto bail;
728         }
729
730 bail:
731         return status;
732 }
733
734 /* Callers don't need to care which bitmap (local alloc or main) to
735  * use so we figure it out for them, but unfortunately this clutters
736  * things a bit. */
737 static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
738                                              u32 bits_wanted, u64 max_block,
739                                              struct ocfs2_alloc_context **ac)
740 {
741         int status;
742
743         mlog_entry_void();
744
745         *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
746         if (!(*ac)) {
747                 status = -ENOMEM;
748                 mlog_errno(status);
749                 goto bail;
750         }
751
752         (*ac)->ac_bits_wanted = bits_wanted;
753         (*ac)->ac_max_block = max_block;
754
755         status = -ENOSPC;
756         if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
757                 status = ocfs2_reserve_local_alloc_bits(osb,
758                                                         bits_wanted,
759                                                         *ac);
760                 if (status == -EFBIG) {
761                         /* The local alloc window is outside ac_max_block.
762                          * use the main bitmap. */
763                         status = -ENOSPC;
764                 } else if ((status < 0) && (status != -ENOSPC)) {
765                         mlog_errno(status);
766                         goto bail;
767                 }
768         }
769
770         if (status == -ENOSPC) {
771                 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
772                 if (status < 0) {
773                         if (status != -ENOSPC)
774                                 mlog_errno(status);
775                         goto bail;
776                 }
777         }
778
779         status = 0;
780 bail:
781         if ((status < 0) && *ac) {
782                 ocfs2_free_alloc_context(*ac);
783                 *ac = NULL;
784         }
785
786         mlog_exit(status);
787         return status;
788 }
789
790 int ocfs2_reserve_clusters(struct ocfs2_super *osb,
791                            u32 bits_wanted,
792                            struct ocfs2_alloc_context **ac)
793 {
794         return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
795 }
796
797 /*
798  * More or less lifted from ext3. I'll leave their description below:
799  *
800  * "For ext3 allocations, we must not reuse any blocks which are
801  * allocated in the bitmap buffer's "last committed data" copy.  This
802  * prevents deletes from freeing up the page for reuse until we have
803  * committed the delete transaction.
804  *
805  * If we didn't do this, then deleting something and reallocating it as
806  * data would allow the old block to be overwritten before the
807  * transaction committed (because we force data to disk before commit).
808  * This would lead to corruption if we crashed between overwriting the
809  * data and committing the delete.
810  *
811  * @@@ We may want to make this allocation behaviour conditional on
812  * data-writes at some point, and disable it for metadata allocations or
813  * sync-data inodes."
814  *
815  * Note: OCFS2 already does this differently for metadata vs data
816  * allocations, as those bitmaps are separate and undo access is never
817  * called on a metadata group descriptor.
818  */
819 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
820                                          int nr)
821 {
822         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
823
824         if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
825                 return 0;
826         if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
827                 return 1;
828
829         bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
830         return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
831 }
832
833 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
834                                              struct buffer_head *bg_bh,
835                                              unsigned int bits_wanted,
836                                              unsigned int total_bits,
837                                              u16 *bit_off,
838                                              u16 *bits_found)
839 {
840         void *bitmap;
841         u16 best_offset, best_size;
842         int offset, start, found, status = 0;
843         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
844
845         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
846                 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
847                 return -EIO;
848         }
849
850         found = start = best_offset = best_size = 0;
851         bitmap = bg->bg_bitmap;
852
853         while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
854                 if (offset == total_bits)
855                         break;
856
857                 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
858                         /* We found a zero, but we can't use it as it
859                          * hasn't been put to disk yet! */
860                         found = 0;
861                         start = offset + 1;
862                 } else if (offset == start) {
863                         /* we found a zero */
864                         found++;
865                         /* move start to the next bit to test */
866                         start++;
867                 } else {
868                         /* got a zero after some ones */
869                         found = 1;
870                         start = offset + 1;
871                 }
872                 if (found > best_size) {
873                         best_size = found;
874                         best_offset = start - found;
875                 }
876                 /* we got everything we needed */
877                 if (found == bits_wanted) {
878                         /* mlog(0, "Found it all!\n"); */
879                         break;
880                 }
881         }
882
883         /* XXX: I think the first clause is equivalent to the second
884          *      - jlbec */
885         if (found == bits_wanted) {
886                 *bit_off = start - found;
887                 *bits_found = found;
888         } else if (best_size) {
889                 *bit_off = best_offset;
890                 *bits_found = best_size;
891         } else {
892                 status = -ENOSPC;
893                 /* No error log here -- see the comment above
894                  * ocfs2_test_bg_bit_allocatable */
895         }
896
897         return status;
898 }
899
900 static inline int ocfs2_block_group_set_bits(handle_t *handle,
901                                              struct inode *alloc_inode,
902                                              struct ocfs2_group_desc *bg,
903                                              struct buffer_head *group_bh,
904                                              unsigned int bit_off,
905                                              unsigned int num_bits)
906 {
907         int status;
908         void *bitmap = bg->bg_bitmap;
909         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
910
911         mlog_entry_void();
912
913         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
914                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
915                 status = -EIO;
916                 goto bail;
917         }
918         BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
919
920         mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
921              num_bits);
922
923         if (ocfs2_is_cluster_bitmap(alloc_inode))
924                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
925
926         status = ocfs2_journal_access(handle,
927                                       alloc_inode,
928                                       group_bh,
929                                       journal_type);
930         if (status < 0) {
931                 mlog_errno(status);
932                 goto bail;
933         }
934
935         le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
936
937         while(num_bits--)
938                 ocfs2_set_bit(bit_off++, bitmap);
939
940         status = ocfs2_journal_dirty(handle,
941                                      group_bh);
942         if (status < 0) {
943                 mlog_errno(status);
944                 goto bail;
945         }
946
947 bail:
948         mlog_exit(status);
949         return status;
950 }
951
952 /* find the one with the most empty bits */
953 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
954 {
955         u16 curr, best;
956
957         BUG_ON(!cl->cl_next_free_rec);
958
959         best = curr = 0;
960         while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
961                 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
962                     le32_to_cpu(cl->cl_recs[best].c_free))
963                         best = curr;
964                 curr++;
965         }
966
967         BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
968         return best;
969 }
970
971 static int ocfs2_relink_block_group(handle_t *handle,
972                                     struct inode *alloc_inode,
973                                     struct buffer_head *fe_bh,
974                                     struct buffer_head *bg_bh,
975                                     struct buffer_head *prev_bg_bh,
976                                     u16 chain)
977 {
978         int status;
979         /* there is a really tiny chance the journal calls could fail,
980          * but we wouldn't want inconsistent blocks in *any* case. */
981         u64 fe_ptr, bg_ptr, prev_bg_ptr;
982         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
983         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
984         struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
985
986         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
987                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
988                 status = -EIO;
989                 goto out;
990         }
991         if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
992                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
993                 status = -EIO;
994                 goto out;
995         }
996
997         mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
998              (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
999              (unsigned long long)le64_to_cpu(bg->bg_blkno),
1000              (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
1001
1002         fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
1003         bg_ptr = le64_to_cpu(bg->bg_next_group);
1004         prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
1005
1006         status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
1007                                       OCFS2_JOURNAL_ACCESS_WRITE);
1008         if (status < 0) {
1009                 mlog_errno(status);
1010                 goto out_rollback;
1011         }
1012
1013         prev_bg->bg_next_group = bg->bg_next_group;
1014
1015         status = ocfs2_journal_dirty(handle, prev_bg_bh);
1016         if (status < 0) {
1017                 mlog_errno(status);
1018                 goto out_rollback;
1019         }
1020
1021         status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
1022                                       OCFS2_JOURNAL_ACCESS_WRITE);
1023         if (status < 0) {
1024                 mlog_errno(status);
1025                 goto out_rollback;
1026         }
1027
1028         bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1029
1030         status = ocfs2_journal_dirty(handle, bg_bh);
1031         if (status < 0) {
1032                 mlog_errno(status);
1033                 goto out_rollback;
1034         }
1035
1036         status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
1037                                       OCFS2_JOURNAL_ACCESS_WRITE);
1038         if (status < 0) {
1039                 mlog_errno(status);
1040                 goto out_rollback;
1041         }
1042
1043         fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1044
1045         status = ocfs2_journal_dirty(handle, fe_bh);
1046         if (status < 0) {
1047                 mlog_errno(status);
1048                 goto out_rollback;
1049         }
1050
1051         status = 0;
1052 out_rollback:
1053         if (status < 0) {
1054                 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
1055                 bg->bg_next_group = cpu_to_le64(bg_ptr);
1056                 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1057         }
1058 out:
1059         mlog_exit(status);
1060         return status;
1061 }
1062
1063 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
1064                                                      u32 wanted)
1065 {
1066         return le16_to_cpu(bg->bg_free_bits_count) > wanted;
1067 }
1068
1069 /* return 0 on success, -ENOSPC to keep searching and any other < 0
1070  * value on error. */
1071 static int ocfs2_cluster_group_search(struct inode *inode,
1072                                       struct buffer_head *group_bh,
1073                                       u32 bits_wanted, u32 min_bits,
1074                                       u64 max_block,
1075                                       u16 *bit_off, u16 *bits_found)
1076 {
1077         int search = -ENOSPC;
1078         int ret;
1079         u64 blkoff;
1080         struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1081         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1082         u16 tmp_off, tmp_found;
1083         unsigned int max_bits, gd_cluster_off;
1084
1085         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1086
1087         if (gd->bg_free_bits_count) {
1088                 max_bits = le16_to_cpu(gd->bg_bits);
1089
1090                 /* Tail groups in cluster bitmaps which aren't cpg
1091                  * aligned are prone to partial extention by a failed
1092                  * fs resize. If the file system resize never got to
1093                  * update the dinode cluster count, then we don't want
1094                  * to trust any clusters past it, regardless of what
1095                  * the group descriptor says. */
1096                 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
1097                                                           le64_to_cpu(gd->bg_blkno));
1098                 if ((gd_cluster_off + max_bits) >
1099                     OCFS2_I(inode)->ip_clusters) {
1100                         max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1101                         mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
1102                              (unsigned long long)le64_to_cpu(gd->bg_blkno),
1103                              le16_to_cpu(gd->bg_bits),
1104                              OCFS2_I(inode)->ip_clusters, max_bits);
1105                 }
1106
1107                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1108                                                         group_bh, bits_wanted,
1109                                                         max_bits,
1110                                                         &tmp_off, &tmp_found);
1111                 if (ret)
1112                         return ret;
1113
1114                 if (max_block) {
1115                         blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1116                                                           gd_cluster_off +
1117                                                           tmp_off + tmp_found);
1118                         mlog(0, "Checking %llu against %llu\n",
1119                              (unsigned long long)blkoff,
1120                              (unsigned long long)max_block);
1121                         if (blkoff > max_block)
1122                                 return -ENOSPC;
1123                 }
1124
1125                 /* ocfs2_block_group_find_clear_bits() might
1126                  * return success, but we still want to return
1127                  * -ENOSPC unless it found the minimum number
1128                  * of bits. */
1129                 if (min_bits <= tmp_found) {
1130                         *bit_off = tmp_off;
1131                         *bits_found = tmp_found;
1132                         search = 0; /* success */
1133                 } else if (tmp_found) {
1134                         /*
1135                          * Don't show bits which we'll be returning
1136                          * for allocation to the local alloc bitmap.
1137                          */
1138                         ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
1139                 }
1140         }
1141
1142         return search;
1143 }
1144
1145 static int ocfs2_block_group_search(struct inode *inode,
1146                                     struct buffer_head *group_bh,
1147                                     u32 bits_wanted, u32 min_bits,
1148                                     u64 max_block,
1149                                     u16 *bit_off, u16 *bits_found)
1150 {
1151         int ret = -ENOSPC;
1152         u64 blkoff;
1153         struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1154
1155         BUG_ON(min_bits != 1);
1156         BUG_ON(ocfs2_is_cluster_bitmap(inode));
1157
1158         if (bg->bg_free_bits_count) {
1159                 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1160                                                         group_bh, bits_wanted,
1161                                                         le16_to_cpu(bg->bg_bits),
1162                                                         bit_off, bits_found);
1163                 if (!ret && max_block) {
1164                         blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1165                                 *bits_found;
1166                         mlog(0, "Checking %llu against %llu\n",
1167                              (unsigned long long)blkoff,
1168                              (unsigned long long)max_block);
1169                         if (blkoff > max_block)
1170                                 ret = -ENOSPC;
1171                 }
1172         }
1173
1174         return ret;
1175 }
1176
1177 static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1178                                        handle_t *handle,
1179                                        struct buffer_head *di_bh,
1180                                        u32 num_bits,
1181                                        u16 chain)
1182 {
1183         int ret;
1184         u32 tmp_used;
1185         struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1186         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1187
1188         ret = ocfs2_journal_access(handle, inode, di_bh,
1189                                    OCFS2_JOURNAL_ACCESS_WRITE);
1190         if (ret < 0) {
1191                 mlog_errno(ret);
1192                 goto out;
1193         }
1194
1195         tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1196         di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1197         le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1198
1199         ret = ocfs2_journal_dirty(handle, di_bh);
1200         if (ret < 0)
1201                 mlog_errno(ret);
1202
1203 out:
1204         return ret;
1205 }
1206
1207 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1208                                   handle_t *handle,
1209                                   u32 bits_wanted,
1210                                   u32 min_bits,
1211                                   u16 *bit_off,
1212                                   unsigned int *num_bits,
1213                                   u64 gd_blkno,
1214                                   u16 *bits_left)
1215 {
1216         int ret;
1217         u16 found;
1218         struct buffer_head *group_bh = NULL;
1219         struct ocfs2_group_desc *gd;
1220         struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1221         struct inode *alloc_inode = ac->ac_inode;
1222
1223         ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
1224                                           &group_bh);
1225         if (ret < 0) {
1226                 mlog_errno(ret);
1227                 return ret;
1228         }
1229
1230         gd = (struct ocfs2_group_desc *) group_bh->b_data;
1231         ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1232                                   ac->ac_max_block, bit_off, &found);
1233         if (ret < 0) {
1234                 if (ret != -ENOSPC)
1235                         mlog_errno(ret);
1236                 goto out;
1237         }
1238
1239         *num_bits = found;
1240
1241         ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1242                                                *num_bits,
1243                                                le16_to_cpu(gd->bg_chain));
1244         if (ret < 0) {
1245                 mlog_errno(ret);
1246                 goto out;
1247         }
1248
1249         ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1250                                          *bit_off, *num_bits);
1251         if (ret < 0)
1252                 mlog_errno(ret);
1253
1254         *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1255
1256 out:
1257         brelse(group_bh);
1258
1259         return ret;
1260 }
1261
1262 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1263                               handle_t *handle,
1264                               u32 bits_wanted,
1265                               u32 min_bits,
1266                               u16 *bit_off,
1267                               unsigned int *num_bits,
1268                               u64 *bg_blkno,
1269                               u16 *bits_left)
1270 {
1271         int status;
1272         u16 chain, tmp_bits;
1273         u32 tmp_used;
1274         u64 next_group;
1275         struct inode *alloc_inode = ac->ac_inode;
1276         struct buffer_head *group_bh = NULL;
1277         struct buffer_head *prev_group_bh = NULL;
1278         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1279         struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1280         struct ocfs2_group_desc *bg;
1281
1282         chain = ac->ac_chain;
1283         mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1284              bits_wanted, chain,
1285              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1286
1287         status = ocfs2_read_group_descriptor(alloc_inode, fe,
1288                                              le64_to_cpu(cl->cl_recs[chain].c_blkno),
1289                                              &group_bh);
1290         if (status < 0) {
1291                 mlog_errno(status);
1292                 goto bail;
1293         }
1294         bg = (struct ocfs2_group_desc *) group_bh->b_data;
1295
1296         status = -ENOSPC;
1297         /* for now, the chain search is a bit simplistic. We just use
1298          * the 1st group with any empty bits. */
1299         while ((status = ac->ac_group_search(alloc_inode, group_bh,
1300                                              bits_wanted, min_bits,
1301                                              ac->ac_max_block, bit_off,
1302                                              &tmp_bits)) == -ENOSPC) {
1303                 if (!bg->bg_next_group)
1304                         break;
1305
1306                 brelse(prev_group_bh);
1307                 prev_group_bh = NULL;
1308
1309                 next_group = le64_to_cpu(bg->bg_next_group);
1310                 prev_group_bh = group_bh;
1311                 group_bh = NULL;
1312                 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1313                                                      next_group, &group_bh);
1314                 if (status < 0) {
1315                         mlog_errno(status);
1316                         goto bail;
1317                 }
1318                 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1319         }
1320         if (status < 0) {
1321                 if (status != -ENOSPC)
1322                         mlog_errno(status);
1323                 goto bail;
1324         }
1325
1326         mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1327              tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1328
1329         *num_bits = tmp_bits;
1330
1331         BUG_ON(*num_bits == 0);
1332
1333         /*
1334          * Keep track of previous block descriptor read. When
1335          * we find a target, if we have read more than X
1336          * number of descriptors, and the target is reasonably
1337          * empty, relink him to top of his chain.
1338          *
1339          * We've read 0 extra blocks and only send one more to
1340          * the transaction, yet the next guy to search has a
1341          * much easier time.
1342          *
1343          * Do this *after* figuring out how many bits we're taking out
1344          * of our target group.
1345          */
1346         if (ac->ac_allow_chain_relink &&
1347             (prev_group_bh) &&
1348             (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
1349                 status = ocfs2_relink_block_group(handle, alloc_inode,
1350                                                   ac->ac_bh, group_bh,
1351                                                   prev_group_bh, chain);
1352                 if (status < 0) {
1353                         mlog_errno(status);
1354                         goto bail;
1355                 }
1356         }
1357
1358         /* Ok, claim our bits now: set the info on dinode, chainlist
1359          * and then the group */
1360         status = ocfs2_journal_access(handle,
1361                                       alloc_inode,
1362                                       ac->ac_bh,
1363                                       OCFS2_JOURNAL_ACCESS_WRITE);
1364         if (status < 0) {
1365                 mlog_errno(status);
1366                 goto bail;
1367         }
1368
1369         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1370         fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
1371         le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
1372
1373         status = ocfs2_journal_dirty(handle,
1374                                      ac->ac_bh);
1375         if (status < 0) {
1376                 mlog_errno(status);
1377                 goto bail;
1378         }
1379
1380         status = ocfs2_block_group_set_bits(handle,
1381                                             alloc_inode,
1382                                             bg,
1383                                             group_bh,
1384                                             *bit_off,
1385                                             *num_bits);
1386         if (status < 0) {
1387                 mlog_errno(status);
1388                 goto bail;
1389         }
1390
1391         mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
1392              (unsigned long long)le64_to_cpu(fe->i_blkno));
1393
1394         *bg_blkno = le64_to_cpu(bg->bg_blkno);
1395         *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1396 bail:
1397         brelse(group_bh);
1398         brelse(prev_group_bh);
1399
1400         mlog_exit(status);
1401         return status;
1402 }
1403
1404 /* will give out up to bits_wanted contiguous bits. */
1405 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1406                                      struct ocfs2_alloc_context *ac,
1407                                      handle_t *handle,
1408                                      u32 bits_wanted,
1409                                      u32 min_bits,
1410                                      u16 *bit_off,
1411                                      unsigned int *num_bits,
1412                                      u64 *bg_blkno)
1413 {
1414         int status;
1415         u16 victim, i;
1416         u16 bits_left = 0;
1417         u64 hint_blkno = ac->ac_last_group;
1418         struct ocfs2_chain_list *cl;
1419         struct ocfs2_dinode *fe;
1420
1421         mlog_entry_void();
1422
1423         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1424         BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1425         BUG_ON(!ac->ac_bh);
1426
1427         fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1428
1429         /* The bh was validated by the inode read during
1430          * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
1431         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1432
1433         if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1434             le32_to_cpu(fe->id1.bitmap1.i_total)) {
1435                 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
1436                             "bits but only %u total.",
1437                             (unsigned long long)le64_to_cpu(fe->i_blkno),
1438                             le32_to_cpu(fe->id1.bitmap1.i_used),
1439                             le32_to_cpu(fe->id1.bitmap1.i_total));
1440                 status = -EIO;
1441                 goto bail;
1442         }
1443
1444         if (hint_blkno) {
1445                 /* Attempt to short-circuit the usual search mechanism
1446                  * by jumping straight to the most recently used
1447                  * allocation group. This helps us mantain some
1448                  * contiguousness across allocations. */
1449                 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1450                                                 min_bits, bit_off, num_bits,
1451                                                 hint_blkno, &bits_left);
1452                 if (!status) {
1453                         /* Be careful to update *bg_blkno here as the
1454                          * caller is expecting it to be filled in, and
1455                          * ocfs2_search_one_group() won't do that for
1456                          * us. */
1457                         *bg_blkno = hint_blkno;
1458                         goto set_hint;
1459                 }
1460                 if (status < 0 && status != -ENOSPC) {
1461                         mlog_errno(status);
1462                         goto bail;
1463                 }
1464         }
1465
1466         cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1467
1468         victim = ocfs2_find_victim_chain(cl);
1469         ac->ac_chain = victim;
1470         ac->ac_allow_chain_relink = 1;
1471
1472         status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
1473                                     num_bits, bg_blkno, &bits_left);
1474         if (!status)
1475                 goto set_hint;
1476         if (status < 0 && status != -ENOSPC) {
1477                 mlog_errno(status);
1478                 goto bail;
1479         }
1480
1481         mlog(0, "Search of victim chain %u came up with nothing, "
1482              "trying all chains now.\n", victim);
1483
1484         /* If we didn't pick a good victim, then just default to
1485          * searching each chain in order. Don't allow chain relinking
1486          * because we only calculate enough journal credits for one
1487          * relink per alloc. */
1488         ac->ac_allow_chain_relink = 0;
1489         for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1490                 if (i == victim)
1491                         continue;
1492                 if (!cl->cl_recs[i].c_free)
1493                         continue;
1494
1495                 ac->ac_chain = i;
1496                 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1497                                             bit_off, num_bits, bg_blkno,
1498                                             &bits_left);
1499                 if (!status)
1500                         break;
1501                 if (status < 0 && status != -ENOSPC) {
1502                         mlog_errno(status);
1503                         goto bail;
1504                 }
1505         }
1506
1507 set_hint:
1508         if (status != -ENOSPC) {
1509                 /* If the next search of this group is not likely to
1510                  * yield a suitable extent, then we reset the last
1511                  * group hint so as to not waste a disk read */
1512                 if (bits_left < min_bits)
1513                         ac->ac_last_group = 0;
1514                 else
1515                         ac->ac_last_group = *bg_blkno;
1516         }
1517
1518 bail:
1519         mlog_exit(status);
1520         return status;
1521 }
1522
1523 int ocfs2_claim_metadata(struct ocfs2_super *osb,
1524                          handle_t *handle,
1525                          struct ocfs2_alloc_context *ac,
1526                          u32 bits_wanted,
1527                          u16 *suballoc_bit_start,
1528                          unsigned int *num_bits,
1529                          u64 *blkno_start)
1530 {
1531         int status;
1532         u64 bg_blkno;
1533
1534         BUG_ON(!ac);
1535         BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1536         BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1537
1538         status = ocfs2_claim_suballoc_bits(osb,
1539                                            ac,
1540                                            handle,
1541                                            bits_wanted,
1542                                            1,
1543                                            suballoc_bit_start,
1544                                            num_bits,
1545                                            &bg_blkno);
1546         if (status < 0) {
1547                 mlog_errno(status);
1548                 goto bail;
1549         }
1550         atomic_inc(&osb->alloc_stats.bg_allocs);
1551
1552         *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
1553         ac->ac_bits_given += (*num_bits);
1554         status = 0;
1555 bail:
1556         mlog_exit(status);
1557         return status;
1558 }
1559
1560 int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1561                           handle_t *handle,
1562                           struct ocfs2_alloc_context *ac,
1563                           u16 *suballoc_bit,
1564                           u64 *fe_blkno)
1565 {
1566         int status;
1567         unsigned int num_bits;
1568         u64 bg_blkno;
1569
1570         mlog_entry_void();
1571
1572         BUG_ON(!ac);
1573         BUG_ON(ac->ac_bits_given != 0);
1574         BUG_ON(ac->ac_bits_wanted != 1);
1575         BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
1576
1577         status = ocfs2_claim_suballoc_bits(osb,
1578                                            ac,
1579                                            handle,
1580                                            1,
1581                                            1,
1582                                            suballoc_bit,
1583                                            &num_bits,
1584                                            &bg_blkno);
1585         if (status < 0) {
1586                 mlog_errno(status);
1587                 goto bail;
1588         }
1589         atomic_inc(&osb->alloc_stats.bg_allocs);
1590
1591         BUG_ON(num_bits != 1);
1592
1593         *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1594         ac->ac_bits_given++;
1595         status = 0;
1596 bail:
1597         mlog_exit(status);
1598         return status;
1599 }
1600
1601 /* translate a group desc. blkno and it's bitmap offset into
1602  * disk cluster offset. */
1603 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
1604                                                    u64 bg_blkno,
1605                                                    u16 bg_bit_off)
1606 {
1607         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1608         u32 cluster = 0;
1609
1610         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1611
1612         if (bg_blkno != osb->first_cluster_group_blkno)
1613                 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
1614         cluster += (u32) bg_bit_off;
1615         return cluster;
1616 }
1617
1618 /* given a cluster offset, calculate which block group it belongs to
1619  * and return that block offset. */
1620 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
1621 {
1622         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1623         u32 group_no;
1624
1625         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1626
1627         group_no = cluster / osb->bitmap_cpg;
1628         if (!group_no)
1629                 return osb->first_cluster_group_blkno;
1630         return ocfs2_clusters_to_blocks(inode->i_sb,
1631                                         group_no * osb->bitmap_cpg);
1632 }
1633
1634 /* given the block number of a cluster start, calculate which cluster
1635  * group and descriptor bitmap offset that corresponds to. */
1636 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1637                                                 u64 data_blkno,
1638                                                 u64 *bg_blkno,
1639                                                 u16 *bg_bit_off)
1640 {
1641         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1642         u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
1643
1644         BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1645
1646         *bg_blkno = ocfs2_which_cluster_group(inode,
1647                                               data_cluster);
1648
1649         if (*bg_blkno == osb->first_cluster_group_blkno)
1650                 *bg_bit_off = (u16) data_cluster;
1651         else
1652                 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
1653                                                              data_blkno - *bg_blkno);
1654 }
1655
1656 /*
1657  * min_bits - minimum contiguous chunk from this total allocation we
1658  * can handle. set to what we asked for originally for a full
1659  * contig. allocation, set to '1' to indicate we can deal with extents
1660  * of any size.
1661  */
1662 int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1663                            handle_t *handle,
1664                            struct ocfs2_alloc_context *ac,
1665                            u32 min_clusters,
1666                            u32 max_clusters,
1667                            u32 *cluster_start,
1668                            u32 *num_clusters)
1669 {
1670         int status;
1671         unsigned int bits_wanted = max_clusters;
1672         u64 bg_blkno = 0;
1673         u16 bg_bit_off;
1674
1675         mlog_entry_void();
1676
1677         BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1678
1679         BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
1680                && ac->ac_which != OCFS2_AC_USE_MAIN);
1681
1682         if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
1683                 status = ocfs2_claim_local_alloc_bits(osb,
1684                                                       handle,
1685                                                       ac,
1686                                                       bits_wanted,
1687                                                       cluster_start,
1688                                                       num_clusters);
1689                 if (!status)
1690                         atomic_inc(&osb->alloc_stats.local_data);
1691         } else {
1692                 if (min_clusters > (osb->bitmap_cpg - 1)) {
1693                         /* The only paths asking for contiguousness
1694                          * should know about this already. */
1695                         mlog(ML_ERROR, "minimum allocation requested %u exceeds "
1696                              "group bitmap size %u!\n", min_clusters,
1697                              osb->bitmap_cpg);
1698                         status = -ENOSPC;
1699                         goto bail;
1700                 }
1701                 /* clamp the current request down to a realistic size. */
1702                 if (bits_wanted > (osb->bitmap_cpg - 1))
1703                         bits_wanted = osb->bitmap_cpg - 1;
1704
1705                 status = ocfs2_claim_suballoc_bits(osb,
1706                                                    ac,
1707                                                    handle,
1708                                                    bits_wanted,
1709                                                    min_clusters,
1710                                                    &bg_bit_off,
1711                                                    num_clusters,
1712                                                    &bg_blkno);
1713                 if (!status) {
1714                         *cluster_start =
1715                                 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1716                                                                  bg_blkno,
1717                                                                  bg_bit_off);
1718                         atomic_inc(&osb->alloc_stats.bitmap_data);
1719                 }
1720         }
1721         if (status < 0) {
1722                 if (status != -ENOSPC)
1723                         mlog_errno(status);
1724                 goto bail;
1725         }
1726
1727         ac->ac_bits_given += *num_clusters;
1728
1729 bail:
1730         mlog_exit(status);
1731         return status;
1732 }
1733
1734 int ocfs2_claim_clusters(struct ocfs2_super *osb,
1735                          handle_t *handle,
1736                          struct ocfs2_alloc_context *ac,
1737                          u32 min_clusters,
1738                          u32 *cluster_start,
1739                          u32 *num_clusters)
1740 {
1741         unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1742
1743         return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
1744                                       bits_wanted, cluster_start, num_clusters);
1745 }
1746
1747 static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1748                                                struct inode *alloc_inode,
1749                                                struct ocfs2_group_desc *bg,
1750                                                struct buffer_head *group_bh,
1751                                                unsigned int bit_off,
1752                                                unsigned int num_bits)
1753 {
1754         int status;
1755         unsigned int tmp;
1756         int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1757         struct ocfs2_group_desc *undo_bg = NULL;
1758
1759         mlog_entry_void();
1760
1761         if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
1762                 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
1763                 status = -EIO;
1764                 goto bail;
1765         }
1766
1767         mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1768
1769         if (ocfs2_is_cluster_bitmap(alloc_inode))
1770                 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1771
1772         status = ocfs2_journal_access(handle, alloc_inode, group_bh,
1773                                       journal_type);
1774         if (status < 0) {
1775                 mlog_errno(status);
1776                 goto bail;
1777         }
1778
1779         if (ocfs2_is_cluster_bitmap(alloc_inode))
1780                 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
1781
1782         tmp = num_bits;
1783         while(tmp--) {
1784                 ocfs2_clear_bit((bit_off + tmp),
1785                                 (unsigned long *) bg->bg_bitmap);
1786                 if (ocfs2_is_cluster_bitmap(alloc_inode))
1787                         ocfs2_set_bit(bit_off + tmp,
1788                                       (unsigned long *) undo_bg->bg_bitmap);
1789         }
1790         le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1791
1792         status = ocfs2_journal_dirty(handle, group_bh);
1793         if (status < 0)
1794                 mlog_errno(status);
1795 bail:
1796         return status;
1797 }
1798
1799 /*
1800  * expects the suballoc inode to already be locked.
1801  */
1802 int ocfs2_free_suballoc_bits(handle_t *handle,
1803                              struct inode *alloc_inode,
1804                              struct buffer_head *alloc_bh,
1805                              unsigned int start_bit,
1806                              u64 bg_blkno,
1807                              unsigned int count)
1808 {
1809         int status = 0;
1810         u32 tmp_used;
1811         struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1812         struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1813         struct buffer_head *group_bh = NULL;
1814         struct ocfs2_group_desc *group;
1815
1816         mlog_entry_void();
1817
1818         /* The alloc_bh comes from ocfs2_free_dinode() or
1819          * ocfs2_free_clusters().  The callers have all locked the
1820          * allocator and gotten alloc_bh from the lock call.  This
1821          * validates the dinode buffer.  Any corruption that has happended
1822          * is a code bug. */
1823         BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1824         BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1825
1826         mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1827              (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1828              (unsigned long long)bg_blkno, start_bit);
1829
1830         status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
1831                                              &group_bh);
1832         if (status < 0) {
1833                 mlog_errno(status);
1834                 goto bail;
1835         }
1836         group = (struct ocfs2_group_desc *) group_bh->b_data;
1837
1838         BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1839
1840         status = ocfs2_block_group_clear_bits(handle, alloc_inode,
1841                                               group, group_bh,
1842                                               start_bit, count);
1843         if (status < 0) {
1844                 mlog_errno(status);
1845                 goto bail;
1846         }
1847
1848         status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
1849                                       OCFS2_JOURNAL_ACCESS_WRITE);
1850         if (status < 0) {
1851                 mlog_errno(status);
1852                 goto bail;
1853         }
1854
1855         le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
1856                      count);
1857         tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1858         fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
1859
1860         status = ocfs2_journal_dirty(handle, alloc_bh);
1861         if (status < 0) {
1862                 mlog_errno(status);
1863                 goto bail;
1864         }
1865
1866 bail:
1867         brelse(group_bh);
1868
1869         mlog_exit(status);
1870         return status;
1871 }
1872
1873 int ocfs2_free_dinode(handle_t *handle,
1874                       struct inode *inode_alloc_inode,
1875                       struct buffer_head *inode_alloc_bh,
1876                       struct ocfs2_dinode *di)
1877 {
1878         u64 blk = le64_to_cpu(di->i_blkno);
1879         u16 bit = le16_to_cpu(di->i_suballoc_bit);
1880         u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1881
1882         return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
1883                                         inode_alloc_bh, bit, bg_blkno, 1);
1884 }
1885
1886 int ocfs2_free_clusters(handle_t *handle,
1887                        struct inode *bitmap_inode,
1888                        struct buffer_head *bitmap_bh,
1889                        u64 start_blk,
1890                        unsigned int num_clusters)
1891 {
1892         int status;
1893         u16 bg_start_bit;
1894         u64 bg_blkno;
1895         struct ocfs2_dinode *fe;
1896
1897         /* You can't ever have a contiguous set of clusters
1898          * bigger than a block group bitmap so we never have to worry
1899          * about looping on them. */
1900
1901         mlog_entry_void();
1902
1903         /* This is expensive. We can safely remove once this stuff has
1904          * gotten tested really well. */
1905         BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
1906
1907         fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
1908
1909         ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
1910                                      &bg_start_bit);
1911
1912         mlog(0, "want to free %u clusters starting at block %llu\n",
1913              num_clusters, (unsigned long long)start_blk);
1914         mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
1915              (unsigned long long)bg_blkno, bg_start_bit);
1916
1917         status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1918                                           bg_start_bit, bg_blkno,
1919                                           num_clusters);
1920         if (status < 0) {
1921                 mlog_errno(status);
1922                 goto out;
1923         }
1924
1925         ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
1926                                          num_clusters);
1927
1928 out:
1929         mlog_exit(status);
1930         return status;
1931 }
1932
1933 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
1934 {
1935         printk("Block Group:\n");
1936         printk("bg_signature:       %s\n", bg->bg_signature);
1937         printk("bg_size:            %u\n", bg->bg_size);
1938         printk("bg_bits:            %u\n", bg->bg_bits);
1939         printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
1940         printk("bg_chain:           %u\n", bg->bg_chain);
1941         printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
1942         printk("bg_next_group:      %llu\n",
1943                (unsigned long long)bg->bg_next_group);
1944         printk("bg_parent_dinode:   %llu\n",
1945                (unsigned long long)bg->bg_parent_dinode);
1946         printk("bg_blkno:           %llu\n",
1947                (unsigned long long)bg->bg_blkno);
1948 }
1949
1950 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1951 {
1952         int i;
1953
1954         printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
1955         printk("i_signature:                  %s\n", fe->i_signature);
1956         printk("i_size:                       %llu\n",
1957                (unsigned long long)fe->i_size);
1958         printk("i_clusters:                   %u\n", fe->i_clusters);
1959         printk("i_generation:                 %u\n",
1960                le32_to_cpu(fe->i_generation));
1961         printk("id1.bitmap1.i_used:           %u\n",
1962                le32_to_cpu(fe->id1.bitmap1.i_used));
1963         printk("id1.bitmap1.i_total:          %u\n",
1964                le32_to_cpu(fe->id1.bitmap1.i_total));
1965         printk("id2.i_chain.cl_cpg:           %u\n", fe->id2.i_chain.cl_cpg);
1966         printk("id2.i_chain.cl_bpc:           %u\n", fe->id2.i_chain.cl_bpc);
1967         printk("id2.i_chain.cl_count:         %u\n", fe->id2.i_chain.cl_count);
1968         printk("id2.i_chain.cl_next_free_rec: %u\n",
1969                fe->id2.i_chain.cl_next_free_rec);
1970         for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
1971                 printk("fe->id2.i_chain.cl_recs[%d].c_free:  %u\n", i,
1972                        fe->id2.i_chain.cl_recs[i].c_free);
1973                 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
1974                        fe->id2.i_chain.cl_recs[i].c_total);
1975                 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
1976                        (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1977         }
1978 }
1979
1980 /*
1981  * For a given allocation, determine which allocators will need to be
1982  * accessed, and lock them, reserving the appropriate number of bits.
1983  *
1984  * Sparse file systems call this from ocfs2_write_begin_nolock()
1985  * and ocfs2_allocate_unwritten_extents().
1986  *
1987  * File systems which don't support holes call this from
1988  * ocfs2_extend_allocation().
1989  */
1990 int ocfs2_lock_allocators(struct inode *inode,
1991                           struct ocfs2_extent_tree *et,
1992                           u32 clusters_to_add, u32 extents_to_split,
1993                           struct ocfs2_alloc_context **data_ac,
1994                           struct ocfs2_alloc_context **meta_ac)
1995 {
1996         int ret = 0, num_free_extents;
1997         unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1998         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1999
2000         *meta_ac = NULL;
2001         if (data_ac)
2002                 *data_ac = NULL;
2003
2004         BUG_ON(clusters_to_add != 0 && data_ac == NULL);
2005
2006         num_free_extents = ocfs2_num_free_extents(osb, inode, et);
2007         if (num_free_extents < 0) {
2008                 ret = num_free_extents;
2009                 mlog_errno(ret);
2010                 goto out;
2011         }
2012
2013         /*
2014          * Sparse allocation file systems need to be more conservative
2015          * with reserving room for expansion - the actual allocation
2016          * happens while we've got a journal handle open so re-taking
2017          * a cluster lock (because we ran out of room for another
2018          * extent) will violate ordering rules.
2019          *
2020          * Most of the time we'll only be seeing this 1 cluster at a time
2021          * anyway.
2022          *
2023          * Always lock for any unwritten extents - we might want to
2024          * add blocks during a split.
2025          */
2026         if (!num_free_extents ||
2027             (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
2028                 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
2029                 if (ret < 0) {
2030                         if (ret != -ENOSPC)
2031                                 mlog_errno(ret);
2032                         goto out;
2033                 }
2034         }
2035
2036         if (clusters_to_add == 0)
2037                 goto out;
2038
2039         ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2040         if (ret < 0) {
2041                 if (ret != -ENOSPC)
2042                         mlog_errno(ret);
2043                 goto out;
2044         }
2045
2046 out:
2047         if (ret) {
2048                 if (*meta_ac) {
2049                         ocfs2_free_alloc_context(*meta_ac);
2050                         *meta_ac = NULL;
2051                 }
2052
2053                 /*
2054                  * We cannot have an error and a non null *data_ac.
2055                  */
2056         }
2057
2058         return ret;
2059 }