ext4: Show unwritten extent flag in ext4_ext_show_leaf()
[safe/jmp/linux-2.6] / fs / ext4 / extents.c
1 /*
2  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3  * Written by Alex Tomas <alex@clusterfs.com>
4  *
5  * Architecture independence:
6  *   Copyright (c) 2005, Bull S.A.
7  *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public Licens
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
21  */
22
23 /*
24  * Extents support for EXT4
25  *
26  * TODO:
27  *   - ext4*_error() should be used in some situations
28  *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
29  *   - smart tree reduction
30  */
31
32 #include <linux/module.h>
33 #include <linux/fs.h>
34 #include <linux/time.h>
35 #include <linux/jbd2.h>
36 #include <linux/highuid.h>
37 #include <linux/pagemap.h>
38 #include <linux/quotaops.h>
39 #include <linux/string.h>
40 #include <linux/slab.h>
41 #include <linux/falloc.h>
42 #include <asm/uaccess.h>
43 #include <linux/fiemap.h>
44 #include "ext4_jbd2.h"
45 #include "ext4_extents.h"
46
47
48 /*
49  * ext_pblock:
50  * combine low and high parts of physical block number into ext4_fsblk_t
51  */
52 ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53 {
54         ext4_fsblk_t block;
55
56         block = le32_to_cpu(ex->ee_start_lo);
57         block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
58         return block;
59 }
60
61 /*
62  * idx_pblock:
63  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
64  */
65 ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
66 {
67         ext4_fsblk_t block;
68
69         block = le32_to_cpu(ix->ei_leaf_lo);
70         block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
71         return block;
72 }
73
74 /*
75  * ext4_ext_store_pblock:
76  * stores a large physical block number into an extent struct,
77  * breaking it into parts
78  */
79 void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
80 {
81         ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
82         ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
83 }
84
85 /*
86  * ext4_idx_store_pblock:
87  * stores a large physical block number into an index struct,
88  * breaking it into parts
89  */
90 static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
91 {
92         ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
93         ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
94 }
95
96 static int ext4_ext_journal_restart(handle_t *handle, int needed)
97 {
98         int err;
99
100         if (!ext4_handle_valid(handle))
101                 return 0;
102         if (handle->h_buffer_credits > needed)
103                 return 0;
104         err = ext4_journal_extend(handle, needed);
105         if (err <= 0)
106                 return err;
107         return ext4_journal_restart(handle, needed);
108 }
109
110 /*
111  * could return:
112  *  - EROFS
113  *  - ENOMEM
114  */
115 static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
116                                 struct ext4_ext_path *path)
117 {
118         if (path->p_bh) {
119                 /* path points to block */
120                 return ext4_journal_get_write_access(handle, path->p_bh);
121         }
122         /* path points to leaf/index in inode body */
123         /* we use in-core data, no need to protect them */
124         return 0;
125 }
126
127 /*
128  * could return:
129  *  - EROFS
130  *  - ENOMEM
131  *  - EIO
132  */
133 static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
134                                 struct ext4_ext_path *path)
135 {
136         int err;
137         if (path->p_bh) {
138                 /* path points to block */
139                 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
140         } else {
141                 /* path points to leaf/index in inode body */
142                 err = ext4_mark_inode_dirty(handle, inode);
143         }
144         return err;
145 }
146
147 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
148                               struct ext4_ext_path *path,
149                               ext4_lblk_t block)
150 {
151         struct ext4_inode_info *ei = EXT4_I(inode);
152         ext4_fsblk_t bg_start;
153         ext4_fsblk_t last_block;
154         ext4_grpblk_t colour;
155         ext4_group_t block_group;
156         int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
157         int depth;
158
159         if (path) {
160                 struct ext4_extent *ex;
161                 depth = path->p_depth;
162
163                 /* try to predict block placement */
164                 ex = path[depth].p_ext;
165                 if (ex)
166                         return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
167
168                 /* it looks like index is empty;
169                  * try to find starting block from index itself */
170                 if (path[depth].p_bh)
171                         return path[depth].p_bh->b_blocknr;
172         }
173
174         /* OK. use inode's group */
175         block_group = ei->i_block_group;
176         if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
177                 /*
178                  * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
179                  * block groups per flexgroup, reserve the first block 
180                  * group for directories and special files.  Regular 
181                  * files will start at the second block group.  This
182                  * tends to speed up directory access and improves 
183                  * fsck times.
184                  */
185                 block_group &= ~(flex_size-1);
186                 if (S_ISREG(inode->i_mode))
187                         block_group++;
188         }
189         bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
190                 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
191         last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
192
193         /*
194          * If we are doing delayed allocation, we don't need take
195          * colour into account.
196          */
197         if (test_opt(inode->i_sb, DELALLOC))
198                 return bg_start;
199
200         if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
201                 colour = (current->pid % 16) *
202                         (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
203         else
204                 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
205         return bg_start + colour + block;
206 }
207
208 /*
209  * Allocation for a meta data block
210  */
211 static ext4_fsblk_t
212 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
213                         struct ext4_ext_path *path,
214                         struct ext4_extent *ex, int *err)
215 {
216         ext4_fsblk_t goal, newblock;
217
218         goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
219         newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
220         return newblock;
221 }
222
223 static int ext4_ext_space_block(struct inode *inode)
224 {
225         int size;
226
227         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
228                         / sizeof(struct ext4_extent);
229 #ifdef AGGRESSIVE_TEST
230         if (size > 6)
231                 size = 6;
232 #endif
233         return size;
234 }
235
236 static int ext4_ext_space_block_idx(struct inode *inode)
237 {
238         int size;
239
240         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
241                         / sizeof(struct ext4_extent_idx);
242 #ifdef AGGRESSIVE_TEST
243         if (size > 5)
244                 size = 5;
245 #endif
246         return size;
247 }
248
249 static int ext4_ext_space_root(struct inode *inode)
250 {
251         int size;
252
253         size = sizeof(EXT4_I(inode)->i_data);
254         size -= sizeof(struct ext4_extent_header);
255         size /= sizeof(struct ext4_extent);
256 #ifdef AGGRESSIVE_TEST
257         if (size > 3)
258                 size = 3;
259 #endif
260         return size;
261 }
262
263 static int ext4_ext_space_root_idx(struct inode *inode)
264 {
265         int size;
266
267         size = sizeof(EXT4_I(inode)->i_data);
268         size -= sizeof(struct ext4_extent_header);
269         size /= sizeof(struct ext4_extent_idx);
270 #ifdef AGGRESSIVE_TEST
271         if (size > 4)
272                 size = 4;
273 #endif
274         return size;
275 }
276
277 /*
278  * Calculate the number of metadata blocks needed
279  * to allocate @blocks
280  * Worse case is one block per extent
281  */
282 int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
283 {
284         int lcap, icap, rcap, leafs, idxs, num;
285         int newextents = blocks;
286
287         rcap = ext4_ext_space_root_idx(inode);
288         lcap = ext4_ext_space_block(inode);
289         icap = ext4_ext_space_block_idx(inode);
290
291         /* number of new leaf blocks needed */
292         num = leafs = (newextents + lcap - 1) / lcap;
293
294         /*
295          * Worse case, we need separate index block(s)
296          * to link all new leaf blocks
297          */
298         idxs = (leafs + icap - 1) / icap;
299         do {
300                 num += idxs;
301                 idxs = (idxs + icap - 1) / icap;
302         } while (idxs > rcap);
303
304         return num;
305 }
306
307 static int
308 ext4_ext_max_entries(struct inode *inode, int depth)
309 {
310         int max;
311
312         if (depth == ext_depth(inode)) {
313                 if (depth == 0)
314                         max = ext4_ext_space_root(inode);
315                 else
316                         max = ext4_ext_space_root_idx(inode);
317         } else {
318                 if (depth == 0)
319                         max = ext4_ext_space_block(inode);
320                 else
321                         max = ext4_ext_space_block_idx(inode);
322         }
323
324         return max;
325 }
326
327 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328 {
329         ext4_fsblk_t block = ext_pblock(ext);
330         int len = ext4_ext_get_actual_len(ext);
331
332         return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
333 }
334
335 static int ext4_valid_extent_idx(struct inode *inode,
336                                 struct ext4_extent_idx *ext_idx)
337 {
338         ext4_fsblk_t block = idx_pblock(ext_idx);
339
340         return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
341 }
342
343 static int ext4_valid_extent_entries(struct inode *inode,
344                                 struct ext4_extent_header *eh,
345                                 int depth)
346 {
347         struct ext4_extent *ext;
348         struct ext4_extent_idx *ext_idx;
349         unsigned short entries;
350         if (eh->eh_entries == 0)
351                 return 1;
352
353         entries = le16_to_cpu(eh->eh_entries);
354
355         if (depth == 0) {
356                 /* leaf entries */
357                 ext = EXT_FIRST_EXTENT(eh);
358                 while (entries) {
359                         if (!ext4_valid_extent(inode, ext))
360                                 return 0;
361                         ext++;
362                         entries--;
363                 }
364         } else {
365                 ext_idx = EXT_FIRST_INDEX(eh);
366                 while (entries) {
367                         if (!ext4_valid_extent_idx(inode, ext_idx))
368                                 return 0;
369                         ext_idx++;
370                         entries--;
371                 }
372         }
373         return 1;
374 }
375
376 static int __ext4_ext_check(const char *function, struct inode *inode,
377                                         struct ext4_extent_header *eh,
378                                         int depth)
379 {
380         const char *error_msg;
381         int max = 0;
382
383         if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
384                 error_msg = "invalid magic";
385                 goto corrupted;
386         }
387         if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
388                 error_msg = "unexpected eh_depth";
389                 goto corrupted;
390         }
391         if (unlikely(eh->eh_max == 0)) {
392                 error_msg = "invalid eh_max";
393                 goto corrupted;
394         }
395         max = ext4_ext_max_entries(inode, depth);
396         if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
397                 error_msg = "too large eh_max";
398                 goto corrupted;
399         }
400         if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
401                 error_msg = "invalid eh_entries";
402                 goto corrupted;
403         }
404         if (!ext4_valid_extent_entries(inode, eh, depth)) {
405                 error_msg = "invalid extent entries";
406                 goto corrupted;
407         }
408         return 0;
409
410 corrupted:
411         ext4_error(inode->i_sb, function,
412                         "bad header/extent in inode #%lu: %s - magic %x, "
413                         "entries %u, max %u(%u), depth %u(%u)",
414                         inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
415                         le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
416                         max, le16_to_cpu(eh->eh_depth), depth);
417
418         return -EIO;
419 }
420
421 #define ext4_ext_check(inode, eh, depth)        \
422         __ext4_ext_check(__func__, inode, eh, depth)
423
424 int ext4_ext_check_inode(struct inode *inode)
425 {
426         return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
427 }
428
429 #ifdef EXT_DEBUG
430 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
431 {
432         int k, l = path->p_depth;
433
434         ext_debug("path:");
435         for (k = 0; k <= l; k++, path++) {
436                 if (path->p_idx) {
437                   ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
438                             idx_pblock(path->p_idx));
439                 } else if (path->p_ext) {
440                         ext_debug("  %d:[%d]%d:%llu ",
441                                   le32_to_cpu(path->p_ext->ee_block),
442                                   ext4_ext_is_uninitialized(path->p_ext),
443                                   ext4_ext_get_actual_len(path->p_ext),
444                                   ext_pblock(path->p_ext));
445                 } else
446                         ext_debug("  []");
447         }
448         ext_debug("\n");
449 }
450
451 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
452 {
453         int depth = ext_depth(inode);
454         struct ext4_extent_header *eh;
455         struct ext4_extent *ex;
456         int i;
457
458         if (!path)
459                 return;
460
461         eh = path[depth].p_hdr;
462         ex = EXT_FIRST_EXTENT(eh);
463
464         ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
465
466         for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
467                 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
468                           ext4_ext_is_uninitialized(ex),
469                           ext4_ext_get_actual_len(ex), ext_pblock(ex));
470         }
471         ext_debug("\n");
472 }
473 #else
474 #define ext4_ext_show_path(inode, path)
475 #define ext4_ext_show_leaf(inode, path)
476 #endif
477
478 void ext4_ext_drop_refs(struct ext4_ext_path *path)
479 {
480         int depth = path->p_depth;
481         int i;
482
483         for (i = 0; i <= depth; i++, path++)
484                 if (path->p_bh) {
485                         brelse(path->p_bh);
486                         path->p_bh = NULL;
487                 }
488 }
489
490 /*
491  * ext4_ext_binsearch_idx:
492  * binary search for the closest index of the given block
493  * the header must be checked before calling this
494  */
495 static void
496 ext4_ext_binsearch_idx(struct inode *inode,
497                         struct ext4_ext_path *path, ext4_lblk_t block)
498 {
499         struct ext4_extent_header *eh = path->p_hdr;
500         struct ext4_extent_idx *r, *l, *m;
501
502
503         ext_debug("binsearch for %u(idx):  ", block);
504
505         l = EXT_FIRST_INDEX(eh) + 1;
506         r = EXT_LAST_INDEX(eh);
507         while (l <= r) {
508                 m = l + (r - l) / 2;
509                 if (block < le32_to_cpu(m->ei_block))
510                         r = m - 1;
511                 else
512                         l = m + 1;
513                 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
514                                 m, le32_to_cpu(m->ei_block),
515                                 r, le32_to_cpu(r->ei_block));
516         }
517
518         path->p_idx = l - 1;
519         ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
520                   idx_pblock(path->p_idx));
521
522 #ifdef CHECK_BINSEARCH
523         {
524                 struct ext4_extent_idx *chix, *ix;
525                 int k;
526
527                 chix = ix = EXT_FIRST_INDEX(eh);
528                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
529                   if (k != 0 &&
530                       le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
531                                 printk(KERN_DEBUG "k=%d, ix=0x%p, "
532                                        "first=0x%p\n", k,
533                                        ix, EXT_FIRST_INDEX(eh));
534                                 printk(KERN_DEBUG "%u <= %u\n",
535                                        le32_to_cpu(ix->ei_block),
536                                        le32_to_cpu(ix[-1].ei_block));
537                         }
538                         BUG_ON(k && le32_to_cpu(ix->ei_block)
539                                            <= le32_to_cpu(ix[-1].ei_block));
540                         if (block < le32_to_cpu(ix->ei_block))
541                                 break;
542                         chix = ix;
543                 }
544                 BUG_ON(chix != path->p_idx);
545         }
546 #endif
547
548 }
549
550 /*
551  * ext4_ext_binsearch:
552  * binary search for closest extent of the given block
553  * the header must be checked before calling this
554  */
555 static void
556 ext4_ext_binsearch(struct inode *inode,
557                 struct ext4_ext_path *path, ext4_lblk_t block)
558 {
559         struct ext4_extent_header *eh = path->p_hdr;
560         struct ext4_extent *r, *l, *m;
561
562         if (eh->eh_entries == 0) {
563                 /*
564                  * this leaf is empty:
565                  * we get such a leaf in split/add case
566                  */
567                 return;
568         }
569
570         ext_debug("binsearch for %u:  ", block);
571
572         l = EXT_FIRST_EXTENT(eh) + 1;
573         r = EXT_LAST_EXTENT(eh);
574
575         while (l <= r) {
576                 m = l + (r - l) / 2;
577                 if (block < le32_to_cpu(m->ee_block))
578                         r = m - 1;
579                 else
580                         l = m + 1;
581                 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
582                                 m, le32_to_cpu(m->ee_block),
583                                 r, le32_to_cpu(r->ee_block));
584         }
585
586         path->p_ext = l - 1;
587         ext_debug("  -> %d:%llu:[%d]%d ",
588                         le32_to_cpu(path->p_ext->ee_block),
589                         ext_pblock(path->p_ext),
590                         ext4_ext_is_uninitialized(path->p_ext),
591                         ext4_ext_get_actual_len(path->p_ext));
592
593 #ifdef CHECK_BINSEARCH
594         {
595                 struct ext4_extent *chex, *ex;
596                 int k;
597
598                 chex = ex = EXT_FIRST_EXTENT(eh);
599                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
600                         BUG_ON(k && le32_to_cpu(ex->ee_block)
601                                           <= le32_to_cpu(ex[-1].ee_block));
602                         if (block < le32_to_cpu(ex->ee_block))
603                                 break;
604                         chex = ex;
605                 }
606                 BUG_ON(chex != path->p_ext);
607         }
608 #endif
609
610 }
611
612 int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
613 {
614         struct ext4_extent_header *eh;
615
616         eh = ext_inode_hdr(inode);
617         eh->eh_depth = 0;
618         eh->eh_entries = 0;
619         eh->eh_magic = EXT4_EXT_MAGIC;
620         eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
621         ext4_mark_inode_dirty(handle, inode);
622         ext4_ext_invalidate_cache(inode);
623         return 0;
624 }
625
626 struct ext4_ext_path *
627 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
628                                         struct ext4_ext_path *path)
629 {
630         struct ext4_extent_header *eh;
631         struct buffer_head *bh;
632         short int depth, i, ppos = 0, alloc = 0;
633
634         eh = ext_inode_hdr(inode);
635         depth = ext_depth(inode);
636
637         /* account possible depth increase */
638         if (!path) {
639                 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
640                                 GFP_NOFS);
641                 if (!path)
642                         return ERR_PTR(-ENOMEM);
643                 alloc = 1;
644         }
645         path[0].p_hdr = eh;
646         path[0].p_bh = NULL;
647
648         i = depth;
649         /* walk through the tree */
650         while (i) {
651                 int need_to_validate = 0;
652
653                 ext_debug("depth %d: num %d, max %d\n",
654                           ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
655
656                 ext4_ext_binsearch_idx(inode, path + ppos, block);
657                 path[ppos].p_block = idx_pblock(path[ppos].p_idx);
658                 path[ppos].p_depth = i;
659                 path[ppos].p_ext = NULL;
660
661                 bh = sb_getblk(inode->i_sb, path[ppos].p_block);
662                 if (unlikely(!bh))
663                         goto err;
664                 if (!bh_uptodate_or_lock(bh)) {
665                         if (bh_submit_read(bh) < 0) {
666                                 put_bh(bh);
667                                 goto err;
668                         }
669                         /* validate the extent entries */
670                         need_to_validate = 1;
671                 }
672                 eh = ext_block_hdr(bh);
673                 ppos++;
674                 BUG_ON(ppos > depth);
675                 path[ppos].p_bh = bh;
676                 path[ppos].p_hdr = eh;
677                 i--;
678
679                 if (need_to_validate && ext4_ext_check(inode, eh, i))
680                         goto err;
681         }
682
683         path[ppos].p_depth = i;
684         path[ppos].p_ext = NULL;
685         path[ppos].p_idx = NULL;
686
687         /* find extent */
688         ext4_ext_binsearch(inode, path + ppos, block);
689         /* if not an empty leaf */
690         if (path[ppos].p_ext)
691                 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
692
693         ext4_ext_show_path(inode, path);
694
695         return path;
696
697 err:
698         ext4_ext_drop_refs(path);
699         if (alloc)
700                 kfree(path);
701         return ERR_PTR(-EIO);
702 }
703
704 /*
705  * ext4_ext_insert_index:
706  * insert new index [@logical;@ptr] into the block at @curp;
707  * check where to insert: before @curp or after @curp
708  */
709 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
710                                 struct ext4_ext_path *curp,
711                                 int logical, ext4_fsblk_t ptr)
712 {
713         struct ext4_extent_idx *ix;
714         int len, err;
715
716         err = ext4_ext_get_access(handle, inode, curp);
717         if (err)
718                 return err;
719
720         BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
721         len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
722         if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
723                 /* insert after */
724                 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
725                         len = (len - 1) * sizeof(struct ext4_extent_idx);
726                         len = len < 0 ? 0 : len;
727                         ext_debug("insert new index %d after: %llu. "
728                                         "move %d from 0x%p to 0x%p\n",
729                                         logical, ptr, len,
730                                         (curp->p_idx + 1), (curp->p_idx + 2));
731                         memmove(curp->p_idx + 2, curp->p_idx + 1, len);
732                 }
733                 ix = curp->p_idx + 1;
734         } else {
735                 /* insert before */
736                 len = len * sizeof(struct ext4_extent_idx);
737                 len = len < 0 ? 0 : len;
738                 ext_debug("insert new index %d before: %llu. "
739                                 "move %d from 0x%p to 0x%p\n",
740                                 logical, ptr, len,
741                                 curp->p_idx, (curp->p_idx + 1));
742                 memmove(curp->p_idx + 1, curp->p_idx, len);
743                 ix = curp->p_idx;
744         }
745
746         ix->ei_block = cpu_to_le32(logical);
747         ext4_idx_store_pblock(ix, ptr);
748         le16_add_cpu(&curp->p_hdr->eh_entries, 1);
749
750         BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
751                              > le16_to_cpu(curp->p_hdr->eh_max));
752         BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
753
754         err = ext4_ext_dirty(handle, inode, curp);
755         ext4_std_error(inode->i_sb, err);
756
757         return err;
758 }
759
760 /*
761  * ext4_ext_split:
762  * inserts new subtree into the path, using free index entry
763  * at depth @at:
764  * - allocates all needed blocks (new leaf and all intermediate index blocks)
765  * - makes decision where to split
766  * - moves remaining extents and index entries (right to the split point)
767  *   into the newly allocated blocks
768  * - initializes subtree
769  */
770 static int ext4_ext_split(handle_t *handle, struct inode *inode,
771                                 struct ext4_ext_path *path,
772                                 struct ext4_extent *newext, int at)
773 {
774         struct buffer_head *bh = NULL;
775         int depth = ext_depth(inode);
776         struct ext4_extent_header *neh;
777         struct ext4_extent_idx *fidx;
778         struct ext4_extent *ex;
779         int i = at, k, m, a;
780         ext4_fsblk_t newblock, oldblock;
781         __le32 border;
782         ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
783         int err = 0;
784
785         /* make decision: where to split? */
786         /* FIXME: now decision is simplest: at current extent */
787
788         /* if current leaf will be split, then we should use
789          * border from split point */
790         BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
791         if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
792                 border = path[depth].p_ext[1].ee_block;
793                 ext_debug("leaf will be split."
794                                 " next leaf starts at %d\n",
795                                   le32_to_cpu(border));
796         } else {
797                 border = newext->ee_block;
798                 ext_debug("leaf will be added."
799                                 " next leaf starts at %d\n",
800                                 le32_to_cpu(border));
801         }
802
803         /*
804          * If error occurs, then we break processing
805          * and mark filesystem read-only. index won't
806          * be inserted and tree will be in consistent
807          * state. Next mount will repair buffers too.
808          */
809
810         /*
811          * Get array to track all allocated blocks.
812          * We need this to handle errors and free blocks
813          * upon them.
814          */
815         ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
816         if (!ablocks)
817                 return -ENOMEM;
818
819         /* allocate all needed blocks */
820         ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
821         for (a = 0; a < depth - at; a++) {
822                 newblock = ext4_ext_new_meta_block(handle, inode, path,
823                                                    newext, &err);
824                 if (newblock == 0)
825                         goto cleanup;
826                 ablocks[a] = newblock;
827         }
828
829         /* initialize new leaf */
830         newblock = ablocks[--a];
831         BUG_ON(newblock == 0);
832         bh = sb_getblk(inode->i_sb, newblock);
833         if (!bh) {
834                 err = -EIO;
835                 goto cleanup;
836         }
837         lock_buffer(bh);
838
839         err = ext4_journal_get_create_access(handle, bh);
840         if (err)
841                 goto cleanup;
842
843         neh = ext_block_hdr(bh);
844         neh->eh_entries = 0;
845         neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
846         neh->eh_magic = EXT4_EXT_MAGIC;
847         neh->eh_depth = 0;
848         ex = EXT_FIRST_EXTENT(neh);
849
850         /* move remainder of path[depth] to the new leaf */
851         BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
852         /* start copy from next extent */
853         /* TODO: we could do it by single memmove */
854         m = 0;
855         path[depth].p_ext++;
856         while (path[depth].p_ext <=
857                         EXT_MAX_EXTENT(path[depth].p_hdr)) {
858                 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
859                                 le32_to_cpu(path[depth].p_ext->ee_block),
860                                 ext_pblock(path[depth].p_ext),
861                                 ext4_ext_is_uninitialized(path[depth].p_ext),
862                                 ext4_ext_get_actual_len(path[depth].p_ext),
863                                 newblock);
864                 /*memmove(ex++, path[depth].p_ext++,
865                                 sizeof(struct ext4_extent));
866                 neh->eh_entries++;*/
867                 path[depth].p_ext++;
868                 m++;
869         }
870         if (m) {
871                 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
872                 le16_add_cpu(&neh->eh_entries, m);
873         }
874
875         set_buffer_uptodate(bh);
876         unlock_buffer(bh);
877
878         err = ext4_handle_dirty_metadata(handle, inode, bh);
879         if (err)
880                 goto cleanup;
881         brelse(bh);
882         bh = NULL;
883
884         /* correct old leaf */
885         if (m) {
886                 err = ext4_ext_get_access(handle, inode, path + depth);
887                 if (err)
888                         goto cleanup;
889                 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
890                 err = ext4_ext_dirty(handle, inode, path + depth);
891                 if (err)
892                         goto cleanup;
893
894         }
895
896         /* create intermediate indexes */
897         k = depth - at - 1;
898         BUG_ON(k < 0);
899         if (k)
900                 ext_debug("create %d intermediate indices\n", k);
901         /* insert new index into current index block */
902         /* current depth stored in i var */
903         i = depth - 1;
904         while (k--) {
905                 oldblock = newblock;
906                 newblock = ablocks[--a];
907                 bh = sb_getblk(inode->i_sb, newblock);
908                 if (!bh) {
909                         err = -EIO;
910                         goto cleanup;
911                 }
912                 lock_buffer(bh);
913
914                 err = ext4_journal_get_create_access(handle, bh);
915                 if (err)
916                         goto cleanup;
917
918                 neh = ext_block_hdr(bh);
919                 neh->eh_entries = cpu_to_le16(1);
920                 neh->eh_magic = EXT4_EXT_MAGIC;
921                 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
922                 neh->eh_depth = cpu_to_le16(depth - i);
923                 fidx = EXT_FIRST_INDEX(neh);
924                 fidx->ei_block = border;
925                 ext4_idx_store_pblock(fidx, oldblock);
926
927                 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
928                                 i, newblock, le32_to_cpu(border), oldblock);
929                 /* copy indexes */
930                 m = 0;
931                 path[i].p_idx++;
932
933                 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
934                                 EXT_MAX_INDEX(path[i].p_hdr));
935                 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
936                                 EXT_LAST_INDEX(path[i].p_hdr));
937                 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
938                         ext_debug("%d: move %d:%llu in new index %llu\n", i,
939                                         le32_to_cpu(path[i].p_idx->ei_block),
940                                         idx_pblock(path[i].p_idx),
941                                         newblock);
942                         /*memmove(++fidx, path[i].p_idx++,
943                                         sizeof(struct ext4_extent_idx));
944                         neh->eh_entries++;
945                         BUG_ON(neh->eh_entries > neh->eh_max);*/
946                         path[i].p_idx++;
947                         m++;
948                 }
949                 if (m) {
950                         memmove(++fidx, path[i].p_idx - m,
951                                 sizeof(struct ext4_extent_idx) * m);
952                         le16_add_cpu(&neh->eh_entries, m);
953                 }
954                 set_buffer_uptodate(bh);
955                 unlock_buffer(bh);
956
957                 err = ext4_handle_dirty_metadata(handle, inode, bh);
958                 if (err)
959                         goto cleanup;
960                 brelse(bh);
961                 bh = NULL;
962
963                 /* correct old index */
964                 if (m) {
965                         err = ext4_ext_get_access(handle, inode, path + i);
966                         if (err)
967                                 goto cleanup;
968                         le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
969                         err = ext4_ext_dirty(handle, inode, path + i);
970                         if (err)
971                                 goto cleanup;
972                 }
973
974                 i--;
975         }
976
977         /* insert new index */
978         err = ext4_ext_insert_index(handle, inode, path + at,
979                                     le32_to_cpu(border), newblock);
980
981 cleanup:
982         if (bh) {
983                 if (buffer_locked(bh))
984                         unlock_buffer(bh);
985                 brelse(bh);
986         }
987
988         if (err) {
989                 /* free all allocated blocks in error case */
990                 for (i = 0; i < depth; i++) {
991                         if (!ablocks[i])
992                                 continue;
993                         ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
994                 }
995         }
996         kfree(ablocks);
997
998         return err;
999 }
1000
1001 /*
1002  * ext4_ext_grow_indepth:
1003  * implements tree growing procedure:
1004  * - allocates new block
1005  * - moves top-level data (index block or leaf) into the new block
1006  * - initializes new top-level, creating index that points to the
1007  *   just created block
1008  */
1009 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1010                                         struct ext4_ext_path *path,
1011                                         struct ext4_extent *newext)
1012 {
1013         struct ext4_ext_path *curp = path;
1014         struct ext4_extent_header *neh;
1015         struct ext4_extent_idx *fidx;
1016         struct buffer_head *bh;
1017         ext4_fsblk_t newblock;
1018         int err = 0;
1019
1020         newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
1021         if (newblock == 0)
1022                 return err;
1023
1024         bh = sb_getblk(inode->i_sb, newblock);
1025         if (!bh) {
1026                 err = -EIO;
1027                 ext4_std_error(inode->i_sb, err);
1028                 return err;
1029         }
1030         lock_buffer(bh);
1031
1032         err = ext4_journal_get_create_access(handle, bh);
1033         if (err) {
1034                 unlock_buffer(bh);
1035                 goto out;
1036         }
1037
1038         /* move top-level index/leaf into new block */
1039         memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
1040
1041         /* set size of new block */
1042         neh = ext_block_hdr(bh);
1043         /* old root could have indexes or leaves
1044          * so calculate e_max right way */
1045         if (ext_depth(inode))
1046           neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
1047         else
1048           neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
1049         neh->eh_magic = EXT4_EXT_MAGIC;
1050         set_buffer_uptodate(bh);
1051         unlock_buffer(bh);
1052
1053         err = ext4_handle_dirty_metadata(handle, inode, bh);
1054         if (err)
1055                 goto out;
1056
1057         /* create index in new top-level index: num,max,pointer */
1058         err = ext4_ext_get_access(handle, inode, curp);
1059         if (err)
1060                 goto out;
1061
1062         curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
1063         curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
1064         curp->p_hdr->eh_entries = cpu_to_le16(1);
1065         curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
1066
1067         if (path[0].p_hdr->eh_depth)
1068                 curp->p_idx->ei_block =
1069                         EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
1070         else
1071                 curp->p_idx->ei_block =
1072                         EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
1073         ext4_idx_store_pblock(curp->p_idx, newblock);
1074
1075         neh = ext_inode_hdr(inode);
1076         fidx = EXT_FIRST_INDEX(neh);
1077         ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1078                   le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1079                   le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
1080
1081         neh->eh_depth = cpu_to_le16(path->p_depth + 1);
1082         err = ext4_ext_dirty(handle, inode, curp);
1083 out:
1084         brelse(bh);
1085
1086         return err;
1087 }
1088
1089 /*
1090  * ext4_ext_create_new_leaf:
1091  * finds empty index and adds new leaf.
1092  * if no free index is found, then it requests in-depth growing.
1093  */
1094 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1095                                         struct ext4_ext_path *path,
1096                                         struct ext4_extent *newext)
1097 {
1098         struct ext4_ext_path *curp;
1099         int depth, i, err = 0;
1100
1101 repeat:
1102         i = depth = ext_depth(inode);
1103
1104         /* walk up to the tree and look for free index entry */
1105         curp = path + depth;
1106         while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1107                 i--;
1108                 curp--;
1109         }
1110
1111         /* we use already allocated block for index block,
1112          * so subsequent data blocks should be contiguous */
1113         if (EXT_HAS_FREE_INDEX(curp)) {
1114                 /* if we found index with free entry, then use that
1115                  * entry: create all needed subtree and add new leaf */
1116                 err = ext4_ext_split(handle, inode, path, newext, i);
1117                 if (err)
1118                         goto out;
1119
1120                 /* refill path */
1121                 ext4_ext_drop_refs(path);
1122                 path = ext4_ext_find_extent(inode,
1123                                     (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1124                                     path);
1125                 if (IS_ERR(path))
1126                         err = PTR_ERR(path);
1127         } else {
1128                 /* tree is full, time to grow in depth */
1129                 err = ext4_ext_grow_indepth(handle, inode, path, newext);
1130                 if (err)
1131                         goto out;
1132
1133                 /* refill path */
1134                 ext4_ext_drop_refs(path);
1135                 path = ext4_ext_find_extent(inode,
1136                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1137                                     path);
1138                 if (IS_ERR(path)) {
1139                         err = PTR_ERR(path);
1140                         goto out;
1141                 }
1142
1143                 /*
1144                  * only first (depth 0 -> 1) produces free space;
1145                  * in all other cases we have to split the grown tree
1146                  */
1147                 depth = ext_depth(inode);
1148                 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1149                         /* now we need to split */
1150                         goto repeat;
1151                 }
1152         }
1153
1154 out:
1155         return err;
1156 }
1157
1158 /*
1159  * search the closest allocated block to the left for *logical
1160  * and returns it at @logical + it's physical address at @phys
1161  * if *logical is the smallest allocated block, the function
1162  * returns 0 at @phys
1163  * return value contains 0 (success) or error code
1164  */
1165 int
1166 ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1167                         ext4_lblk_t *logical, ext4_fsblk_t *phys)
1168 {
1169         struct ext4_extent_idx *ix;
1170         struct ext4_extent *ex;
1171         int depth, ee_len;
1172
1173         BUG_ON(path == NULL);
1174         depth = path->p_depth;
1175         *phys = 0;
1176
1177         if (depth == 0 && path->p_ext == NULL)
1178                 return 0;
1179
1180         /* usually extent in the path covers blocks smaller
1181          * then *logical, but it can be that extent is the
1182          * first one in the file */
1183
1184         ex = path[depth].p_ext;
1185         ee_len = ext4_ext_get_actual_len(ex);
1186         if (*logical < le32_to_cpu(ex->ee_block)) {
1187                 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
1188                 while (--depth >= 0) {
1189                         ix = path[depth].p_idx;
1190                         BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
1191                 }
1192                 return 0;
1193         }
1194
1195         BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
1196
1197         *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1198         *phys = ext_pblock(ex) + ee_len - 1;
1199         return 0;
1200 }
1201
1202 /*
1203  * search the closest allocated block to the right for *logical
1204  * and returns it at @logical + it's physical address at @phys
1205  * if *logical is the smallest allocated block, the function
1206  * returns 0 at @phys
1207  * return value contains 0 (success) or error code
1208  */
1209 int
1210 ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1211                         ext4_lblk_t *logical, ext4_fsblk_t *phys)
1212 {
1213         struct buffer_head *bh = NULL;
1214         struct ext4_extent_header *eh;
1215         struct ext4_extent_idx *ix;
1216         struct ext4_extent *ex;
1217         ext4_fsblk_t block;
1218         int depth;      /* Note, NOT eh_depth; depth from top of tree */
1219         int ee_len;
1220
1221         BUG_ON(path == NULL);
1222         depth = path->p_depth;
1223         *phys = 0;
1224
1225         if (depth == 0 && path->p_ext == NULL)
1226                 return 0;
1227
1228         /* usually extent in the path covers blocks smaller
1229          * then *logical, but it can be that extent is the
1230          * first one in the file */
1231
1232         ex = path[depth].p_ext;
1233         ee_len = ext4_ext_get_actual_len(ex);
1234         if (*logical < le32_to_cpu(ex->ee_block)) {
1235                 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
1236                 while (--depth >= 0) {
1237                         ix = path[depth].p_idx;
1238                         BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
1239                 }
1240                 *logical = le32_to_cpu(ex->ee_block);
1241                 *phys = ext_pblock(ex);
1242                 return 0;
1243         }
1244
1245         BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
1246
1247         if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1248                 /* next allocated block in this leaf */
1249                 ex++;
1250                 *logical = le32_to_cpu(ex->ee_block);
1251                 *phys = ext_pblock(ex);
1252                 return 0;
1253         }
1254
1255         /* go up and search for index to the right */
1256         while (--depth >= 0) {
1257                 ix = path[depth].p_idx;
1258                 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1259                         goto got_index;
1260         }
1261
1262         /* we've gone up to the root and found no index to the right */
1263         return 0;
1264
1265 got_index:
1266         /* we've found index to the right, let's
1267          * follow it and find the closest allocated
1268          * block to the right */
1269         ix++;
1270         block = idx_pblock(ix);
1271         while (++depth < path->p_depth) {
1272                 bh = sb_bread(inode->i_sb, block);
1273                 if (bh == NULL)
1274                         return -EIO;
1275                 eh = ext_block_hdr(bh);
1276                 /* subtract from p_depth to get proper eh_depth */
1277                 if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
1278                         put_bh(bh);
1279                         return -EIO;
1280                 }
1281                 ix = EXT_FIRST_INDEX(eh);
1282                 block = idx_pblock(ix);
1283                 put_bh(bh);
1284         }
1285
1286         bh = sb_bread(inode->i_sb, block);
1287         if (bh == NULL)
1288                 return -EIO;
1289         eh = ext_block_hdr(bh);
1290         if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
1291                 put_bh(bh);
1292                 return -EIO;
1293         }
1294         ex = EXT_FIRST_EXTENT(eh);
1295         *logical = le32_to_cpu(ex->ee_block);
1296         *phys = ext_pblock(ex);
1297         put_bh(bh);
1298         return 0;
1299 }
1300
1301 /*
1302  * ext4_ext_next_allocated_block:
1303  * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
1304  * NOTE: it considers block number from index entry as
1305  * allocated block. Thus, index entries have to be consistent
1306  * with leaves.
1307  */
1308 static ext4_lblk_t
1309 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1310 {
1311         int depth;
1312
1313         BUG_ON(path == NULL);
1314         depth = path->p_depth;
1315
1316         if (depth == 0 && path->p_ext == NULL)
1317                 return EXT_MAX_BLOCK;
1318
1319         while (depth >= 0) {
1320                 if (depth == path->p_depth) {
1321                         /* leaf */
1322                         if (path[depth].p_ext !=
1323                                         EXT_LAST_EXTENT(path[depth].p_hdr))
1324                           return le32_to_cpu(path[depth].p_ext[1].ee_block);
1325                 } else {
1326                         /* index */
1327                         if (path[depth].p_idx !=
1328                                         EXT_LAST_INDEX(path[depth].p_hdr))
1329                           return le32_to_cpu(path[depth].p_idx[1].ei_block);
1330                 }
1331                 depth--;
1332         }
1333
1334         return EXT_MAX_BLOCK;
1335 }
1336
1337 /*
1338  * ext4_ext_next_leaf_block:
1339  * returns first allocated block from next leaf or EXT_MAX_BLOCK
1340  */
1341 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
1342                                         struct ext4_ext_path *path)
1343 {
1344         int depth;
1345
1346         BUG_ON(path == NULL);
1347         depth = path->p_depth;
1348
1349         /* zero-tree has no leaf blocks at all */
1350         if (depth == 0)
1351                 return EXT_MAX_BLOCK;
1352
1353         /* go to index block */
1354         depth--;
1355
1356         while (depth >= 0) {
1357                 if (path[depth].p_idx !=
1358                                 EXT_LAST_INDEX(path[depth].p_hdr))
1359                         return (ext4_lblk_t)
1360                                 le32_to_cpu(path[depth].p_idx[1].ei_block);
1361                 depth--;
1362         }
1363
1364         return EXT_MAX_BLOCK;
1365 }
1366
1367 /*
1368  * ext4_ext_correct_indexes:
1369  * if leaf gets modified and modified extent is first in the leaf,
1370  * then we have to correct all indexes above.
1371  * TODO: do we need to correct tree in all cases?
1372  */
1373 static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1374                                 struct ext4_ext_path *path)
1375 {
1376         struct ext4_extent_header *eh;
1377         int depth = ext_depth(inode);
1378         struct ext4_extent *ex;
1379         __le32 border;
1380         int k, err = 0;
1381
1382         eh = path[depth].p_hdr;
1383         ex = path[depth].p_ext;
1384         BUG_ON(ex == NULL);
1385         BUG_ON(eh == NULL);
1386
1387         if (depth == 0) {
1388                 /* there is no tree at all */
1389                 return 0;
1390         }
1391
1392         if (ex != EXT_FIRST_EXTENT(eh)) {
1393                 /* we correct tree if first leaf got modified only */
1394                 return 0;
1395         }
1396
1397         /*
1398          * TODO: we need correction if border is smaller than current one
1399          */
1400         k = depth - 1;
1401         border = path[depth].p_ext->ee_block;
1402         err = ext4_ext_get_access(handle, inode, path + k);
1403         if (err)
1404                 return err;
1405         path[k].p_idx->ei_block = border;
1406         err = ext4_ext_dirty(handle, inode, path + k);
1407         if (err)
1408                 return err;
1409
1410         while (k--) {
1411                 /* change all left-side indexes */
1412                 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1413                         break;
1414                 err = ext4_ext_get_access(handle, inode, path + k);
1415                 if (err)
1416                         break;
1417                 path[k].p_idx->ei_block = border;
1418                 err = ext4_ext_dirty(handle, inode, path + k);
1419                 if (err)
1420                         break;
1421         }
1422
1423         return err;
1424 }
1425
1426 int
1427 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1428                                 struct ext4_extent *ex2)
1429 {
1430         unsigned short ext1_ee_len, ext2_ee_len, max_len;
1431
1432         /*
1433          * Make sure that either both extents are uninitialized, or
1434          * both are _not_.
1435          */
1436         if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
1437                 return 0;
1438
1439         if (ext4_ext_is_uninitialized(ex1))
1440                 max_len = EXT_UNINIT_MAX_LEN;
1441         else
1442                 max_len = EXT_INIT_MAX_LEN;
1443
1444         ext1_ee_len = ext4_ext_get_actual_len(ex1);
1445         ext2_ee_len = ext4_ext_get_actual_len(ex2);
1446
1447         if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1448                         le32_to_cpu(ex2->ee_block))
1449                 return 0;
1450
1451         /*
1452          * To allow future support for preallocated extents to be added
1453          * as an RO_COMPAT feature, refuse to merge to extents if
1454          * this can result in the top bit of ee_len being set.
1455          */
1456         if (ext1_ee_len + ext2_ee_len > max_len)
1457                 return 0;
1458 #ifdef AGGRESSIVE_TEST
1459         if (ext1_ee_len >= 4)
1460                 return 0;
1461 #endif
1462
1463         if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
1464                 return 1;
1465         return 0;
1466 }
1467
1468 /*
1469  * This function tries to merge the "ex" extent to the next extent in the tree.
1470  * It always tries to merge towards right. If you want to merge towards
1471  * left, pass "ex - 1" as argument instead of "ex".
1472  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1473  * 1 if they got merged.
1474  */
1475 int ext4_ext_try_to_merge(struct inode *inode,
1476                           struct ext4_ext_path *path,
1477                           struct ext4_extent *ex)
1478 {
1479         struct ext4_extent_header *eh;
1480         unsigned int depth, len;
1481         int merge_done = 0;
1482         int uninitialized = 0;
1483
1484         depth = ext_depth(inode);
1485         BUG_ON(path[depth].p_hdr == NULL);
1486         eh = path[depth].p_hdr;
1487
1488         while (ex < EXT_LAST_EXTENT(eh)) {
1489                 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1490                         break;
1491                 /* merge with next extent! */
1492                 if (ext4_ext_is_uninitialized(ex))
1493                         uninitialized = 1;
1494                 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1495                                 + ext4_ext_get_actual_len(ex + 1));
1496                 if (uninitialized)
1497                         ext4_ext_mark_uninitialized(ex);
1498
1499                 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1500                         len = (EXT_LAST_EXTENT(eh) - ex - 1)
1501                                 * sizeof(struct ext4_extent);
1502                         memmove(ex + 1, ex + 2, len);
1503                 }
1504                 le16_add_cpu(&eh->eh_entries, -1);
1505                 merge_done = 1;
1506                 WARN_ON(eh->eh_entries == 0);
1507                 if (!eh->eh_entries)
1508                         ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
1509                            "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
1510         }
1511
1512         return merge_done;
1513 }
1514
1515 /*
1516  * check if a portion of the "newext" extent overlaps with an
1517  * existing extent.
1518  *
1519  * If there is an overlap discovered, it updates the length of the newext
1520  * such that there will be no overlap, and then returns 1.
1521  * If there is no overlap found, it returns 0.
1522  */
1523 unsigned int ext4_ext_check_overlap(struct inode *inode,
1524                                     struct ext4_extent *newext,
1525                                     struct ext4_ext_path *path)
1526 {
1527         ext4_lblk_t b1, b2;
1528         unsigned int depth, len1;
1529         unsigned int ret = 0;
1530
1531         b1 = le32_to_cpu(newext->ee_block);
1532         len1 = ext4_ext_get_actual_len(newext);
1533         depth = ext_depth(inode);
1534         if (!path[depth].p_ext)
1535                 goto out;
1536         b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1537
1538         /*
1539          * get the next allocated block if the extent in the path
1540          * is before the requested block(s)
1541          */
1542         if (b2 < b1) {
1543                 b2 = ext4_ext_next_allocated_block(path);
1544                 if (b2 == EXT_MAX_BLOCK)
1545                         goto out;
1546         }
1547
1548         /* check for wrap through zero on extent logical start block*/
1549         if (b1 + len1 < b1) {
1550                 len1 = EXT_MAX_BLOCK - b1;
1551                 newext->ee_len = cpu_to_le16(len1);
1552                 ret = 1;
1553         }
1554
1555         /* check for overlap */
1556         if (b1 + len1 > b2) {
1557                 newext->ee_len = cpu_to_le16(b2 - b1);
1558                 ret = 1;
1559         }
1560 out:
1561         return ret;
1562 }
1563
1564 /*
1565  * ext4_ext_insert_extent:
1566  * tries to merge requsted extent into the existing extent or
1567  * inserts requested extent as new one into the tree,
1568  * creating new leaf in the no-space case.
1569  */
1570 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1571                                 struct ext4_ext_path *path,
1572                                 struct ext4_extent *newext)
1573 {
1574         struct ext4_extent_header *eh;
1575         struct ext4_extent *ex, *fex;
1576         struct ext4_extent *nearex; /* nearest extent */
1577         struct ext4_ext_path *npath = NULL;
1578         int depth, len, err;
1579         ext4_lblk_t next;
1580         unsigned uninitialized = 0;
1581
1582         BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1583         depth = ext_depth(inode);
1584         ex = path[depth].p_ext;
1585         BUG_ON(path[depth].p_hdr == NULL);
1586
1587         /* try to insert block into found extent and return */
1588         if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
1589                 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
1590                                 ext4_ext_is_uninitialized(newext),
1591                                 ext4_ext_get_actual_len(newext),
1592                                 le32_to_cpu(ex->ee_block),
1593                                 ext4_ext_is_uninitialized(ex),
1594                                 ext4_ext_get_actual_len(ex), ext_pblock(ex));
1595                 err = ext4_ext_get_access(handle, inode, path + depth);
1596                 if (err)
1597                         return err;
1598
1599                 /*
1600                  * ext4_can_extents_be_merged should have checked that either
1601                  * both extents are uninitialized, or both aren't. Thus we
1602                  * need to check only one of them here.
1603                  */
1604                 if (ext4_ext_is_uninitialized(ex))
1605                         uninitialized = 1;
1606                 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1607                                         + ext4_ext_get_actual_len(newext));
1608                 if (uninitialized)
1609                         ext4_ext_mark_uninitialized(ex);
1610                 eh = path[depth].p_hdr;
1611                 nearex = ex;
1612                 goto merge;
1613         }
1614
1615 repeat:
1616         depth = ext_depth(inode);
1617         eh = path[depth].p_hdr;
1618         if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
1619                 goto has_space;
1620
1621         /* probably next leaf has space for us? */
1622         fex = EXT_LAST_EXTENT(eh);
1623         next = ext4_ext_next_leaf_block(inode, path);
1624         if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
1625             && next != EXT_MAX_BLOCK) {
1626                 ext_debug("next leaf block - %d\n", next);
1627                 BUG_ON(npath != NULL);
1628                 npath = ext4_ext_find_extent(inode, next, NULL);
1629                 if (IS_ERR(npath))
1630                         return PTR_ERR(npath);
1631                 BUG_ON(npath->p_depth != path->p_depth);
1632                 eh = npath[depth].p_hdr;
1633                 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1634                         ext_debug("next leaf isnt full(%d)\n",
1635                                   le16_to_cpu(eh->eh_entries));
1636                         path = npath;
1637                         goto repeat;
1638                 }
1639                 ext_debug("next leaf has no free space(%d,%d)\n",
1640                           le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1641         }
1642
1643         /*
1644          * There is no free space in the found leaf.
1645          * We're gonna add a new leaf in the tree.
1646          */
1647         err = ext4_ext_create_new_leaf(handle, inode, path, newext);
1648         if (err)
1649                 goto cleanup;
1650         depth = ext_depth(inode);
1651         eh = path[depth].p_hdr;
1652
1653 has_space:
1654         nearex = path[depth].p_ext;
1655
1656         err = ext4_ext_get_access(handle, inode, path + depth);
1657         if (err)
1658                 goto cleanup;
1659
1660         if (!nearex) {
1661                 /* there is no extent in this leaf, create first one */
1662                 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
1663                                 le32_to_cpu(newext->ee_block),
1664                                 ext_pblock(newext),
1665                                 ext4_ext_is_uninitialized(newext),
1666                                 ext4_ext_get_actual_len(newext));
1667                 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1668         } else if (le32_to_cpu(newext->ee_block)
1669                            > le32_to_cpu(nearex->ee_block)) {
1670 /*              BUG_ON(newext->ee_block == nearex->ee_block); */
1671                 if (nearex != EXT_LAST_EXTENT(eh)) {
1672                         len = EXT_MAX_EXTENT(eh) - nearex;
1673                         len = (len - 1) * sizeof(struct ext4_extent);
1674                         len = len < 0 ? 0 : len;
1675                         ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1676                                         "move %d from 0x%p to 0x%p\n",
1677                                         le32_to_cpu(newext->ee_block),
1678                                         ext_pblock(newext),
1679                                         ext4_ext_is_uninitialized(newext),
1680                                         ext4_ext_get_actual_len(newext),
1681                                         nearex, len, nearex + 1, nearex + 2);
1682                         memmove(nearex + 2, nearex + 1, len);
1683                 }
1684                 path[depth].p_ext = nearex + 1;
1685         } else {
1686                 BUG_ON(newext->ee_block == nearex->ee_block);
1687                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1688                 len = len < 0 ? 0 : len;
1689                 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1690                                 "move %d from 0x%p to 0x%p\n",
1691                                 le32_to_cpu(newext->ee_block),
1692                                 ext_pblock(newext),
1693                                 ext4_ext_is_uninitialized(newext),
1694                                 ext4_ext_get_actual_len(newext),
1695                                 nearex, len, nearex + 1, nearex + 2);
1696                 memmove(nearex + 1, nearex, len);
1697                 path[depth].p_ext = nearex;
1698         }
1699
1700         le16_add_cpu(&eh->eh_entries, 1);
1701         nearex = path[depth].p_ext;
1702         nearex->ee_block = newext->ee_block;
1703         ext4_ext_store_pblock(nearex, ext_pblock(newext));
1704         nearex->ee_len = newext->ee_len;
1705
1706 merge:
1707         /* try to merge extents to the right */
1708         ext4_ext_try_to_merge(inode, path, nearex);
1709
1710         /* try to merge extents to the left */
1711
1712         /* time to correct all indexes above */
1713         err = ext4_ext_correct_indexes(handle, inode, path);
1714         if (err)
1715                 goto cleanup;
1716
1717         err = ext4_ext_dirty(handle, inode, path + depth);
1718
1719 cleanup:
1720         if (npath) {
1721                 ext4_ext_drop_refs(npath);
1722                 kfree(npath);
1723         }
1724         ext4_ext_invalidate_cache(inode);
1725         return err;
1726 }
1727
1728 int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1729                         ext4_lblk_t num, ext_prepare_callback func,
1730                         void *cbdata)
1731 {
1732         struct ext4_ext_path *path = NULL;
1733         struct ext4_ext_cache cbex;
1734         struct ext4_extent *ex;
1735         ext4_lblk_t next, start = 0, end = 0;
1736         ext4_lblk_t last = block + num;
1737         int depth, exists, err = 0;
1738
1739         BUG_ON(func == NULL);
1740         BUG_ON(inode == NULL);
1741
1742         while (block < last && block != EXT_MAX_BLOCK) {
1743                 num = last - block;
1744                 /* find extent for this block */
1745                 path = ext4_ext_find_extent(inode, block, path);
1746                 if (IS_ERR(path)) {
1747                         err = PTR_ERR(path);
1748                         path = NULL;
1749                         break;
1750                 }
1751
1752                 depth = ext_depth(inode);
1753                 BUG_ON(path[depth].p_hdr == NULL);
1754                 ex = path[depth].p_ext;
1755                 next = ext4_ext_next_allocated_block(path);
1756
1757                 exists = 0;
1758                 if (!ex) {
1759                         /* there is no extent yet, so try to allocate
1760                          * all requested space */
1761                         start = block;
1762                         end = block + num;
1763                 } else if (le32_to_cpu(ex->ee_block) > block) {
1764                         /* need to allocate space before found extent */
1765                         start = block;
1766                         end = le32_to_cpu(ex->ee_block);
1767                         if (block + num < end)
1768                                 end = block + num;
1769                 } else if (block >= le32_to_cpu(ex->ee_block)
1770                                         + ext4_ext_get_actual_len(ex)) {
1771                         /* need to allocate space after found extent */
1772                         start = block;
1773                         end = block + num;
1774                         if (end >= next)
1775                                 end = next;
1776                 } else if (block >= le32_to_cpu(ex->ee_block)) {
1777                         /*
1778                          * some part of requested space is covered
1779                          * by found extent
1780                          */
1781                         start = block;
1782                         end = le32_to_cpu(ex->ee_block)
1783                                 + ext4_ext_get_actual_len(ex);
1784                         if (block + num < end)
1785                                 end = block + num;
1786                         exists = 1;
1787                 } else {
1788                         BUG();
1789                 }
1790                 BUG_ON(end <= start);
1791
1792                 if (!exists) {
1793                         cbex.ec_block = start;
1794                         cbex.ec_len = end - start;
1795                         cbex.ec_start = 0;
1796                         cbex.ec_type = EXT4_EXT_CACHE_GAP;
1797                 } else {
1798                         cbex.ec_block = le32_to_cpu(ex->ee_block);
1799                         cbex.ec_len = ext4_ext_get_actual_len(ex);
1800                         cbex.ec_start = ext_pblock(ex);
1801                         cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1802                 }
1803
1804                 BUG_ON(cbex.ec_len == 0);
1805                 err = func(inode, path, &cbex, ex, cbdata);
1806                 ext4_ext_drop_refs(path);
1807
1808                 if (err < 0)
1809                         break;
1810
1811                 if (err == EXT_REPEAT)
1812                         continue;
1813                 else if (err == EXT_BREAK) {
1814                         err = 0;
1815                         break;
1816                 }
1817
1818                 if (ext_depth(inode) != depth) {
1819                         /* depth was changed. we have to realloc path */
1820                         kfree(path);
1821                         path = NULL;
1822                 }
1823
1824                 block = cbex.ec_block + cbex.ec_len;
1825         }
1826
1827         if (path) {
1828                 ext4_ext_drop_refs(path);
1829                 kfree(path);
1830         }
1831
1832         return err;
1833 }
1834
1835 static void
1836 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1837                         __u32 len, ext4_fsblk_t start, int type)
1838 {
1839         struct ext4_ext_cache *cex;
1840         BUG_ON(len == 0);
1841         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1842         cex = &EXT4_I(inode)->i_cached_extent;
1843         cex->ec_type = type;
1844         cex->ec_block = block;
1845         cex->ec_len = len;
1846         cex->ec_start = start;
1847         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1848 }
1849
1850 /*
1851  * ext4_ext_put_gap_in_cache:
1852  * calculate boundaries of the gap that the requested block fits into
1853  * and cache this gap
1854  */
1855 static void
1856 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1857                                 ext4_lblk_t block)
1858 {
1859         int depth = ext_depth(inode);
1860         unsigned long len;
1861         ext4_lblk_t lblock;
1862         struct ext4_extent *ex;
1863
1864         ex = path[depth].p_ext;
1865         if (ex == NULL) {
1866                 /* there is no extent yet, so gap is [0;-] */
1867                 lblock = 0;
1868                 len = EXT_MAX_BLOCK;
1869                 ext_debug("cache gap(whole file):");
1870         } else if (block < le32_to_cpu(ex->ee_block)) {
1871                 lblock = block;
1872                 len = le32_to_cpu(ex->ee_block) - block;
1873                 ext_debug("cache gap(before): %u [%u:%u]",
1874                                 block,
1875                                 le32_to_cpu(ex->ee_block),
1876                                  ext4_ext_get_actual_len(ex));
1877         } else if (block >= le32_to_cpu(ex->ee_block)
1878                         + ext4_ext_get_actual_len(ex)) {
1879                 ext4_lblk_t next;
1880                 lblock = le32_to_cpu(ex->ee_block)
1881                         + ext4_ext_get_actual_len(ex);
1882
1883                 next = ext4_ext_next_allocated_block(path);
1884                 ext_debug("cache gap(after): [%u:%u] %u",
1885                                 le32_to_cpu(ex->ee_block),
1886                                 ext4_ext_get_actual_len(ex),
1887                                 block);
1888                 BUG_ON(next == lblock);
1889                 len = next - lblock;
1890         } else {
1891                 lblock = len = 0;
1892                 BUG();
1893         }
1894
1895         ext_debug(" -> %u:%lu\n", lblock, len);
1896         ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
1897 }
1898
1899 static int
1900 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1901                         struct ext4_extent *ex)
1902 {
1903         struct ext4_ext_cache *cex;
1904         int ret = EXT4_EXT_CACHE_NO;
1905
1906         /* 
1907          * We borrow i_block_reservation_lock to protect i_cached_extent
1908          */
1909         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1910         cex = &EXT4_I(inode)->i_cached_extent;
1911
1912         /* has cache valid data? */
1913         if (cex->ec_type == EXT4_EXT_CACHE_NO)
1914                 goto errout;
1915
1916         BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1917                         cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1918         if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
1919                 ex->ee_block = cpu_to_le32(cex->ec_block);
1920                 ext4_ext_store_pblock(ex, cex->ec_start);
1921                 ex->ee_len = cpu_to_le16(cex->ec_len);
1922                 ext_debug("%u cached by %u:%u:%llu\n",
1923                                 block,
1924                                 cex->ec_block, cex->ec_len, cex->ec_start);
1925                 ret = cex->ec_type;
1926         }
1927 errout:
1928         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1929         return ret;
1930 }
1931
1932 /*
1933  * ext4_ext_rm_idx:
1934  * removes index from the index block.
1935  * It's used in truncate case only, thus all requests are for
1936  * last index in the block only.
1937  */
1938 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1939                         struct ext4_ext_path *path)
1940 {
1941         struct buffer_head *bh;
1942         int err;
1943         ext4_fsblk_t leaf;
1944
1945         /* free index block */
1946         path--;
1947         leaf = idx_pblock(path->p_idx);
1948         BUG_ON(path->p_hdr->eh_entries == 0);
1949         err = ext4_ext_get_access(handle, inode, path);
1950         if (err)
1951                 return err;
1952         le16_add_cpu(&path->p_hdr->eh_entries, -1);
1953         err = ext4_ext_dirty(handle, inode, path);
1954         if (err)
1955                 return err;
1956         ext_debug("index is empty, remove it, free block %llu\n", leaf);
1957         bh = sb_find_get_block(inode->i_sb, leaf);
1958         ext4_forget(handle, 1, inode, bh, leaf);
1959         ext4_free_blocks(handle, inode, leaf, 1, 1);
1960         return err;
1961 }
1962
1963 /*
1964  * ext4_ext_calc_credits_for_single_extent:
1965  * This routine returns max. credits that needed to insert an extent
1966  * to the extent tree.
1967  * When pass the actual path, the caller should calculate credits
1968  * under i_data_sem.
1969  */
1970 int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1971                                                 struct ext4_ext_path *path)
1972 {
1973         if (path) {
1974                 int depth = ext_depth(inode);
1975                 int ret = 0;
1976
1977                 /* probably there is space in leaf? */
1978                 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
1979                                 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
1980
1981                         /*
1982                          *  There are some space in the leaf tree, no
1983                          *  need to account for leaf block credit
1984                          *
1985                          *  bitmaps and block group descriptor blocks
1986                          *  and other metadat blocks still need to be
1987                          *  accounted.
1988                          */
1989                         /* 1 bitmap, 1 block group descriptor */
1990                         ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1991                         return ret;
1992                 }
1993         }
1994
1995         return ext4_chunk_trans_blocks(inode, nrblocks);
1996 }
1997
1998 /*
1999  * How many index/leaf blocks need to change/allocate to modify nrblocks?
2000  *
2001  * if nrblocks are fit in a single extent (chunk flag is 1), then
2002  * in the worse case, each tree level index/leaf need to be changed
2003  * if the tree split due to insert a new extent, then the old tree
2004  * index/leaf need to be updated too
2005  *
2006  * If the nrblocks are discontiguous, they could cause
2007  * the whole tree split more than once, but this is really rare.
2008  */
2009 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2010 {
2011         int index;
2012         int depth = ext_depth(inode);
2013
2014         if (chunk)
2015                 index = depth * 2;
2016         else
2017                 index = depth * 3;
2018
2019         return index;
2020 }
2021
2022 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2023                                 struct ext4_extent *ex,
2024                                 ext4_lblk_t from, ext4_lblk_t to)
2025 {
2026         struct buffer_head *bh;
2027         unsigned short ee_len =  ext4_ext_get_actual_len(ex);
2028         int i, metadata = 0;
2029
2030         if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2031                 metadata = 1;
2032 #ifdef EXTENTS_STATS
2033         {
2034                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2035                 spin_lock(&sbi->s_ext_stats_lock);
2036                 sbi->s_ext_blocks += ee_len;
2037                 sbi->s_ext_extents++;
2038                 if (ee_len < sbi->s_ext_min)
2039                         sbi->s_ext_min = ee_len;
2040                 if (ee_len > sbi->s_ext_max)
2041                         sbi->s_ext_max = ee_len;
2042                 if (ext_depth(inode) > sbi->s_depth_max)
2043                         sbi->s_depth_max = ext_depth(inode);
2044                 spin_unlock(&sbi->s_ext_stats_lock);
2045         }
2046 #endif
2047         if (from >= le32_to_cpu(ex->ee_block)
2048             && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2049                 /* tail removal */
2050                 ext4_lblk_t num;
2051                 ext4_fsblk_t start;
2052
2053                 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2054                 start = ext_pblock(ex) + ee_len - num;
2055                 ext_debug("free last %u blocks starting %llu\n", num, start);
2056                 for (i = 0; i < num; i++) {
2057                         bh = sb_find_get_block(inode->i_sb, start + i);
2058                         ext4_forget(handle, 0, inode, bh, start + i);
2059                 }
2060                 ext4_free_blocks(handle, inode, start, num, metadata);
2061         } else if (from == le32_to_cpu(ex->ee_block)
2062                    && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2063                 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
2064                         from, to, le32_to_cpu(ex->ee_block), ee_len);
2065         } else {
2066                 printk(KERN_INFO "strange request: removal(2) "
2067                                 "%u-%u from %u:%u\n",
2068                                 from, to, le32_to_cpu(ex->ee_block), ee_len);
2069         }
2070         return 0;
2071 }
2072
2073 static int
2074 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2075                 struct ext4_ext_path *path, ext4_lblk_t start)
2076 {
2077         int err = 0, correct_index = 0;
2078         int depth = ext_depth(inode), credits;
2079         struct ext4_extent_header *eh;
2080         ext4_lblk_t a, b, block;
2081         unsigned num;
2082         ext4_lblk_t ex_ee_block;
2083         unsigned short ex_ee_len;
2084         unsigned uninitialized = 0;
2085         struct ext4_extent *ex;
2086
2087         /* the header must be checked already in ext4_ext_remove_space() */
2088         ext_debug("truncate since %u in leaf\n", start);
2089         if (!path[depth].p_hdr)
2090                 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2091         eh = path[depth].p_hdr;
2092         BUG_ON(eh == NULL);
2093
2094         /* find where to start removing */
2095         ex = EXT_LAST_EXTENT(eh);
2096
2097         ex_ee_block = le32_to_cpu(ex->ee_block);
2098         ex_ee_len = ext4_ext_get_actual_len(ex);
2099
2100         while (ex >= EXT_FIRST_EXTENT(eh) &&
2101                         ex_ee_block + ex_ee_len > start) {
2102
2103                 if (ext4_ext_is_uninitialized(ex))
2104                         uninitialized = 1;
2105                 else
2106                         uninitialized = 0;
2107
2108                 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
2109                          uninitialized, ex_ee_len);
2110                 path[depth].p_ext = ex;
2111
2112                 a = ex_ee_block > start ? ex_ee_block : start;
2113                 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
2114                         ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
2115
2116                 ext_debug("  border %u:%u\n", a, b);
2117
2118                 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
2119                         block = 0;
2120                         num = 0;
2121                         BUG();
2122                 } else if (a != ex_ee_block) {
2123                         /* remove tail of the extent */
2124                         block = ex_ee_block;
2125                         num = a - block;
2126                 } else if (b != ex_ee_block + ex_ee_len - 1) {
2127                         /* remove head of the extent */
2128                         block = a;
2129                         num = b - a;
2130                         /* there is no "make a hole" API yet */
2131                         BUG();
2132                 } else {
2133                         /* remove whole extent: excellent! */
2134                         block = ex_ee_block;
2135                         num = 0;
2136                         BUG_ON(a != ex_ee_block);
2137                         BUG_ON(b != ex_ee_block + ex_ee_len - 1);
2138                 }
2139
2140                 /*
2141                  * 3 for leaf, sb, and inode plus 2 (bmap and group
2142                  * descriptor) for each block group; assume two block
2143                  * groups plus ex_ee_len/blocks_per_block_group for
2144                  * the worst case
2145                  */
2146                 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2147                 if (ex == EXT_FIRST_EXTENT(eh)) {
2148                         correct_index = 1;
2149                         credits += (ext_depth(inode)) + 1;
2150                 }
2151                 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
2152
2153                 err = ext4_ext_journal_restart(handle, credits);
2154                 if (err)
2155                         goto out;
2156
2157                 err = ext4_ext_get_access(handle, inode, path + depth);
2158                 if (err)
2159                         goto out;
2160
2161                 err = ext4_remove_blocks(handle, inode, ex, a, b);
2162                 if (err)
2163                         goto out;
2164
2165                 if (num == 0) {
2166                         /* this extent is removed; mark slot entirely unused */
2167                         ext4_ext_store_pblock(ex, 0);
2168                         le16_add_cpu(&eh->eh_entries, -1);
2169                 }
2170
2171                 ex->ee_block = cpu_to_le32(block);
2172                 ex->ee_len = cpu_to_le16(num);
2173                 /*
2174                  * Do not mark uninitialized if all the blocks in the
2175                  * extent have been removed.
2176                  */
2177                 if (uninitialized && num)
2178                         ext4_ext_mark_uninitialized(ex);
2179
2180                 err = ext4_ext_dirty(handle, inode, path + depth);
2181                 if (err)
2182                         goto out;
2183
2184                 ext_debug("new extent: %u:%u:%llu\n", block, num,
2185                                 ext_pblock(ex));
2186                 ex--;
2187                 ex_ee_block = le32_to_cpu(ex->ee_block);
2188                 ex_ee_len = ext4_ext_get_actual_len(ex);
2189         }
2190
2191         if (correct_index && eh->eh_entries)
2192                 err = ext4_ext_correct_indexes(handle, inode, path);
2193
2194         /* if this leaf is free, then we should
2195          * remove it from index block above */
2196         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2197                 err = ext4_ext_rm_idx(handle, inode, path + depth);
2198
2199 out:
2200         return err;
2201 }
2202
2203 /*
2204  * ext4_ext_more_to_rm:
2205  * returns 1 if current index has to be freed (even partial)
2206  */
2207 static int
2208 ext4_ext_more_to_rm(struct ext4_ext_path *path)
2209 {
2210         BUG_ON(path->p_idx == NULL);
2211
2212         if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2213                 return 0;
2214
2215         /*
2216          * if truncate on deeper level happened, it wasn't partial,
2217          * so we have to consider current index for truncation
2218          */
2219         if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2220                 return 0;
2221         return 1;
2222 }
2223
2224 static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2225 {
2226         struct super_block *sb = inode->i_sb;
2227         int depth = ext_depth(inode);
2228         struct ext4_ext_path *path;
2229         handle_t *handle;
2230         int i = 0, err = 0;
2231
2232         ext_debug("truncate since %u\n", start);
2233
2234         /* probably first extent we're gonna free will be last in block */
2235         handle = ext4_journal_start(inode, depth + 1);
2236         if (IS_ERR(handle))
2237                 return PTR_ERR(handle);
2238
2239         ext4_ext_invalidate_cache(inode);
2240
2241         /*
2242          * We start scanning from right side, freeing all the blocks
2243          * after i_size and walking into the tree depth-wise.
2244          */
2245         path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
2246         if (path == NULL) {
2247                 ext4_journal_stop(handle);
2248                 return -ENOMEM;
2249         }
2250         path[0].p_hdr = ext_inode_hdr(inode);
2251         if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2252                 err = -EIO;
2253                 goto out;
2254         }
2255         path[0].p_depth = depth;
2256
2257         while (i >= 0 && err == 0) {
2258                 if (i == depth) {
2259                         /* this is leaf block */
2260                         err = ext4_ext_rm_leaf(handle, inode, path, start);
2261                         /* root level has p_bh == NULL, brelse() eats this */
2262                         brelse(path[i].p_bh);
2263                         path[i].p_bh = NULL;
2264                         i--;
2265                         continue;
2266                 }
2267
2268                 /* this is index block */
2269                 if (!path[i].p_hdr) {
2270                         ext_debug("initialize header\n");
2271                         path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2272                 }
2273
2274                 if (!path[i].p_idx) {
2275                         /* this level hasn't been touched yet */
2276                         path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2277                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2278                         ext_debug("init index ptr: hdr 0x%p, num %d\n",
2279                                   path[i].p_hdr,
2280                                   le16_to_cpu(path[i].p_hdr->eh_entries));
2281                 } else {
2282                         /* we were already here, see at next index */
2283                         path[i].p_idx--;
2284                 }
2285
2286                 ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
2287                                 i, EXT_FIRST_INDEX(path[i].p_hdr),
2288                                 path[i].p_idx);
2289                 if (ext4_ext_more_to_rm(path + i)) {
2290                         struct buffer_head *bh;
2291                         /* go to the next level */
2292                         ext_debug("move to level %d (block %llu)\n",
2293                                   i + 1, idx_pblock(path[i].p_idx));
2294                         memset(path + i + 1, 0, sizeof(*path));
2295                         bh = sb_bread(sb, idx_pblock(path[i].p_idx));
2296                         if (!bh) {
2297                                 /* should we reset i_size? */
2298                                 err = -EIO;
2299                                 break;
2300                         }
2301                         if (WARN_ON(i + 1 > depth)) {
2302                                 err = -EIO;
2303                                 break;
2304                         }
2305                         if (ext4_ext_check(inode, ext_block_hdr(bh),
2306                                                         depth - i - 1)) {
2307                                 err = -EIO;
2308                                 break;
2309                         }
2310                         path[i + 1].p_bh = bh;
2311
2312                         /* save actual number of indexes since this
2313                          * number is changed at the next iteration */
2314                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
2315                         i++;
2316                 } else {
2317                         /* we finished processing this index, go up */
2318                         if (path[i].p_hdr->eh_entries == 0 && i > 0) {
2319                                 /* index is empty, remove it;
2320                                  * handle must be already prepared by the
2321                                  * truncatei_leaf() */
2322                                 err = ext4_ext_rm_idx(handle, inode, path + i);
2323                         }
2324                         /* root level has p_bh == NULL, brelse() eats this */
2325                         brelse(path[i].p_bh);
2326                         path[i].p_bh = NULL;
2327                         i--;
2328                         ext_debug("return to level %d\n", i);
2329                 }
2330         }
2331
2332         /* TODO: flexible tree reduction should be here */
2333         if (path->p_hdr->eh_entries == 0) {
2334                 /*
2335                  * truncate to zero freed all the tree,
2336                  * so we need to correct eh_depth
2337                  */
2338                 err = ext4_ext_get_access(handle, inode, path);
2339                 if (err == 0) {
2340                         ext_inode_hdr(inode)->eh_depth = 0;
2341                         ext_inode_hdr(inode)->eh_max =
2342                                 cpu_to_le16(ext4_ext_space_root(inode));
2343                         err = ext4_ext_dirty(handle, inode, path);
2344                 }
2345         }
2346 out:
2347         ext4_ext_drop_refs(path);
2348         kfree(path);
2349         ext4_journal_stop(handle);
2350
2351         return err;
2352 }
2353
2354 /*
2355  * called at mount time
2356  */
2357 void ext4_ext_init(struct super_block *sb)
2358 {
2359         /*
2360          * possible initialization would be here
2361          */
2362
2363         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2364                 printk(KERN_INFO "EXT4-fs: file extents enabled");
2365 #ifdef AGGRESSIVE_TEST
2366                 printk(", aggressive tests");
2367 #endif
2368 #ifdef CHECK_BINSEARCH
2369                 printk(", check binsearch");
2370 #endif
2371 #ifdef EXTENTS_STATS
2372                 printk(", stats");
2373 #endif
2374                 printk("\n");
2375 #ifdef EXTENTS_STATS
2376                 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
2377                 EXT4_SB(sb)->s_ext_min = 1 << 30;
2378                 EXT4_SB(sb)->s_ext_max = 0;
2379 #endif
2380         }
2381 }
2382
2383 /*
2384  * called at umount time
2385  */
2386 void ext4_ext_release(struct super_block *sb)
2387 {
2388         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
2389                 return;
2390
2391 #ifdef EXTENTS_STATS
2392         if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
2393                 struct ext4_sb_info *sbi = EXT4_SB(sb);
2394                 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
2395                         sbi->s_ext_blocks, sbi->s_ext_extents,
2396                         sbi->s_ext_blocks / sbi->s_ext_extents);
2397                 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
2398                         sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
2399         }
2400 #endif
2401 }
2402
2403 static void bi_complete(struct bio *bio, int error)
2404 {
2405         complete((struct completion *)bio->bi_private);
2406 }
2407
2408 /* FIXME!! we need to try to merge to left or right after zero-out  */
2409 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2410 {
2411         int ret = -EIO;
2412         struct bio *bio;
2413         int blkbits, blocksize;
2414         sector_t ee_pblock;
2415         struct completion event;
2416         unsigned int ee_len, len, done, offset;
2417
2418
2419         blkbits   = inode->i_blkbits;
2420         blocksize = inode->i_sb->s_blocksize;
2421         ee_len    = ext4_ext_get_actual_len(ex);
2422         ee_pblock = ext_pblock(ex);
2423
2424         /* convert ee_pblock to 512 byte sectors */
2425         ee_pblock = ee_pblock << (blkbits - 9);
2426
2427         while (ee_len > 0) {
2428
2429                 if (ee_len > BIO_MAX_PAGES)
2430                         len = BIO_MAX_PAGES;
2431                 else
2432                         len = ee_len;
2433
2434                 bio = bio_alloc(GFP_NOIO, len);
2435                 bio->bi_sector = ee_pblock;
2436                 bio->bi_bdev   = inode->i_sb->s_bdev;
2437
2438                 done = 0;
2439                 offset = 0;
2440                 while (done < len) {
2441                         ret = bio_add_page(bio, ZERO_PAGE(0),
2442                                                         blocksize, offset);
2443                         if (ret != blocksize) {
2444                                 /*
2445                                  * We can't add any more pages because of
2446                                  * hardware limitations.  Start a new bio.
2447                                  */
2448                                 break;
2449                         }
2450                         done++;
2451                         offset += blocksize;
2452                         if (offset >= PAGE_CACHE_SIZE)
2453                                 offset = 0;
2454                 }
2455
2456                 init_completion(&event);
2457                 bio->bi_private = &event;
2458                 bio->bi_end_io = bi_complete;
2459                 submit_bio(WRITE, bio);
2460                 wait_for_completion(&event);
2461
2462                 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2463                         ret = 0;
2464                 else {
2465                         ret = -EIO;
2466                         break;
2467                 }
2468                 bio_put(bio);
2469                 ee_len    -= done;
2470                 ee_pblock += done  << (blkbits - 9);
2471         }
2472         return ret;
2473 }
2474
2475 #define EXT4_EXT_ZERO_LEN 7
2476
2477 /*
2478  * This function is called by ext4_ext_get_blocks() if someone tries to write
2479  * to an uninitialized extent. It may result in splitting the uninitialized
2480  * extent into multiple extents (upto three - one initialized and two
2481  * uninitialized).
2482  * There are three possibilities:
2483  *   a> There is no split required: Entire extent should be initialized
2484  *   b> Splits in two extents: Write is happening at either end of the extent
2485  *   c> Splits in three extents: Somone is writing in middle of the extent
2486  */
2487 static int ext4_ext_convert_to_initialized(handle_t *handle,
2488                                                 struct inode *inode,
2489                                                 struct ext4_ext_path *path,
2490                                                 ext4_lblk_t iblock,
2491                                                 unsigned int max_blocks)
2492 {
2493         struct ext4_extent *ex, newex, orig_ex;
2494         struct ext4_extent *ex1 = NULL;
2495         struct ext4_extent *ex2 = NULL;
2496         struct ext4_extent *ex3 = NULL;
2497         struct ext4_extent_header *eh;
2498         ext4_lblk_t ee_block;
2499         unsigned int allocated, ee_len, depth;
2500         ext4_fsblk_t newblock;
2501         int err = 0;
2502         int ret = 0;
2503
2504         depth = ext_depth(inode);
2505         eh = path[depth].p_hdr;
2506         ex = path[depth].p_ext;
2507         ee_block = le32_to_cpu(ex->ee_block);
2508         ee_len = ext4_ext_get_actual_len(ex);
2509         allocated = ee_len - (iblock - ee_block);
2510         newblock = iblock - ee_block + ext_pblock(ex);
2511         ex2 = ex;
2512         orig_ex.ee_block = ex->ee_block;
2513         orig_ex.ee_len   = cpu_to_le16(ee_len);
2514         ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2515
2516         err = ext4_ext_get_access(handle, inode, path + depth);
2517         if (err)
2518                 goto out;
2519         /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2520         if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2521                 err =  ext4_ext_zeroout(inode, &orig_ex);
2522                 if (err)
2523                         goto fix_extent_len;
2524                 /* update the extent length and mark as initialized */
2525                 ex->ee_block = orig_ex.ee_block;
2526                 ex->ee_len   = orig_ex.ee_len;
2527                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2528                 ext4_ext_dirty(handle, inode, path + depth);
2529                 /* zeroed the full extent */
2530                 return allocated;
2531         }
2532
2533         /* ex1: ee_block to iblock - 1 : uninitialized */
2534         if (iblock > ee_block) {
2535                 ex1 = ex;
2536                 ex1->ee_len = cpu_to_le16(iblock - ee_block);
2537                 ext4_ext_mark_uninitialized(ex1);
2538                 ex2 = &newex;
2539         }
2540         /*
2541          * for sanity, update the length of the ex2 extent before
2542          * we insert ex3, if ex1 is NULL. This is to avoid temporary
2543          * overlap of blocks.
2544          */
2545         if (!ex1 && allocated > max_blocks)
2546                 ex2->ee_len = cpu_to_le16(max_blocks);
2547         /* ex3: to ee_block + ee_len : uninitialised */
2548         if (allocated > max_blocks) {
2549                 unsigned int newdepth;
2550                 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2551                 if (allocated <= EXT4_EXT_ZERO_LEN) {
2552                         /*
2553                          * iblock == ee_block is handled by the zerouout
2554                          * at the beginning.
2555                          * Mark first half uninitialized.
2556                          * Mark second half initialized and zero out the
2557                          * initialized extent
2558                          */
2559                         ex->ee_block = orig_ex.ee_block;
2560                         ex->ee_len   = cpu_to_le16(ee_len - allocated);
2561                         ext4_ext_mark_uninitialized(ex);
2562                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2563                         ext4_ext_dirty(handle, inode, path + depth);
2564
2565                         ex3 = &newex;
2566                         ex3->ee_block = cpu_to_le32(iblock);
2567                         ext4_ext_store_pblock(ex3, newblock);
2568                         ex3->ee_len = cpu_to_le16(allocated);
2569                         err = ext4_ext_insert_extent(handle, inode, path, ex3);
2570                         if (err == -ENOSPC) {
2571                                 err =  ext4_ext_zeroout(inode, &orig_ex);
2572                                 if (err)
2573                                         goto fix_extent_len;
2574                                 ex->ee_block = orig_ex.ee_block;
2575                                 ex->ee_len   = orig_ex.ee_len;
2576                                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2577                                 ext4_ext_dirty(handle, inode, path + depth);
2578                                 /* blocks available from iblock */
2579                                 return allocated;
2580
2581                         } else if (err)
2582                                 goto fix_extent_len;
2583
2584                         /*
2585                          * We need to zero out the second half because
2586                          * an fallocate request can update file size and
2587                          * converting the second half to initialized extent
2588                          * implies that we can leak some junk data to user
2589                          * space.
2590                          */
2591                         err =  ext4_ext_zeroout(inode, ex3);
2592                         if (err) {
2593                                 /*
2594                                  * We should actually mark the
2595                                  * second half as uninit and return error
2596                                  * Insert would have changed the extent
2597                                  */
2598                                 depth = ext_depth(inode);
2599                                 ext4_ext_drop_refs(path);
2600                                 path = ext4_ext_find_extent(inode,
2601                                                                 iblock, path);
2602                                 if (IS_ERR(path)) {
2603                                         err = PTR_ERR(path);
2604                                         return err;
2605                                 }
2606                                 /* get the second half extent details */
2607                                 ex = path[depth].p_ext;
2608                                 err = ext4_ext_get_access(handle, inode,
2609                                                                 path + depth);
2610                                 if (err)
2611                                         return err;
2612                                 ext4_ext_mark_uninitialized(ex);
2613                                 ext4_ext_dirty(handle, inode, path + depth);
2614                                 return err;
2615                         }
2616
2617                         /* zeroed the second half */
2618                         return allocated;
2619                 }
2620                 ex3 = &newex;
2621                 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2622                 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2623                 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2624                 ext4_ext_mark_uninitialized(ex3);
2625                 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2626                 if (err == -ENOSPC) {
2627                         err =  ext4_ext_zeroout(inode, &orig_ex);
2628                         if (err)
2629                                 goto fix_extent_len;
2630                         /* update the extent length and mark as initialized */
2631                         ex->ee_block = orig_ex.ee_block;
2632                         ex->ee_len   = orig_ex.ee_len;
2633                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2634                         ext4_ext_dirty(handle, inode, path + depth);
2635                         /* zeroed the full extent */
2636                         /* blocks available from iblock */
2637                         return allocated;
2638
2639                 } else if (err)
2640                         goto fix_extent_len;
2641                 /*
2642                  * The depth, and hence eh & ex might change
2643                  * as part of the insert above.
2644                  */
2645                 newdepth = ext_depth(inode);
2646                 /*
2647                  * update the extent length after successful insert of the
2648                  * split extent
2649                  */
2650                 orig_ex.ee_len = cpu_to_le16(ee_len -
2651                                                 ext4_ext_get_actual_len(ex3));
2652                 depth = newdepth;
2653                 ext4_ext_drop_refs(path);
2654                 path = ext4_ext_find_extent(inode, iblock, path);
2655                 if (IS_ERR(path)) {
2656                         err = PTR_ERR(path);
2657                         goto out;
2658                 }
2659                 eh = path[depth].p_hdr;
2660                 ex = path[depth].p_ext;
2661                 if (ex2 != &newex)
2662                         ex2 = ex;
2663
2664                 err = ext4_ext_get_access(handle, inode, path + depth);
2665                 if (err)
2666                         goto out;
2667
2668                 allocated = max_blocks;
2669
2670                 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2671                  * to insert a extent in the middle zerout directly
2672                  * otherwise give the extent a chance to merge to left
2673                  */
2674                 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2675                                                         iblock != ee_block) {
2676                         err =  ext4_ext_zeroout(inode, &orig_ex);
2677                         if (err)
2678                                 goto fix_extent_len;
2679                         /* update the extent length and mark as initialized */
2680                         ex->ee_block = orig_ex.ee_block;
2681                         ex->ee_len   = orig_ex.ee_len;
2682                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2683                         ext4_ext_dirty(handle, inode, path + depth);
2684                         /* zero out the first half */
2685                         /* blocks available from iblock */
2686                         return allocated;
2687                 }
2688         }
2689         /*
2690          * If there was a change of depth as part of the
2691          * insertion of ex3 above, we need to update the length
2692          * of the ex1 extent again here
2693          */
2694         if (ex1 && ex1 != ex) {
2695                 ex1 = ex;
2696                 ex1->ee_len = cpu_to_le16(iblock - ee_block);
2697                 ext4_ext_mark_uninitialized(ex1);
2698                 ex2 = &newex;
2699         }
2700         /* ex2: iblock to iblock + maxblocks-1 : initialised */
2701         ex2->ee_block = cpu_to_le32(iblock);
2702         ext4_ext_store_pblock(ex2, newblock);
2703         ex2->ee_len = cpu_to_le16(allocated);
2704         if (ex2 != ex)
2705                 goto insert;
2706         /*
2707          * New (initialized) extent starts from the first block
2708          * in the current extent. i.e., ex2 == ex
2709          * We have to see if it can be merged with the extent
2710          * on the left.
2711          */
2712         if (ex2 > EXT_FIRST_EXTENT(eh)) {
2713                 /*
2714                  * To merge left, pass "ex2 - 1" to try_to_merge(),
2715                  * since it merges towards right _only_.
2716                  */
2717                 ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
2718                 if (ret) {
2719                         err = ext4_ext_correct_indexes(handle, inode, path);
2720                         if (err)
2721                                 goto out;
2722                         depth = ext_depth(inode);
2723                         ex2--;
2724                 }
2725         }
2726         /*
2727          * Try to Merge towards right. This might be required
2728          * only when the whole extent is being written to.
2729          * i.e. ex2 == ex and ex3 == NULL.
2730          */
2731         if (!ex3) {
2732                 ret = ext4_ext_try_to_merge(inode, path, ex2);
2733                 if (ret) {
2734                         err = ext4_ext_correct_indexes(handle, inode, path);
2735                         if (err)
2736                                 goto out;
2737                 }
2738         }
2739         /* Mark modified extent as dirty */
2740         err = ext4_ext_dirty(handle, inode, path + depth);
2741         goto out;
2742 insert:
2743         err = ext4_ext_insert_extent(handle, inode, path, &newex);
2744         if (err == -ENOSPC) {
2745                 err =  ext4_ext_zeroout(inode, &orig_ex);
2746                 if (err)
2747                         goto fix_extent_len;
2748                 /* update the extent length and mark as initialized */
2749                 ex->ee_block = orig_ex.ee_block;
2750                 ex->ee_len   = orig_ex.ee_len;
2751                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2752                 ext4_ext_dirty(handle, inode, path + depth);
2753                 /* zero out the first half */
2754                 return allocated;
2755         } else if (err)
2756                 goto fix_extent_len;
2757 out:
2758         ext4_ext_show_leaf(inode, path);
2759         return err ? err : allocated;
2760
2761 fix_extent_len:
2762         ex->ee_block = orig_ex.ee_block;
2763         ex->ee_len   = orig_ex.ee_len;
2764         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2765         ext4_ext_mark_uninitialized(ex);
2766         ext4_ext_dirty(handle, inode, path + depth);
2767         return err;
2768 }
2769
2770 /*
2771  * Block allocation/map/preallocation routine for extents based files
2772  *
2773  *
2774  * Need to be called with
2775  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
2776  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
2777  *
2778  * return > 0, number of of blocks already mapped/allocated
2779  *          if create == 0 and these are pre-allocated blocks
2780  *              buffer head is unmapped
2781  *          otherwise blocks are mapped
2782  *
2783  * return = 0, if plain look up failed (blocks have not been allocated)
2784  *          buffer head is unmapped
2785  *
2786  * return < 0, error case.
2787  */
2788 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2789                         ext4_lblk_t iblock,
2790                         unsigned int max_blocks, struct buffer_head *bh_result,
2791                         int flags)
2792 {
2793         struct ext4_ext_path *path = NULL;
2794         struct ext4_extent_header *eh;
2795         struct ext4_extent newex, *ex;
2796         ext4_fsblk_t newblock;
2797         int err = 0, depth, ret, cache_type;
2798         unsigned int allocated = 0;
2799         struct ext4_allocation_request ar;
2800
2801         __clear_bit(BH_New, &bh_result->b_state);
2802         ext_debug("blocks %u/%u requested for inode %lu\n",
2803                         iblock, max_blocks, inode->i_ino);
2804
2805         /* check in cache */
2806         cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2807         if (cache_type) {
2808                 if (cache_type == EXT4_EXT_CACHE_GAP) {
2809                         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2810                                 /*
2811                                  * block isn't allocated yet and
2812                                  * user doesn't want to allocate it
2813                                  */
2814                                 goto out2;
2815                         }
2816                         /* we should allocate requested block */
2817                 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
2818                         /* block is already allocated */
2819                         newblock = iblock
2820                                    - le32_to_cpu(newex.ee_block)
2821                                    + ext_pblock(&newex);
2822                         /* number of remaining blocks in the extent */
2823                         allocated = ext4_ext_get_actual_len(&newex) -
2824                                         (iblock - le32_to_cpu(newex.ee_block));
2825                         goto out;
2826                 } else {
2827                         BUG();
2828                 }
2829         }
2830
2831         /* find extent for this block */
2832         path = ext4_ext_find_extent(inode, iblock, NULL);
2833         if (IS_ERR(path)) {
2834                 err = PTR_ERR(path);
2835                 path = NULL;
2836                 goto out2;
2837         }
2838
2839         depth = ext_depth(inode);
2840
2841         /*
2842          * consistent leaf must not be empty;
2843          * this situation is possible, though, _during_ tree modification;
2844          * this is why assert can't be put in ext4_ext_find_extent()
2845          */
2846         BUG_ON(path[depth].p_ext == NULL && depth != 0);
2847         eh = path[depth].p_hdr;
2848
2849         ex = path[depth].p_ext;
2850         if (ex) {
2851                 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
2852                 ext4_fsblk_t ee_start = ext_pblock(ex);
2853                 unsigned short ee_len;
2854
2855                 /*
2856                  * Uninitialized extents are treated as holes, except that
2857                  * we split out initialized portions during a write.
2858                  */
2859                 ee_len = ext4_ext_get_actual_len(ex);
2860                 /* if found extent covers block, simply return it */
2861                 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2862                         newblock = iblock - ee_block + ee_start;
2863                         /* number of remaining blocks in the extent */
2864                         allocated = ee_len - (iblock - ee_block);
2865                         ext_debug("%u fit into %u:%d -> %llu\n", iblock,
2866                                         ee_block, ee_len, newblock);
2867
2868                         /* Do not put uninitialized extent in the cache */
2869                         if (!ext4_ext_is_uninitialized(ex)) {
2870                                 ext4_ext_put_in_cache(inode, ee_block,
2871                                                         ee_len, ee_start,
2872                                                         EXT4_EXT_CACHE_EXTENT);
2873                                 goto out;
2874                         }
2875                         if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
2876                                 goto out;
2877                         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2878                                 if (allocated > max_blocks)
2879                                         allocated = max_blocks;
2880                                 /*
2881                                  * We have blocks reserved already.  We
2882                                  * return allocated blocks so that delalloc
2883                                  * won't do block reservation for us.  But
2884                                  * the buffer head will be unmapped so that
2885                                  * a read from the block returns 0s.
2886                                  */
2887                                 set_buffer_unwritten(bh_result);
2888                                 bh_result->b_bdev = inode->i_sb->s_bdev;
2889                                 bh_result->b_blocknr = newblock;
2890                                 goto out2;
2891                         }
2892
2893                         ret = ext4_ext_convert_to_initialized(handle, inode,
2894                                                                 path, iblock,
2895                                                                 max_blocks);
2896                         if (ret <= 0) {
2897                                 err = ret;
2898                                 goto out2;
2899                         } else
2900                                 allocated = ret;
2901                         goto outnew;
2902                 }
2903         }
2904
2905         /*
2906          * requested block isn't allocated yet;
2907          * we couldn't try to create block if create flag is zero
2908          */
2909         if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
2910                 /*
2911                  * put just found gap into cache to speed up
2912                  * subsequent requests
2913                  */
2914                 ext4_ext_put_gap_in_cache(inode, path, iblock);
2915                 goto out2;
2916         }
2917         /*
2918          * Okay, we need to do block allocation.
2919          */
2920
2921         /* find neighbour allocated blocks */
2922         ar.lleft = iblock;
2923         err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
2924         if (err)
2925                 goto out2;
2926         ar.lright = iblock;
2927         err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
2928         if (err)
2929                 goto out2;
2930
2931         /*
2932          * See if request is beyond maximum number of blocks we can have in
2933          * a single extent. For an initialized extent this limit is
2934          * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
2935          * EXT_UNINIT_MAX_LEN.
2936          */
2937         if (max_blocks > EXT_INIT_MAX_LEN &&
2938             !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2939                 max_blocks = EXT_INIT_MAX_LEN;
2940         else if (max_blocks > EXT_UNINIT_MAX_LEN &&
2941                  (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
2942                 max_blocks = EXT_UNINIT_MAX_LEN;
2943
2944         /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
2945         newex.ee_block = cpu_to_le32(iblock);
2946         newex.ee_len = cpu_to_le16(max_blocks);
2947         err = ext4_ext_check_overlap(inode, &newex, path);
2948         if (err)
2949                 allocated = ext4_ext_get_actual_len(&newex);
2950         else
2951                 allocated = max_blocks;
2952
2953         /* allocate new block */
2954         ar.inode = inode;
2955         ar.goal = ext4_ext_find_goal(inode, path, iblock);
2956         ar.logical = iblock;
2957         ar.len = allocated;
2958         if (S_ISREG(inode->i_mode))
2959                 ar.flags = EXT4_MB_HINT_DATA;
2960         else
2961                 /* disable in-core preallocation for non-regular files */
2962                 ar.flags = 0;
2963         newblock = ext4_mb_new_blocks(handle, &ar, &err);
2964         if (!newblock)
2965                 goto out2;
2966         ext_debug("allocate new block: goal %llu, found %llu/%u\n",
2967                   ar.goal, newblock, allocated);
2968
2969         /* try to insert new extent into found leaf and return */
2970         ext4_ext_store_pblock(&newex, newblock);
2971         newex.ee_len = cpu_to_le16(ar.len);
2972         if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
2973                 ext4_ext_mark_uninitialized(&newex);
2974         err = ext4_ext_insert_extent(handle, inode, path, &newex);
2975         if (err) {
2976                 /* free data blocks we just allocated */
2977                 /* not a good idea to call discard here directly,
2978                  * but otherwise we'd need to call it every free() */
2979                 ext4_discard_preallocations(inode);
2980                 ext4_free_blocks(handle, inode, ext_pblock(&newex),
2981                                         ext4_ext_get_actual_len(&newex), 0);
2982                 goto out2;
2983         }
2984
2985         /* previous routine could use block we allocated */
2986         newblock = ext_pblock(&newex);
2987         allocated = ext4_ext_get_actual_len(&newex);
2988 outnew:
2989         set_buffer_new(bh_result);
2990
2991         /* Cache only when it is _not_ an uninitialized extent */
2992         if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
2993                 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2994                                                 EXT4_EXT_CACHE_EXTENT);
2995 out:
2996         if (allocated > max_blocks)
2997                 allocated = max_blocks;
2998         ext4_ext_show_leaf(inode, path);
2999         set_buffer_mapped(bh_result);
3000         bh_result->b_bdev = inode->i_sb->s_bdev;
3001         bh_result->b_blocknr = newblock;
3002 out2:
3003         if (path) {
3004                 ext4_ext_drop_refs(path);
3005                 kfree(path);
3006         }
3007         return err ? err : allocated;
3008 }
3009
3010 void ext4_ext_truncate(struct inode *inode)
3011 {
3012         struct address_space *mapping = inode->i_mapping;
3013         struct super_block *sb = inode->i_sb;
3014         ext4_lblk_t last_block;
3015         handle_t *handle;
3016         int err = 0;
3017
3018         /*
3019          * probably first extent we're gonna free will be last in block
3020          */
3021         err = ext4_writepage_trans_blocks(inode);
3022         handle = ext4_journal_start(inode, err);
3023         if (IS_ERR(handle))
3024                 return;
3025
3026         if (inode->i_size & (sb->s_blocksize - 1))
3027                 ext4_block_truncate_page(handle, mapping, inode->i_size);
3028
3029         if (ext4_orphan_add(handle, inode))
3030                 goto out_stop;
3031
3032         down_write(&EXT4_I(inode)->i_data_sem);
3033         ext4_ext_invalidate_cache(inode);
3034
3035         ext4_discard_preallocations(inode);
3036
3037         /*
3038          * TODO: optimization is possible here.
3039          * Probably we need not scan at all,
3040          * because page truncation is enough.
3041          */
3042
3043         /* we have to know where to truncate from in crash case */
3044         EXT4_I(inode)->i_disksize = inode->i_size;
3045         ext4_mark_inode_dirty(handle, inode);
3046
3047         last_block = (inode->i_size + sb->s_blocksize - 1)
3048                         >> EXT4_BLOCK_SIZE_BITS(sb);
3049         err = ext4_ext_remove_space(inode, last_block);
3050
3051         /* In a multi-transaction truncate, we only make the final
3052          * transaction synchronous.
3053          */
3054         if (IS_SYNC(inode))
3055                 ext4_handle_sync(handle);
3056
3057 out_stop:
3058         up_write(&EXT4_I(inode)->i_data_sem);
3059         /*
3060          * If this was a simple ftruncate() and the file will remain alive,
3061          * then we need to clear up the orphan record which we created above.
3062          * However, if this was a real unlink then we were called by
3063          * ext4_delete_inode(), and we allow that function to clean up the
3064          * orphan info for us.
3065          */
3066         if (inode->i_nlink)
3067                 ext4_orphan_del(handle, inode);
3068
3069         inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3070         ext4_mark_inode_dirty(handle, inode);
3071         ext4_journal_stop(handle);
3072 }
3073
3074 static void ext4_falloc_update_inode(struct inode *inode,
3075                                 int mode, loff_t new_size, int update_ctime)
3076 {
3077         struct timespec now;
3078
3079         if (update_ctime) {
3080                 now = current_fs_time(inode->i_sb);
3081                 if (!timespec_equal(&inode->i_ctime, &now))
3082                         inode->i_ctime = now;
3083         }
3084         /*
3085          * Update only when preallocation was requested beyond
3086          * the file size.
3087          */
3088         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3089                 if (new_size > i_size_read(inode))
3090                         i_size_write(inode, new_size);
3091                 if (new_size > EXT4_I(inode)->i_disksize)
3092                         ext4_update_i_disksize(inode, new_size);
3093         }
3094
3095 }
3096
3097 /*
3098  * preallocate space for a file. This implements ext4's fallocate inode
3099  * operation, which gets called from sys_fallocate system call.
3100  * For block-mapped files, posix_fallocate should fall back to the method
3101  * of writing zeroes to the required new blocks (the same behavior which is
3102  * expected for file systems which do not support fallocate() system call).
3103  */
3104 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3105 {
3106         handle_t *handle;
3107         ext4_lblk_t block;
3108         loff_t new_size;
3109         unsigned int max_blocks;
3110         int ret = 0;
3111         int ret2 = 0;
3112         int retries = 0;
3113         struct buffer_head map_bh;
3114         unsigned int credits, blkbits = inode->i_blkbits;
3115
3116         /*
3117          * currently supporting (pre)allocate mode for extent-based
3118          * files _only_
3119          */
3120         if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3121                 return -EOPNOTSUPP;
3122
3123         /* preallocation to directories is currently not supported */
3124         if (S_ISDIR(inode->i_mode))
3125                 return -ENODEV;
3126
3127         block = offset >> blkbits;
3128         /*
3129          * We can't just convert len to max_blocks because
3130          * If blocksize = 4096 offset = 3072 and len = 2048
3131          */
3132         max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
3133                                                         - block;
3134         /*
3135          * credits to insert 1 extent into extent tree
3136          */
3137         credits = ext4_chunk_trans_blocks(inode, max_blocks);
3138         mutex_lock(&inode->i_mutex);
3139 retry:
3140         while (ret >= 0 && ret < max_blocks) {
3141                 block = block + ret;
3142                 max_blocks = max_blocks - ret;
3143                 handle = ext4_journal_start(inode, credits);
3144                 if (IS_ERR(handle)) {
3145                         ret = PTR_ERR(handle);
3146                         break;
3147                 }
3148                 map_bh.b_state = 0;
3149                 ret = ext4_get_blocks(handle, inode, block,
3150                                       max_blocks, &map_bh,
3151                                       EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
3152                 if (ret <= 0) {
3153 #ifdef EXT4FS_DEBUG
3154                         WARN_ON(ret <= 0);
3155                         printk(KERN_ERR "%s: ext4_ext_get_blocks "
3156                                     "returned error inode#%lu, block=%u, "
3157                                     "max_blocks=%u", __func__,
3158                                     inode->i_ino, block, max_blocks);
3159 #endif
3160                         ext4_mark_inode_dirty(handle, inode);
3161                         ret2 = ext4_journal_stop(handle);
3162                         break;
3163                 }
3164                 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
3165                                                 blkbits) >> blkbits))
3166                         new_size = offset + len;
3167                 else
3168                         new_size = (block + ret) << blkbits;
3169
3170                 ext4_falloc_update_inode(inode, mode, new_size,
3171                                                 buffer_new(&map_bh));
3172                 ext4_mark_inode_dirty(handle, inode);
3173                 ret2 = ext4_journal_stop(handle);
3174                 if (ret2)
3175                         break;
3176         }
3177         if (ret == -ENOSPC &&
3178                         ext4_should_retry_alloc(inode->i_sb, &retries)) {
3179                 ret = 0;
3180                 goto retry;
3181         }
3182         mutex_unlock(&inode->i_mutex);
3183         return ret > 0 ? ret2 : ret;
3184 }
3185
3186 /*
3187  * Callback function called for each extent to gather FIEMAP information.
3188  */
3189 static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3190                        struct ext4_ext_cache *newex, struct ext4_extent *ex,
3191                        void *data)
3192 {
3193         struct fiemap_extent_info *fieinfo = data;
3194         unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3195         __u64   logical;
3196         __u64   physical;
3197         __u64   length;
3198         __u32   flags = 0;
3199         int     error;
3200
3201         logical =  (__u64)newex->ec_block << blksize_bits;
3202
3203         if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
3204                 pgoff_t offset;
3205                 struct page *page;
3206                 struct buffer_head *bh = NULL;
3207
3208                 offset = logical >> PAGE_SHIFT;
3209                 page = find_get_page(inode->i_mapping, offset);
3210                 if (!page || !page_has_buffers(page))
3211                         return EXT_CONTINUE;
3212
3213                 bh = page_buffers(page);
3214
3215                 if (!bh)
3216                         return EXT_CONTINUE;
3217
3218                 if (buffer_delay(bh)) {
3219                         flags |= FIEMAP_EXTENT_DELALLOC;
3220                         page_cache_release(page);
3221                 } else {
3222                         page_cache_release(page);
3223                         return EXT_CONTINUE;
3224                 }
3225         }
3226
3227         physical = (__u64)newex->ec_start << blksize_bits;
3228         length =   (__u64)newex->ec_len << blksize_bits;
3229
3230         if (ex && ext4_ext_is_uninitialized(ex))
3231                 flags |= FIEMAP_EXTENT_UNWRITTEN;
3232
3233         /*
3234          * If this extent reaches EXT_MAX_BLOCK, it must be last.
3235          *
3236          * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3237          * this also indicates no more allocated blocks.
3238          *
3239          * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3240          */
3241         if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3242             newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3243                 loff_t size = i_size_read(inode);
3244                 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3245
3246                 flags |= FIEMAP_EXTENT_LAST;
3247                 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3248                     logical+length > size)
3249                         length = (size - logical + bs - 1) & ~(bs-1);
3250         }
3251
3252         error = fiemap_fill_next_extent(fieinfo, logical, physical,
3253                                         length, flags);
3254         if (error < 0)
3255                 return error;
3256         if (error == 1)
3257                 return EXT_BREAK;
3258
3259         return EXT_CONTINUE;
3260 }
3261
3262 /* fiemap flags we can handle specified here */
3263 #define EXT4_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
3264
3265 static int ext4_xattr_fiemap(struct inode *inode,
3266                                 struct fiemap_extent_info *fieinfo)
3267 {
3268         __u64 physical = 0;
3269         __u64 length;
3270         __u32 flags = FIEMAP_EXTENT_LAST;
3271         int blockbits = inode->i_sb->s_blocksize_bits;
3272         int error = 0;
3273
3274         /* in-inode? */
3275         if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
3276                 struct ext4_iloc iloc;
3277                 int offset;     /* offset of xattr in inode */
3278
3279                 error = ext4_get_inode_loc(inode, &iloc);
3280                 if (error)
3281                         return error;
3282                 physical = iloc.bh->b_blocknr << blockbits;
3283                 offset = EXT4_GOOD_OLD_INODE_SIZE +
3284                                 EXT4_I(inode)->i_extra_isize;
3285                 physical += offset;
3286                 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3287                 flags |= FIEMAP_EXTENT_DATA_INLINE;
3288         } else { /* external block */
3289                 physical = EXT4_I(inode)->i_file_acl << blockbits;
3290                 length = inode->i_sb->s_blocksize;
3291         }
3292
3293         if (physical)
3294                 error = fiemap_fill_next_extent(fieinfo, 0, physical,
3295                                                 length, flags);
3296         return (error < 0 ? error : 0);
3297 }
3298
3299 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3300                 __u64 start, __u64 len)
3301 {
3302         ext4_lblk_t start_blk;
3303         ext4_lblk_t len_blks;
3304         int error = 0;
3305
3306         /* fallback to generic here if not in extents fmt */
3307         if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3308                 return generic_block_fiemap(inode, fieinfo, start, len,
3309                         ext4_get_block);
3310
3311         if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
3312                 return -EBADR;
3313
3314         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3315                 error = ext4_xattr_fiemap(inode, fieinfo);
3316         } else {
3317                 start_blk = start >> inode->i_sb->s_blocksize_bits;
3318                 len_blks = len >> inode->i_sb->s_blocksize_bits;
3319
3320                 /*
3321                  * Walk the extent tree gathering extent information.
3322                  * ext4_ext_fiemap_cb will push extents back to user.
3323                  */
3324                 down_read(&EXT4_I(inode)->i_data_sem);
3325                 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3326                                           ext4_ext_fiemap_cb, fieinfo);
3327                 up_read(&EXT4_I(inode)->i_data_sem);
3328         }
3329
3330         return error;
3331 }
3332