ecryptfs: Fix refcnt leak on ecryptfs_follow_link() error path
[safe/jmp/linux-2.6] / fs / ext4 / extents.c
index a38e651..7d7b74e 100644 (file)
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
  * to allocate @blocks
  * Worse case is one block per extent
  */
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
 {
-       int lcap, icap, rcap, leafs, idxs, num;
-       int newextents = blocks;
-
-       rcap = ext4_ext_space_root_idx(inode, 0);
-       lcap = ext4_ext_space_block(inode, 0);
-       icap = ext4_ext_space_block_idx(inode, 0);
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int idxs, num = 0;
 
-       /* number of new leaf blocks needed */
-       num = leafs = (newextents + lcap - 1) / lcap;
+       idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+               / sizeof(struct ext4_extent_idx));
 
        /*
-        * Worse case, we need separate index block(s)
-        * to link all new leaf blocks
+        * If the new delayed allocation block is contiguous with the
+        * previous da block, it can share index blocks with the
+        * previous block, so we only need to allocate a new index
+        * block every idxs leaf blocks.  At ldxs**2 blocks, we need
+        * an additional index block, and at ldxs**3 blocks, yet
+        * another index blocks.
         */
-       idxs = (leafs + icap - 1) / icap;
-       do {
-               num += idxs;
-               idxs = (idxs + icap - 1) / icap;
-       } while (idxs > rcap);
+       if (ei->i_da_metadata_calc_len &&
+           ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+               if ((ei->i_da_metadata_calc_len % idxs) == 0)
+                       num++;
+               if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+                       num++;
+               if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+                       num++;
+                       ei->i_da_metadata_calc_len = 0;
+               } else
+                       ei->i_da_metadata_calc_len++;
+               ei->i_da_metadata_calc_last_lblock++;
+               return num;
+       }
 
-       return num;
+       /*
+        * In the worst case we need a new set of index blocks at
+        * every level of the inode's extent tree.
+        */
+       ei->i_da_metadata_calc_len = 1;
+       ei->i_da_metadata_calc_last_lblock = lblock;
+       return ext_depth(inode) + 1;
 }
 
 static int
@@ -1007,7 +1022,8 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                                        EXT4_FREE_BLOCKS_METADATA);
                }
        }
        kfree(ablocks);
@@ -1761,7 +1777,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
        while (block < last && block != EXT_MAX_BLOCK) {
                num = last - block;
                /* find extent for this block */
+               down_read(&EXT4_I(inode)->i_data_sem);
                path = ext4_ext_find_extent(inode, block, path);
+               up_read(&EXT4_I(inode)->i_data_sem);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        path = NULL;
@@ -1957,7 +1975,6 @@ errout:
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path)
 {
-       struct buffer_head *bh;
        int err;
        ext4_fsblk_t leaf;
 
@@ -1973,9 +1990,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       bh = sb_find_get_block(inode->i_sb, leaf);
-       ext4_forget(handle, 1, inode, bh, leaf);
-       ext4_free_blocks(handle, inode, leaf, 1, 1);
+       ext4_free_blocks(handle, inode, 0, leaf, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
 }
 
@@ -2042,12 +2058,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                                struct ext4_extent *ex,
                                ext4_lblk_t from, ext4_lblk_t to)
 {
-       struct buffer_head *bh;
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
-       int i, metadata = 0;
+       int flags = EXT4_FREE_BLOCKS_FORGET;
 
        if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               metadata = 1;
+               flags |= EXT4_FREE_BLOCKS_METADATA;
 #ifdef EXTENTS_STATS
        {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2087,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               for (i = 0; i < num; i++) {
-                       bh = sb_find_get_block(inode->i_sb, start + i);
-                       ext4_forget(handle, 0, inode, bh, start + i);
-               }
-               ext4_free_blocks(handle, inode, start, num, metadata);
+               ext4_free_blocks(handle, inode, 0, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2178,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-               credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+               credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 
                err = ext4_ext_truncate_extend_restart(handle, inode, credits);
                if (err)
@@ -2380,6 +2391,7 @@ void ext4_ext_init(struct super_block *sb)
         */
 
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
                printk(KERN_INFO "EXT4-fs: file extents enabled");
 #ifdef AGGRESSIVE_TEST
                printk(", aggressive tests");
@@ -2391,6 +2403,7 @@ void ext4_ext_init(struct super_block *sb)
                printk(", stats");
 #endif
                printk("\n");
+#endif
 #ifdef EXTENTS_STATS
                spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
                EXT4_SB(sb)->s_ext_min = 1 << 30;
@@ -2805,6 +2818,8 @@ fix_extent_len:
  * into three uninitialized extent(at most). After IO complete, the part
  * being filled will be convert to initialized by the end_io callback function
  * via ext4_convert_unwritten_extents().
+ *
+ * Returns the size of uninitialized extent to be written on success.
  */
 static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct inode *inode,
@@ -2822,7 +2837,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        unsigned int allocated, ee_len, depth;
        ext4_fsblk_t newblock;
        int err = 0;
-       int ret = 0;
 
        ext_debug("ext4_split_unwritten_extents: inode %lu,"
                  "iblock %llu, max_blocks %u\n", inode->i_ino,
@@ -2840,12 +2854,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
        /*
-        * if the entire unintialized extent length less than
-        * the size of extent to write, there is no need to split
-        * uninitialized extent
+        * If the uninitialized extent begins at the same logical
+        * block where the write begins, and the write completely
+        * covers the extent, then we don't need to split it.
         */
-       if (allocated <= max_blocks)
-               return ret;
+       if ((iblock == ee_block) && (allocated <= max_blocks))
+               return allocated;
 
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
@@ -3024,6 +3038,14 @@ out:
        return err;
 }
 
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+                       sector_t block, int count)
+{
+       int i;
+       for (i = 0; i < count; i++)
+                unmap_underlying_metadata(bdev, block + i);
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3033,6 +3055,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 {
        int ret = 0;
        int err = 0;
+       ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
 
        ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
                  "block %llu, max_blocks %u, flags %d, allocated %u",
@@ -3045,12 +3068,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                ret = ext4_split_unwritten_extents(handle,
                                                inode, path, iblock,
                                                max_blocks, flags);
+               /*
+                * Flag the inode(non aio case) or end_io struct (aio case)
+                * that this IO needs to convertion to written when IO is
+                * completed
+                */
+               if (io)
+                       io->flag = DIO_AIO_UNWRITTEN;
+               else
+                       EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN;
                goto out;
        }
-       /* DIO end_io complete, convert the filled extent to written */
+       /* async DIO end_io complete, convert the filled extent to written */
        if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
                ret = ext4_convert_unwritten_extents_dio(handle, inode,
                                                        path);
+               if (ret >= 0)
+                       ext4_update_inode_fsync_trans(handle, inode, 1);
                goto out2;
        }
        /* buffered IO case */
@@ -3078,6 +3112,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                path, iblock,
                                                max_blocks);
+       if (ret >= 0)
+               ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
        if (ret <= 0) {
                err = ret;
@@ -3085,6 +3121,18 @@ out:
        } else
                allocated = ret;
        set_buffer_new(bh_result);
+       /*
+        * if we allocated more blocks than requested
+        * we need to make sure we unmap the extra block
+        * allocated. The actual needed block will get
+        * unmapped later when we find the buffer_head marked
+        * new.
+        */
+       if (allocated > max_blocks) {
+               unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+                                       newblock + max_blocks,
+                                       allocated - max_blocks);
+       }
 map_out:
        set_buffer_mapped(bh_result);
 out1:
@@ -3130,6 +3178,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        int err = 0, depth, ret, cache_type;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
+       ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
 
        __clear_bit(BH_New, &bh_result->b_state);
        ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -3176,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * this situation is possible, though, _during_ tree modification;
         * this is why assert can't be put in ext4_ext_find_extent()
         */
-       BUG_ON(path[depth].p_ext == NULL && depth != 0);
+       if (path[depth].p_ext == NULL && depth != 0) {
+               ext4_error(inode->i_sb, __func__, "bad extent address "
+                          "inode: %lu, iblock: %d, depth: %d",
+                          inode->i_ino, iblock, depth);
+               err = -EIO;
+               goto out2;
+       }
        eh = path[depth].p_hdr;
 
        ex = path[depth].p_ext;
@@ -3279,16 +3334,34 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* try to insert new extent into found leaf and return */
        ext4_ext_store_pblock(&newex, newblock);
        newex.ee_len = cpu_to_le16(ar.len);
-       if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
+       /* Mark uninitialized */
+       if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
                ext4_ext_mark_uninitialized(&newex);
+               /*
+                * io_end structure was created for every async
+                * direct IO write to the middle of the file.
+                * To avoid unecessary convertion for every aio dio rewrite
+                * to the mid of file, here we flag the IO that is really
+                * need the convertion.
+                * For non asycn direct IO case, flag the inode state
+                * that we need to perform convertion when IO is done.
+                */
+               if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+                       if (io)
+                               io->flag = DIO_AIO_UNWRITTEN;
+                       else
+                               EXT4_I(inode)->i_state |=
+                                       EXT4_STATE_DIO_UNWRITTEN;;
+               }
+       }
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err) {
                /* free data blocks we just allocated */
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, ext_pblock(&newex),
-                                       ext4_ext_get_actual_len(&newex), 0);
+               ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+                                ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
 
@@ -3297,10 +3370,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        allocated = ext4_ext_get_actual_len(&newex);
        set_buffer_new(bh_result);
 
-       /* Cache only when it is _not_ an uninitialized extent */
-       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+       /*
+        * Cache the extent and update transaction to commit on fdatasync only
+        * when it is _not_ an uninitialized extent.
+        */
+       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
                ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
                                                EXT4_EXT_CACHE_EXTENT);
+               ext4_update_inode_fsync_trans(handle, inode, 1);
+       } else
+               ext4_update_inode_fsync_trans(handle, inode, 0);
 out:
        if (allocated > max_blocks)
                allocated = max_blocks;
@@ -3500,6 +3579,7 @@ retry:
  *
  * This function is called from the direct IO end io call back
  * function, to convert the fallocated extents after IO is completed.
+ * Returns 0 on success.
  */
 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                                    loff_t len)
@@ -3687,10 +3767,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 * Walk the extent tree gathering extent information.
                 * ext4_ext_fiemap_cb will push extents back to user.
                 */
-               down_read(&EXT4_I(inode)->i_data_sem);
                error = ext4_ext_walk_space(inode, start_blk, len_blks,
                                          ext4_ext_fiemap_cb, fieinfo);
-               up_read(&EXT4_I(inode)->i_data_sem);
        }
 
        return error;