ecryptfs: Fix refcnt leak on ecryptfs_follow_link() error path
[safe/jmp/linux-2.6] / fs / ext4 / extents.c
index 10539e3..7d7b74e 100644 (file)
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
  * to allocate @blocks
  * Worse case is one block per extent
  */
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
 {
-       int lcap, icap, rcap, leafs, idxs, num;
-       int newextents = blocks;
-
-       rcap = ext4_ext_space_root_idx(inode, 0);
-       lcap = ext4_ext_space_block(inode, 0);
-       icap = ext4_ext_space_block_idx(inode, 0);
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int idxs, num = 0;
 
-       /* number of new leaf blocks needed */
-       num = leafs = (newextents + lcap - 1) / lcap;
+       idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+               / sizeof(struct ext4_extent_idx));
 
        /*
-        * Worse case, we need separate index block(s)
-        * to link all new leaf blocks
+        * If the new delayed allocation block is contiguous with the
+        * previous da block, it can share index blocks with the
+        * previous block, so we only need to allocate a new index
+        * block every idxs leaf blocks.  At ldxs**2 blocks, we need
+        * an additional index block, and at ldxs**3 blocks, yet
+        * another index blocks.
         */
-       idxs = (leafs + icap - 1) / icap;
-       do {
-               num += idxs;
-               idxs = (idxs + icap - 1) / icap;
-       } while (idxs > rcap);
+       if (ei->i_da_metadata_calc_len &&
+           ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+               if ((ei->i_da_metadata_calc_len % idxs) == 0)
+                       num++;
+               if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+                       num++;
+               if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+                       num++;
+                       ei->i_da_metadata_calc_len = 0;
+               } else
+                       ei->i_da_metadata_calc_len++;
+               ei->i_da_metadata_calc_last_lblock++;
+               return num;
+       }
 
-       return num;
+       /*
+        * In the worst case we need a new set of index blocks at
+        * every level of the inode's extent tree.
+        */
+       ei->i_da_metadata_calc_len = 1;
+       ei->i_da_metadata_calc_last_lblock = lblock;
+       return ext_depth(inode) + 1;
 }
 
 static int
@@ -1007,7 +1022,8 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                                        EXT4_FREE_BLOCKS_METADATA);
                }
        }
        kfree(ablocks);
@@ -1761,7 +1777,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
        while (block < last && block != EXT_MAX_BLOCK) {
                num = last - block;
                /* find extent for this block */
+               down_read(&EXT4_I(inode)->i_data_sem);
                path = ext4_ext_find_extent(inode, block, path);
+               up_read(&EXT4_I(inode)->i_data_sem);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        path = NULL;
@@ -1957,7 +1975,6 @@ errout:
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path)
 {
-       struct buffer_head *bh;
        int err;
        ext4_fsblk_t leaf;
 
@@ -1973,9 +1990,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       bh = sb_find_get_block(inode->i_sb, leaf);
-       ext4_forget(handle, 1, inode, bh, leaf);
-       ext4_free_blocks(handle, inode, leaf, 1, 1);
+       ext4_free_blocks(handle, inode, 0, leaf, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
 }
 
@@ -2042,12 +2058,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                                struct ext4_extent *ex,
                                ext4_lblk_t from, ext4_lblk_t to)
 {
-       struct buffer_head *bh;
        unsigned short ee_len =  ext4_ext_get_actual_len(ex);
-       int i, metadata = 0;
+       int flags = EXT4_FREE_BLOCKS_FORGET;
 
        if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-               metadata = 1;
+               flags |= EXT4_FREE_BLOCKS_METADATA;
 #ifdef EXTENTS_STATS
        {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2087,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               for (i = 0; i < num; i++) {
-                       bh = sb_find_get_block(inode->i_sb, start + i);
-                       ext4_forget(handle, 0, inode, bh, start + i);
-               }
-               ext4_free_blocks(handle, inode, start, num, metadata);
+               ext4_free_blocks(handle, inode, 0, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2178,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-               credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+               credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 
                err = ext4_ext_truncate_extend_restart(handle, inode, credits);
                if (err)
@@ -2807,6 +2818,8 @@ fix_extent_len:
  * into three uninitialized extent(at most). After IO complete, the part
  * being filled will be convert to initialized by the end_io callback function
  * via ext4_convert_unwritten_extents().
+ *
+ * Returns the size of uninitialized extent to be written on success.
  */
 static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct inode *inode,
@@ -2824,7 +2837,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        unsigned int allocated, ee_len, depth;
        ext4_fsblk_t newblock;
        int err = 0;
-       int ret = 0;
 
        ext_debug("ext4_split_unwritten_extents: inode %lu,"
                  "iblock %llu, max_blocks %u\n", inode->i_ino,
@@ -2842,12 +2854,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
        /*
-        * if the entire unintialized extent length less than
-        * the size of extent to write, there is no need to split
-        * uninitialized extent
+        * If the uninitialized extent begins at the same logical
+        * block where the write begins, and the write completely
+        * covers the extent, then we don't need to split it.
         */
-       if (allocated <= max_blocks)
-               return ret;
+       if ((iblock == ee_block) && (allocated <= max_blocks))
+               return allocated;
 
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
@@ -3026,6 +3038,14 @@ out:
        return err;
 }
 
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+                       sector_t block, int count)
+{
+       int i;
+       for (i = 0; i < count; i++)
+                unmap_underlying_metadata(bdev, block + i);
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3048,15 +3068,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                ret = ext4_split_unwritten_extents(handle,
                                                inode, path, iblock,
                                                max_blocks, flags);
-               /* flag the io_end struct that we need convert when IO done */
+               /*
+                * Flag the inode(non aio case) or end_io struct (aio case)
+                * that this IO needs to convertion to written when IO is
+                * completed
+                */
                if (io)
                        io->flag = DIO_AIO_UNWRITTEN;
+               else
+                       EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN;
                goto out;
        }
-       /* DIO end_io complete, convert the filled extent to written */
+       /* async DIO end_io complete, convert the filled extent to written */
        if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
                ret = ext4_convert_unwritten_extents_dio(handle, inode,
                                                        path);
+               if (ret >= 0)
+                       ext4_update_inode_fsync_trans(handle, inode, 1);
                goto out2;
        }
        /* buffered IO case */
@@ -3084,6 +3112,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                path, iblock,
                                                max_blocks);
+       if (ret >= 0)
+               ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
        if (ret <= 0) {
                err = ret;
@@ -3091,6 +3121,18 @@ out:
        } else
                allocated = ret;
        set_buffer_new(bh_result);
+       /*
+        * if we allocated more blocks than requested
+        * we need to make sure we unmap the extra block
+        * allocated. The actual needed block will get
+        * unmapped later when we find the buffer_head marked
+        * new.
+        */
+       if (allocated > max_blocks) {
+               unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+                                       newblock + max_blocks,
+                                       allocated - max_blocks);
+       }
 map_out:
        set_buffer_mapped(bh_result);
 out1:
@@ -3183,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * this situation is possible, though, _during_ tree modification;
         * this is why assert can't be put in ext4_ext_find_extent()
         */
-       BUG_ON(path[depth].p_ext == NULL && depth != 0);
+       if (path[depth].p_ext == NULL && depth != 0) {
+               ext4_error(inode->i_sb, __func__, "bad extent address "
+                          "inode: %lu, iblock: %d, depth: %d",
+                          inode->i_ino, iblock, depth);
+               err = -EIO;
+               goto out2;
+       }
        eh = path[depth].p_hdr;
 
        ex = path[depth].p_ext;
@@ -3295,10 +3343,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                 * To avoid unecessary convertion for every aio dio rewrite
                 * to the mid of file, here we flag the IO that is really
                 * need the convertion.
-                *
+                * For non asycn direct IO case, flag the inode state
+                * that we need to perform convertion when IO is done.
                 */
-               if (io && flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT)
-                       io->flag = DIO_AIO_UNWRITTEN;
+               if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+                       if (io)
+                               io->flag = DIO_AIO_UNWRITTEN;
+                       else
+                               EXT4_I(inode)->i_state |=
+                                       EXT4_STATE_DIO_UNWRITTEN;;
+               }
        }
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err) {
@@ -3306,8 +3360,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, ext_pblock(&newex),
-                                       ext4_ext_get_actual_len(&newex), 0);
+               ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+                                ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
 
@@ -3316,10 +3370,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        allocated = ext4_ext_get_actual_len(&newex);
        set_buffer_new(bh_result);
 
-       /* Cache only when it is _not_ an uninitialized extent */
-       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+       /*
+        * Cache the extent and update transaction to commit on fdatasync only
+        * when it is _not_ an uninitialized extent.
+        */
+       if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
                ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
                                                EXT4_EXT_CACHE_EXTENT);
+               ext4_update_inode_fsync_trans(handle, inode, 1);
+       } else
+               ext4_update_inode_fsync_trans(handle, inode, 0);
 out:
        if (allocated > max_blocks)
                allocated = max_blocks;
@@ -3519,6 +3579,7 @@ retry:
  *
  * This function is called from the direct IO end io call back
  * function, to convert the fallocated extents after IO is completed.
+ * Returns 0 on success.
  */
 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                                    loff_t len)
@@ -3706,10 +3767,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 * Walk the extent tree gathering extent information.
                 * ext4_ext_fiemap_cb will push extents back to user.
                 */
-               down_read(&EXT4_I(inode)->i_data_sem);
                error = ext4_ext_walk_space(inode, start_blk, len_blks,
                                          ext4_ext_fiemap_cb, fieinfo);
-               up_read(&EXT4_I(inode)->i_data_sem);
        }
 
        return error;