ext4: Avoid data / filesystem corruption when write fails to copy data
[safe/jmp/linux-2.6] / fs / ext4 / migrate.c
index 8c6c685..d641e13 100644 (file)
@@ -13,8 +13,8 @@
  */
 
 #include <linux/module.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs_extents.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
 
 /*
  * The contiguous blocks details which can be
@@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
 
        if (IS_ERR(path)) {
                retval = PTR_ERR(path);
+               path = NULL;
                goto err_out;
        }
 
@@ -52,12 +53,14 @@ static int finish_range(handle_t *handle, struct inode *inode,
         * credit. But below we try to not accumalate too much
         * of them by restarting the journal.
         */
-       needed = ext4_ext_calc_credits_for_insert(inode, path);
+       needed = ext4_ext_calc_credits_for_single_extent(inode,
+                   lb->last_block - lb->first_block + 1, path);
 
        /*
         * Make sure the credit we accumalated is not really high
         */
-       if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
+       if (needed && ext4_handle_has_enough_credits(handle,
+                                               EXT4_RESERVE_TRANS_BLOCKS)) {
                retval = ext4_journal_restart(handle, needed);
                if (retval)
                        goto err_out;
@@ -72,8 +75,12 @@ static int finish_range(handle_t *handle, struct inode *inode,
                                goto err_out;
                }
        }
-       retval = ext4_ext_insert_extent(handle, inode, path, &newext);
+       retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
 err_out:
+       if (path) {
+               ext4_ext_drop_refs(path);
+               kfree(path);
+       }
        lb->first_pblock = 0;
        return retval;
 }
@@ -223,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
 {
        int retval = 0, needed;
 
-       if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+       if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
                return 0;
        /*
         * We are freeing a blocks. During this we touch
@@ -255,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
        for (i = 0; i < max_entries; i++) {
                if (tmp_idata[i]) {
                        extend_credit_for_blkdel(handle, inode);
-                       ext4_free_blocks(handle, inode,
-                                       le32_to_cpu(tmp_idata[i]), 1, 1);
+                       ext4_free_blocks(handle, inode, 0,
+                                        le32_to_cpu(tmp_idata[i]), 1,
+                                        EXT4_FREE_BLOCKS_METADATA |
+                                        EXT4_FREE_BLOCKS_FORGET);
                }
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+                        EXT4_FREE_BLOCKS_METADATA |
+                        EXT4_FREE_BLOCKS_FORGET);
        return 0;
 }
 
@@ -290,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+       ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+                        EXT4_FREE_BLOCKS_METADATA |
+                        EXT4_FREE_BLOCKS_FORGET);
        return 0;
 }
 
@@ -301,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
        /* ei->i_data[EXT4_IND_BLOCK] */
        if (i_data[0]) {
                extend_credit_for_blkdel(handle, inode);
-               ext4_free_blocks(handle, inode,
-                               le32_to_cpu(i_data[0]), 1, 1);
+               ext4_free_blocks(handle, inode, 0,
+                               le32_to_cpu(i_data[0]), 1,
+                                EXT4_FREE_BLOCKS_METADATA |
+                                EXT4_FREE_BLOCKS_FORGET);
        }
 
        /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -322,7 +337,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 }
 
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
-                               struct inode *tmp_inode)
+                                               struct inode *tmp_inode)
 {
        int retval;
        __le32  i_data[3];
@@ -334,7 +349,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
         * i_data field of the original inode
         */
        retval = ext4_journal_extend(handle, 1);
-       if (retval != 0) {
+       if (retval) {
                retval = ext4_journal_restart(handle, 1);
                if (retval)
                        goto err_out;
@@ -346,6 +361,17 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 
        down_write(&EXT4_I(inode)->i_data_sem);
        /*
+        * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
+        * happened after we started the migrate. We need to
+        * fail the migrate
+        */
+       if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) {
+               retval = -EAGAIN;
+               up_write(&EXT4_I(inode)->i_data_sem);
+               goto err_out;
+       } else
+               EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
+       /*
         * We have the extent map build with the tmp inode.
         * Now copy the i_data across
         */
@@ -401,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
        }
        put_bh(bh);
        extend_credit_for_blkdel(handle, inode);
-       ext4_free_blocks(handle, inode, block, 1, 1);
+       ext4_free_blocks(handle, inode, 0, block, 1,
+                        EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return retval;
 }
 
@@ -429,8 +456,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
 
 }
 
-int ext4_ext_migrate(struct inode *inode, struct file *filp,
-                               unsigned int cmd, unsigned long arg)
+int ext4_ext_migrate(struct inode *inode)
 {
        handle_t *handle;
        int retval = 0, i;
@@ -440,14 +466,15 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
        struct inode *tmp_inode = NULL;
        struct list_blocks_struct lb;
        unsigned long max_entries;
+       __u32 goal;
 
-       if (!test_opt(inode->i_sb, EXTENTS))
-               /*
-                * if mounted with noextents we don't allow the migrate
-                */
-               return -EINVAL;
-
-       if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       /*
+        * If the filesystem does not support extents, or the inode
+        * already is extent-based, error out.
+        */
+       if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
+                                      EXT4_FEATURE_INCOMPAT_EXTENTS) ||
+           (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
                return -EINVAL;
 
        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
@@ -463,16 +490,16 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
                                        + 1);
        if (IS_ERR(handle)) {
                retval = PTR_ERR(handle);
-               goto err_out;
+               return retval;
        }
-       tmp_inode = ext4_new_inode(handle,
-                               inode->i_sb->s_root->d_inode,
-                               S_IFREG);
+       goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
+               EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
+       tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+                                  S_IFREG, 0, goal);
        if (IS_ERR(tmp_inode)) {
                retval = -ENOMEM;
                ext4_journal_stop(handle);
-               tmp_inode = NULL;
-               goto err_out;
+               return retval;
        }
        i_size_write(tmp_inode, i_size_read(inode));
        /*
@@ -498,11 +525,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
         * when we add extents we extent the journal
         */
        /*
-        * inode_mutex prevent write and truncate on the file. Read still goes
-        * through. We take i_data_sem in ext4_ext_swap_inode_data before we
-        * switch the inode format to prevent read.
+        * Even though we take i_mutex we can still cause block
+        * allocation via mmap write to holes. If we have allocated
+        * new blocks we fail migrate.  New block allocation will
+        * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
+        * with i_data_sem held to prevent racing with block
+        * allocation.
         */
-       mutex_lock(&(inode->i_mutex));
+       down_read((&EXT4_I(inode)->i_data_sem));
+       EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE;
+       up_read((&EXT4_I(inode)->i_data_sem));
+
        handle = ext4_journal_start(inode, 1);
 
        ei = EXT4_I(inode);
@@ -554,9 +587,15 @@ err_out:
                 * tmp_inode
                 */
                free_ext_block(handle, tmp_inode);
-       else
-               retval = ext4_ext_swap_inode_data(handle, inode,
-                                                       tmp_inode);
+       else {
+               retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
+               if (retval)
+                       /*
+                        * if we fail to swap inode data free the extent
+                        * details of the tmp inode
+                        */
+                       free_ext_block(handle, tmp_inode);
+       }
 
        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
        if (ext4_journal_extend(handle, 1) != 0)
@@ -588,10 +627,8 @@ err_out:
        tmp_inode->i_nlink = 0;
 
        ext4_journal_stop(handle);
-       mutex_unlock(&(inode->i_mutex));
-
-       if (tmp_inode)
-               iput(tmp_inode);
+       unlock_new_inode(tmp_inode);
+       iput(tmp_inode);
 
        return retval;
 }