ext4: return correct wbc.nr_to_write in ext4_da_writepages
[safe/jmp/linux-2.6] / fs / ext4 / inode.c
index 0e2ea57..3e3b454 100644 (file)
@@ -983,6 +983,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
                goto cleanup;
 
        set_buffer_new(bh_result);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
 got_it:
        map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
        if (count > blocks_to_boundary)
@@ -1001,17 +1003,12 @@ out:
        return err;
 }
 
-qsize_t ext4_get_reserved_space(struct inode *inode)
+#ifdef CONFIG_QUOTA
+qsize_t *ext4_get_reserved_space(struct inode *inode)
 {
-       unsigned long long total;
-
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       total = EXT4_I(inode)->i_reserved_data_blocks +
-               EXT4_I(inode)->i_reserved_meta_blocks;
-       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
-       return total;
+       return &EXT4_I(inode)->i_reserved_quota;
 }
+#endif
 /*
  * Calculate the number of metadata blocks need to reserve
  * to allocate @blocks for non extent file based file
@@ -1049,7 +1046,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
 static void ext4_da_update_reserve_space(struct inode *inode, int used)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       int total, mdb, mdb_free;
+       int total, mdb, mdb_free, mdb_claim = 0;
 
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        /* recalculate the number of metablocks still need to be reserved */
@@ -1062,7 +1059,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
 
        if (mdb_free) {
                /* Account for allocated meta_blocks */
-               mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+               mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
+               BUG_ON(mdb_free < mdb_claim);
+               mdb_free -= mdb_claim;
 
                /* update fs dirty blocks counter */
                percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
@@ -1073,8 +1072,11 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        /* update per-inode reservations */
        BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
        EXT4_I(inode)->i_reserved_data_blocks -= used;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
+       vfs_dq_claim_block(inode, used + mdb_claim);
+
        /*
         * free those over-booking quota for metadata blocks
         */
@@ -1814,19 +1816,17 @@ repeat:
 
        md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
        total = md_needed + nrblocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
        /*
         * Make quota reservation here to prevent quota overflow
         * later. Real quota accounting is done at pages writeout
         * time.
         */
-       if (vfs_dq_reserve_block(inode, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       if (vfs_dq_reserve_block(inode, total))
                return -EDQUOT;
-       }
 
        if (ext4_claim_free_blocks(sbi, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                vfs_dq_release_reservation_block(inode, total);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
@@ -1834,10 +1834,11 @@ repeat:
                }
                return -ENOSPC;
        }
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
-       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
-
+       EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
        return 0;       /* success */
 }
 
@@ -2900,7 +2901,7 @@ retry:
                ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
                                        &mpd);
                /*
-                * If we have a contigous extent of pages and we
+                * If we have a contiguous extent of pages and we
                 * haven't done the I/O yet, map the blocks and submit
                 * them for I/O.
                 */
@@ -2966,8 +2967,7 @@ retry:
 out_writepages:
        if (!no_nrwrite_index_update)
                wbc->no_nrwrite_index_update = 0;
-       if (wbc->nr_to_write > nr_to_writebump)
-               wbc->nr_to_write -= nr_to_writebump;
+       wbc->nr_to_write -= nr_to_writebump;
        wbc->range_start = range_start;
        trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
        return ret;
@@ -2992,11 +2992,18 @@ static int ext4_nonda_switch(struct super_block *sb)
        if (2 * free_blocks < 3 * dirty_blocks ||
                free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
                /*
-                * free block count is less that 150% of dirty blocks
-                * or free blocks is less that watermark
+                * free block count is less than 150% of dirty blocks
+                * or free blocks is less than watermark
                 */
                return 1;
        }
+       /*
+        * Even if we don't switch but are nearing capacity,
+        * start pushing delalloc when 1/2 of free blocks are dirty.
+        */
+       if (free_blocks < 2 * dirty_blocks)
+               writeback_inodes_sb_if_idle(sb);
+
        return 0;
 }
 
@@ -4031,7 +4038,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
        int k, err;
 
        *top = 0;
-       /* Make k index the deepest non-null offest + 1 */
+       /* Make k index the deepest non-null offset + 1 */
        for (k = depth; k > 1 && !offsets[k-1]; k--)
                ;
        partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4738,6 +4745,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        struct ext4_inode *raw_inode;
        struct ext4_inode_info *ei;
        struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
        long ret;
        int block;
 
@@ -4791,6 +4799,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
        inode->i_size = ext4_isize(raw_inode);
        ei->i_disksize = inode->i_size;
+#ifdef CONFIG_QUOTA
+       ei->i_reserved_quota = 0;
+#endif
        inode->i_generation = le32_to_cpu(raw_inode->i_generation);
        ei->i_block_group = iloc.block_group;
        ei->i_last_alloc_group = ~0;
@@ -4802,6 +4813,31 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
 
+       /*
+        * Set transaction id's of transactions that have to be committed
+        * to finish f[data]sync. We set them to currently running transaction
+        * as we cannot be sure that the inode or some of its metadata isn't
+        * part of the transaction - the inode could have been reclaimed and
+        * now it is reread from disk.
+        */
+       if (journal) {
+               transaction_t *transaction;
+               tid_t tid;
+
+               spin_lock(&journal->j_state_lock);
+               if (journal->j_running_transaction)
+                       transaction = journal->j_running_transaction;
+               else
+                       transaction = journal->j_committing_transaction;
+               if (transaction)
+                       tid = transaction->t_tid;
+               else
+                       tid = journal->j_commit_sequence;
+               spin_unlock(&journal->j_state_lock);
+               ei->i_sync_tid = tid;
+               ei->i_datasync_tid = tid;
+       }
+
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -5056,6 +5092,7 @@ static int ext4_do_update_inode(handle_t *handle,
                err = rc;
        ei->i_state &= ~EXT4_STATE_NEW;
 
+       ext4_update_inode_fsync_trans(handle, inode, 0);
 out_brelse:
        brelse(bh);
        ext4_std_error(inode->i_sb, err);
@@ -5175,8 +5212,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-               handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
-                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
@@ -5324,7 +5361,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * worse case, the indexs blocks spread over different block groups
  *
  * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiugous, with flexbg,
+ * different block groups too. If they are contiuguous, with flexbg,
  * they could still across block group boundary.
  *
  * Also account for superblock, inode, quota and xattr blocks
@@ -5400,7 +5437,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.