ext4: return correct wbc.nr_to_write in ext4_da_writepages

[safe/jmp/linux-2.6] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 0e2ea57..3e3b454 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -983,6 +983,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
                 goto cleanup;
  
         set_buffer_new(bh_result);
+
+       ext4_update_inode_fsync_trans(handle, inode, 1);
  got_it:
         map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
         if (count > blocks_to_boundary)
@@ -1001,17 +1003,12 @@ out:
         return err;
  }
  
-qsize_t ext4_get_reserved_space(struct inode *inode)
+#ifdef CONFIG_QUOTA
+qsize_t *ext4_get_reserved_space(struct inode *inode)
  {
-       unsigned long long total;
-
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       total = EXT4_I(inode)->i_reserved_data_blocks +
-               EXT4_I(inode)->i_reserved_meta_blocks;
-       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
-       return total;
+       return &EXT4_I(inode)->i_reserved_quota;
  }
+#endif
  /*
   * Calculate the number of metadata blocks need to reserve
   * to allocate @blocks for non extent file based file
@@ -1049,7 +1046,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
  static void ext4_da_update_reserve_space(struct inode *inode, int used)
  {
         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       int total, mdb, mdb_free;
+       int total, mdb, mdb_free, mdb_claim = 0;
  
         spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
         /* recalculate the number of metablocks still need to be reserved */
@@ -1062,7 +1059,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
  
         if (mdb_free) {
                 /* Account for allocated meta_blocks */
-               mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+               mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
+               BUG_ON(mdb_free < mdb_claim);
+               mdb_free -= mdb_claim;
  
                 /* update fs dirty blocks counter */
                 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
@@ -1073,8 +1072,11 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
         /* update per-inode reservations */
         BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
         EXT4_I(inode)->i_reserved_data_blocks -= used;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
+       vfs_dq_claim_block(inode, used + mdb_claim);
+
         /*
          * free those over-booking quota for metadata blocks
          */
@@ -1814,19 +1816,17 @@ repeat:
  
         md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
         total = md_needed + nrblocks;
+       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
  
         /*
          * Make quota reservation here to prevent quota overflow
          * later. Real quota accounting is done at pages writeout
          * time.
          */
-       if (vfs_dq_reserve_block(inode, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       if (vfs_dq_reserve_block(inode, total))
                 return -EDQUOT;
-       }
  
         if (ext4_claim_free_blocks(sbi, total)) {
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                 vfs_dq_release_reservation_block(inode, total);
                 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                         yield();
@@ -1834,10 +1834,11 @@ repeat:
                 }
                 return -ENOSPC;
         }
+       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
         EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
-       EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
-
+       EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
         spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
         return 0;       /* success */
  }
  
@@ -2900,7 +2901,7 @@ retry:
                 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
                                         &mpd);
                 /*
-                * If we have a contigous extent of pages and we
+                * If we have a contiguous extent of pages and we
                  * haven't done the I/O yet, map the blocks and submit
                  * them for I/O.
                  */
@@ -2966,8 +2967,7 @@ retry:
  out_writepages:
         if (!no_nrwrite_index_update)
                 wbc->no_nrwrite_index_update = 0;
-       if (wbc->nr_to_write > nr_to_writebump)
-               wbc->nr_to_write -= nr_to_writebump;
+       wbc->nr_to_write -= nr_to_writebump;
         wbc->range_start = range_start;
         trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
         return ret;
@@ -2992,11 +2992,18 @@ static int ext4_nonda_switch(struct super_block *sb)
         if (2 * free_blocks < 3 * dirty_blocks ||
                 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
                 /*
-                * free block count is less that 150% of dirty blocks
-                * or free blocks is less that watermark
+                * free block count is less than 150% of dirty blocks
+                * or free blocks is less than watermark
                  */
                 return 1;
         }
+       /*
+        * Even if we don't switch but are nearing capacity,
+        * start pushing delalloc when 1/2 of free blocks are dirty.
+        */
+       if (free_blocks < 2 * dirty_blocks)
+               writeback_inodes_sb_if_idle(sb);
+
         return 0;
  }
  
@@ -4031,7 +4038,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
         int k, err;
  
         *top = 0;
-       /* Make k index the deepest non-null offest + 1 */
+       /* Make k index the deepest non-null offset + 1 */
         for (k = depth; k > 1 && !offsets[k-1]; k--)
                 ;
         partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4738,6 +4745,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
         struct ext4_inode *raw_inode;
         struct ext4_inode_info *ei;
         struct inode *inode;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
         long ret;
         int block;
  
@@ -4791,6 +4799,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
         inode->i_size = ext4_isize(raw_inode);
         ei->i_disksize = inode->i_size;
+#ifdef CONFIG_QUOTA
+       ei->i_reserved_quota = 0;
+#endif
         inode->i_generation = le32_to_cpu(raw_inode->i_generation);
         ei->i_block_group = iloc.block_group;
         ei->i_last_alloc_group = ~0;
@@ -4802,6 +4813,31 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 ei->i_data[block] = raw_inode->i_block[block];
         INIT_LIST_HEAD(&ei->i_orphan);
  
+       /*
+        * Set transaction id's of transactions that have to be committed
+        * to finish f[data]sync. We set them to currently running transaction
+        * as we cannot be sure that the inode or some of its metadata isn't
+        * part of the transaction - the inode could have been reclaimed and
+        * now it is reread from disk.
+        */
+       if (journal) {
+               transaction_t *transaction;
+               tid_t tid;
+
+               spin_lock(&journal->j_state_lock);
+               if (journal->j_running_transaction)
+                       transaction = journal->j_running_transaction;
+               else
+                       transaction = journal->j_committing_transaction;
+               if (transaction)
+                       tid = transaction->t_tid;
+               else
+                       tid = journal->j_commit_sequence;
+               spin_unlock(&journal->j_state_lock);
+               ei->i_sync_tid = tid;
+               ei->i_datasync_tid = tid;
+       }
+
         if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -5056,6 +5092,7 @@ static int ext4_do_update_inode(handle_t *handle,
                 err = rc;
         ei->i_state &= ~EXT4_STATE_NEW;
  
+       ext4_update_inode_fsync_trans(handle, inode, 0);
  out_brelse:
         brelse(bh);
         ext4_std_error(inode->i_sb, err);
@@ -5175,8 +5212,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
  
                 /* (user+group)*(old+new) structure, inode write (sb,
                  * inode block, ? - but truncate inode update has it) */
-               handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
-                                       EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+               handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+                                       EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
                 if (IS_ERR(handle)) {
                         error = PTR_ERR(handle);
                         goto err_out;
@@ -5324,7 +5361,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
   * worse case, the indexs blocks spread over different block groups
   *
   * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiugous, with flexbg,
+ * different block groups too. If they are contiuguous, with flexbg,
   * they could still across block group boundary.
   *
   * Also account for superblock, inode, quota and xattr blocks
@@ -5400,7 +5437,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
   * Calculate the journal credits for a chunk of data modification.
   *
   * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
+ * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
   *
   * journal buffers for data blocks are not included here, as DIO
   * and fallocate do no need to journal data buffers.