string: factorize skip_spaces and export it to be generally available

[safe/jmp/linux-2.6] / fs / jbd / commit.c
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c

index a003d50..4bd8825 100644 (file)
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -20,6 +20,7 @@
  #include <linux/slab.h>
  #include <linux/mm.h>
  #include <linux/pagemap.h>
+#include <linux/bio.h>
  
  /*
   * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -36,7 +37,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
  
  /*
   * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
   * After the transaction commits, these pages are left on the LRU, with no
   * ->mapping, and with attached buffers.  These pages are trivially reclaimable
   * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +46,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
   * So here, we have a buffer which has just come off the forget list.  Look to
   * see if we can strip all buffers from the backing page.
   *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock.  The caller provided us with a ref
+ * against the buffer, and we drop that here.
   */
  static void release_buffer_page(struct buffer_head *bh)
  {
@@ -63,7 +64,7 @@ static void release_buffer_page(struct buffer_head *bh)
                 goto nope;
  
         /* OK, it's a truncated page */
-       if (TestSetPageLocked(page))
+       if (!trylock_page(page))
                 goto nope;
  
         page_cache_get(page);
@@ -78,6 +79,19 @@ nope:
  }
  
  /*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+       if (buffer_freed(bh)) {
+               clear_buffer_freed(bh);
+               release_buffer_page(bh);
+       } else
+               put_bh(bh);
+}
+
+/*
   * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
   * held.  For ranking reasons we must trylock.  If we lose, schedule away and
   * return 0.  j_list_lock is dropped in this case.
@@ -104,7 +118,8 @@ static int journal_write_commit_record(journal_t *journal,
  {
         struct journal_head *descriptor;
         struct buffer_head *bh;
-       int i, ret;
+       journal_header_t *header;
+       int ret;
         int barrier_done = 0;
  
         if (is_journal_aborted(journal))
@@ -116,13 +131,10 @@ static int journal_write_commit_record(journal_t *journal,
  
         bh = jh2bh(descriptor);
  
-       /* AKPM: buglet - add `i' to tmp! */
-       for (i = 0; i < bh->b_size; i += 512) {
-               journal_header_t *tmp = (journal_header_t*)bh->b_data;
-               tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-               tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
-               tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-       }
+       header = (journal_header_t *)(bh->b_data);
+       header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+       header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+       header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
  
         JBUFFER_TRACE(descriptor, "write commit block");
         set_buffer_dirty(bh);
@@ -131,6 +143,8 @@ static int journal_write_commit_record(journal_t *journal,
                 barrier_done = 1;
         }
         ret = sync_dirty_buffer(bh);
+       if (barrier_done)
+               clear_buffer_ordered(bh);
         /* is it possible for another commit to fail at roughly
          * the same time as this one?  If so, we don't want to
          * trust the barrier flag in the super, but instead want
@@ -148,7 +162,6 @@ static int journal_write_commit_record(journal_t *journal,
                 spin_unlock(&journal->j_state_lock);
  
                 /* And try again, without the barrier */
-               clear_buffer_ordered(bh);
                 set_buffer_uptodate(bh);
                 set_buffer_dirty(bh);
                 ret = sync_dirty_buffer(bh);
@@ -159,28 +172,31 @@ static int journal_write_commit_record(journal_t *journal,
         return (ret == -EIO);
  }
  
-static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
+                                  int write_op)
  {
         int i;
  
         for (i = 0; i < bufs; i++) {
                 wbuf[i]->b_end_io = end_buffer_write_sync;
                 /* We use-up our safety reference in submit_bh() */
-               submit_bh(WRITE, wbuf[i]);
+               submit_bh(write_op, wbuf[i]);
         }
  }
  
  /*
   *  Submit all the data buffers to disk
   */
-static void journal_submit_data_buffers(journal_t *journal,
-                               transaction_t *commit_transaction)
+static int journal_submit_data_buffers(journal_t *journal,
+                                      transaction_t *commit_transaction,
+                                      int write_op)
  {
         struct journal_head *jh;
         struct buffer_head *bh;
         int locked;
         int bufs = 0;
         struct buffer_head **wbuf = journal->j_wbuf;
+       int err = 0;
  
         /*
          * Whenever we unlock the journal and sleep, things can get added
@@ -208,11 +224,11 @@ write_out_data:
                  * blocking lock_buffer().
                  */
                 if (buffer_dirty(bh)) {
-                       if (test_set_buffer_locked(bh)) {
+                       if (!trylock_buffer(bh)) {
                                 BUFFER_TRACE(bh, "needs blocking lock");
                                 spin_unlock(&journal->j_list_lock);
                                 /* Write out all data to prevent deadlocks */
-                               journal_do_submit_data(wbuf, bufs);
+                               journal_do_submit_data(wbuf, bufs, write_op);
                                 bufs = 0;
                                 lock_buffer(bh);
                                 spin_lock(&journal->j_list_lock);
@@ -225,14 +241,14 @@ write_out_data:
                         spin_lock(&journal->j_list_lock);
                 }
                 /* Someone already cleaned up the buffer? */
-               if (!buffer_jbd(bh)
+               if (!buffer_jbd(bh) || bh2jh(bh) != jh
                         || jh->b_transaction != commit_transaction
                         || jh->b_jlist != BJ_SyncData) {
                         jbd_unlock_bh_state(bh);
                         if (locked)
                                 unlock_buffer(bh);
                         BUFFER_TRACE(bh, "already cleaned up");
-                       put_bh(bh);
+                       release_data_buffer(bh);
                         continue;
                 }
                 if (locked && test_clear_buffer_dirty(bh)) {
@@ -243,7 +259,7 @@ write_out_data:
                         jbd_unlock_bh_state(bh);
                         if (bufs == journal->j_wbufsize) {
                                 spin_unlock(&journal->j_list_lock);
-                               journal_do_submit_data(wbuf, bufs);
+                               journal_do_submit_data(wbuf, bufs, write_op);
                                 bufs = 0;
                                 goto write_out_data;
                         }
@@ -254,24 +270,28 @@ write_out_data:
                         put_bh(bh);
                 } else {
                         BUFFER_TRACE(bh, "writeout complete: unfile");
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                         __journal_unfile_buffer(jh);
                         jbd_unlock_bh_state(bh);
                         if (locked)
                                 unlock_buffer(bh);
                         journal_remove_journal_head(bh);
-                       /* Once for our safety reference, once for
+                       /* One for our safety reference, other for
                          * journal_remove_journal_head() */
                         put_bh(bh);
-                       put_bh(bh);
+                       release_data_buffer(bh);
                 }
  
-               if (lock_need_resched(&journal->j_list_lock)) {
+               if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
                         spin_unlock(&journal->j_list_lock);
                         goto write_out_data;
                 }
         }
         spin_unlock(&journal->j_list_lock);
-       journal_do_submit_data(wbuf, bufs);
+       journal_do_submit_data(wbuf, bufs, write_op);
+
+       return err;
  }
  
  /*
@@ -288,7 +308,9 @@ void journal_commit_transaction(journal_t *journal)
         int bufs;
         int flags;
         int err;
-       unsigned long blocknr;
+       unsigned int blocknr;
+       ktime_t start_time;
+       u64 commit_time;
         char *tagp = NULL;
         journal_header_t *header;
         journal_block_tag_t *tag = NULL;
@@ -296,6 +318,7 @@ void journal_commit_transaction(journal_t *journal)
         int first_tag = 0;
         int tag_flag;
         int i;
+       int write_op = WRITE;
  
         /*
          * First job: lock down the current transaction and wait for
@@ -328,6 +351,13 @@ void journal_commit_transaction(journal_t *journal)
         spin_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_LOCKED;
  
+       /*
+        * Use plugged writes here, since we want to submit several before
+        * we unplug the device. We don't do explicit unplugging in here,
+        * instead we rely on sync_buffer() doing the unplug for us.
+        */
+       if (commit_transaction->t_synchronous_commit)
+               write_op = WRITE_SYNC_PLUG;
         spin_lock(&commit_transaction->t_handle_lock);
         while (commit_transaction->t_updates) {
                 DEFINE_WAIT(wait);
@@ -375,7 +405,7 @@ void journal_commit_transaction(journal_t *journal)
                         struct buffer_head *bh = jh2bh(jh);
  
                         jbd_lock_bh_state(bh);
-                       jbd_slab_free(jh->b_committed_data, bh->b_size);
+                       jbd_free(jh->b_committed_data, bh->b_size);
                         jh->b_committed_data = NULL;
                         jbd_unlock_bh_state(bh);
                 }
@@ -401,6 +431,7 @@ void journal_commit_transaction(journal_t *journal)
         commit_transaction->t_state = T_FLUSH;
         journal->j_committing_transaction = commit_transaction;
         journal->j_running_transaction = NULL;
+       start_time = ktime_get();
         commit_transaction->t_log_start = journal->j_head;
         wake_up(&journal->j_wait_transaction_locked);
         spin_unlock(&journal->j_state_lock);
@@ -408,27 +439,11 @@ void journal_commit_transaction(journal_t *journal)
         jbd_debug (3, "JBD: commit phase 2\n");
  
         /*
-        * First, drop modified flag: all accesses to the buffers
-        * will be tracked for a new trasaction only -bzzz
-        */
-       spin_lock(&journal->j_list_lock);
-       if (commit_transaction->t_buffers) {
-               new_jh = jh = commit_transaction->t_buffers->b_tnext;
-               do {
-                       J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
-                                       new_jh->b_modified == 0);
-                       new_jh->b_modified = 0;
-                       new_jh = new_jh->b_tnext;
-               } while (new_jh != jh);
-       }
-       spin_unlock(&journal->j_list_lock);
-
-       /*
          * Now start flushing things to disk, in the order they appear
          * on the transaction lists.  Data blocks go first.
          */
-       err = 0;
-       journal_submit_data_buffers(journal, commit_transaction);
+       err = journal_submit_data_buffers(journal, commit_transaction,
+                                         write_op);
  
         /*
          * Wait for all previously submitted IO to complete.
@@ -443,16 +458,29 @@ void journal_commit_transaction(journal_t *journal)
                 if (buffer_locked(bh)) {
                         spin_unlock(&journal->j_list_lock);
                         wait_on_buffer(bh);
-                       if (unlikely(!buffer_uptodate(bh)))
-                               err = -EIO;
                         spin_lock(&journal->j_list_lock);
                 }
+               if (unlikely(!buffer_uptodate(bh))) {
+                       if (!trylock_page(bh->b_page)) {
+                               spin_unlock(&journal->j_list_lock);
+                               lock_page(bh->b_page);
+                               spin_lock(&journal->j_list_lock);
+                       }
+                       if (bh->b_page->mapping)
+                               set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+                       unlock_page(bh->b_page);
+                       SetPageError(bh->b_page);
+                       err = -EIO;
+               }
                 if (!inverted_lock(journal, bh)) {
                         put_bh(bh);
                         spin_lock(&journal->j_list_lock);
                         continue;
                 }
-               if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+               if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+                   jh->b_transaction == commit_transaction &&
+                   jh->b_jlist == BJ_Locked) {
                         __journal_unfile_buffer(jh);
                         jbd_unlock_bh_state(bh);
                         journal_remove_journal_head(bh);
@@ -460,17 +488,23 @@ void journal_commit_transaction(journal_t *journal)
                 } else {
                         jbd_unlock_bh_state(bh);
                 }
-               put_bh(bh);
+               release_data_buffer(bh);
                 cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
  
-       if (err)
-               __journal_abort_hard(journal);
+       if (err) {
+               char b[BDEVNAME_SIZE];
  
-       journal_write_revoke_records(journal, commit_transaction);
+               printk(KERN_WARNING
+                       "JBD: Detected IO errors while flushing file data "
+                       "on %s\n", bdevname(journal->j_fs_dev, b));
+               if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+                       journal_abort(journal, err);
+               err = 0;
+       }
  
-       jbd_debug(3, "JBD: commit phase 2\n");
+       journal_write_revoke_records(journal, commit_transaction, write_op);
  
         /*
          * If we found any dirty or locked buffers, then we should have
@@ -487,7 +521,12 @@ void journal_commit_transaction(journal_t *journal)
          * transaction!  Now comes the tricky part: we need to write out
          * metadata.  Loop over the transaction's entire buffer list:
          */
+       spin_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_COMMIT;
+       spin_unlock(&journal->j_state_lock);
+
+       J_ASSERT(commit_transaction->t_nr_buffers <=
+                commit_transaction->t_outstanding_credits);
  
         descriptor = NULL;
         bufs = 0;
@@ -498,9 +537,10 @@ void journal_commit_transaction(journal_t *journal)
                 jh = commit_transaction->t_buffers;
  
                 /* If we're in abort mode, we just un-journal the buffer and
-                  release it for background writing. */
+                  release it. */
  
                 if (is_journal_aborted(journal)) {
+                       clear_buffer_jbddirty(jh2bh(jh));
                         JBUFFER_TRACE(jh, "journal is aborting: refile");
                         journal_refile_buffer(journal, jh);
                         /* If that was the last one, we need to clean up
@@ -524,7 +564,7 @@ void journal_commit_transaction(journal_t *journal)
  
                         descriptor = journal_get_descriptor_buffer(journal);
                         if (!descriptor) {
-                               __journal_abort_hard(journal);
+                               journal_abort(journal, -EIO);
                                 continue;
                         }
  
@@ -557,7 +597,7 @@ void journal_commit_transaction(journal_t *journal)
                    and repeat this loop: we'll fall into the
                    refile-on-abort condition above. */
                 if (err) {
-                       __journal_abort_hard(journal);
+                       journal_abort(journal, err);
                         continue;
                 }
  
@@ -634,7 +674,7 @@ start_journal_io:
                                 clear_buffer_dirty(bh);
                                 set_buffer_uptodate(bh);
                                 bh->b_end_io = journal_end_buffer_io_sync;
-                               submit_bh(WRITE, bh);
+                               submit_bh(write_op, bh);
                         }
                         cond_resched();
  
@@ -742,13 +782,16 @@ wait_for_iobuf:
                 /* AKPM: bforget here */
         }
  
+       if (err)
+               journal_abort(journal, err);
+
         jbd_debug(3, "JBD: commit phase 6\n");
  
         if (journal_write_commit_record(journal, commit_transaction))
                 err = -EIO;
  
         if (err)
-               __journal_abort_hard(journal);
+               journal_abort(journal, err);
  
         /* End of a transaction!  Finally, we can do checkpoint
             processing: any buffers committed as a result of this
@@ -792,14 +835,14 @@ restart_loop:
                  * Otherwise, we can just throw away the frozen data now.
                  */
                 if (jh->b_committed_data) {
-                       jbd_slab_free(jh->b_committed_data, bh->b_size);
+                       jbd_free(jh->b_committed_data, bh->b_size);
                         jh->b_committed_data = NULL;
                         if (jh->b_frozen_data) {
                                 jh->b_committed_data = jh->b_frozen_data;
                                 jh->b_frozen_data = NULL;
                         }
                 } else if (jh->b_frozen_data) {
-                       jbd_slab_free(jh->b_frozen_data, bh->b_size);
+                       jbd_free(jh->b_frozen_data, bh->b_size);
                         jh->b_frozen_data = NULL;
                 }
  
@@ -832,6 +875,8 @@ restart_loop:
                 if (buffer_jbddirty(bh)) {
                         JBUFFER_TRACE(jh, "add to new checkpointing trans");
                         __journal_insert_checkpoint(jh, commit_transaction);
+                       if (is_journal_aborted(journal))
+                               clear_buffer_jbddirty(bh);
                         JBUFFER_TRACE(jh, "refile for checkpoint writeback");
                         __journal_refile_buffer(jh);
                         jbd_unlock_bh_state(bh);
@@ -858,10 +903,10 @@ restart_loop:
         }
         spin_unlock(&journal->j_list_lock);
         /*
-        * This is a bit sleazy.  We borrow j_list_lock to protect
-        * journal->j_committing_transaction in __journal_remove_checkpoint.
-        * Really, __journal_remove_checkpoint should be using j_state_lock but
-        * it's a bit hassle to hold that across __journal_remove_checkpoint
+        * This is a bit sleazy.  We use j_list_lock to protect transition
+        * of a transaction into T_FINISHED state and calling
+        * __journal_drop_transaction(). Otherwise we could race with
+        * other checkpointing code processing the transaction...
          */
         spin_lock(&journal->j_state_lock);
         spin_lock(&journal->j_list_lock);
@@ -885,6 +930,18 @@ restart_loop:
         J_ASSERT(commit_transaction == journal->j_committing_transaction);
         journal->j_commit_sequence = commit_transaction->t_tid;
         journal->j_committing_transaction = NULL;
+       commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+
+       /*
+        * weight the commit time higher than the average time so we don't
+        * react too strongly to vast changes in commit time
+        */
+       if (likely(journal->j_average_commit_time))
+               journal->j_average_commit_time = (commit_time*3 +
+                               journal->j_average_commit_time) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
+
         spin_unlock(&journal->j_state_lock);
  
         if (commit_transaction->t_checkpoint_list == NULL &&