/*
- * linux/fs/transaction.c
+ * linux/fs/jbd/transaction.c
*
* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
*
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/timer.h>
-#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+static void __journal_temp_unlink_buffer(struct journal_head *jh);
+
/*
* get_transaction: obtain a new transaction_t object.
*
spin_lock_init(&transaction->t_handle_lock);
/* Set up the commit timer for the new transaction. */
- journal->j_commit_timer.expires = transaction->t_expires;
+ journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
add_timer(&journal->j_commit_timer);
J_ASSERT(journal->j_running_transaction == NULL);
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = jbd_kmalloc(sizeof(*new_transaction),
- GFP_NOFS);
+ new_transaction = kzalloc(sizeof(*new_transaction),
+ GFP_NOFS|__GFP_NOFAIL);
if (!new_transaction) {
ret = -ENOMEM;
goto out;
}
- memset(new_transaction, 0, sizeof(*new_transaction));
}
jbd_debug(3, "New handle %p going live.\n", handle);
return ret;
}
+static struct lock_class_key jbd_handle_key;
+
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
+ lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0);
+
return handle;
}
jbd_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
+ goto out;
}
+
+ lock_map_acquire(&handle->h_lockdep_map);
+
+out:
return handle;
}
/**
- * int journal_restart() - restart a handle .
+ * int journal_restart() - restart a handle.
* @handle: handle to restart
* @nblocks: nr credits requested
*
goto done;
/*
+ * this is the first time this transaction is touching this buffer,
+ * reset the modified flag
+ */
+ jh->b_modified = 0;
+
+ /*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
*/
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
frozen_buffer =
- jbd_slab_alloc(jh2bh(jh)->b_size,
+ jbd_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
"%s: OOM for frozen_buffer\n",
- __FUNCTION__);
+ __func__);
JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM;
jbd_lock_bh_state(bh);
out:
if (unlikely(frozen_buffer)) /* It's usually NULL */
- jbd_slab_free(frozen_buffer, bh->b_size);
+ jbd_free(frozen_buffer, bh->b_size);
JBUFFER_TRACE(jh, "exit");
return error;
if (jh->b_transaction == NULL) {
jh->b_transaction = transaction;
+
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "file as BJ_Reserved");
__journal_file_buffer(jh, transaction, BJ_Reserved);
} else if (jh->b_transaction == journal->j_committing_transaction) {
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "set next transaction");
jh->b_next_transaction = transaction;
}
}
/**
- * int journal_get_undo_access() - Notify intent to modify metadata with
- * non-rewindable consequences
+ * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
* @credits: store the number of taken credits here (if not NULL)
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
- __FUNCTION__);
+ __func__);
err = -ENOMEM;
goto out;
}
out:
journal_put_journal_head(jh);
if (unlikely(committed_data))
- jbd_slab_free(committed_data, bh->b_size);
+ jbd_free(committed_data, bh->b_size);
return err;
}
/**
- * int journal_dirty_data() - mark a buffer as containing dirty data which
- * needs to be flushed before we can commit the
- * current transaction.
+ * int journal_dirty_data() - mark a buffer as containing dirty data to be flushed
* @handle: transaction
* @bh: bufferhead to mark
*
+ * Description:
+ * Mark a buffer as containing dirty data which needs to be flushed before
+ * we can commit the current transaction.
+ *
* The buffer is placed on the transaction's data list and is marked as
* belonging to the transaction.
*
*/
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
+
+ /* Now that we have bh_state locked, are we really still mapped? */
+ if (!buffer_mapped(bh)) {
+ JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
+ goto no_journal;
+ }
+
if (jh->b_transaction) {
JBUFFER_TRACE(jh, "has transaction");
if (jh->b_transaction != handle->h_transaction) {
sync_dirty_buffer(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
+ /* Since we dropped the lock... */
+ if (!buffer_mapped(bh)) {
+ JBUFFER_TRACE(jh, "buffer got unmapped");
+ goto no_journal;
+ }
/* The buffer may become locked again at any
time if it is redirtied */
}
}
/**
- * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
+ * int journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
* @bh: buffer to mark
*
- * mark dirty metadata which needs to be journaled as part of the current
+ * Mark dirty metadata which needs to be journaled as part of the current
* transaction.
*
* The buffer is placed on the transaction's metadata list and is marked
}
/* That test should have eliminated the following case: */
- J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+ J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock);
struct journal_head *jh;
int drop_reserve = 0;
int err = 0;
+ int was_modified = 0;
BUFFER_TRACE(bh, "entry");
goto not_jbd;
}
+ /* keep track of wether or not this transaction modified us */
+ was_modified = jh->b_modified;
+
/*
* The buffer's going from the transaction, we must drop
* all references -bzzz
JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
- drop_reserve = 1;
+ /*
+ * we only want to drop a reference if this transaction
+ * modified the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
/*
* We are no longer going to journal this buffer.
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction == transaction);
jh->b_next_transaction = NULL;
- drop_reserve = 1;
+
+ /*
+ * only drop a reference if this transaction modified
+ * the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
}
}
spin_unlock(&journal->j_state_lock);
}
+ lock_map_release(&handle->h_lockdep_map);
+
jbd_free_handle(handle);
return err;
}
-/**int journal_force_commit() - force any uncommitted transactions
+/**
+ * int journal_force_commit() - force any uncommitted transactions
* @journal: journal to force
*
* For synchronous operations: force any uncommitted transactions
*
* Called under j_list_lock. The journal may not be locked.
*/
-void __journal_temp_unlink_buffer(struct journal_head *jh)
+static void __journal_temp_unlink_buffer(struct journal_head *jh)
{
struct journal_head **list = NULL;
transaction_t *transaction;
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
if (jh->b_jlist != BJ_None)
- J_ASSERT_JH(jh, transaction != 0);
+ J_ASSERT_JH(jh, transaction != NULL);
switch (jh->b_jlist) {
case BJ_None:
if (buffer_locked(bh) || buffer_dirty(bh))
goto out;
- if (jh->b_next_transaction != 0)
+ if (jh->b_next_transaction != NULL)
goto out;
spin_lock(&journal->j_list_lock);
- if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+ if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
journal_remove_journal_head(bh);
__brelse(bh);
}
- } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+ } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}
}
}
} else if (transaction == journal->j_committing_transaction) {
+ JBUFFER_TRACE(jh, "on committing transaction");
if (jh->b_jlist == BJ_Locked) {
/*
* The buffer is on the committing transaction's locked
* can remove it's next_transaction pointer from the
* running transaction if that is set, but nothing
* else. */
- JBUFFER_TRACE(jh, "on committing transaction");
set_buffer_freed(bh);
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction ==
* i_size already for this truncate so recovery will not
* expose the disk blocks we are discarding here.) */
J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+ JBUFFER_TRACE(jh, "on running transaction");
may_free = __dispose_buffer(jh, transaction);
}
}
/**
- * void journal_invalidatepage()
- * @journal: journal to use for flush...
+ * void journal_invalidatepage() - invalidate a journal page
+ * @journal: journal to use for flush
* @page: page to flush
* @offset: length of page to invalidate.
*
* Reap page buffers containing data after offset in page.
- *
*/
void journal_invalidatepage(journal_t *journal,
struct page *page,
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
- jh->b_transaction == 0);
+ jh->b_transaction == NULL);
if (jh->b_transaction && jh->b_jlist == jlist)
return;
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
__journal_file_buffer(jh, jh->b_transaction,
- was_dirty ? BJ_Metadata : BJ_Reserved);
+ jh->b_modified ? BJ_Metadata : BJ_Reserved);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
if (was_dirty)