** from within kupdate, it will ignore the immediate flag
*/
-#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/workqueue.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
struct reiserfs_journal *journal);
static int dirty_one_transaction(struct super_block *s,
struct reiserfs_journal_list *jl);
-static void flush_async_commits(void *p);
+static void flush_async_commits(struct work_struct *work);
static void queue_log_writer(struct super_block *s);
/* values for join in do_journal_begin_r */
struct reiserfs_bitmap_node *bn;
static int id;
- bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS,
- p_s_sb);
+ bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
if (!bn) {
return NULL;
}
- bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb);
+ bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
if (!bn->data) {
- reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+ kfree(bn);
return NULL;
}
bn->id = id++;
- memset(bn->data, 0, p_s_sb->s_blocksize);
INIT_LIST_HEAD(&bn->list);
return bn;
}
struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
journal->j_used_bitmap_nodes--;
if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
- reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
- reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+ kfree(bn->data);
+ kfree(bn);
} else {
list_add(&bn->list, &journal->j_bitmap_nodes);
journal->j_free_bitmap_nodes++;
while (next != &journal->j_bitmap_nodes) {
bn = list_entry(next, struct reiserfs_bitmap_node, list);
list_del(next);
- reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
- reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+ kfree(bn->data);
+ kfree(bn);
next = journal->j_bitmap_nodes.next;
journal->j_free_bitmap_nodes--;
}
jl->j_trans_id, jl->j_refcount);
}
if (--jl->j_refcount == 0)
- reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+ kfree(jl);
}
/*
spinlock_t * lock, void (fn) (struct buffer_chunk *))
{
int ret = 0;
- if (chunk->nr >= CHUNK_SIZE)
- BUG();
+ BUG_ON(chunk->nr >= CHUNK_SIZE);
chunk->bh[chunk->nr++] = bh;
if (chunk->nr >= CHUNK_SIZE) {
ret = 1;
/* buffer must be locked for __add_jh, should be able to have
* two adds at the same time
*/
- if (bh->b_private)
- BUG();
+ BUG_ON(bh->b_private);
jh->bh = bh;
bh->b_private = jh;
}
get_bh(bh);
if (test_set_buffer_locked(bh)) {
if (!buffer_dirty(bh)) {
- list_del_init(&jh->list);
- list_add(&jh->list, &tmp);
+ list_move(&jh->list, &tmp);
goto loop_next;
}
spin_unlock(lock);
spin_lock(lock);
goto loop_next;
}
+ /* in theory, dirty non-uptodate buffers should never get here,
+ * but the upper layer io error paths still have a few quirks.
+ * Handle them here as gracefully as we can
+ */
+ if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
+ clear_buffer_dirty(bh);
+ ret = -EIO;
+ }
if (buffer_dirty(bh)) {
- list_del_init(&jh->list);
- list_add(&jh->list, &tmp);
+ list_move(&jh->list, &tmp);
add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
} else {
reiserfs_free_jh(bh);
if (!buffer_uptodate(bh)) {
ret = -EIO;
}
+ /* ugly interaction with invalidatepage here.
+ * reiserfs_invalidate_page will pin any buffer that has a valid
+ * journal head from an older transaction. If someone else sets
+ * our buffer dirty after we write it in the first loop, and
+ * then someone truncates the page away, nobody will ever write
+ * the buffer. We're safe if we write the page one last time
+ * after freeing the journal header.
+ */
+ if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+ spin_unlock(lock);
+ ll_rw_block(WRITE, 1, &bh);
+ spin_lock(lock);
+ }
put_bh(bh);
cond_resched_lock(lock);
}
DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle))
- blk_congestion_wait(WRITE, HZ / 10);
+ congestion_wait(WRITE, HZ / 10);
return 0;
}
struct reiserfs_journal *journal = SB_JOURNAL(s);
int barrier = 0;
int retval = 0;
+ int write_len;
reiserfs_check_lock_depth(s, "flush_commit_list");
}
if (!list_empty(&jl->j_bh_list)) {
+ int ret;
unlock_kernel();
- write_ordered_buffers(&journal->j_dirty_buffers_lock,
- journal, jl, &jl->j_bh_list);
+ ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
+ journal, jl, &jl->j_bh_list);
+ if (ret < 0 && retval == 0)
+ retval = ret;
lock_kernel();
}
BUG_ON(!list_empty(&jl->j_bh_list));
/*
* for the description block and all the log blocks, submit any buffers
- * that haven't already reached the disk
+ * that haven't already reached the disk. Try to write at least 256
+ * log blocks. later on, we will only wait on blocks that correspond
+ * to this transaction, but while we're unplugging we might as well
+ * get a chunk of data on there.
*/
atomic_inc(&journal->j_async_throttle);
- for (i = 0; i < (jl->j_len + 1); i++) {
+ write_len = jl->j_len + 1;
+ if (write_len < 256)
+ write_len = 256;
+ for (i = 0 ; i < write_len ; i++) {
bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
SB_ONDISK_JOURNAL_SIZE(s);
tbh = journal_find_get_block(s, bn);
- if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
- ll_rw_block(SWRITE, 1, &tbh);
- put_bh(tbh);
+ if (tbh) {
+ if (buffer_dirty(tbh))
+ ll_rw_block(WRITE, 1, &tbh) ;
+ put_bh(tbh) ;
+ }
}
atomic_dec(&journal->j_async_throttle);
+ /* We're skipping the commit if there's an error */
+ if (retval || reiserfs_is_journal_aborted(journal))
+ barrier = 0;
+
/* wait on everything written so far before writing the commit
* if we are in barrier mode, send the commit down now
*/
BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
if (!barrier) {
- if (buffer_dirty(jl->j_commit_bh))
- BUG();
- mark_buffer_dirty(jl->j_commit_bh);
- sync_dirty_buffer(jl->j_commit_bh);
+ /* If there was a write error in the journal - we can't commit
+ * this transaction - it will be invalid and, if successful,
+ * will just end up propogating the write error out to
+ * the file system. */
+ if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
+ if (buffer_dirty(jl->j_commit_bh))
+ BUG();
+ mark_buffer_dirty(jl->j_commit_bh) ;
+ sync_dirty_buffer(jl->j_commit_bh) ;
+ }
} else
wait_on_buffer(jl->j_commit_bh);
return NULL;
}
+static int newer_jl_done(struct reiserfs_journal_cnode *cn)
+{
+ struct super_block *sb = cn->sb;
+ b_blocknr_t blocknr = cn->blocknr;
+
+ cn = cn->hprev;
+ while (cn) {
+ if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
+ atomic_read(&cn->jlist->j_commit_left) != 0)
+ return 0;
+ cn = cn->hprev;
+ }
+ return 1;
+}
+
static void remove_journal_hash(struct super_block *,
struct reiserfs_journal_cnode **,
struct reiserfs_journal_list *, unsigned long,
return err;
}
+static int test_transaction(struct super_block *s,
+ struct reiserfs_journal_list *jl)
+{
+ struct reiserfs_journal_cnode *cn;
+
+ if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
+ return 1;
+
+ cn = jl->j_realblock;
+ while (cn) {
+ /* if the blocknr == 0, this has been cleared from the hash,
+ ** skip it
+ */
+ if (cn->blocknr == 0) {
+ goto next;
+ }
+ if (cn->bh && !newer_jl_done(cn))
+ return 0;
+ next:
+ cn = cn->next;
+ cond_resched();
+ }
+ return 0;
+}
+
static int write_one_transaction(struct super_block *s,
struct reiserfs_journal_list *jl,
struct buffer_chunk *chunk)
static void free_journal_ram(struct super_block *p_s_sb)
{
struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
- reiserfs_kfree(journal->j_current_jl,
- sizeof(struct reiserfs_journal_list), p_s_sb);
+ kfree(journal->j_current_jl);
journal->j_num_lists--;
vfree(journal->j_cnode_free_orig);
}
trans_id = get_desc_trans_id(desc);
/* now we know we've got a good transaction, and it was inside the valid time ranges */
- log_blocks =
- reiserfs_kmalloc(get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
- real_blocks =
- reiserfs_kmalloc(get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
+ log_blocks = kmalloc(get_desc_trans_len(desc) *
+ sizeof(struct buffer_head *), GFP_NOFS);
+ real_blocks = kmalloc(get_desc_trans_len(desc) *
+ sizeof(struct buffer_head *), GFP_NOFS);
if (!log_blocks || !real_blocks) {
brelse(c_bh);
brelse(d_bh);
- reiserfs_kfree(log_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
- reiserfs_kfree(real_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
+ kfree(log_blocks);
+ kfree(real_blocks);
reiserfs_warning(p_s_sb,
"journal-1169: kmalloc failed, unable to mount FS");
return -1;
brelse_array(real_blocks, i);
brelse(c_bh);
brelse(d_bh);
- reiserfs_kfree(log_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
- reiserfs_kfree(real_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
+ kfree(log_blocks);
+ kfree(real_blocks);
return -1;
}
}
brelse_array(real_blocks, get_desc_trans_len(desc));
brelse(c_bh);
brelse(d_bh);
- reiserfs_kfree(log_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
- reiserfs_kfree(real_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
+ kfree(log_blocks);
+ kfree(real_blocks);
return -1;
}
memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
get_desc_trans_len(desc) - i);
brelse(c_bh);
brelse(d_bh);
- reiserfs_kfree(log_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
- reiserfs_kfree(real_blocks,
- get_desc_trans_len(desc) *
- sizeof(struct buffer_head *), p_s_sb);
+ kfree(log_blocks);
+ kfree(real_blocks);
return -1;
}
brelse(real_blocks[i]);
journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
journal->j_last_flush_trans_id = trans_id;
journal->j_trans_id = trans_id + 1;
+ /* check for trans_id overflow */
+ if (journal->j_trans_id == 0)
+ journal->j_trans_id = 10;
brelse(c_bh);
brelse(d_bh);
- reiserfs_kfree(log_blocks,
- le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
- p_s_sb);
- reiserfs_kfree(real_blocks,
- le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
- p_s_sb);
+ kfree(log_blocks);
+ kfree(real_blocks);
return 0;
}
return 1;
}
jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
- if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
- le32_to_cpu(jh->j_first_unflushed_offset) <
+ if (le32_to_cpu(jh->j_first_unflushed_offset) <
SB_ONDISK_JOURNAL_SIZE(p_s_sb)
&& le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
oldest_start =
journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
journal->j_trans_id =
le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+ /* check for trans_id overflow */
+ if (journal->j_trans_id == 0)
+ journal->j_trans_id = 10;
journal->j_last_flush_trans_id =
le32_to_cpu(jh->j_last_flush_trans_id);
journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
{
struct reiserfs_journal_list *jl;
- retry:
- jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS,
- s);
- if (!jl) {
- yield();
- goto retry;
- }
- memset(jl, 0, sizeof(*jl));
+ jl = kzalloc(sizeof(struct reiserfs_journal_list),
+ GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&jl->j_list);
INIT_LIST_HEAD(&jl->j_working_list);
INIT_LIST_HEAD(&jl->j_tail_bh_list);
journal->j_cnode_used = 0;
journal->j_must_wait = 0;
+ if (journal->j_cnode_free == 0) {
+ reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory "
+ "allocation failed (%ld bytes). Journal is "
+ "too large for available memory. Usually "
+ "this is due to a journal that is too large.",
+ sizeof (struct reiserfs_journal_cnode) * num_cnodes);
+ goto free_and_return;
+ }
+
init_journal_hash(p_s_sb);
jl = journal->j_current_jl;
jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
if (reiserfs_mounted_fs_count <= 1)
commit_wq = create_workqueue("reiserfs");
- INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
+ INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
+ journal->j_work_sb = p_s_sb;
return 0;
free_and_return:
free_journal_ram(p_s_sb);
journal->j_cnode_free < (journal->j_trans_max * 3)) {
return 1;
}
+ /* protected by the BKL here */
+ journal->j_len_alloc += new_alloc;
+ th->t_blocks_allocated += new_alloc ;
return 0;
}
struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount;
while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) &&
int retval;
reiserfs_check_lock_depth(p_s_sb, "journal_begin");
- if (nblocks > journal->j_trans_max)
- BUG();
+ BUG_ON(nblocks > journal->j_trans_max);
PROC_INFO_INC(p_s_sb, journal.journal_being);
/* set here for journal_join */
if (reiserfs_transaction_running(s)) {
th = current->journal_info;
th->t_refcount++;
- if (th->t_refcount < 2) {
- BUG();
- }
+ BUG_ON(th->t_refcount < 2);
+
return th;
}
- th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle),
- GFP_NOFS, s);
+ th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
if (!th)
return NULL;
ret = journal_begin(th, s, nblocks);
if (ret) {
- reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
- s);
+ kfree(th);
return NULL;
}
ret = -EIO;
if (th->t_refcount == 0) {
SB_JOURNAL(s)->j_persistent_trans--;
- reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
- s);
+ kfree(th);
}
return ret;
}
** pointer
*/
th->t_handle_save = cur_th;
- if (cur_th && cur_th->t_refcount > 1) {
- BUG();
- }
+ BUG_ON(cur_th && cur_th->t_refcount > 1);
return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
}
** pointer
*/
th->t_handle_save = cur_th;
- if (cur_th && cur_th->t_refcount > 1) {
- BUG();
- }
+ BUG_ON(cur_th && cur_th->t_refcount > 1);
return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
}
current->journal_info = th;
}
ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
- if (current->journal_info != th)
- BUG();
+ BUG_ON(current->journal_info != th);
/* I guess this boils down to being the reciprocal of clm-2100 above.
* If do_journal_begin_r fails, we need to put it back, since journal_end
/* we aren't allowed to close a nested transaction on a different
** filesystem from the one in the task struct
*/
- if (cur_th->t_super != th->t_super)
- BUG();
+ BUG_ON(cur_th->t_super != th->t_super);
if (th != cur_th) {
memcpy(current->journal_info, th, sizeof(*th));
BUG_ON(!th->t_trans_id);
/* you can sync while nested, very, very bad */
- if (th->t_refcount > 1) {
- BUG();
- }
+ BUG_ON(th->t_refcount > 1);
if (journal->j_len == 0) {
reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
1);
/*
** writeback the pending async commits to disk
*/
-static void flush_async_commits(void *p)
+static void flush_async_commits(struct work_struct *work)
{
- struct super_block *p_s_sb = p;
- struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+ struct reiserfs_journal *journal =
+ container_of(work, struct reiserfs_journal, j_work.work);
+ struct super_block *p_s_sb = journal->j_work_sb;
struct reiserfs_journal_list *jl;
struct list_head *entry;
flush_commit_list(p_s_sb, jl, 1);
}
unlock_kernel();
- /*
- * this is a little racey, but there's no harm in missing
- * the filemap_fdata_write
- */
- if (!atomic_read(&journal->j_async_throttle)
- && !reiserfs_is_journal_aborted(journal)) {
- atomic_inc(&journal->j_async_throttle);
- filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
- atomic_dec(&journal->j_async_throttle);
- }
}
/*
** will be dealt with by next transaction that actually writes something, but should be taken
** care of in this trans
*/
- if (journal->j_len == 0) {
- BUG();
- }
+ BUG_ON(journal->j_len == 0);
+
/* if wcount > 0, and we are called to with flush or commit_now,
** we wait on j_join_wait. We will wake up when the last writer has
** finished the transaction, and started it on its way to the disk.
unlock_journal(p_s_sb);
}
}
- if (journal->j_trans_id == trans_id) {
- BUG();
- }
+ BUG_ON(journal->j_trans_id == trans_id);
+
if (commit_now
&& journal_list_still_alive(p_s_sb, trans_id)
&& wait_on_commit) {
entry = journal->j_journal_list.next;
jl = JOURNAL_LIST_ENTRY(entry);
/* this check should always be run, to send old lists to disk */
- if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
+ if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
+ atomic_read(&jl->j_commit_left) == 0 &&
+ test_transaction(s, jl)) {
flush_used_journal_lists(s, jl);
} else {
break;
int cur_write_start = 0; /* start index of current log write */
int old_start;
int i;
- int flush = flags & FLUSH_ALL;
- int wait_on_commit = flags & WAIT;
+ int flush;
+ int wait_on_commit;
struct reiserfs_journal_list *jl, *temp_jl;
struct list_head *entry, *safe;
unsigned long jindex;
BUG_ON(th->t_refcount > 1);
BUG_ON(!th->t_trans_id);
+ /* protect flush_older_commits from doing mistakes if the
+ transaction ID counter gets overflowed. */
+ if (th->t_trans_id == ~0UL)
+ flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+ flush = flags & FLUSH_ALL;
+ wait_on_commit = flags & WAIT;
+
put_fs_excl();
current->journal_info = th->t_handle_save;
reiserfs_check_lock_depth(p_s_sb, "journal end");
flush = 1;
}
#ifdef REISERFS_PREALLOCATE
- /* quota ops might need to nest, setup the journal_info pointer for them */
+ /* quota ops might need to nest, setup the journal_info pointer for them
+ * and raise the refcount so that it is > 0. */
current->journal_info = th;
+ th->t_refcount++;
reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
* the transaction */
+ th->t_refcount--;
current->journal_info = th->t_handle_save;
#endif
set_commit_trans_len(commit, journal->j_len);
/* special check in case all buffers in the journal were marked for not logging */
- if (journal->j_len == 0) {
- BUG();
- }
+ BUG_ON(journal->j_len == 0);
/* we're about to dirty all the log blocks, mark the description block
* dirty now too. Don't mark the commit block dirty until all the
journal->j_first = NULL;
journal->j_len = 0;
journal->j_trans_start_time = 0;
- journal->j_trans_id++;
+ /* check for trans_id overflow */
+ if (++journal->j_trans_id == 0)
+ journal->j_trans_id = 10;
journal->j_current_jl->j_trans_id = journal->j_trans_id;
journal->j_must_wait = 0;
journal->j_len_alloc = 0;
journal, jl, &jl->j_tail_bh_list);
lock_kernel();
}
- if (!list_empty(&jl->j_tail_bh_list))
- BUG();
+ BUG_ON(!list_empty(&jl->j_tail_bh_list));
up(&jl->j_commit_lock);
/* honor the flush wishes from the caller, simple commits can