[PATCH] remember mode of reiserfs journal
[safe/jmp/linux-2.6] / fs / reiserfs / journal.c
index 47c9f43..9643c3b 100644 (file)
 **                     from within kupdate, it will ignore the immediate flag
 */
 
-#include <linux/config.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
 #include <linux/time.h>
-#include <asm/semaphore.h>
-
+#include <linux/semaphore.h>
 #include <linux/vmalloc.h>
 #include <linux/reiserfs_fs.h>
-
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/fcntl.h>
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -104,7 +102,7 @@ static int release_journal_dev(struct super_block *super,
                               struct reiserfs_journal *journal);
 static int dirty_one_transaction(struct super_block *s,
                                 struct reiserfs_journal_list *jl);
-static void flush_async_commits(void *p);
+static void flush_async_commits(struct work_struct *work);
 static void queue_log_writer(struct super_block *s);
 
 /* values for join in do_journal_begin_r */
@@ -219,11 +217,12 @@ static void allocate_bitmap_nodes(struct super_block *p_s_sb)
        }
 }
 
-static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
+                                 b_blocknr_t block,
                                  struct reiserfs_list_bitmap *jb)
 {
-       int bmap_nr = block / (p_s_sb->s_blocksize << 3);
-       int bit_nr = block % (p_s_sb->s_blocksize << 3);
+       unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
+       unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
 
        if (!jb->bitmaps[bmap_nr]) {
                jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
@@ -239,7 +238,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
        if (jb->bitmaps == NULL)
                return;
 
-       for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
+       for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
                if (jb->bitmaps[i]) {
                        free_bitmap_node(p_s_sb, jb->bitmaps[i]);
                        jb->bitmaps[i] = NULL;
@@ -289,7 +288,7 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
 */
 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
                                   struct reiserfs_list_bitmap *jb_array,
-                                  int bmap_nr)
+                                  unsigned int bmap_nr)
 {
        int i;
        int failed = 0;
@@ -483,7 +482,7 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 **
 */
 int reiserfs_in_journal(struct super_block *p_s_sb,
-                       int bmap_nr, int bit_nr, int search_all,
+                       unsigned int bmap_nr, int bit_nr, int search_all,
                        b_blocknr_t * next_zero_bit)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
@@ -557,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
 static inline void lock_journal(struct super_block *p_s_sb)
 {
        PROC_INFO_INC(p_s_sb, journal.lock_journal);
-       down(&SB_JOURNAL(p_s_sb)->j_lock);
+       mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 /* unlock the current transaction */
 static inline void unlock_journal(struct super_block *p_s_sb)
 {
-       up(&SB_JOURNAL(p_s_sb)->j_lock);
+       mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
 }
 
 static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -615,6 +614,31 @@ static int journal_list_still_alive(struct super_block *s,
        return 0;
 }
 
+/*
+ * If page->mapping was null, we failed to truncate this page for
+ * some reason.  Most likely because it was truncated after being
+ * logged via data=journal.
+ *
+ * This does a check to see if the buffer belongs to one of these
+ * lost pages before doing the final put_bh.  If page->mapping was
+ * null, it tries to free buffers on the page, which should make the
+ * final page_cache_release drop the page from the lru.
+ */
+static void release_buffer_page(struct buffer_head *bh)
+{
+       struct page *page = bh->b_page;
+       if (!page->mapping && trylock_page(page)) {
+               page_cache_get(page);
+               put_bh(bh);
+               if (!page->mapping)
+                       try_to_free_buffers(page);
+               unlock_page(page);
+               page_cache_release(page);
+       } else {
+               put_bh(bh);
+       }
+}
+
 static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 {
        char b[BDEVNAME_SIZE];
@@ -628,8 +652,9 @@ static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
                set_buffer_uptodate(bh);
        else
                clear_buffer_uptodate(bh);
+
        unlock_buffer(bh);
-       put_bh(bh);
+       release_buffer_page(bh);
 }
 
 static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
@@ -719,8 +744,7 @@ static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
                        spinlock_t * lock, void (fn) (struct buffer_chunk *))
 {
        int ret = 0;
-       if (chunk->nr >= CHUNK_SIZE)
-               BUG();
+       BUG_ON(chunk->nr >= CHUNK_SIZE);
        chunk->bh[chunk->nr++] = bh;
        if (chunk->nr >= CHUNK_SIZE) {
                ret = 1;
@@ -789,8 +813,7 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
                /* buffer must be locked for __add_jh, should be able to have
                 * two adds at the same time
                 */
-               if (bh->b_private)
-                       BUG();
+               BUG_ON(bh->b_private);
                jh->bh = bh;
                bh->b_private = jh;
        }
@@ -832,10 +855,9 @@ static int write_ordered_buffers(spinlock_t * lock,
                jh = JH_ENTRY(list->next);
                bh = jh->bh;
                get_bh(bh);
-               if (test_set_buffer_locked(bh)) {
+               if (!trylock_buffer(bh)) {
                        if (!buffer_dirty(bh)) {
-                               list_del_init(&jh->list);
-                               list_add(&jh->list, &tmp);
+                               list_move(&jh->list, &tmp);
                                goto loop_next;
                        }
                        spin_unlock(lock);
@@ -855,8 +877,7 @@ static int write_ordered_buffers(spinlock_t * lock,
                        ret = -EIO;
                }
                if (buffer_dirty(bh)) {
-                       list_del_init(&jh->list);
-                       list_add(&jh->list, &tmp);
+                       list_move(&jh->list, &tmp);
                        add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
                } else {
                        reiserfs_free_jh(bh);
@@ -970,12 +991,13 @@ static int flush_older_commits(struct super_block *s,
        }
        return 0;
 }
-int reiserfs_async_progress_wait(struct super_block *s)
+
+static int reiserfs_async_progress_wait(struct super_block *s)
 {
        DEFINE_WAIT(wait);
        struct reiserfs_journal *j = SB_JOURNAL(s);
        if (atomic_read(&j->j_async_throttle))
-               blk_congestion_wait(WRITE, HZ / 10);
+               congestion_wait(WRITE, HZ / 10);
        return 0;
 }
 
@@ -990,7 +1012,7 @@ static int flush_commit_list(struct super_block *s,
                             struct reiserfs_journal_list *jl, int flushall)
 {
        int i;
-       int bn;
+       b_blocknr_t bn;
        struct buffer_head *tbh = NULL;
        unsigned long trans_id = jl->j_trans_id;
        struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -1021,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
        }
 
        /* make sure nobody is trying to flush this one at the same time */
-       down(&jl->j_commit_lock);
+       mutex_lock(&jl->j_commit_mutex);
        if (!journal_list_still_alive(s, trans_id)) {
-               up(&jl->j_commit_lock);
+               mutex_unlock(&jl->j_commit_mutex);
                goto put_jl;
        }
        BUG_ON(jl->j_trans_id == 0);
@@ -1033,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
                if (flushall) {
                        atomic_set(&(jl->j_older_commits_done), 1);
                }
-               up(&jl->j_commit_lock);
+               mutex_unlock(&jl->j_commit_mutex);
                goto put_jl;
        }
 
@@ -1114,7 +1136,7 @@ static int flush_commit_list(struct super_block *s,
        if (!barrier) {
                /* If there was a write error in the journal - we can't commit
                 * this transaction - it will be invalid and, if successful,
-                * will just end up propogating the write error out to
+                * will just end up propagating the write error out to
                 * the file system. */
                if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
                        if (buffer_dirty(jl->j_commit_bh))
@@ -1129,7 +1151,7 @@ static int flush_commit_list(struct super_block *s,
 
        /* If there was a write error in the journal - we can't commit this
         * transaction - it will be invalid and, if successful, will just end
-        * up propogating the write error out to the filesystem. */
+        * up propagating the write error out to the filesystem. */
        if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
                reiserfs_warning(s, "journal-615: buffer write failed");
@@ -1157,13 +1179,13 @@ static int flush_commit_list(struct super_block *s,
        if (flushall) {
                atomic_set(&(jl->j_older_commits_done), 1);
        }
-       up(&jl->j_commit_lock);
+       mutex_unlock(&jl->j_commit_mutex);
       put_jl:
        put_journal_list(s, jl);
 
        if (retval)
                reiserfs_abort(s, retval, "Journal write error in %s",
-                              __FUNCTION__);
+                              __func__);
        put_fs_excl();
        return retval;
 }
@@ -1189,6 +1211,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
        return NULL;
 }
 
+static int newer_jl_done(struct reiserfs_journal_cnode *cn)
+{
+       struct super_block *sb = cn->sb;
+       b_blocknr_t blocknr = cn->blocknr;
+
+       cn = cn->hprev;
+       while (cn) {
+               if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
+                   atomic_read(&cn->jlist->j_commit_left) != 0)
+                                   return 0;
+               cn = cn->hprev;
+       }
+       return 1;
+}
+
 static void remove_journal_hash(struct super_block *,
                                struct reiserfs_journal_cnode **,
                                struct reiserfs_journal_list *, unsigned long,
@@ -1372,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
 
        /* if flushall == 0, the lock is already held */
        if (flushall) {
-               down(&journal->j_flush_sem);
-       } else if (!down_trylock(&journal->j_flush_sem)) {
+               mutex_lock(&journal->j_flush_mutex);
+       } else if (mutex_trylock(&journal->j_flush_mutex)) {
                BUG();
        }
 
@@ -1453,7 +1490,7 @@ static int flush_journal_list(struct super_block *s,
                }
 
                /* if someone has this block in a newer transaction, just make
-                ** sure they are commited, and don't try writing it to disk
+                ** sure they are committed, and don't try writing it to disk
                 */
                if (pjl) {
                        if (atomic_read(&pjl->j_commit_left))
@@ -1495,7 +1532,7 @@ static int flush_journal_list(struct super_block *s,
                        reiserfs_warning(s,
                                         "clm-2082: Unable to flush buffer %llu in %s",
                                         (unsigned long long)saved_bh->
-                                        b_blocknr, __FUNCTION__);
+                                        b_blocknr, __func__);
                }
              free_cnode:
                last = cn;
@@ -1535,9 +1572,10 @@ static int flush_journal_list(struct super_block *s,
                                BUG_ON(!test_clear_buffer_journal_dirty
                                       (cn->bh));
 
-                               /* undo the inc from journal_mark_dirty */
+                               /* drop one ref for us */
                                put_bh(cn->bh);
-                               brelse(cn->bh);
+                               /* drop one ref for journal_mark_dirty */
+                               release_buffer_page(cn->bh);
                        }
                        cn = cn->next;
                }
@@ -1546,7 +1584,7 @@ static int flush_journal_list(struct super_block *s,
        if (err)
                reiserfs_abort(s, -EIO,
                               "Write error while pushing transaction to disk in %s",
-                              __FUNCTION__);
+                              __func__);
       flush_older_and_return:
 
        /* before we can update the journal header block, we _must_ flush all 
@@ -1576,7 +1614,7 @@ static int flush_journal_list(struct super_block *s,
                if (err)
                        reiserfs_abort(s, -EIO,
                                       "Write error while updating journal header in %s",
-                                      __FUNCTION__);
+                                      __func__);
        }
        remove_all_from_journal_list(s, jl, 0);
        list_del_init(&jl->j_list);
@@ -1602,11 +1640,36 @@ static int flush_journal_list(struct super_block *s,
        jl->j_state = 0;
        put_journal_list(s, jl);
        if (flushall)
-               up(&journal->j_flush_sem);
+               mutex_unlock(&journal->j_flush_mutex);
        put_fs_excl();
        return err;
 }
 
+static int test_transaction(struct super_block *s,
+                            struct reiserfs_journal_list *jl)
+{
+       struct reiserfs_journal_cnode *cn;
+
+       if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
+               return 1;
+
+       cn = jl->j_realblock;
+       while (cn) {
+               /* if the blocknr == 0, this has been cleared from the hash,
+                ** skip it
+                */
+               if (cn->blocknr == 0) {
+                       goto next;
+               }
+               if (cn->bh && !newer_jl_done(cn))
+                       return 0;
+             next:
+               cn = cn->next;
+               cond_resched();
+       }
+       return 0;
+}
+
 static int write_one_transaction(struct super_block *s,
                                 struct reiserfs_journal_list *jl,
                                 struct buffer_chunk *chunk)
@@ -1707,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        chunk.nr = 0;
 
-       down(&journal->j_flush_sem);
+       mutex_lock(&journal->j_flush_mutex);
        if (!journal_list_still_alive(s, orig_trans_id)) {
                goto done;
        }
 
-       /* we've got j_flush_sem held, nobody is going to delete any
+       /* we've got j_flush_mutex held, nobody is going to delete any
         * of these lists out from underneath us
         */
        while ((num_trans && transactions_flushed < num_trans) ||
@@ -1747,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
        }
 
       done:
-       up(&journal->j_flush_sem);
+       mutex_unlock(&journal->j_flush_mutex);
        return ret;
 }
 
@@ -2227,6 +2290,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
        journal->j_last_flush_trans_id = trans_id;
        journal->j_trans_id = trans_id + 1;
+       /* check for trans_id overflow */
+       if (journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        brelse(c_bh);
        brelse(d_bh);
        kfree(log_blocks);
@@ -2240,8 +2306,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
    Right now it is only used from journal code. But later we might use it
    from other places.
    Note: Do not use journal_getblk/sb_getblk functions here! */
-static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
-                                          int bufsize, unsigned int max_block)
+static struct buffer_head *reiserfs_breada(struct block_device *dev,
+                                          b_blocknr_t block, int bufsize,
+                                          b_blocknr_t max_block)
 {
        struct buffer_head *bhlist[BUFNR];
        unsigned int blocks = BUFNR;
@@ -2319,8 +2386,7 @@ static int journal_read(struct super_block *p_s_sb)
                return 1;
        }
        jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
-       if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
-           le32_to_cpu(jh->j_first_unflushed_offset) <
+       if (le32_to_cpu(jh->j_first_unflushed_offset) <
            SB_ONDISK_JOURNAL_SIZE(p_s_sb)
            && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
                oldest_start =
@@ -2451,6 +2517,9 @@ static int journal_read(struct super_block *p_s_sb)
                journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
                journal->j_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+               /* check for trans_id overflow */
+               if (journal->j_trans_id == 0)
+                       journal->j_trans_id = 10;
                journal->j_last_flush_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id);
                journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -2485,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
        INIT_LIST_HEAD(&jl->j_working_list);
        INIT_LIST_HEAD(&jl->j_tail_bh_list);
        INIT_LIST_HEAD(&jl->j_bh_list);
-       sema_init(&jl->j_commit_lock, 1);
+       mutex_init(&jl->j_commit_mutex);
        SB_JOURNAL(s)->j_num_lists++;
        get_journal_list(jl);
        return jl;
@@ -2503,12 +2572,10 @@ static int release_journal_dev(struct super_block *super,
 
        result = 0;
 
-       if (journal->j_dev_file != NULL) {
-               result = filp_close(journal->j_dev_file, NULL);
-               journal->j_dev_file = NULL;
-               journal->j_dev_bd = NULL;
-       } else if (journal->j_dev_bd != NULL) {
-               result = blkdev_put(journal->j_dev_bd);
+       if (journal->j_dev_bd != NULL) {
+               if (journal->j_dev_bd->bd_dev != super->s_dev)
+                       bd_release(journal->j_dev_bd);
+               result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
                journal->j_dev_bd = NULL;
        }
 
@@ -2526,13 +2593,12 @@ static int journal_init_dev(struct super_block *super,
 {
        int result;
        dev_t jdev;
-       int blkdev_mode = FMODE_READ | FMODE_WRITE;
+       fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
        char b[BDEVNAME_SIZE];
 
        result = 0;
 
        journal->j_dev_bd = NULL;
-       journal->j_dev_file = NULL;
        jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
            new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
 
@@ -2542,6 +2608,7 @@ static int journal_init_dev(struct super_block *super,
        /* there is no "jdev" option and journal is on separate device */
        if ((!jdev_name || !jdev_name[0])) {
                journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
+               journal->j_dev_mode = blkdev_mode;
                if (IS_ERR(journal->j_dev_bd)) {
                        result = PTR_ERR(journal->j_dev_bd);
                        journal->j_dev_bd = NULL;
@@ -2549,35 +2616,91 @@ static int journal_init_dev(struct super_block *super,
                                         "cannot init journal device '%s': %i",
                                         __bdevname(jdev, b), result);
                        return result;
-               } else if (jdev != super->s_dev)
+               } else if (jdev != super->s_dev) {
+                       result = bd_claim(journal->j_dev_bd, journal);
+                       if (result) {
+                               blkdev_put(journal->j_dev_bd, blkdev_mode);
+                               return result;
+                       }
+
                        set_blocksize(journal->j_dev_bd, super->s_blocksize);
+               }
+
                return 0;
        }
 
-       journal->j_dev_file = filp_open(jdev_name, 0, 0);
-       if (!IS_ERR(journal->j_dev_file)) {
-               struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
-               if (!S_ISBLK(jdev_inode->i_mode)) {
-                       reiserfs_warning(super, "journal_init_dev: '%s' is "
-                                        "not a block device", jdev_name);
-                       result = -ENOTBLK;
-                       release_journal_dev(super, journal);
-               } else {
-                       /* ok */
-                       journal->j_dev_bd = I_BDEV(jdev_inode);
-                       set_blocksize(journal->j_dev_bd, super->s_blocksize);
-                       reiserfs_info(super,
-                                     "journal_init_dev: journal device: %s\n",
-                                     bdevname(journal->j_dev_bd, b));
-               }
-       } else {
-               result = PTR_ERR(journal->j_dev_file);
-               journal->j_dev_file = NULL;
+       journal->j_dev_mode = blkdev_mode;
+       journal->j_dev_bd = open_bdev_exclusive(jdev_name,
+                                               blkdev_mode, journal);
+       if (IS_ERR(journal->j_dev_bd)) {
+               result = PTR_ERR(journal->j_dev_bd);
+               journal->j_dev_bd = NULL;
                reiserfs_warning(super,
                                 "journal_init_dev: Cannot open '%s': %i",
                                 jdev_name, result);
+               return result;
        }
-       return result;
+
+       set_blocksize(journal->j_dev_bd, super->s_blocksize);
+       reiserfs_info(super,
+                     "journal_init_dev: journal device: %s\n",
+                     bdevname(journal->j_dev_bd, b));
+       return 0;
+}
+
+/**
+ * When creating/tuning a file system user can assign some
+ * journal params within boundaries which depend on the ratio
+ * blocksize/standard_blocksize.
+ *
+ * For blocks >= standard_blocksize transaction size should
+ * be not less then JOURNAL_TRANS_MIN_DEFAULT, and not more
+ * then JOURNAL_TRANS_MAX_DEFAULT.
+ *
+ * For blocks < standard_blocksize these boundaries should be
+ * decreased proportionally.
+ */
+#define REISERFS_STANDARD_BLKSIZE (4096)
+
+static int check_advise_trans_params(struct super_block *p_s_sb,
+                                    struct reiserfs_journal *journal)
+{
+        if (journal->j_trans_max) {
+               /* Non-default journal params.
+                  Do sanity check for them. */
+               int ratio = 1;
+               if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
+                       ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize;
+
+               if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
+                   journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
+                   SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
+                   JOURNAL_MIN_RATIO) {
+                       reiserfs_warning(p_s_sb,
+                                "sh-462: bad transaction max size (%u). FSCK?",
+                                journal->j_trans_max);
+                       return 1;
+               }
+               if (journal->j_max_batch != (journal->j_trans_max) *
+                       JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
+                       reiserfs_warning(p_s_sb,
+                               "sh-463: bad transaction max batch (%u). FSCK?",
+                               journal->j_max_batch);
+                       return 1;
+               }
+       } else {
+               /* Default journal params.
+                   The file system was created by old version
+                  of mkreiserfs, so some fields contain zeros,
+                  and we need to advise proper values for them */
+               if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE)
+                       reiserfs_panic(p_s_sb, "sh-464: bad blocksize (%u)",
+                                      p_s_sb->s_blocksize);
+               journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
+               journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
+               journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
+       }
+       return 0;
 }
 
 /*
@@ -2608,7 +2731,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        journal->j_persistent_trans = 0;
        if (reiserfs_allocate_list_bitmaps(p_s_sb,
                                           journal->j_list_bitmap,
-                                          SB_BMAP_NR(p_s_sb)))
+                                          reiserfs_bmap_count(p_s_sb)))
                goto free_and_return;
        allocate_bitmap_nodes(p_s_sb);
 
@@ -2616,7 +2739,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
                                                 REISERFS_OLD_DISK_OFFSET_IN_BYTES
                                                 / p_s_sb->s_blocksize +
-                                                SB_BMAP_NR(p_s_sb) +
+                                                reiserfs_bmap_count(p_s_sb) +
                                                 1 :
                                                 REISERFS_DISK_OFFSET_IN_BYTES /
                                                 p_s_sb->s_blocksize + 2);
@@ -2675,49 +2798,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
            le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
        journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
 
-       if (journal->j_trans_max) {
-               /* make sure these parameters are available, assign it if they are not */
-               __u32 initial = journal->j_trans_max;
-               __u32 ratio = 1;
-
-               if (p_s_sb->s_blocksize < 4096)
-                       ratio = 4096 / p_s_sb->s_blocksize;
-
-               if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
-                   JOURNAL_MIN_RATIO)
-                       journal->j_trans_max =
-                           SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO;
-               if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio)
-                       journal->j_trans_max =
-                           JOURNAL_TRANS_MAX_DEFAULT / ratio;
-               if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio)
-                       journal->j_trans_max =
-                           JOURNAL_TRANS_MIN_DEFAULT / ratio;
-
-               if (journal->j_trans_max != initial)
-                       reiserfs_warning(p_s_sb,
-                                        "sh-461: journal_init: wrong transaction max size (%u). Changed to %u",
-                                        initial, journal->j_trans_max);
-
-               journal->j_max_batch = journal->j_trans_max *
-                   JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT;
-       }
-
-       if (!journal->j_trans_max) {
-               /*we have the file system was created by old version of mkreiserfs 
-                  so this field contains zero value */
-               journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
-               journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
-               journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
-
-               /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096
-                  trans max size is decreased proportionally */
-               if (p_s_sb->s_blocksize < 4096) {
-                       journal->j_trans_max /= (4096 / p_s_sb->s_blocksize);
-                       journal->j_max_batch = (journal->j_trans_max) * 9 / 10;
-               }
-       }
-
+       if (check_advise_trans_params(p_s_sb, journal) != 0)
+               goto free_and_return;
        journal->j_default_max_commit_age = journal->j_max_commit_age;
 
        if (commit_max_age != 0) {
@@ -2756,8 +2838,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        journal->j_last = NULL;
        journal->j_first = NULL;
        init_waitqueue_head(&(journal->j_join_wait));
-       sema_init(&journal->j_lock, 1);
-       sema_init(&journal->j_flush_sem, 1);
+       mutex_init(&journal->j_mutex);
+       mutex_init(&journal->j_flush_mutex);
 
        journal->j_trans_id = 10;
        journal->j_mount_id = 10;
@@ -2795,7 +2877,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        if (reiserfs_mounted_fs_count <= 1)
                commit_wq = create_workqueue("reiserfs");
 
-       INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
+       INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
+       journal->j_work_sb = p_s_sb;
        return 0;
       free_and_return:
        free_journal_ram(p_s_sb);
@@ -2823,6 +2906,9 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
            journal->j_cnode_free < (journal->j_trans_max * 3)) {
                return 1;
        }
+       /* protected by the BKL here */
+       journal->j_len_alloc += new_alloc;
+       th->t_blocks_allocated += new_alloc ;
        return 0;
 }
 
@@ -2873,7 +2959,7 @@ static void queue_log_writer(struct super_block *s)
        set_current_state(TASK_UNINTERRUPTIBLE);
        if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
                schedule();
-       current->state = TASK_RUNNING;
+       __set_current_state(TASK_RUNNING);
        remove_wait_queue(&journal->j_join_wait, &wait);
 }
 
@@ -2922,8 +3008,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
        int retval;
 
        reiserfs_check_lock_depth(p_s_sb, "journal_begin");
-       if (nblocks > journal->j_trans_max)
-               BUG();
+       BUG_ON(nblocks > journal->j_trans_max);
 
        PROC_INFO_INC(p_s_sb, journal.journal_being);
        /* set here for journal_join */
@@ -3039,9 +3124,8 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
        if (reiserfs_transaction_running(s)) {
                th = current->journal_info;
                th->t_refcount++;
-               if (th->t_refcount < 2) {
-                       BUG();
-               }
+               BUG_ON(th->t_refcount < 2);
+               
                return th;
        }
        th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
@@ -3081,9 +3165,7 @@ static int journal_join(struct reiserfs_transaction_handle *th,
         ** pointer
         */
        th->t_handle_save = cur_th;
-       if (cur_th && cur_th->t_refcount > 1) {
-               BUG();
-       }
+       BUG_ON(cur_th && cur_th->t_refcount > 1);
        return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
 }
 
@@ -3096,9 +3178,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *th,
         ** pointer
         */
        th->t_handle_save = cur_th;
-       if (cur_th && cur_th->t_refcount > 1) {
-               BUG();
-       }
+       BUG_ON(cur_th && cur_th->t_refcount > 1);
        return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
 }
 
@@ -3133,8 +3213,7 @@ int journal_begin(struct reiserfs_transaction_handle *th,
                current->journal_info = th;
        }
        ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
-       if (current->journal_info != th)
-               BUG();
+       BUG_ON(current->journal_info != th);
 
        /* I guess this boils down to being the reciprocal of clm-2100 above.
         * If do_journal_begin_r fails, we need to put it back, since journal_end
@@ -3279,8 +3358,7 @@ int journal_end(struct reiserfs_transaction_handle *th,
                /* we aren't allowed to close a nested transaction on a different
                 ** filesystem from the one in the task struct
                 */
-               if (cur_th->t_super != th->t_super)
-                       BUG();
+               BUG_ON(cur_th->t_super != th->t_super);
 
                if (th != cur_th) {
                        memcpy(current->journal_info, th, sizeof(*th));
@@ -3348,7 +3426,7 @@ static int remove_from_transaction(struct super_block *p_s_sb,
 
 /*
 ** for any cnode in a journal list, it can only be dirtied of all the
-** transactions that include it are commited to disk.
+** transactions that include it are committed to disk.
 ** this checks through each transaction, and returns 1 if you are allowed to dirty,
 ** and 0 if you aren't
 **
@@ -3390,7 +3468,7 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
 }
 
 /* syncs the commit blocks, but does not force the real buffers to disk
-** will wait until the current transaction is done/commited before returning 
+** will wait until the current transaction is done/committed before returning 
 */
 int journal_end_sync(struct reiserfs_transaction_handle *th,
                     struct super_block *p_s_sb, unsigned long nblocks)
@@ -3399,9 +3477,7 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
 
        BUG_ON(!th->t_trans_id);
        /* you can sync while nested, very, very bad */
-       if (th->t_refcount > 1) {
-               BUG();
-       }
+       BUG_ON(th->t_refcount > 1);
        if (journal->j_len == 0) {
                reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
                                             1);
@@ -3413,10 +3489,11 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
 /*
 ** writeback the pending async commits to disk
 */
-static void flush_async_commits(void *p)
+static void flush_async_commits(struct work_struct *work)
 {
-       struct super_block *p_s_sb = p;
-       struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
+       struct reiserfs_journal *journal =
+               container_of(work, struct reiserfs_journal, j_work.work);
+       struct super_block *p_s_sb = journal->j_work_sb;
        struct reiserfs_journal_list *jl;
        struct list_head *entry;
 
@@ -3428,16 +3505,6 @@ static void flush_async_commits(void *p)
                flush_commit_list(p_s_sb, jl, 1);
        }
        unlock_kernel();
-       /*
-        * this is a little racey, but there's no harm in missing
-        * the filemap_fdata_write
-        */
-       if (!atomic_read(&journal->j_async_throttle)
-           && !reiserfs_is_journal_aborted(journal)) {
-               atomic_inc(&journal->j_async_throttle);
-               filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
-               atomic_dec(&journal->j_async_throttle);
-       }
 }
 
 /*
@@ -3521,9 +3588,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
         ** will be dealt with by next transaction that actually writes something, but should be taken
         ** care of in this trans
         */
-       if (journal->j_len == 0) {
-               BUG();
-       }
+       BUG_ON(journal->j_len == 0);
+
        /* if wcount > 0, and we are called to with flush or commit_now,
         ** we wait on j_join_wait.  We will wake up when the last writer has
         ** finished the transaction, and started it on its way to the disk.
@@ -3557,9 +3623,8 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
                                        unlock_journal(p_s_sb);
                                }
                        }
-                       if (journal->j_trans_id == trans_id) {
-                               BUG();
-                       }
+                       BUG_ON(journal->j_trans_id == trans_id);
+                       
                        if (commit_now
                            && journal_list_still_alive(p_s_sb, trans_id)
                            && wait_on_commit) {
@@ -3684,13 +3749,8 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
                }
        }
 
-       if (bh) {
-               put_bh(bh);     /* get_hash grabs the buffer */
-               if (atomic_read(&(bh->b_count)) < 0) {
-                       reiserfs_warning(p_s_sb,
-                                        "journal-2165: bh->b_count < 0");
-               }
-       }
+       if (bh)
+               release_buffer_page(bh); /* get_hash grabs the buffer */
        return 0;
 }
 
@@ -3814,7 +3874,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
 {
        PROC_INFO_INC(p_s_sb, journal.prepare);
 
-       if (test_set_buffer_locked(bh)) {
+       if (!trylock_buffer(bh)) {
                if (!wait)
                        return 0;
                lock_buffer(bh);
@@ -3839,7 +3899,9 @@ static void flush_old_journal_lists(struct super_block *s)
                entry = journal->j_journal_list.next;
                jl = JOURNAL_LIST_ENTRY(entry);
                /* this check should always be run, to send old lists to disk */
-               if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
+               if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
+                   atomic_read(&jl->j_commit_left) == 0 &&
+                   test_transaction(s, jl)) {
                        flush_used_journal_lists(s, jl);
                } else {
                        break;
@@ -3871,8 +3933,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        int cur_write_start = 0;        /* start index of current log write */
        int old_start;
        int i;
-       int flush = flags & FLUSH_ALL;
-       int wait_on_commit = flags & WAIT;
+       int flush;
+       int wait_on_commit;
        struct reiserfs_journal_list *jl, *temp_jl;
        struct list_head *entry, *safe;
        unsigned long jindex;
@@ -3882,6 +3944,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        BUG_ON(th->t_refcount > 1);
        BUG_ON(!th->t_trans_id);
 
+       /* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+       if (th->t_trans_id == ~0UL)
+               flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+       flush = flags & FLUSH_ALL;
+       wait_on_commit = flags & WAIT;
+
        put_fs_excl();
        current->journal_info = th->t_handle_save;
        reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -3962,7 +4031,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         * the new transaction is fully setup, and we've already flushed the
         * ordered bh list
         */
-       down(&jl->j_commit_lock);
+       mutex_lock(&jl->j_commit_mutex);
 
        /* save the transaction id in case we need to commit it later */
        commit_trans_id = jl->j_trans_id;
@@ -4030,9 +4099,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        set_commit_trans_len(commit, journal->j_len);
 
        /* special check in case all buffers in the journal were marked for not logging */
-       if (journal->j_len == 0) {
-               BUG();
-       }
+       BUG_ON(journal->j_len == 0);
 
        /* we're about to dirty all the log blocks, mark the description block
         * dirty now too.  Don't mark the commit block dirty until all the
@@ -4103,7 +4170,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        journal->j_first = NULL;
        journal->j_len = 0;
        journal->j_trans_start_time = 0;
-       journal->j_trans_id++;
+       /* check for trans_id overflow */
+       if (++journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        journal->j_current_jl->j_trans_id = journal->j_trans_id;
        journal->j_must_wait = 0;
        journal->j_len_alloc = 0;
@@ -4127,9 +4196,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                                      journal, jl, &jl->j_tail_bh_list);
                lock_kernel();
        }
-       if (!list_empty(&jl->j_tail_bh_list))
-               BUG();
-       up(&jl->j_commit_lock);
+       BUG_ON(!list_empty(&jl->j_tail_bh_list));
+       mutex_unlock(&jl->j_commit_mutex);
 
        /* honor the flush wishes from the caller, simple commits can
         ** be done outside the journal lock, they are done below
@@ -4245,5 +4313,5 @@ static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
 
 void reiserfs_journal_abort(struct super_block *sb, int errno)
 {
-       return __reiserfs_journal_abort_soft(sb, errno);
+       __reiserfs_journal_abort_soft(sb, errno);
 }