Merge branches 'sched/clock', 'sched/urgent' and 'linus' into sched/core
[safe/jmp/linux-2.6] / include / linux / jbd2.h
index 6856400..4d248b3 100644 (file)
@@ -30,8 +30,6 @@
 #include <linux/bit_spinlock.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
-
-#include <asm/semaphore.h>
 #endif
 
 #define journal_oom_retry 1
@@ -63,7 +61,7 @@ extern u8 jbd2_journal_enable_debug;
        do {                                                            \
                if ((n) <= jbd2_journal_enable_debug) {                 \
                        printk (KERN_DEBUG "(%s, %d): %s: ",            \
-                               __FILE__, __LINE__, __FUNCTION__);      \
+                               __FILE__, __LINE__, __func__);  \
                        printk (f, ## a);                               \
                }                                                       \
        } while (0)
@@ -149,6 +147,30 @@ typedef struct journal_header_s
        __be32          h_sequence;
 } journal_header_t;
 
+/*
+ * Checksum types.
+ */
+#define JBD2_CRC32_CHKSUM   1
+#define JBD2_MD5_CHKSUM     2
+#define JBD2_SHA1_CHKSUM    3
+
+#define JBD2_CRC32_CHKSUM_SIZE 4
+
+#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
+/*
+ * Commit block header for storing transactional checksums:
+ */
+struct commit_header {
+       __be32          h_magic;
+       __be32          h_blocktype;
+       __be32          h_sequence;
+       unsigned char   h_chksum_type;
+       unsigned char   h_chksum_size;
+       unsigned char   h_padding[2];
+       __be32          h_chksum[JBD2_CHECKSUM_BYTES];
+       __be64          h_commit_sec;
+       __be32          h_commit_nsec;
+};
 
 /*
  * The block tag: used to describe a single buffer in the journal.
@@ -242,14 +264,18 @@ typedef struct journal_superblock_s
        ((j)->j_format_version >= 2 &&                                  \
         ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
-#define JBD2_FEATURE_INCOMPAT_REVOKE   0x00000001
-#define JBD2_FEATURE_INCOMPAT_64BIT    0x00000002
+#define JBD2_FEATURE_COMPAT_CHECKSUM   0x00000001
+
+#define JBD2_FEATURE_INCOMPAT_REVOKE           0x00000001
+#define JBD2_FEATURE_INCOMPAT_64BIT            0x00000002
+#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT     0x00000004
 
 /* Features known to this kernel version: */
-#define JBD2_KNOWN_COMPAT_FEATURES     0
+#define JBD2_KNOWN_COMPAT_FEATURES     JBD2_FEATURE_COMPAT_CHECKSUM
 #define JBD2_KNOWN_ROCOMPAT_FEATURES   0
 #define JBD2_KNOWN_INCOMPAT_FEATURES   (JBD2_FEATURE_INCOMPAT_REVOKE | \
-                                        JBD2_FEATURE_INCOMPAT_64BIT)
+                                       JBD2_FEATURE_INCOMPAT_64BIT | \
+                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
 
 #ifdef __KERNEL__
 
@@ -282,7 +308,8 @@ void buffer_assertion_failure(struct buffer_head *bh);
                int val = (expr);                                            \
                if (!val) {                                                  \
                        printk(KERN_ERR                                      \
-                               "EXT3-fs unexpected failure: %s;\n",# expr); \
+                              "JBD2 unexpected failure: %s: %s;\n",         \
+                              __func__, #expr);                             \
                        printk(KERN_ERR why "\n");                           \
                }                                                            \
                val;                                                         \
@@ -303,6 +330,7 @@ enum jbd_state_bits {
        BH_State,               /* Pins most journal_head state */
        BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
        BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
+       BH_JBDPrivateStart,     /* First bit available for private use by FS */
 };
 
 BUFFER_FNS(JBD, jbd)
@@ -355,6 +383,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
        bit_spin_unlock(BH_JournalHead, &bh->b_state);
 }
 
+/* Flags in jbd_inode->i_flags */
+#define __JI_COMMIT_RUNNING 0
+/* Commit of the inode data in progress. We use this flag to protect us from
+ * concurrent deletion of inode. We cannot use reference to inode for this
+ * since we cannot afford doing last iput() on behalf of kjournald
+ */
+#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+
+/**
+ * struct jbd_inode is the structure linking inodes in ordered mode
+ *   present in a transaction so that we can sync them during commit.
+ */
+struct jbd2_inode {
+       /* Which transaction does this inode belong to? Either the running
+        * transaction or the committing one. [j_list_lock] */
+       transaction_t *i_transaction;
+
+       /* Pointer to the running transaction modifying inode's data in case
+        * there is already a committing transaction touching it. [j_list_lock] */
+       transaction_t *i_next_transaction;
+
+       /* List of inodes in the i_transaction [j_list_lock] */
+       struct list_head i_list;
+
+       /* VFS inode this inode belongs to [constant during the lifetime
+        * of the structure] */
+       struct inode *i_vfs_inode;
+
+       /* Flags of inode [j_list_lock] */
+       unsigned int i_flags;
+};
+
 struct jbd2_revoke_table_s;
 
 /**
@@ -392,6 +452,10 @@ struct handle_s
        unsigned int    h_sync:         1;      /* sync-on-close */
        unsigned int    h_jdata:        1;      /* force data journaling */
        unsigned int    h_aborted:      1;      /* fatal error on handle */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lockdep_map      h_lockdep_map;
+#endif
 };
 
 
@@ -481,24 +545,12 @@ struct transaction_s
        struct journal_head     *t_reserved_list;
 
        /*
-        * Doubly-linked circular list of all buffers under writeout during
-        * commit [j_list_lock]
-        */
-       struct journal_head     *t_locked_list;
-
-       /*
         * Doubly-linked circular list of all metadata buffers owned by this
         * transaction [j_list_lock]
         */
        struct journal_head     *t_buffers;
 
        /*
-        * Doubly-linked circular list of all data buffers still to be
-        * flushed before this transaction can be committed [j_list_lock]
-        */
-       struct journal_head     *t_sync_datalist;
-
-       /*
         * Doubly-linked circular list of all forget buffers (superseded
         * buffers which we can un-checkpoint once this transaction commits)
         * [j_list_lock]
@@ -537,6 +589,12 @@ struct transaction_s
        struct journal_head     *t_log_list;
 
        /*
+        * List of inodes whose data we've modified in data=ordered mode.
+        * [j_list_lock]
+        */
+       struct list_head        t_inode_list;
+
+       /*
         * Protects info related to handles
         */
        spinlock_t              t_handle_lock;
@@ -581,10 +639,20 @@ struct transaction_s
        unsigned long           t_expires;
 
        /*
+        * When this transaction started, in nanoseconds [no locking]
+        */
+       ktime_t                 t_start_time;
+
+       /*
         * How many handles used this transaction? [t_handle_lock]
         */
        int t_handle_count;
 
+       /*
+        * For use by the filesystem to store fs-specific data
+        * structures associated with the transaction
+        */
+       struct list_head        t_private_list;
 };
 
 struct transaction_run_stats_s {
@@ -620,6 +688,8 @@ jbd2_time_diff(unsigned long start, unsigned long end)
        return end + (MAX_JIFFY_OFFSET - start);
 }
 
+#define JBD2_NR_BATCH  64
+
 /**
  * struct journal_s - The journal_s type is the concrete type associated with
  *     journal_t.
@@ -764,6 +834,14 @@ struct journal_s
        struct mutex            j_checkpoint_mutex;
 
        /*
+        * List of buffer heads used by the checkpoint routine.  This
+        * was moved from jbd2_log_do_checkpoint() to reduce stack
+        * usage.  Access to this array is controlled by the
+        * j_checkpoint_mutex.  [j_checkpoint_mutex]
+        */
+       struct buffer_head      *j_chkpt_bhs[JBD2_NR_BATCH];
+       
+       /*
         * Journal head: identifies the first unused block in the journal.
         * [j_state_lock]
         */
@@ -794,7 +872,8 @@ struct journal_s
         */
        struct block_device     *j_dev;
        int                     j_blocksize;
-       unsigned long long              j_blk_offset;
+       unsigned long long      j_blk_offset;
+       char                    j_devname[BDEVNAME_SIZE+24];
 
        /*
         * Device which holds the client fs.  For internal journal this will be
@@ -876,9 +955,31 @@ struct journal_s
        struct buffer_head      **j_wbuf;
        int                     j_wbufsize;
 
+       /*
+        * this is the pid of hte last person to run a synchronous operation
+        * through the journal
+        */
        pid_t                   j_last_sync_writer;
 
        /*
+        * the average amount of time in nanoseconds it takes to commit a
+        * transaction to disk. [j_state_lock]
+        */
+       u64                     j_average_commit_time;
+
+       /*
+        * minimum and maximum times that we should wait for
+        * additional filesystem operations to get batched into a
+        * synchronous handle in microseconds
+        */
+       u32                     j_min_batch_time;
+       u32                     j_max_batch_time;
+
+       /* This function is called when a transaction is closed */
+       void                    (*j_commit_callback)(journal_t *,
+                                                    transaction_t *);
+
+       /*
         * Journal statistics
         */
        struct transaction_stats_s *j_history;
@@ -891,6 +992,9 @@ struct journal_s
        struct proc_dir_entry   *j_proc_entry;
        struct transaction_stats_s j_stats;
 
+       /* Failed journal commit ID */
+       unsigned int            j_failed_commit;
+
        /*
         * An opaque pointer to fs-private information.  ext3 puts its
         * superblock pointer here
@@ -907,6 +1011,9 @@ struct journal_s
 #define JBD2_FLUSHED   0x008   /* The journal superblock has been flushed */
 #define JBD2_LOADED    0x010   /* The journal superblock has been loaded */
 #define JBD2_BARRIER   0x020   /* Use IDE barriers */
+#define JBD2_ABORT_ON_SYNCDATA_ERR     0x040   /* Abort the journal on file
+                                                * data write error in ordered
+                                                * mode */
 
 /*
  * Function declarations for the journaling transaction and buffer
@@ -935,6 +1042,35 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
+
+/*
+ * Triggers
+ */
+
+struct jbd2_buffer_trigger_type {
+       /*
+        * Fired just before a buffer is written to the journal.
+        * mapped_data is a mapped buffer that is the frozen data for
+        * commit.
+        */
+       void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+                        struct buffer_head *bh, void *mapped_data,
+                        size_t size);
+
+       /*
+        * Fired during journal abort for dirty buffers that will not be
+        * committed.
+        */
+       void (*t_abort)(struct jbd2_buffer_trigger_type *type,
+                       struct buffer_head *bh);
+};
+
+extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+                                      void *mapped_data,
+                                      struct jbd2_buffer_trigger_type *triggers);
+extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
+                                     struct jbd2_buffer_trigger_type *triggers);
+
 /* Buffer IO */
 extern int
 jbd2_journal_write_metadata_buffer(transaction_t         *transaction,
@@ -973,7 +1109,8 @@ extern int  jbd2_journal_extend (handle_t *, int nblocks);
 extern int      jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int      jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
-extern int      jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
+void            jbd2_journal_set_triggers(struct buffer_head *,
+                                          struct jbd2_buffer_trigger_type *type);
 extern int      jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern void     jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int      jbd2_journal_forget (handle_t *, struct buffer_head *);
@@ -997,9 +1134,10 @@ extern int           jbd2_journal_check_available_features
                   (journal_t *, unsigned long, unsigned long, unsigned long);
 extern int        jbd2_journal_set_features
                   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int        jbd2_journal_create     (journal_t *);
+extern void       jbd2_journal_clear_features
+                  (journal_t *, unsigned long, unsigned long, unsigned long);
 extern int        jbd2_journal_load       (journal_t *journal);
-extern void       jbd2_journal_destroy    (journal_t *);
+extern int        jbd2_journal_destroy    (journal_t *);
 extern int        jbd2_journal_recover    (journal_t *journal);
 extern int        jbd2_journal_wipe       (journal_t *, int);
 extern int        jbd2_journal_skip_recovery   (journal_t *);
@@ -1011,6 +1149,11 @@ extern void         jbd2_journal_ack_err    (journal_t *);
 extern int        jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
+extern int        jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_begin_ordered_truncate(journal_t *journal,
+                               struct jbd2_inode *inode, loff_t new_size);
+extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
+extern void       jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
 
 /*
  * journal_head management
@@ -1068,14 +1211,14 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
 
 void __jbd2_log_wait_for_space(journal_t *journal);
-extern void    __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
-extern int     jbd2_cleanup_journal_tail(journal_t *);
+extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+extern int jbd2_cleanup_journal_tail(journal_t *);
 
 /* Debugging code only: */
 
 #define jbd_ENOSYS() \
 do {                                                                      \
-       printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \
+       printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \
        current->state = TASK_UNINTERRUPTIBLE;                             \
        schedule();                                                        \
 } while (1)
@@ -1146,15 +1289,13 @@ static inline int jbd_space_needed(journal_t *journal)
 
 /* journaling buffer types */
 #define BJ_None                0       /* Not journaled */
-#define BJ_SyncData    1       /* Normal data: flush before commit */
-#define BJ_Metadata    2       /* Normal journaled metadata */
-#define BJ_Forget      3       /* Buffer superseded by this transaction */
-#define BJ_IO          4       /* Buffer is for temporary IO use */
-#define BJ_Shadow      5       /* Buffer contents being shadowed to the log */
-#define BJ_LogCtl      6       /* Buffer contains log descriptors */
-#define BJ_Reserved    7       /* Buffer is reserved for access by journal */
-#define BJ_Locked      8       /* Locked for I/O during commit */
-#define BJ_Types       9
+#define BJ_Metadata    1       /* Normal journaled metadata */
+#define BJ_Forget      2       /* Buffer superseded by this transaction */
+#define BJ_IO          3       /* Buffer is for temporary IO use */
+#define BJ_Shadow      4       /* Buffer contents being shadowed to the log */
+#define BJ_LogCtl      5       /* Buffer contains log descriptors */
+#define BJ_Reserved    6       /* Buffer is reserved for access by journal */
+#define BJ_Types       7
 
 extern int jbd_blocks_per_page(struct inode *inode);