#include <linux/bit_spinlock.h>
#include <linux/mutex.h>
#include <linux/timer.h>
-
-#include <asm/semaphore.h>
+#include <linux/slab.h>
#endif
#define journal_oom_retry 1
do { \
if ((n) <= jbd2_journal_enable_debug) { \
printk (KERN_DEBUG "(%s, %d): %s: ", \
- __FILE__, __LINE__, __FUNCTION__); \
+ __FILE__, __LINE__, __func__); \
printk (f, ## a); \
} \
} while (0)
#define jbd_debug(f, a...) /**/
#endif
-static inline void *jbd2_alloc(size_t size, gfp_t flags)
-{
- return (void *)__get_free_pages(flags, get_order(size));
-}
-
-static inline void jbd2_free(void *ptr, size_t size)
-{
- free_pages((unsigned long)ptr, get_order(size));
-};
+extern void *jbd2_alloc(size_t size, gfp_t flags);
+extern void jbd2_free(void *ptr, size_t size);
#define JBD2_MIN_JOURNAL_BLOCKS 1024
__be32 h_sequence;
} journal_header_t;
+/*
+ * Checksum types.
+ */
+#define JBD2_CRC32_CHKSUM 1
+#define JBD2_MD5_CHKSUM 2
+#define JBD2_SHA1_CHKSUM 3
+
+#define JBD2_CRC32_CHKSUM_SIZE 4
+
+#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
+/*
+ * Commit block header for storing transactional checksums:
+ */
+struct commit_header {
+ __be32 h_magic;
+ __be32 h_blocktype;
+ __be32 h_sequence;
+ unsigned char h_chksum_type;
+ unsigned char h_chksum_size;
+ unsigned char h_padding[2];
+ __be32 h_chksum[JBD2_CHECKSUM_BYTES];
+ __be64 h_commit_sec;
+ __be32 h_commit_nsec;
+};
/*
* The block tag: used to describe a single buffer in the journal.
((j)->j_format_version >= 2 && \
((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
-#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
-#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
+#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
+
+#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
+#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
+#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
/* Features known to this kernel version: */
-#define JBD2_KNOWN_COMPAT_FEATURES 0
+#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
#define JBD2_KNOWN_ROCOMPAT_FEATURES 0
#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
- JBD2_FEATURE_INCOMPAT_64BIT)
+ JBD2_FEATURE_INCOMPAT_64BIT | \
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
#ifdef __KERNEL__
#define J_ASSERT(assert) BUG_ON(!(assert))
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr) \
- do { \
- if (!(expr)) \
- buffer_assertion_failure(bh); \
- J_ASSERT(expr); \
- } while (0)
-#define J_ASSERT_JH(jh, expr) J_ASSERT_BH(jh2bh(jh), expr)
-#else
#define J_ASSERT_BH(bh, expr) J_ASSERT(expr)
#define J_ASSERT_JH(jh, expr) J_ASSERT(expr)
-#endif
#if defined(JBD2_PARANOID_IOFAIL)
#define J_EXPECT(expr, why...) J_ASSERT(expr)
int val = (expr); \
if (!val) { \
printk(KERN_ERR \
- "EXT3-fs unexpected failure: %s;\n",# expr); \
+ "JBD2 unexpected failure: %s: %s;\n", \
+ __func__, #expr); \
printk(KERN_ERR why "\n"); \
} \
val; \
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */
+ BH_JBDPrivateStart, /* First bit available for private use by FS */
};
BUFFER_FNS(JBD, jbd)
bit_spin_unlock(BH_JournalHead, &bh->b_state);
}
+/* Flags in jbd_inode->i_flags */
+#define __JI_COMMIT_RUNNING 0
+/* Commit of the inode data in progress. We use this flag to protect us from
+ * concurrent deletion of inode. We cannot use reference to inode for this
+ * since we cannot afford doing last iput() on behalf of kjournald
+ */
+#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+
+/**
+ * struct jbd_inode is the structure linking inodes in ordered mode
+ * present in a transaction so that we can sync them during commit.
+ */
+struct jbd2_inode {
+ /* Which transaction does this inode belong to? Either the running
+ * transaction or the committing one. [j_list_lock] */
+ transaction_t *i_transaction;
+
+ /* Pointer to the running transaction modifying inode's data in case
+ * there is already a committing transaction touching it. [j_list_lock] */
+ transaction_t *i_next_transaction;
+
+ /* List of inodes in the i_transaction [j_list_lock] */
+ struct list_head i_list;
+
+ /* VFS inode this inode belongs to [constant during the lifetime
+ * of the structure] */
+ struct inode *i_vfs_inode;
+
+ /* Flags of inode [j_list_lock] */
+ unsigned int i_flags;
+};
+
struct jbd2_revoke_table_s;
/**
unsigned int h_sync: 1; /* sync-on-close */
unsigned int h_jdata: 1; /* force data journaling */
unsigned int h_aborted: 1; /* fatal error on handle */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map h_lockdep_map;
+#endif
};
+/*
+ * Some stats for checkpoint phase
+ */
+struct transaction_chp_stats_s {
+ unsigned long cs_chp_time;
+ __u32 cs_forced_to_close;
+ __u32 cs_written;
+ __u32 cs_dropped;
+};
+
/* The transaction_t type is the guts of the journaling mechanism. It
* tracks a compound transaction through its various states:
*
/*
* Transaction's current state
* [no locking - only kjournald2 alters this]
+ * [j_list_lock] guards transition of a transaction into T_FINISHED
+ * state and subsequent call of __jbd2_journal_drop_transaction()
* FIXME: needs barriers
* KLUDGE: [use j_state_lock]
*/
struct journal_head *t_reserved_list;
/*
- * Doubly-linked circular list of all buffers under writeout during
- * commit [j_list_lock]
- */
- struct journal_head *t_locked_list;
-
- /*
* Doubly-linked circular list of all metadata buffers owned by this
* transaction [j_list_lock]
*/
struct journal_head *t_buffers;
/*
- * Doubly-linked circular list of all data buffers still to be
- * flushed before this transaction can be committed [j_list_lock]
- */
- struct journal_head *t_sync_datalist;
-
- /*
* Doubly-linked circular list of all forget buffers (superseded
* buffers which we can un-checkpoint once this transaction commits)
* [j_list_lock]
struct journal_head *t_log_list;
/*
+ * List of inodes whose data we've modified in data=ordered mode.
+ * [j_list_lock]
+ */
+ struct list_head t_inode_list;
+
+ /*
* Protects info related to handles
*/
spinlock_t t_handle_lock;
/*
+ * Longest time some handle had to wait for running transaction
+ */
+ unsigned long t_max_wait;
+
+ /*
+ * When transaction started
+ */
+ unsigned long t_start;
+
+ /*
+ * Checkpointing stats [j_checkpoint_sem]
+ */
+ struct transaction_chp_stats_s t_chp_stats;
+
+ /*
* Number of outstanding updates running on this transaction
* [t_handle_lock]
*/
unsigned long t_expires;
/*
+ * When this transaction started, in nanoseconds [no locking]
+ */
+ ktime_t t_start_time;
+
+ /*
* How many handles used this transaction? [t_handle_lock]
*/
int t_handle_count;
+ /*
+ * This transaction is being forced and some process is
+ * waiting for it to finish.
+ */
+ unsigned int t_synchronous_commit:1;
+ unsigned int t_flushed_data_blocks:1;
+
+ /*
+ * For use by the filesystem to store fs-specific data
+ * structures associated with the transaction
+ */
+ struct list_head t_private_list;
+};
+
+struct transaction_run_stats_s {
+ unsigned long rs_wait;
+ unsigned long rs_running;
+ unsigned long rs_locked;
+ unsigned long rs_flushing;
+ unsigned long rs_logging;
+
+ __u32 rs_handle_count;
+ __u32 rs_blocks;
+ __u32 rs_blocks_logged;
+};
+
+struct transaction_stats_s {
+ unsigned long ts_tid;
+ struct transaction_run_stats_s run;
};
+static inline unsigned long
+jbd2_time_diff(unsigned long start, unsigned long end)
+{
+ if (end >= start)
+ return end - start;
+
+ return end + (MAX_JIFFY_OFFSET - start);
+}
+
+#define JBD2_NR_BATCH 64
+
/**
* struct journal_s - The journal_s type is the concrete type associated with
* journal_t.
* @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
* number that will fit in j_blocksize
* @j_last_sync_writer: most recent pid which did a synchronous write
+ * @j_history: Buffer storing the transactions statistics history
+ * @j_history_max: Maximum number of transactions in the statistics history
+ * @j_history_cur: Current number of transactions in the statistics history
+ * @j_history_lock: Protect the transactions statistics history
+ * @j_proc_entry: procfs entry for the jbd statistics directory
+ * @j_stats: Overall statistics
* @j_private: An opaque pointer to fs-private information.
*/
struct mutex j_checkpoint_mutex;
/*
+ * List of buffer heads used by the checkpoint routine. This
+ * was moved from jbd2_log_do_checkpoint() to reduce stack
+ * usage. Access to this array is controlled by the
+ * j_checkpoint_mutex. [j_checkpoint_mutex]
+ */
+ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH];
+
+ /*
* Journal head: identifies the first unused block in the journal.
* [j_state_lock]
*/
*/
struct block_device *j_dev;
int j_blocksize;
- unsigned long long j_blk_offset;
+ unsigned long long j_blk_offset;
+ char j_devname[BDEVNAME_SIZE+24];
/*
* Device which holds the client fs. For internal journal this will be
struct buffer_head **j_wbuf;
int j_wbufsize;
+ /*
+ * this is the pid of hte last person to run a synchronous operation
+ * through the journal
+ */
pid_t j_last_sync_writer;
/*
+ * the average amount of time in nanoseconds it takes to commit a
+ * transaction to disk. [j_state_lock]
+ */
+ u64 j_average_commit_time;
+
+ /*
+ * minimum and maximum times that we should wait for
+ * additional filesystem operations to get batched into a
+ * synchronous handle in microseconds
+ */
+ u32 j_min_batch_time;
+ u32 j_max_batch_time;
+
+ /* This function is called when a transaction is closed */
+ void (*j_commit_callback)(journal_t *,
+ transaction_t *);
+
+ /*
+ * Journal statistics
+ */
+ spinlock_t j_history_lock;
+ struct proc_dir_entry *j_proc_entry;
+ struct transaction_stats_s j_stats;
+
+ /* Failed journal commit ID */
+ unsigned int j_failed_commit;
+
+ /*
* An opaque pointer to fs-private information. ext3 puts its
* superblock pointer here
*/
#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */
#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */
#define JBD2_BARRIER 0x020 /* Use IDE barriers */
+#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
+ * data write error in ordered
+ * mode */
/*
* Function declarations for the journaling transaction and buffer
int __jbd2_journal_remove_checkpoint(struct journal_head *);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
+
+/*
+ * Triggers
+ */
+
+struct jbd2_buffer_trigger_type {
+ /*
+ * Fired just before a buffer is written to the journal.
+ * mapped_data is a mapped buffer that is the frozen data for
+ * commit.
+ */
+ void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+ struct buffer_head *bh, void *mapped_data,
+ size_t size);
+
+ /*
+ * Fired during journal abort for dirty buffers that will not be
+ * committed.
+ */
+ void (*t_abort)(struct jbd2_buffer_trigger_type *type,
+ struct buffer_head *bh);
+};
+
+extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+ void *mapped_data,
+ struct jbd2_buffer_trigger_type *triggers);
+extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
+ struct jbd2_buffer_trigger_type *triggers);
+
/* Buffer IO */
extern int
jbd2_journal_write_metadata_buffer(transaction_t *transaction,
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
-extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
+void jbd2_journal_set_triggers(struct buffer_head *,
+ struct jbd2_buffer_trigger_type *type);
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
(journal_t *, unsigned long, unsigned long, unsigned long);
extern int jbd2_journal_set_features
(journal_t *, unsigned long, unsigned long, unsigned long);
-extern int jbd2_journal_create (journal_t *);
+extern void jbd2_journal_clear_features
+ (journal_t *, unsigned long, unsigned long, unsigned long);
extern int jbd2_journal_load (journal_t *journal);
-extern void jbd2_journal_destroy (journal_t *);
+extern int jbd2_journal_destroy (journal_t *);
extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);
extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
+extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+ struct jbd2_inode *inode, loff_t new_size);
+extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
+extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
/*
* journal_head management
extern void jbd2_journal_destroy_revoke(journal_t *);
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
-extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *);
+extern void jbd2_journal_write_revoke_records(journal_t *,
+ transaction_t *, int);
/* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
int jbd2_log_do_checkpoint(journal_t *journal);
void __jbd2_log_wait_for_space(journal_t *journal);
-extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
-extern int jbd2_cleanup_journal_tail(journal_t *);
+extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+extern int jbd2_cleanup_journal_tail(journal_t *);
/* Debugging code only: */
#define jbd_ENOSYS() \
do { \
- printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \
+ printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \
current->state = TASK_UNINTERRUPTIBLE; \
schedule(); \
} while (1)
/* journaling buffer types */
#define BJ_None 0 /* Not journaled */
-#define BJ_SyncData 1 /* Normal data: flush before commit */
-#define BJ_Metadata 2 /* Normal journaled metadata */
-#define BJ_Forget 3 /* Buffer superseded by this transaction */
-#define BJ_IO 4 /* Buffer is for temporary IO use */
-#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */
-#define BJ_LogCtl 6 /* Buffer contains log descriptors */
-#define BJ_Reserved 7 /* Buffer is reserved for access by journal */
-#define BJ_Locked 8 /* Locked for I/O during commit */
-#define BJ_Types 9
+#define BJ_Metadata 1 /* Normal journaled metadata */
+#define BJ_Forget 2 /* Buffer superseded by this transaction */
+#define BJ_IO 3 /* Buffer is for temporary IO use */
+#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */
+#define BJ_LogCtl 5 /* Buffer contains log descriptors */
+#define BJ_Reserved 6 /* Buffer is reserved for access by journal */
+#define BJ_Types 7
extern int jbd_blocks_per_page(struct inode *inode);
#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
#define JBUFFER_TRACE(jh, info) do {} while (0)
+/*
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
+ * tracing infrastructure to map a dev_t to a device name.
+ */
+extern const char *jbd2_dev_to_name(dev_t device);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_JBD2_H */