#include "ocfs2.h"
#include "alloc.h"
+#include "dir.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "localalloc.h"
-#include "namei.h"
#include "slot_map.h"
#include "super.h"
-#include "vote.h"
#include "sysfile.h"
#include "buffer_head_io.h"
mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n",
journal->j_trans_id, flushed);
- ocfs2_kick_vote_thread(osb);
+ ocfs2_wake_downconvert_thread(osb);
wake_up(&journal->j_checkpointed);
finally:
mlog_exit(status);
* transaction. extend_trans will either extend the current handle by
* nblocks, or commit it and start a new one with nblocks credits.
*
+ * This might call journal_restart() which will commit dirty buffers
+ * and then restart the transaction. Before calling
+ * ocfs2_extend_trans(), any changed blocks should have been
+ * dirtied. After calling it, all blocks which need to be changed must
+ * go through another set of journal_access/journal_dirty calls.
+ *
* WARNING: This will not release any semaphores or disk locks taken
* during the transaction, so make sure they were taken *before*
* start_trans or we'll have ordering deadlocks.
mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
+#ifdef OCFS2_DEBUG_FS
+ status = 1;
+#else
status = journal_extend(handle, nblocks);
if (status < 0) {
mlog_errno(status);
goto bail;
}
+#endif
if (status > 0) {
mlog(0, "journal_extend failed, trying journal_restart\n");
return err;
}
-#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5)
+#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
{
journal_t *journal = osb->journal->j_journal;
+ unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
+
+ if (osb->osb_commit_interval)
+ commit_interval = osb->osb_commit_interval;
spin_lock(&journal->j_state_lock);
- journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
+ journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JFS_BARRIER;
else
struct ocfs2_dinode *di = NULL;
struct buffer_head *bh = NULL;
struct ocfs2_super *osb;
- int meta_lock = 0;
+ int inode_lock = 0;
mlog_entry_void();
/* Skip recovery waits here - journal inode metadata never
* changes in a live cluster so it can be considered an
* exception to the rule. */
- status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
+ status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
if (status < 0) {
if (status != -ERESTARTSYS)
mlog(ML_ERROR, "Could not get lock on journal!\n");
goto done;
}
- meta_lock = 1;
+ inode_lock = 1;
di = (struct ocfs2_dinode *)bh->b_data;
if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) {
status = 0;
done:
if (status < 0) {
- if (meta_lock)
- ocfs2_meta_unlock(inode, 1);
+ if (inode_lock)
+ ocfs2_inode_unlock(inode, 1);
if (bh != NULL)
brelse(bh);
if (inode) {
* handle the errors in a specific manner, so no need
* to call ocfs2_error() here. */
mlog(ML_ERROR, "Journal dinode %llu has invalid "
- "signature: %.*s", (unsigned long long)fe->i_blkno, 7,
+ "signature: %.*s",
+ (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
fe->i_signature);
status = -EIO;
goto out;
OCFS2_I(inode)->ip_open_count--;
/* unlock our journal */
- ocfs2_meta_unlock(inode, 1);
+ ocfs2_inode_unlock(inode, 1);
brelse(journal->j_bh);
journal->j_bh = NULL;
container_of(work, struct ocfs2_journal, j_recovery_work);
struct ocfs2_super *osb = journal->j_osb;
struct ocfs2_dinode *la_dinode, *tl_dinode;
- struct ocfs2_la_recovery_item *item;
- struct list_head *p, *n;
+ struct ocfs2_la_recovery_item *item, *n;
LIST_HEAD(tmp_la_list);
mlog_entry_void();
list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
spin_unlock(&journal->j_lock);
- list_for_each_safe(p, n, &tmp_la_list) {
- item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
+ list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
list_del_init(&item->lri_list);
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
la_dinode = item->lri_la_dinode;
if (la_dinode) {
mlog(0, "Clean up local alloc %llu\n",
- (unsigned long long)la_dinode->i_blkno);
+ (unsigned long long)le64_to_cpu(la_dinode->i_blkno));
ret = ocfs2_complete_local_alloc_recovery(osb,
la_dinode);
tl_dinode = item->lri_tl_dinode;
if (tl_dinode) {
mlog(0, "Clean up truncate log %llu\n",
- (unsigned long long)tl_dinode->i_blkno);
+ (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));
ret = ocfs2_complete_truncate_log_recovery(osb,
tl_dinode);
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
- * node(s) may have voted "no" on an inode delete earlier. A
- * revote is therefore required. */
+ * node(s) may have disallowd a previos inode delete. Re-processing
+ * is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
NULL);
}
SET_INODE_JOURNAL(inode);
- status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
+ status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
if (status < 0) {
- mlog(0, "status returned from ocfs2_meta_lock=%d\n", status);
+ mlog(0, "status returned from ocfs2_inode_lock=%d\n", status);
if (status != -ERESTARTSYS)
mlog(ML_ERROR, "Could not lock journal!\n");
goto done;
done:
/* drop the lock on this nodes journal */
if (got_lock)
- ocfs2_meta_unlock(inode, 1);
+ ocfs2_inode_unlock(inode, 1);
if (inode)
iput(inode);
SET_INODE_JOURNAL(inode);
flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
- status = ocfs2_meta_lock_full(inode, NULL, 1, flags);
+ status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
if (status < 0) {
if (status != -EAGAIN)
mlog_errno(status);
goto bail;
}
- ocfs2_meta_unlock(inode, 1);
+ ocfs2_inode_unlock(inode, 1);
bail:
if (inode)
iput(inode);
return status;
}
+struct ocfs2_orphan_filldir_priv {
+ struct inode *head;
+ struct ocfs2_super *osb;
+};
+
+static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
+ loff_t pos, u64 ino, unsigned type)
+{
+ struct ocfs2_orphan_filldir_priv *p = priv;
+ struct inode *iter;
+
+ if (name_len == 1 && !strncmp(".", name, 1))
+ return 0;
+ if (name_len == 2 && !strncmp("..", name, 2))
+ return 0;
+
+ /* Skip bad inodes so that recovery can continue */
+ iter = ocfs2_iget(p->osb, ino,
+ OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
+ if (IS_ERR(iter))
+ return 0;
+
+ mlog(0, "queue orphan %llu\n",
+ (unsigned long long)OCFS2_I(iter)->ip_blkno);
+ /* No locking is required for the next_orphan queue as there
+ * is only ever a single process doing orphan recovery. */
+ OCFS2_I(iter)->ip_next_orphan = p->head;
+ p->head = iter;
+
+ return 0;
+}
+
static int ocfs2_queue_orphans(struct ocfs2_super *osb,
int slot,
struct inode **head)
{
int status;
struct inode *orphan_dir_inode = NULL;
- struct inode *iter;
- unsigned long offset, blk, local;
- struct buffer_head *bh = NULL;
- struct ocfs2_dir_entry *de;
- struct super_block *sb = osb->sb;
+ struct ocfs2_orphan_filldir_priv priv;
+ loff_t pos = 0;
+
+ priv.osb = osb;
+ priv.head = *head;
orphan_dir_inode = ocfs2_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE,
}
mutex_lock(&orphan_dir_inode->i_mutex);
- status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0);
+ status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
if (status < 0) {
mlog_errno(status);
goto out;
}
- offset = 0;
- iter = NULL;
- while(offset < i_size_read(orphan_dir_inode)) {
- blk = offset >> sb->s_blocksize_bits;
-
- bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0);
- if (!bh)
- status = -EINVAL;
- if (status < 0) {
- if (bh)
- brelse(bh);
- mlog_errno(status);
- goto out_unlock;
- }
-
- local = 0;
- while(offset < i_size_read(orphan_dir_inode)
- && local < sb->s_blocksize) {
- de = (struct ocfs2_dir_entry *) (bh->b_data + local);
-
- if (!ocfs2_check_dir_entry(orphan_dir_inode,
- de, bh, local)) {
- status = -EINVAL;
- mlog_errno(status);
- brelse(bh);
- goto out_unlock;
- }
-
- local += le16_to_cpu(de->rec_len);
- offset += le16_to_cpu(de->rec_len);
-
- /* I guess we silently fail on no inode? */
- if (!le64_to_cpu(de->inode))
- continue;
- if (de->file_type > OCFS2_FT_MAX) {
- mlog(ML_ERROR,
- "block %llu contains invalid de: "
- "inode = %llu, rec_len = %u, "
- "name_len = %u, file_type = %u, "
- "name='%.*s'\n",
- (unsigned long long)bh->b_blocknr,
- (unsigned long long)le64_to_cpu(de->inode),
- le16_to_cpu(de->rec_len),
- de->name_len,
- de->file_type,
- de->name_len,
- de->name);
- continue;
- }
- if (de->name_len == 1 && !strncmp(".", de->name, 1))
- continue;
- if (de->name_len == 2 && !strncmp("..", de->name, 2))
- continue;
-
- iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
- OCFS2_FI_FLAG_ORPHAN_RECOVERY);
- if (IS_ERR(iter))
- continue;
-
- mlog(0, "queue orphan %llu\n",
- (unsigned long long)OCFS2_I(iter)->ip_blkno);
- /* No locking is required for the next_orphan
- * queue as there is only ever a single
- * process doing orphan recovery. */
- OCFS2_I(iter)->ip_next_orphan = *head;
- *head = iter;
- }
- brelse(bh);
+ status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
+ ocfs2_orphan_filldir);
+ if (status) {
+ mlog_errno(status);
+ goto out_cluster;
}
-out_unlock:
- ocfs2_meta_unlock(orphan_dir_inode, 0);
+ *head = priv.head;
+
+out_cluster:
+ ocfs2_inode_unlock(orphan_dir_inode, 0);
out:
mutex_unlock(&orphan_dir_inode->i_mutex);
iput(orphan_dir_inode);
iter = oi->ip_next_orphan;
spin_lock(&oi->ip_lock);
- /* Delete voting may have set these on the assumption
- * that the other node would wipe them successfully.
- * If they are still in the node's orphan dir, we need
- * to reset that state. */
+ /* The remote delete code may have set these on the
+ * assumption that the other node would wipe them
+ * successfully. If they are still in the node's
+ * orphan dir, we need to reset that state. */
oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
/* Set the proper information to get us going into