* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
#include "xfs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_trans_priv.h"
#include "xfs_filestream.h"
#include "xfs_da_btree.h"
-#include "xfs_dir2_trace.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
#include "xfs_inode_item.h"
#include "xfs_sync.h"
+#include "xfs_trace.h"
#include <linux/namei.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/mempool.h>
#include <linux/writeback.h>
{
struct xfs_ail *ailp = data;
xfs_lsn_t last_pushed_lsn = 0;
- long tout = 0;
+ long tout = 0; /* milliseconds */
while (!kthread_should_stop()) {
- if (tout)
- schedule_timeout_interruptible(msecs_to_jiffies(tout));
- tout = 1000;
+ schedule_timeout_interruptible(tout ?
+ msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
/* swsusp */
try_to_freeze();
ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
/*
- * If we have nothing to flush with this inode then complete the
- * teardown now, otherwise delay the flush operation.
+ * We always use background reclaim here because even if the
+ * inode is clean, it still may be under IO and hence we have
+ * to take the flush lock. The background reclaim path handles
+ * this more efficiently than we can here, so simply let background
+ * reclaim tear down all inodes.
*/
- if (!xfs_inode_clean(ip)) {
- xfs_inode_set_reclaim_tag(ip);
- return;
- }
-
out_reclaim:
- xfs_ireclaim(ip);
+ xfs_inode_set_reclaim_tag(ip);
}
/*
XFS_I(inode)->i_update_core = 1;
}
-/*
- * Attempt to flush the inode, this will actually fail
- * if the inode is pinned, but we dirty the inode again
- * at the point when it is unpinned after a log write,
- * since this is when the inode itself becomes flushable.
- */
+STATIC int
+xfs_log_inode(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ /* we need to return with the lock hold shared */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Note - it's possible that we might have pushed ourselves out of the
+ * way during trans_reserve which would flush the inode. But there's
+ * no guarantee that the inode buffer has actually gone out yet (it's
+ * delwri). Plus the buffer could be pinned anyway if it's part of
+ * an inode in another recent transaction. So we play it safe and
+ * fire off the transaction anyway.
+ */
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, ip);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0);
+ xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+
+ return error;
+}
+
STATIC int
xfs_fs_write_inode(
struct inode *inode,
- int sync)
+ struct writeback_control *wbc)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- int error = 0;
+ int error = EAGAIN;
xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- if (sync) {
- error = xfs_wait_on_pages(ip, 0, -1);
- if (error)
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ /*
+ * Make sure the inode has hit stable storage. By using the
+ * log and the fsync transactions we reduce the IOs we have
+ * to do here from two (log and inode) to just the log.
+ *
+ * Note: We still need to do a delwri write of the inode after
+ * this to flush it to the backing buffer so that bulkstat
+ * works properly if this is the first time the inode has been
+ * written. Because we hold the ilock atomically over the
+ * transaction commit and the inode flush we are guaranteed
+ * that the inode is not pinned when it returns. If the flush
+ * lock is already held, then the inode has already been
+ * flushed once and we don't need to flush it again. Hence
+ * the code will only flush the inode if it isn't already
+ * being flushed.
+ */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (ip->i_update_core) {
+ error = xfs_log_inode(ip);
+ if (error)
+ goto out_unlock;
+ }
+ } else {
+ /*
+ * We make this non-blocking if the inode is contended, return
+ * EAGAIN to indicate to the caller that they did not succeed.
+ * This prevents the flush path from blocking on inodes inside
+ * another operation right now, they get caught later by xfs_sync.
+ */
+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
goto out;
}
- /*
- * Bypass inodes which have already been cleaned by
- * the inode flush clustering code inside xfs_iflush
- */
- if (xfs_inode_clean(ip))
- goto out;
+ if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+ goto out_unlock;
/*
- * We make this non-blocking if the inode is contended, return
- * EAGAIN to indicate to the caller that they did not succeed.
- * This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by xfs_sync.
+ * Now we have the flush lock and the inode is not pinned, we can check
+ * if the inode is really clean as we know that there are no pending
+ * transaction completions, it is not waiting on the delayed write
+ * queue and there is no IO in progress.
*/
- if (sync) {
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- xfs_iflock(ip);
-
- error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
- } else {
- error = EAGAIN;
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
- goto out;
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
-
- error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ error = 0;
+ goto out_unlock;
}
+ error = xfs_iflush(ip, 0);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
struct super_block *sb)
{
struct xfs_mount *mp = XFS_M(sb);
- struct xfs_inode *rip = mp->m_rootip;
- int unmount_event_flags = 0;
xfs_syncd_stop(mp);
xfs_sync_attr(mp, 0);
}
-#ifdef HAVE_DMAPI
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- unmount_event_flags =
- (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
- 0 : DM_FLAGS_UNWANTED;
- /*
- * Ignore error from dmapi here, first unmount is not allowed
- * to fail anyway, and second we wouldn't want to fail a
- * unmount because of dmapi.
- */
- XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
- NULL, NULL, 0, 0, unmount_event_flags);
- }
-#endif
+ XFS_SEND_PREUNMOUNT(mp);
/*
* Blow away any referenced inode in the filestreams cache.
XFS_bflush(mp->m_ddev_targp);
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
- unmount_event_flags);
- }
+ XFS_SEND_UNMOUNT(mp);
xfs_unmountfs(mp);
xfs_freesb(mp);
return 0;
}
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+ __uint64_t resblks = 0;
+
+ mp->m_resblks_save = mp->m_resblks;
+ xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+ __uint64_t resblks;
+
+ if (mp->m_resblks_save) {
+ resblks = mp->m_resblks_save;
+ mp->m_resblks_save = 0;
+ } else
+ resblks = xfs_default_resblks(mp);
+
+ xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
STATIC int
xfs_fs_remount(
struct super_block *sb,
}
mp->m_update_flags = 0;
}
+
+ /*
+ * Fill out the reserve pool if it is empty. Use the stashed
+ * value if it is non-zero, otherwise go with the default.
+ */
+ xfs_restore_resvblks(mp);
}
/* rw -> ro */
if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+ /*
+ * After we have synced the data but before we sync the
+ * metadata, we need to free up the reserve block pool so that
+ * the used block count in the superblock on disk is correct at
+ * the end of the remount. Stash the current reserve pool size
+ * so that if we get remounted rw, we can return it to the same
+ * size.
+ */
+
xfs_quiesce_data(mp);
+ xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
mp->m_flags |= XFS_MOUNT_RDONLY;
}
{
struct xfs_mount *mp = XFS_M(sb);
+ xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
return -xfs_fs_log_dummy(mp);
}
STATIC int
+xfs_fs_unfreeze(
+ struct super_block *sb)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+
+ xfs_restore_resvblks(mp);
+ return 0;
+}
+
+STATIC int
xfs_fs_show_options(
struct seq_file *m,
struct vfsmount *mnt)
goto fail_vnrele;
kfree(mtpt);
-
- xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
return 0;
out_filestream_unmount:
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
+ .unfreeze_fs = xfs_fs_unfreeze,
.statfs = xfs_fs_statfs,
.remount_fs = xfs_fs_remount,
.show_options = xfs_fs_show_options,
};
STATIC int __init
-xfs_alloc_trace_bufs(void)
-{
-#ifdef XFS_ALLOC_TRACE
- xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
- if (!xfs_alloc_trace_buf)
- goto out;
-#endif
-#ifdef XFS_BMAP_TRACE
- xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
- if (!xfs_bmap_trace_buf)
- goto out_free_alloc_trace;
-#endif
-#ifdef XFS_BTREE_TRACE
- xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
- KM_MAYFAIL);
- if (!xfs_allocbt_trace_buf)
- goto out_free_bmap_trace;
-
- xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
- if (!xfs_inobt_trace_buf)
- goto out_free_allocbt_trace;
-
- xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
- if (!xfs_bmbt_trace_buf)
- goto out_free_inobt_trace;
-#endif
-#ifdef XFS_ATTR_TRACE
- xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
- if (!xfs_attr_trace_buf)
- goto out_free_bmbt_trace;
-#endif
-#ifdef XFS_DIR2_TRACE
- xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
- if (!xfs_dir2_trace_buf)
- goto out_free_attr_trace;
-#endif
-
- return 0;
-
-#ifdef XFS_DIR2_TRACE
- out_free_attr_trace:
-#endif
-#ifdef XFS_ATTR_TRACE
- ktrace_free(xfs_attr_trace_buf);
- out_free_bmbt_trace:
-#endif
-#ifdef XFS_BTREE_TRACE
- ktrace_free(xfs_bmbt_trace_buf);
- out_free_inobt_trace:
- ktrace_free(xfs_inobt_trace_buf);
- out_free_allocbt_trace:
- ktrace_free(xfs_allocbt_trace_buf);
- out_free_bmap_trace:
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(xfs_bmap_trace_buf);
- out_free_alloc_trace:
-#endif
-#ifdef XFS_ALLOC_TRACE
- ktrace_free(xfs_alloc_trace_buf);
- out:
-#endif
- return -ENOMEM;
-}
-
-STATIC void
-xfs_free_trace_bufs(void)
-{
-#ifdef XFS_DIR2_TRACE
- ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
- ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BTREE_TRACE
- ktrace_free(xfs_bmbt_trace_buf);
- ktrace_free(xfs_inobt_trace_buf);
- ktrace_free(xfs_allocbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
- ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
- ktrace_free(xfs_alloc_trace_buf);
-#endif
-}
-
-STATIC int __init
xfs_init_zones(void)
{
printk(KERN_INFO XFS_VERSION_STRING " with "
XFS_BUILD_OPTIONS " enabled\n");
- ktrace_init(64);
xfs_ioend_init();
xfs_dir_startup();
if (error)
goto out;
- error = xfs_alloc_trace_bufs();
- if (error)
- goto out_destroy_zones;
-
error = xfs_mru_cache_init();
if (error)
- goto out_free_trace_buffers;
+ goto out_destroy_zones;
error = xfs_filestream_init();
if (error)
xfs_filestream_uninit();
out_mru_cache_uninit:
xfs_mru_cache_uninit();
- out_free_trace_buffers:
- xfs_free_trace_bufs();
out_destroy_zones:
xfs_destroy_zones();
out:
xfs_buf_terminate();
xfs_filestream_uninit();
xfs_mru_cache_uninit();
- xfs_free_trace_bufs();
xfs_destroy_zones();
- ktrace_uninit();
}
module_init(init_xfs_fs);