XFS_LINUX := linux-2.6
ifeq ($(CONFIG_XFS_DEBUG),y)
- EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
- EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
- EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
- EXTRA_CFLAGS += -DXFS_ATTR_TRACE
- EXTRA_CFLAGS += -DXFS_BLI_TRACE
- EXTRA_CFLAGS += -DXFS_BMAP_TRACE
- EXTRA_CFLAGS += -DXFS_BMBT_TRACE
- EXTRA_CFLAGS += -DXFS_DIR2_TRACE
- EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
- EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
- EXTRA_CFLAGS += -DXFS_LOG_TRACE
- EXTRA_CFLAGS += -DXFS_RW_TRACE
- EXTRA_CFLAGS += -DXFS_BUF_TRACE
- EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+ EXTRA_CFLAGS += -g
endif
obj-$(CONFIG_XFS_FS) += xfs.o
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "xfs.h"
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#include <linux/migrate.h>
#include <linux/backing-dev.h>
-#include "xfs_linux.h"
STATIC kmem_zone_t *xfs_buf_zone;
STATIC kmem_shaker_t xfs_buf_shake;
btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t), KM_SLEEP);
+ sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
spin_lock_init(&btp->bt_hash[i].bh_lock);
INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
--- /dev/null
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_DMAPI_PRIV_H__
+#define __XFS_DMAPI_PRIV_H__
+
+/*
+ * Based on IO_ISDIRECT, decide which i_ flag is set.
+ */
+#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
+ DM_FLAGS_IMUX : 0)
+#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
+
+#endif /*__XFS_DMAPI_PRIV_H__*/
put_unused_fd(new_fd);
return -XFS_ERROR(-PTR_ERR(filp));
}
- if (inode->i_mode & S_IFREG)
+ if (inode->i_mode & S_IFREG) {
+ /* invisible operation should not change atime */
+ filp->f_flags |= O_NOATIME;
filp->f_op = &xfs_invis_file_operations;
+ }
fd_install(new_fd, filp);
return new_fd;
xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
xfs_set_inodeops(inode);
- spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~XFS_INEW;
- spin_unlock(&ip->i_flags_lock);
+ xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(inode);
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <xfs.h>
#include "debug.h"
#include "spin.h"
-#include <asm/page.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
static char message[256]; /* keep it off the stack */
static DEFINE_SPINLOCK(xfs_err_lock);
* as we go.
*/
int
-uio_read(caddr_t src, size_t len, struct uio *uio)
+xfs_uio_read(caddr_t src, size_t len, struct uio *uio)
{
size_t count;
typedef struct uio uio_t;
typedef struct iovec iovec_t;
-extern int uio_read (caddr_t, size_t, uio_t *);
+extern int xfs_uio_read (caddr_t, size_t, uio_t *);
#endif /* __XFS_SUPPORT_MOVE_H__ */
*/
#ifndef __XFS_H__
#define __XFS_H__
+
+#ifdef CONFIG_XFS_DEBUG
+#define STATIC
+#define DEBUG 1
+#define XFS_BUF_LOCK_TRACKING 1
+/* #define QUOTADEBUG 1 */
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+#define XFS_ALLOC_TRACE 1
+#define XFS_ATTR_TRACE 1
+#define XFS_BLI_TRACE 1
+#define XFS_BMAP_TRACE 1
+#define XFS_BMBT_TRACE 1
+#define XFS_DIR2_TRACE 1
+#define XFS_DQUOT_TRACE 1
+#define XFS_ILOCK_TRACE 1
+#define XFS_LOG_TRACE 1
+#define XFS_RW_TRACE 1
+#define XFS_BUF_TRACE 1
+#define XFS_VNODE_TRACE 1
+#endif
+
#include <linux-2.6/xfs_linux.h>
#endif /* __XFS_H__ */
idbp->d_off = pa->cook;
idbp->d_name[namelen] = '\0';
memcpy(idbp->d_name, pa->name, namelen);
- rval = uio_read((caddr_t)idbp, reclen, uio);
+ rval = xfs_uio_read((caddr_t)idbp, reclen, uio);
pa->done = (rval == 0);
return rval;
}
#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
/*
- * Based on IO_ISDIRECT, decide which i_ flag is set.
+ * Pull in platform specific event flags defines
*/
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
- DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
- (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
- DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
- 0 : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX)
-#endif
-
+#include "xfs_dmapi_priv.h"
/*
* Macros to turn caller specified delay/block flags into
* If INEW is set this inode is being set up
* we need to pause and try again.
*/
- if (ip->i_flags & XFS_INEW) {
+ if (xfs_iflags_test(ip, XFS_INEW)) {
read_unlock(&ih->ih_lock);
delay(1);
XFS_STATS_INC(xs_ig_frecycle);
* on its way out of the system,
* we need to pause and try again.
*/
- if (ip->i_flags & XFS_IRECLAIM) {
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&ih->ih_lock);
delay(1);
XFS_STATS_INC(xs_ig_frecycle);
goto again;
}
+ ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+
+ /*
+ * If lookup is racing with unlink, then we
+ * should return an error immediately so we
+ * don't remove it from the reclaim list and
+ * potentially leak the inode.
+ */
+ if ((ip->i_d.di_mode == 0) &&
+ !(flags & XFS_IGET_CREATE)) {
+ read_unlock(&ih->ih_lock);
+ return ENOENT;
+ }
+
+ /*
+ * There may be transactions sitting in the
+ * incore log buffers or being flushed to disk
+ * at this time. We can't clear the
+ * XFS_IRECLAIMABLE flag until these
+ * transactions have hit the disk, otherwise we
+ * will void the guarantee the flag provides
+ * xfs_iunpin()
+ */
+ if (xfs_ipincount(ip)) {
+ read_unlock(&ih->ih_lock);
+ xfs_log_force(mp, 0,
+ XFS_LOG_FORCE|XFS_LOG_SYNC);
+ XFS_STATS_INC(xs_ig_frecycle);
+ goto again;
+ }
vn_trace_exit(vp, "xfs_iget.alloc",
(inst_t *)__return_address);
XFS_STATS_INC(xs_ig_found);
- spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~XFS_IRECLAIMABLE;
- spin_unlock(&ip->i_flags_lock);
+ xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
version = ih->ih_version;
read_unlock(&ih->ih_lock);
xfs_ihash_promote(ih, ip, version);
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
- spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~XFS_ISTALE;
- spin_unlock(&ip->i_flags_lock);
-
+ xfs_iflags_clear(ip, XFS_ISTALE);
vn_trace_exit(vp, "xfs_iget.found",
(inst_t *)__return_address);
goto return_ip;
ih->ih_next = ip;
ip->i_udquot = ip->i_gdquot = NULL;
ih->ih_version++;
- spin_lock(&ip->i_flags_lock);
- ip->i_flags |= XFS_INEW;
- spin_unlock(&ip->i_flags_lock);
-
+ xfs_iflags_set(ip, XFS_INEW);
write_unlock(&ih->ih_lock);
/*
vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
if ((ip->i_d.di_mode == 0)) {
- ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
+ ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
vn_mark_bad(vp);
}
if (inode->i_state & I_NEW)
/*
* Free all memory associated with the inode.
*/
+ xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_idestroy(ip);
}
/* Inode not in memory or we found it already,
* nothing to do
*/
- if (!ip || (ip->i_flags & XFS_ISTALE)) {
+ if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&ih->ih_lock);
continue;
}
if (ip == free_ip) {
if (xfs_iflock_nowait(ip)) {
- spin_lock(&ip->i_flags_lock);
- ip->i_flags |= XFS_ISTALE;
- spin_unlock(&ip->i_flags_lock);
-
+ xfs_iflags_set(ip, XFS_ISTALE);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
} else {
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
if (xfs_iflock_nowait(ip)) {
- spin_lock(&ip->i_flags_lock);
- ip->i_flags |= XFS_ISTALE;
- spin_unlock(&ip->i_flags_lock);
+ xfs_iflags_set(ip, XFS_ISTALE);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
- spin_lock(&iip->ili_inode->i_flags_lock);
- iip->ili_inode->i_flags |= XFS_ISTALE;
- spin_unlock(&iip->ili_inode->i_flags_lock);
+ xfs_iflags_set(ip, XFS_ISTALE);
pre_flushed++;
}
lip = lip->li_bio_list;
{
ASSERT(atomic_read(&ip->i_pincount) > 0);
- if (atomic_dec_and_test(&ip->i_pincount)) {
+ if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
+
/*
- * If the inode is currently being reclaimed, the
- * linux inode _and_ the xfs vnode may have been
- * freed so we cannot reference either of them safely.
- * Hence we should not try to do anything to them
- * if the xfs inode is currently in the reclaim
- * path.
+ * If the inode is currently being reclaimed, the link between
+ * the bhv_vnode and the xfs_inode will be broken after the
+ * XFS_IRECLAIM* flag is set. Hence, if these flags are not
+ * set, then we can move forward and mark the linux inode dirty
+ * knowing that it is still valid as it won't freed until after
+ * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
+ * i_flags_lock is used to synchronise the setting of the
+ * XFS_IRECLAIM* flags and the breaking of the link, and so we
+ * can execute atomically w.r.t to reclaim by holding this lock
+ * here.
*
- * However, we still need to issue the unpin wakeup
- * call as the inode reclaim may be blocked waiting for
- * the inode to become unpinned.
+ * However, we still need to issue the unpin wakeup call as the
+ * inode reclaim may be blocked waiting for the inode to become
+ * unpinned.
*/
- struct inode *inode = NULL;
- spin_lock(&ip->i_flags_lock);
- if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
+ if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
+ struct inode *inode = NULL;
+
+ BUG_ON(vp == NULL);
+ inode = vn_to_inode(vp);
+ BUG_ON(inode->i_state & I_CLEAR);
/* make sync come back and flush this inode */
- if (vp) {
- inode = vn_to_inode(vp);
-
- if (!(inode->i_state &
- (I_NEW|I_FREEING|I_CLEAR))) {
- inode = igrab(inode);
- if (inode)
- mark_inode_dirty_sync(inode);
- } else
- inode = NULL;
- }
+ if (!(inode->i_state & (I_NEW|I_FREEING)))
+ mark_inode_dirty_sync(inode);
}
spin_unlock(&ip->i_flags_lock);
wake_up(&ip->i_ipin_wait);
- if (inode)
- iput(inode);
}
}
#endif
} xfs_inode_t;
+
+/*
+ * i_flags helper functions
+ */
+static inline void
+__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+ ip->i_flags |= flags;
+}
+
+static inline void
+xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+ spin_lock(&ip->i_flags_lock);
+ __xfs_iflags_set(ip, flags);
+ spin_unlock(&ip->i_flags_lock);
+}
+
+static inline void
+xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
+{
+ spin_lock(&ip->i_flags_lock);
+ ip->i_flags &= ~flags;
+ spin_unlock(&ip->i_flags_lock);
+}
+
+static inline int
+__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+ return (ip->i_flags & flags);
+}
+
+static inline int
+xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+ int ret;
+ spin_lock(&ip->i_flags_lock);
+ ret = __xfs_iflags_test(ip, flags);
+ spin_unlock(&ip->i_flags_lock);
+ return ret;
+}
#endif /* __KERNEL__ */
pathlen = (int)ip->i_d.di_size;
if (ip->i_df.if_flags & XFS_IFINLINE) {
- error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
+ error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
}
else {
/*
byte_cnt = pathlen;
pathlen -= byte_cnt;
- error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
+ error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
xfs_buf_relse (bp);
}
*/
xfs_synchronize_atime(ip);
- /* If we have nothing to flush with this inode then complete the
- * teardown now, otherwise break the link between the xfs inode
- * and the linux inode and clean up the xfs inode later. This
- * avoids flushing the inode to disk during the delete operation
- * itself.
+ /*
+ * If we have nothing to flush with this inode then complete the
+ * teardown now, otherwise break the link between the xfs inode and the
+ * linux inode and clean up the xfs inode later. This avoids flushing
+ * the inode to disk during the delete operation itself.
+ *
+ * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
+ * first to ensure that xfs_iunpin() will never see an xfs inode
+ * that has a linux inode being reclaimed. Synchronisation is provided
+ * by the i_flags_lock.
*/
if (!ip->i_update_core && (ip->i_itemp == NULL)) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
} else {
xfs_mount_t *mp = ip->i_mount;
- /* Protect sync from us */
+ /* Protect sync and unpin from us */
XFS_MOUNT_ILOCK(mp);
- vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
- list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
spin_lock(&ip->i_flags_lock);
- ip->i_flags |= XFS_IRECLAIMABLE;
+ __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+ vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
spin_unlock(&ip->i_flags_lock);
+ list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
XFS_MOUNT_IUNLOCK(mp);
}
return 0;
*/
write_lock(&ih->ih_lock);
spin_lock(&ip->i_flags_lock);
- if ((ip->i_flags & XFS_IRECLAIM) ||
- (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+ (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);
if (locked) {
}
return 1;
}
- ip->i_flags |= XFS_IRECLAIM;
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);