#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
+#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
for (i = 0; i < nex; i++, dp++) {
xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
- ep->l0 = be64_to_cpu(get_unaligned(&dp->l0));
- ep->l1 = be64_to_cpu(get_unaligned(&dp->l1));
+ ep->l0 = get_unaligned_be64(&dp->l0);
+ ep->l1 = get_unaligned_be64(&dp->l1);
}
XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
if (whichfork != XFS_DATA_FORK ||
}
/*
+ * Allocate and initialise an xfs_inode.
+ */
+struct xfs_inode *
+xfs_inode_alloc(
+ struct xfs_mount *mp,
+ xfs_ino_t ino)
+{
+ struct xfs_inode *ip;
+
+ /*
+ * if this didn't occur in transactions, we could use
+ * KM_MAYFAIL and return NULL here on ENOMEM. Set the
+ * code up to do this anyway.
+ */
+ ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
+ if (!ip)
+ return NULL;
+
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(list_empty(&ip->i_reclaim));
+
+ ip->i_ino = ino;
+ ip->i_mount = mp;
+ ip->i_blkno = 0;
+ ip->i_len = 0;
+ ip->i_boffset =0;
+ ip->i_afp = NULL;
+ memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+ ip->i_flags = 0;
+ ip->i_update_core = 0;
+ ip->i_update_size = 0;
+ ip->i_delayed_blks = 0;
+ memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+ ip->i_size = 0;
+ ip->i_new_size = 0;
+
+ /*
+ * Initialize inode's trace buffers.
+ */
+#ifdef XFS_INODE_TRACE
+ ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BMAP_TRACE
+ ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_BTREE_TRACE
+ ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_RW_TRACE
+ ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_ILOCK_TRACE
+ ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
+#endif
+#ifdef XFS_DIR2_TRACE
+ ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
+#endif
+
+ return ip;
+}
+
+/*
* Given a mount structure and an inode number, return a pointer
* to a newly allocated in-core inode corresponding to the given
* inode number.
xfs_inode_t *ip;
int error;
- ASSERT(xfs_inode_zone != NULL);
-
- ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
- ip->i_ino = ino;
- ip->i_mount = mp;
- atomic_set(&ip->i_iocount, 0);
- spin_lock_init(&ip->i_flags_lock);
+ ip = xfs_inode_alloc(mp, ino);
+ if (!ip)
+ return ENOMEM;
/*
* Get pointer's to the on-disk inode and the buffer containing it.
*/
error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
if (error) {
- kmem_zone_free(xfs_inode_zone, ip);
+ xfs_idestroy(ip);
return error;
}
/*
- * Initialize inode's trace buffers.
- * Do this before xfs_iformat in case it adds entries.
- */
-#ifdef XFS_INODE_TRACE
- ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
- ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
- ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_RW_TRACE
- ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ILOCK_TRACE
- ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
- ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP);
-#endif
-
- /*
* If we got something that isn't an inode it means someone
* (nfs or dmi) has a stale handle.
*/
if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) {
- kmem_zone_free(xfs_inode_zone, ip);
+ xfs_idestroy(ip);
xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
xfs_dinode_from_disk(&ip->i_d, &dip->di_core);
error = xfs_iformat(ip, dip);
if (error) {
- kmem_zone_free(xfs_inode_zone, ip);
+ xfs_idestroy(ip);
xfs_trans_brelse(tp, bp);
#ifdef DEBUG
xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
}
- INIT_LIST_HEAD(&ip->i_reclaim);
-
/*
* The inode format changed when we moved the link count and
* made it 32 bits long. If this is an old format inode,
{
xfs_ino_t ino;
xfs_inode_t *ip;
- bhv_vnode_t *vp;
uint flags;
int error;
+ timespec_t tv;
/*
* Call the space management code to pick
}
ASSERT(ip != NULL);
- vp = XFS_ITOV(ip);
ip->i_d.di_mode = (__uint16_t)mode;
ip->i_d.di_onlink = 0;
ip->i_d.di_nlink = nlink;
ASSERT(ip->i_d.di_nlink == nlink);
- ip->i_d.di_uid = current_fsuid(cr);
- ip->i_d.di_gid = current_fsgid(cr);
+ ip->i_d.di_uid = current_fsuid();
+ ip->i_d.di_gid = current_fsgid();
ip->i_d.di_projid = prid;
memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
ip->i_size = 0;
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
+
+ nanotime(&tv);
+ ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
+ ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
+ ip->i_d.di_atime = ip->i_d.di_mtime;
+ ip->i_d.di_ctime = ip->i_d.di_mtime;
+
/*
* di_gen will have been taken care of in xfs_iread.
*/
xfs_trans_log_inode(tp, ip, flags);
/* now that we have an i_mode we can setup inode ops and unlock */
- xfs_initialize_vnode(tp->t_mountp, vp, ip);
+ xfs_setup_inode(ip);
*ipp = ip;
return 0;
xfs_fsize_t last_byte;
xfs_off_t toss_start;
xfs_mount_t *mp;
- bhv_vnode_t *vp;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
(flags == XFS_ITRUNC_MAYBE));
mp = ip->i_mount;
- vp = XFS_ITOV(ip);
/* wait for the completion of any pending DIOs */
if (new_size < ip->i_size)
#ifdef DEBUG
if (new_size == 0) {
- ASSERT(VN_CACHED(vp) == 0);
+ ASSERT(VN_CACHED(VFS_I(ip)) == 0);
}
#endif
return error;
return 0;
}
-
-/*
- * xfs_igrow_start
- *
- * Do the first part of growing a file: zero any data in the last
- * block that is beyond the old EOF. We need to do this before
- * the inode is joined to the transaction to modify the i_size.
- * That way we can drop the inode lock and call into the buffer
- * cache to get the buffer mapping the EOF.
- */
-int
-xfs_igrow_start(
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- cred_t *credp)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(new_size > ip->i_size);
-
- /*
- * Zero any pages that may have been created by
- * xfs_write_file() beyond the end of the file
- * and any blocks between the old and new file sizes.
- */
- return xfs_zero_eof(ip, new_size, ip->i_size);
-}
-
-/*
- * xfs_igrow_finish
- *
- * This routine is called to extend the size of a file.
- * The inode must have both the iolock and the ilock locked
- * for update and it must be a part of the current transaction.
- * The xfs_igrow_start() function must have been called previously.
- * If the change_flag is not zero, the inode change timestamp will
- * be updated.
- */
-void
-xfs_igrow_finish(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int change_flag)
-{
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(ip->i_transp == tp);
- ASSERT(new_size > ip->i_size);
-
- /*
- * Update the file size. Update the inode change timestamp
- * if change_flag set.
- */
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- if (change_flag)
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-}
-
-
/*
* This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It
}
if (ip->i_afp)
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
- mrfree(&ip->i_lock);
- mrfree(&ip->i_iolock);
- freesema(&ip->i_flock);
#ifdef XFS_INODE_TRACE
ktrace_free(ip->i_trace);
#ifdef XFS_BMAP_TRACE
ktrace_free(ip->i_xtrace);
#endif
-#ifdef XFS_BMBT_TRACE
+#ifdef XFS_BTREE_TRACE
ktrace_free(ip->i_btrace);
#endif
#ifdef XFS_RW_TRACE
spin_unlock(&mp->m_ail_lock);
}
xfs_inode_item_destroy(ip);
+ ip->i_itemp = NULL;
}
+ /* asserts to verify all state is correct here */
+ ASSERT(atomic_read(&ip->i_iocount) == 0);
+ ASSERT(atomic_read(&ip->i_pincount) == 0);
+ ASSERT(!spin_is_locked(&ip->i_flags_lock));
+ ASSERT(list_empty(&ip->i_reclaim));
kmem_zone_free(xfs_inode_zone, ip);
}
/*
* xfs_iflush() will write a modified inode's changes out to the
* inode's on disk home. The caller must have the inode lock held
- * in at least shared mode and the inode flush semaphore must be
- * held as well. The inode lock will still be held upon return from
+ * in at least shared mode and the inode flush completion must be
+ * active as well. The inode lock will still be held upon return from
* the call and the caller is free to unlock it.
- * The inode flush lock will be unlocked when the inode reaches the disk.
+ * The inode flush will be completed when the inode reaches the disk.
* The flags indicate how the inode's buffer should be written out.
*/
int
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
* flush lock and do nothing.
*/
if (xfs_inode_clean(ip)) {
- ASSERT((iip != NULL) ?
- !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
xfs_ifunlock(ip);
return 0;
}
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
xfs_mount_t *mp)
{
xfs_inode_t *ip;
- bhv_vnode_t *vp;
again:
XFS_MOUNT_ILOCK(mp);
continue;
}
- vp = XFS_ITOV_NULL(ip);
- if (!vp) {
+ if (!VFS_I(ip)) {
XFS_MOUNT_IUNLOCK(mp);
xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
goto again;
}
- ASSERT(vn_count(vp) == 0);
+ ASSERT(vn_count(VFS_I(ip)) == 0);
ip = ip->i_mnext;
} while (ip != mp->m_inodes);
* (all extents past */
if (nex2) {
byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
- nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP);
+ nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
erp->er_extcount -= nex2;
xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
ifp->if_u1.if_extents =
kmem_realloc(ifp->if_u1.if_extents,
rnew_size,
- ifp->if_real_bytes,
- KM_SLEEP);
+ ifp->if_real_bytes, KM_NOFS);
}
if (rnew_size > ifp->if_real_bytes) {
memset(&ifp->if_u1.if_extents[ifp->if_bytes /
xfs_ifork_t *ifp, /* inode fork pointer */
int new_size) /* number of extents in file */
{
- ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP);
+ ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
memset(ifp->if_u1.if_extents, 0, new_size);
if (ifp->if_bytes) {
memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
} else {
ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
kmem_realloc(ifp->if_u1.if_ext_irec,
- new_size, size, KM_SLEEP);
+ new_size, size, KM_NOFS);
}
}
ASSERT(nextents <= XFS_LINEAR_EXTS);
size = nextents * sizeof(xfs_bmbt_rec_t);
- xfs_iext_irec_compact_full(ifp);
+ xfs_iext_irec_compact_pages(ifp);
ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
ep = ifp->if_u1.if_ext_irec->er_extbuf;
nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
ASSERT(nextents <= XFS_LINEAR_EXTS);
- erp = (xfs_ext_irec_t *)
- kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP);
+ erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
if (nextents == 0) {
- ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+ ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
} else if (!ifp->if_real_bytes) {
xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
/* Initialize new extent record */
erp = ifp->if_u1.if_ext_irec;
- erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP);
+ erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
erp[erp_idx].er_extcount = 0;
* compaction policy is as follows:
*
* Full Compaction: Extents fit into a single page (or inline buffer)
- * Full Compaction: Extents occupy less than 10% of allocated space
- * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ * Partial Compaction: Extents occupy less than 50% of allocated space
* No Compaction: Extents occupy at least 50% of allocated space
*/
void
xfs_iext_direct_to_inline(ifp, nextents);
} else if (nextents <= XFS_LINEAR_EXTS) {
xfs_iext_indirect_to_direct(ifp);
- } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
- xfs_iext_irec_compact_full(ifp);
} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
xfs_iext_irec_compact_pages(ifp);
}
erp_next = erp + 1;
if (erp_next->er_extcount <=
(XFS_LINEAR_EXTS - erp->er_extcount)) {
- memmove(&erp->er_extbuf[erp->er_extcount],
+ memcpy(&erp->er_extbuf[erp->er_extcount],
erp_next->er_extbuf, erp_next->er_extcount *
sizeof(xfs_bmbt_rec_t));
erp->er_extcount += erp_next->er_extcount;
}
/*
- * Fully compact the extent records managed by the indirection array.
- */
-void
-xfs_iext_irec_compact_full(
- xfs_ifork_t *ifp) /* inode fork pointer */
-{
- xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */
- xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
- int erp_idx = 0; /* extent irec index */
- int ext_avail; /* empty entries in ex list */
- int ext_diff; /* number of exts to add */
- int nlists; /* number of irec's (ex lists) */
-
- ASSERT(ifp->if_flags & XFS_IFEXTIREC);
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- erp = ifp->if_u1.if_ext_irec;
- ep = &erp->er_extbuf[erp->er_extcount];
- erp_next = erp + 1;
- ep_next = erp_next->er_extbuf;
- while (erp_idx < nlists - 1) {
- ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
- ext_diff = MIN(ext_avail, erp_next->er_extcount);
- memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
- erp->er_extcount += ext_diff;
- erp_next->er_extcount -= ext_diff;
- /* Remove next page */
- if (erp_next->er_extcount == 0) {
- /*
- * Free page before removing extent record
- * so er_extoffs don't get modified in
- * xfs_iext_irec_remove.
- */
- kmem_free(erp_next->er_extbuf);
- erp_next->er_extbuf = NULL;
- xfs_iext_irec_remove(ifp, erp_idx + 1);
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- /* Update next page */
- } else {
- /* Move rest of page up to become next new page */
- memmove(erp_next->er_extbuf, ep_next,
- erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
- ep_next = erp_next->er_extbuf;
- memset(&ep_next[erp_next->er_extcount], 0,
- (XFS_LINEAR_EXTS - erp_next->er_extcount) *
- sizeof(xfs_bmbt_rec_t));
- }
- if (erp->er_extcount == XFS_LINEAR_EXTS) {
- erp_idx++;
- if (erp_idx < nlists)
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- else
- break;
- }
- ep = &erp->er_extbuf[erp->er_extcount];
- erp_next = erp + 1;
- ep_next = erp_next->er_extbuf;
- }
-}
-
-/*
* This is called to update the er_extoff field in the indirection
* array when extents have been added or removed from one of the
* extent lists. erp_idx contains the irec index to begin updating