X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_vnodeops.c;h=603459229904e0b6701b4a883b262041967e9a0a;hb=37c42524d6090644206ae6d310d7e830bd3ccb47;hp=de49601919c138e4fcb923191ccbf53d20b251de;hpb=c41564b5af328ea4600b26119f6c9c8e1eb5c28b;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index de49601..6034592 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -16,8 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include - #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -27,7 +25,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -35,13 +32,11 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir_leaf.h" #include "xfs_itable.h" #include "xfs_btree.h" #include "xfs_ialloc.h" @@ -56,34 +51,16 @@ #include "xfs_refcache.h" #include "xfs_trans_space.h" #include "xfs_log_priv.h" -#include "xfs_mac.h" - - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 +#include "xfs_filestream.h" -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ STATIC int xfs_open( bhv_desc_t *bdp, cred_t *credp) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + bhv_vnode_t *vp = BHV_TO_VNODE(bdp); + xfs_inode_t *ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -101,20 +78,19 @@ xfs_open( return 0; } - /* * xfs_getattr */ STATIC int xfs_getattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { xfs_inode_t *ip; xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -128,7 +104,7 @@ xfs_getattr( if (!(flags & ATTR_LAZY)) xfs_ilock(ip, XFS_ILOCK_SHARED); - vap->va_size = ip->i_d.di_size; + vap->va_size = XFS_ISIZE(ip); if (vap->va_mask == XFS_AT_SIZE) goto all_done; @@ -178,9 +154,8 @@ xfs_getattr( * realtime extent size or the realtime volume's * extent size. */ - vap->va_blocksize = ip->i_d.di_extsize ? - (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : - (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); + vap->va_blocksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; } break; } @@ -241,7 +216,7 @@ xfs_getattr( int xfs_setattr( bhv_desc_t *bdp, - vattr_t *vap, + bhv_vattr_t *vap, int flags, cred_t *credp) { @@ -255,7 +230,7 @@ xfs_setattr( uid_t uid=0, iuid=0; gid_t gid=0, igid=0; int timeflags = 0; - vnode_t *vp; + bhv_vnode_t *vp; xfs_prid_t projid=0, iprojid=0; int mandlock_before, mandlock_after; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; @@ -347,7 +322,6 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; - ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); if (flags & ATTR_NOLOCK) need_iolock = 0; if (!(mask & XFS_AT_SIZE)) { @@ -492,7 +466,7 @@ xfs_setattr( if (mask & XFS_AT_SIZE) { /* Short circuit the truncate case for zero length files */ if ((vap->va_size == 0) && - (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { + (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; if (mask & XFS_AT_CTIME) @@ -610,12 +584,35 @@ xfs_setattr( */ if (mask & XFS_AT_SIZE) { code = 0; - if ((vap->va_size > ip->i_d.di_size) && + if ((vap->va_size > ip->i_size) && (flags & ATTR_NOSIZETOK) == 0) { code = xfs_igrow_start(ip, vap->va_size, credp); } xfs_iunlock(ip, XFS_ILOCK_EXCL); - vn_iowait(vp); /* wait for the completion of any pending DIOs */ + + /* + * We are going to log the inode size change in this + * transaction so any previous writes that are beyond the on + * disk EOF and the new EOF that have not been written out need + * to be written here. If we do not write the data out, we + * expose ourselves to the null files problem. + * + * Only flush from the on disk size to the smaller of the in + * memory file size or the new size as that's the range we + * really care about here and prevents waiting for other data + * not within the range we care about here. + */ + if (!code && + (ip->i_size != ip->i_d.di_size) && + (vap->va_size > ip->i_d.di_size)) { + code = bhv_vop_flush_pages(XFS_ITOV(ip), + ip->i_d.di_size, vap->va_size, + XFS_B_ASYNC, FI_NONE); + } + + /* wait for all I/O to complete */ + vn_iowait(vp); + if (!code) code = xfs_itruncate_data(ip, vap->va_size); if (code) { @@ -650,10 +647,10 @@ xfs_setattr( * Truncate file. Must have write permission and not be a directory. */ if (mask & XFS_AT_SIZE) { - if (vap->va_size > ip->i_d.di_size) { + if (vap->va_size > ip->i_size) { xfs_igrow_finish(tp, ip, vap->va_size, !(flags & ATTR_DMI)); - } else if ((vap->va_size <= ip->i_d.di_size) || + } else if ((vap->va_size <= ip->i_size) || ((vap->va_size == 0) && ip->i_d.di_nextents)) { /* * signal a sync transaction unless @@ -666,9 +663,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + VTRUNCATE(vp); } /* * Have to do this even if the file's size doesn't change. @@ -800,6 +805,10 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NODUMP; if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if (vap->va_xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (vap->va_xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -859,7 +868,7 @@ xfs_setattr( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, commit_flags, NULL); + code = xfs_trans_commit(tp, commit_flags); } /* @@ -869,7 +878,7 @@ xfs_setattr( */ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); if (mandlock_before != mandlock_after) { - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING, mandlock_after); } @@ -936,6 +945,13 @@ xfs_access( /* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + +/* * xfs_readlink * */ @@ -950,7 +966,7 @@ xfs_readlink( int count; xfs_off_t offset; int pathlen; - vnode_t *vp; + bhv_vnode_t *vp; int error = 0; xfs_mount_t *mp; int nmaps; @@ -991,7 +1007,7 @@ xfs_readlink( pathlen = (int)ip->i_d.di_size; if (ip->i_df.if_flags & XFS_IFINLINE) { - error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); + error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); } else { /* @@ -1000,7 +1016,7 @@ xfs_readlink( nmaps = SYMLINK_MAPS; error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), - 0, NULL, 0, mval, &nmaps, NULL); + 0, NULL, 0, mval, &nmaps, NULL, NULL); if (error) { goto error_return; @@ -1022,7 +1038,7 @@ xfs_readlink( byte_cnt = pathlen; pathlen -= byte_cnt; - error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); + error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); xfs_buf_relse (bp); } @@ -1066,6 +1082,9 @@ xfs_fsync( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); + if (flag & FSYNC_DATA) + filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* * We always need to make sure that the required inode state * is safe on disk. The vnode might be clean but because @@ -1155,7 +1174,7 @@ xfs_fsync( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (flag & FSYNC_WAIT) xfs_trans_set_sync(tp); - error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); + error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -1180,13 +1199,15 @@ xfs_fsync( } /* - * This is called by xfs_inactive to free any blocks beyond eof, - * when the link count isn't zero. + * This is called by xfs_inactive to free any blocks beyond eof + * when the link count isn't zero and by xfs_dm_punch_hole() when + * punching a hole to EOF. */ -STATIC int -xfs_inactive_free_eofblocks( +int +xfs_free_eofblocks( xfs_mount_t *mp, - xfs_inode_t *ip) + xfs_inode_t *ip, + int flags) { xfs_trans_t *tp; int error; @@ -1195,12 +1216,13 @@ xfs_inactive_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; + int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ - end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); + end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) @@ -1208,8 +1230,8 @@ xfs_inactive_free_eofblocks( nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && @@ -1235,9 +1257,16 @@ xfs_inactive_free_eofblocks( * cache and we can't * do that within a transaction. */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, - ip->i_d.di_size); + if (use_iolock) + xfs_ilock(ip, XFS_IOLOCK_EXCL); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, + ip->i_size); + if (error) { + xfs_trans_cancel(tp, 0); + if (use_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1257,7 +1286,7 @@ xfs_inactive_free_eofblocks( xfs_trans_ihold(tp, ip); error = xfs_itruncate_finish(&tp, ip, - ip->i_d.di_size, + ip->i_size, XFS_DATA_FORK, 0); /* @@ -1270,10 +1299,10 @@ xfs_inactive_free_eofblocks( XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, - XFS_TRANS_RELEASE_LOG_RES, - NULL); + XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); + xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) + : XFS_ILOCK_EXCL)); } return error; } @@ -1338,7 +1367,7 @@ xfs_inactive_symlink_rmt( nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list))) + &free_list, NULL))) goto error0; /* * Invalidate the block(s). @@ -1353,13 +1382,13 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) + &first_block, &free_list, NULL, &done))) goto error1; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ - if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) + if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) goto error1; /* * The transaction must have been committed, since there were @@ -1385,7 +1414,7 @@ xfs_inactive_symlink_rmt( * we need to unlock the inode since the new transaction doesn't * have the inode attached. */ - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); tp = ntp; if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); @@ -1469,9 +1498,6 @@ xfs_inactive_symlink_local( return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1485,7 +1511,7 @@ xfs_inactive_attrs( tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_d.di_forkoff != 0); - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_attr_inactive(ip); @@ -1524,37 +1550,61 @@ xfs_release( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; xfs_mount_t *mp; int error; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; - if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ if (vp->v_vfsp->vfs_flag & VFS_RDONLY) return 0; + if (!XFS_FORCED_SHUTDOWN(mp)) { + /* + * If we are using filestreams, and we have an unlinked + * file that we are processing the last close on, then nothing + * will be able to reopen and write to this file. Purge this + * inode from the filestreams cache so that it doesn't delay + * teardown of the inode. + */ + if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + + /* + * If we previously truncated this file and removed old data + * in the process, we want to initiate "early" writeout on + * the last close. This is an attempt to combat the notorious + * NULL files problem which is particularly noticable from a + * truncate down, buffered (re-)write (delalloc), followed by + * a crash. What we are effectively doing here is + * significantly reducing the time window where we'd otherwise + * be exposed to that problem. + */ + if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE); + } + #ifdef HAVE_REFCACHE /* If we are in the NFS reference cache then don't do this now */ if (ip->i_refcache) return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return error; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1579,8 +1629,8 @@ xfs_inactive( cred_t *credp) { xfs_inode_t *ip; - vnode_t *vp; - xfs_bmap_free_t free_list; + bhv_vnode_t *vp; + xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; xfs_trans_t *tp; @@ -1610,8 +1660,8 @@ xfs_inactive( * only one with a reference to the inode. */ truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || - (ip->i_delayed_blks > 0)) && + ((ip->i_d.di_size != 0) || (ip->i_size != 0) || + (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); mp = ip->i_mount; @@ -1629,13 +1679,14 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return VN_INACTIVE_CACHE; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1659,7 +1710,12 @@ xfs_inactive( */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1760,7 +1816,7 @@ xfs_inactive( cmn_err(CE_NOTE, "xfs_inactive: xfs_ifree() returned an error = %d on %s", error, mp->m_fsname); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } else { @@ -1773,9 +1829,8 @@ xfs_inactive( * Just ignore errors at this point. There is * nothing we can do except to try to keep going. */ - (void) xfs_bmap_finish(&tp, &free_list, first_block, - &committed); - (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + (void) xfs_bmap_finish(&tp, &free_list, &committed); + (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } /* * Release the dquots held by inode, if any. @@ -1795,17 +1850,17 @@ xfs_inactive( STATIC int xfs_lookup( bhv_desc_t *dir_bdp, - vname_t *dentry, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vnode_t **vpp, int flags, - vnode_t *rdir, + bhv_vnode_t *rdir, cred_t *credp) { xfs_inode_t *dp, *ip; xfs_ino_t e_inum; int error; uint lock_mode; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); @@ -1832,15 +1887,15 @@ xfs_lookup( STATIC int xfs_create( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *name = VNAME(dentry); - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; xfs_inode_t *dp, *ip; - vnode_t *vp=NULL; + bhv_vnode_t *vp = NULL; xfs_trans_t *tp; xfs_mount_t *mp; xfs_dev_t rdev; @@ -1925,7 +1980,7 @@ xfs_create( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); XFS_BMAP_INIT(&free_list, &first_block); @@ -1938,8 +1993,7 @@ xfs_create( if (error) goto error_return; - if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) + if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) goto error_return; rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, @@ -1970,9 +2024,9 @@ xfs_create( xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); dp_joined_to_trans = B_TRUE; - error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto abort_return; @@ -2006,13 +2060,13 @@ xfs_create( IHOLD(ip); vp = XFS_ITOV(ip); - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); goto abort_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); tp = NULL; @@ -2026,7 +2080,7 @@ xfs_create( * Propagate the fact that the vnode changed after the * xfs_inode locks have been released. */ - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); + bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3); *vpp = vp; @@ -2107,7 +2161,6 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2121,7 +2174,7 @@ xfs_lock_dir_and_entry( attempts = 0; again: - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); e_inum = ip->i_ino; @@ -2190,6 +2243,21 @@ int xfs_lock_delays; #endif /* + * Bump the subclass so xfs_lock_inodes() acquires each lock with + * a different value + */ +static inline int +xfs_lock_inumorder(int lock_mode, int subclass) +{ + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; + + return lock_mode; +} + +/* * The following routine will lock n inodes in exclusive mode. * We assume the caller calls us with the inodes in i_ino order. * @@ -2256,7 +2324,7 @@ again: * that is in the AIL. */ ASSERT(i != 0); - if (!xfs_ilock_nowait(ips[i], lock_mode)) { + if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { attempts++; /* @@ -2291,7 +2359,7 @@ again: goto again; } } else { - xfs_ilock(ips[i], lock_mode); + xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); } } @@ -2321,10 +2389,10 @@ int remove_which_error_return = 0; STATIC int xfs_remove( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; char *name = VNAME(dentry); xfs_inode_t *dp, *ip; xfs_trans_t *tp = NULL; @@ -2350,10 +2418,15 @@ xfs_remove( namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &ip)) { + dm_di_mode = ip->i_d.di_mode; + IRELE(ip); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return error; } @@ -2421,7 +2494,7 @@ xfs_remove( return error; } - error = xfs_lock_dir_and_entry(dp, dentry, ip); + error = xfs_lock_dir_and_entry(dp, ip); if (error) { REMOVE_DEBUG_TRACE(__LINE__); xfs_trans_cancel(tp, cancel_flags); @@ -2448,8 +2521,8 @@ xfs_remove( * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, 0); + error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, 0); if (error) { ASSERT(error != ENOENT); REMOVE_DEBUG_TRACE(__LINE__); @@ -2486,13 +2559,13 @@ xfs_remove( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { REMOVE_DEBUG_TRACE(__LINE__); goto error_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); goto std_return; @@ -2506,12 +2579,21 @@ xfs_remove( */ xfs_refcache_purge_ip(ip); + /* + * If we are using filestreams, kill the stream association. + * If the file is still open it may get a new one but that + * will get killed on last close in xfs_close() so we don't + * have to worry about that. + */ + if (link_zero && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); + bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero); IRELE(ip); @@ -2564,8 +2646,8 @@ xfs_remove( STATIC int xfs_link( bhv_desc_t *target_dir_bdp, - vnode_t *src_vp, - vname_t *dentry, + bhv_vnode_t *src_vp, + bhv_vname_t *dentry, cred_t *credp) { xfs_inode_t *tdp, *sip; @@ -2577,7 +2659,7 @@ xfs_link( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *target_dir_vp; + bhv_vnode_t *target_dir_vp; int resblks; char *target_name = VNAME(dentry); int target_namelen; @@ -2587,8 +2669,7 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (VN_ISDIR(src_vp)) - return XFS_ERROR(EPERM); + ASSERT(!VN_ISDIR(src_vp)); sip = xfs_vtoi(src_vp); tdp = XFS_BHVTOI(target_dir_bdp); @@ -2663,18 +2744,17 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (tdp->i_d.di_projid != sip->i_d.di_projid))) { - error = XFS_ERROR(EPERM); + error = XFS_ERROR(EXDEV); goto error_return; } if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, - target_namelen))) + (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) goto error_return; XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, + error = xfs_dir_createname(tp, tdp, target_name, target_namelen, sip->i_ino, &first_block, &free_list, resblks); if (error) @@ -2684,9 +2764,8 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); - if (error) { + if (error) goto abort_return; - } /* * If this is a synchronous mount, make sure that the @@ -2697,16 +2776,15 @@ xfs_link( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish (&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); goto abort_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) goto std_return; - } /* Fall through to std_return with error = 0. */ std_return: @@ -2727,6 +2805,8 @@ std_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; } + + /* * xfs_mkdir * @@ -2734,15 +2814,15 @@ std_return: STATIC int xfs_mkdir( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + bhv_vname_t *dentry, + bhv_vattr_t *vap, + bhv_vnode_t **vpp, cred_t *credp) { char *dir_name = VNAME(dentry); xfs_inode_t *dp; xfs_inode_t *cdp; /* inode of created dir */ - vnode_t *cvp; /* vnode of created dir */ + bhv_vnode_t *cvp; /* vnode of created dir */ xfs_trans_t *tp; xfs_mount_t *mp; int cancel_flags; @@ -2750,7 +2830,7 @@ xfs_mkdir( int committed; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; boolean_t dp_joined_to_trans; boolean_t created = B_FALSE; int dm_event_sent = 0; @@ -2822,7 +2902,7 @@ xfs_mkdir( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); /* * Check for directory link count overflow. @@ -2840,7 +2920,7 @@ xfs_mkdir( goto error_return; if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) + (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) goto error_return; /* * create the directory inode. @@ -2867,9 +2947,9 @@ xfs_mkdir( XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, - cdp->i_ino, &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto error1; @@ -2883,16 +2963,14 @@ xfs_mkdir( */ dp->i_gen++; - error = XFS_DIR_INIT(mp, tp, cdp, dp); - if (error) { + error = xfs_dir_init(tp, cdp, dp); + if (error) goto error2; - } cdp->i_gen = 1; error = xfs_bumplink(tp, dp); - if (error) { + if (error) goto error2; - } cvp = XFS_ITOV(cdp); @@ -2915,13 +2993,13 @@ xfs_mkdir( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { IRELE(cdp); goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); if (error) { @@ -2969,7 +3047,7 @@ std_return: STATIC int xfs_rmdir( bhv_desc_t *dir_bdp, - vname_t *dentry, + bhv_vname_t *dentry, cred_t *credp) { char *name = VNAME(dentry); @@ -2982,8 +3060,8 @@ xfs_rmdir( xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *dir_vp; - int dm_di_mode = 0; + bhv_vnode_t *dir_vp; + int dm_di_mode = S_IFDIR; int last_cdp_link; int namelen; uint resblks; @@ -2998,11 +3076,16 @@ xfs_rmdir( return XFS_ERROR(EIO); namelen = VNAMELEN(dentry); + if (!xfs_get_dir_entry(dentry, &cdp)) { + dm_di_mode = cdp->i_d.di_mode; + IRELE(cdp); + } + if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return XFS_ERROR(error); } @@ -3076,7 +3159,7 @@ xfs_rmdir( * that the directory entry for the child directory inode has * not changed while we were obtaining a log reservation. */ - error = xfs_lock_dir_and_entry(dp, dentry, cdp); + error = xfs_lock_dir_and_entry(dp, cdp); if (error) { xfs_trans_cancel(tp, cancel_flags); IRELE(cdp); @@ -3101,16 +3184,15 @@ xfs_rmdir( error = XFS_ERROR(ENOTEMPTY); goto error_return; } - if (!XFS_DIR_ISEMPTY(mp, cdp)) { + if (!xfs_dir_isempty(cdp)) { error = XFS_ERROR(ENOTEMPTY); goto error_return; } - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, - &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3162,7 +3244,7 @@ xfs_rmdir( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish (&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | @@ -3171,7 +3253,7 @@ xfs_rmdir( goto std_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(cdp); goto std_return; @@ -3181,7 +3263,7 @@ xfs_rmdir( /* * Let interposed file systems know about removed links. */ - VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); + bhv_vop_link_removed(XFS_ITOV(cdp), dir_vp, last_cdp_link); IRELE(cdp); @@ -3209,8 +3291,6 @@ xfs_rmdir( /* - * xfs_readdir - * * Read dp's entries starting at uiop->uio_offset and translate them into * bufsize bytes worth of struct dirents starting at bufbase. */ @@ -3230,28 +3310,23 @@ xfs_readdir( (inst_t *)__return_address); dp = XFS_BHVTOI(dir_bdp); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); - } lock_mode = xfs_ilock_map_shared(dp); - error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); + error = xfs_dir_getdents(tp, dp, uiop, eofp); xfs_iunlock_map_shared(dp, lock_mode); return error; } -/* - * xfs_symlink - * - */ STATIC int xfs_symlink( bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, + bhv_vname_t *dentry, + bhv_vattr_t *vap, char *target_path, - vnode_t **vpp, + bhv_vnode_t **vpp, cred_t *credp) { xfs_trans_t *tp; @@ -3263,7 +3338,7 @@ xfs_symlink( xfs_bmap_free_t free_list; xfs_fsblock_t first_block; boolean_t dp_joined_to_trans; - vnode_t *dir_vp; + bhv_vnode_t *dir_vp; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -3308,7 +3383,7 @@ xfs_symlink( int len, total; char *path; - for(total = 0, path = target_path; total < pathlen;) { + for (total = 0, path = target_path; total < pathlen;) { /* * Skip any slashes. */ @@ -3381,7 +3456,7 @@ xfs_symlink( goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); /* * Check whether the directory allows new symlinks or not. @@ -3402,7 +3477,7 @@ xfs_symlink( * Check for ability to enter directory entry, if no space reserved. */ if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) + (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) goto error_return; /* * Initialize the bmap freelist prior to calling either @@ -3457,7 +3532,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list); + &free_list, NULL); if (error) { goto error1; } @@ -3489,11 +3564,10 @@ xfs_symlink( /* * Create the directory entry for the symlink. */ - error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, - ip->i_ino, &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -3520,11 +3594,11 @@ xfs_symlink( */ IHOLD(ip); - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); @@ -3541,7 +3615,7 @@ std_return: } if (!error) { - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ip); vp = XFS_ITOV(ip); @@ -3606,10 +3680,10 @@ xfs_fid2( int xfs_rwlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3637,10 +3711,10 @@ xfs_rwlock( void xfs_rwunlock( bhv_desc_t *bdp, - vrwlock_t locktype) + bhv_vrwlock_t locktype) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); if (VN_ISDIR(vp)) @@ -3698,12 +3772,16 @@ xfs_inode_flush( sync_lsn = log->l_last_sync_lsn; GRANT_UNLOCK(log, s); - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) - return 0; + if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { + if (flags & FLUSH_SYNC) + log_flags |= XFS_LOG_SYNC; + error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (error) + return error; + } - if (flags & FLUSH_SYNC) - log_flags |= XFS_LOG_SYNC; - return xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (ip->i_update_core == 0) + return 0; } } @@ -3717,9 +3795,6 @@ xfs_inode_flush( if (flags & FLUSH_INODE) { int flush_flags; - if (xfs_ipincount(ip)) - return EAGAIN; - if (flags & FLUSH_SYNC) { xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_iflock(ip); @@ -3744,7 +3819,6 @@ xfs_inode_flush( return error; } - int xfs_set_dmattrs ( bhv_desc_t *bdp, @@ -3780,21 +3854,17 @@ xfs_set_dmattrs ( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); IHOLD(ip); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); return error; } - -/* - * xfs_reclaim - */ STATIC int xfs_reclaim( bhv_desc_t *bdp) { xfs_inode_t *ip; - vnode_t *vp; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); ip = XFS_BHVTOI(bdp); @@ -3819,11 +3889,16 @@ xfs_reclaim( */ xfs_synchronize_atime(ip); - /* If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode - * and the linux inode and clean up the xfs inode later. This - * avoids flushing the inode to disk during the delete operation - * itself. + /* + * If we have nothing to flush with this inode then complete the + * teardown now, otherwise break the link between the xfs inode and the + * linux inode and clean up the xfs inode later. This avoids flushing + * the inode to disk during the delete operation itself. + * + * When breaking the link, we need to set the XFS_IRECLAIMABLE flag + * first to ensure that xfs_iunpin() will never see an xfs inode + * that has a linux inode being reclaimed. Synchronisation is provided + * by the i_flags_lock. */ if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -3832,11 +3907,13 @@ xfs_reclaim( } else { xfs_mount_t *mp = ip->i_mount; - /* Protect sync from us */ + /* Protect sync and unpin from us */ XFS_MOUNT_ILOCK(mp); + spin_lock(&ip->i_flags_lock); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); + spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - ip->i_flags |= XFS_IRECLAIMABLE; XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3849,7 +3926,7 @@ xfs_finish_reclaim( int sync_mode) { xfs_ihash_t *ih = ip->i_hash; - vnode_t *vp = XFS_ITOV_NULL(ip); + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); int error; if (vp && VN_BAD(vp)) @@ -3861,8 +3938,10 @@ xfs_finish_reclaim( * us. */ write_lock(&ih->ih_lock); - if ((ip->i_flags & XFS_IRECLAIM) || - (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); if (locked) { xfs_ifunlock(ip); @@ -3870,7 +3949,8 @@ xfs_finish_reclaim( } return 1; } - ip->i_flags |= XFS_IRECLAIM; + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); /* @@ -4010,22 +4090,16 @@ xfs_alloc_file_space( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rt = XFS_IS_REALTIME_INODE(ip); - if (unlikely(rt)) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; if (len <= 0) return XFS_ERROR(EINVAL); + rt = XFS_IS_REALTIME_INODE(ip); + extsz = xfs_get_extsz_hint(ip); + count = len; - error = 0; imapp = &imaps[0]; nimaps = 1; bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); @@ -4033,14 +4107,14 @@ xfs_alloc_file_space( allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* Generate a DMAPI event if needed. */ - if (alloc_type != 0 && offset < ip->i_d.di_size && + if (alloc_type != 0 && offset < ip->i_size && (attr_flags&ATTR_DMI) == 0 && DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { xfs_off_t end_dmi_offset; end_dmi_offset = offset+len; - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), offset, end_dmi_offset - offset, 0, NULL); @@ -4116,10 +4190,10 @@ retry: * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, + error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, - &free_list); + &free_list, NULL); if (error) { goto error0; } @@ -4127,12 +4201,12 @@ retry: /* * Complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; @@ -4199,8 +4273,8 @@ xfs_zero_remaining_bytes( for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, - &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -4259,7 +4333,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - vnode_t *vp; + bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4267,14 +4341,13 @@ xfs_free_file_space( int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; - xfs_off_t ilen; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; - int rounding; + uint rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; @@ -4296,11 +4369,11 @@ xfs_free_file_space( end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); - if (offset < ip->i_d.di_size && + if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); @@ -4308,7 +4381,6 @@ xfs_free_file_space( return error; } - ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); if (attr_flags & ATTR_NOLOCK) need_iolock = 0; if (need_iolock) { @@ -4316,18 +4388,16 @@ xfs_free_file_space( vn_iowait(vp); /* wait for the completion of any pending DIOs */ } - rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), - (__uint8_t)NBPP); - ilen = len + (offset & (rounding - 1)); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); ioffset = offset & ~(rounding - 1); - if (ilen & (rounding - 1)) - ilen = (ilen + rounding) & ~(rounding - 1); if (VN_CACHED(vp) != 0) { xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, ctooff(offtoct(ioffset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(ioffset)), -1, FI_REMAPF_LOCKED); + if (error) + goto out_unlock_iolock; } /* @@ -4338,8 +4408,8 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4353,8 +4423,8 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4391,9 +4461,12 @@ xfs_free_file_space( while (!error && !done) { /* - * allocate and setup the transaction + * allocate and setup the transaction. Allow this + * transaction to dip into the reserve blocks to ensure + * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), @@ -4426,9 +4499,9 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, + error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, &done); + 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { goto error0; } @@ -4436,12 +4509,12 @@ xfs_free_file_space( /* * complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -4488,8 +4561,8 @@ xfs_change_file_space( xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - vattr_t va; - vnode_t *vp; + bhv_vattr_t va; + bhv_vnode_t *vp; vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); @@ -4519,7 +4592,7 @@ xfs_change_file_space( bf->l_start += offset; break; case 2: /*SEEK_END*/ - bf->l_start += ip->i_d.di_size; + bf->l_start += ip->i_size; break; default: return XFS_ERROR(EINVAL); @@ -4536,7 +4609,7 @@ xfs_change_file_space( bf->l_whence = 0; startoffset = bf->l_start; - fsize = ip->i_d.di_size; + fsize = ip->i_size; /* * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve @@ -4635,19 +4708,20 @@ xfs_change_file_space( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } -vnodeops_t xfs_vnodeops = { +bhv_vnodeops_t xfs_vnodeops = { BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), .vop_open = xfs_open, .vop_read = xfs_read, -#ifdef HAVE_SENDFILE - .vop_sendfile = xfs_sendfile, +#ifdef HAVE_SPLICE + .vop_splice_read = xfs_splice_read, + .vop_splice_write = xfs_splice_write, #endif .vop_write = xfs_write, .vop_ioctl = xfs_ioctl,