X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Fxfs_vnodeops.c;h=efd5aff9eaf6aa1d149fa6431ca6355e944c5b1a;hb=c7227e46423a57b4df27a2d75b5869bd3ae654d0;hp=8b5a44fe28654a10ee02ae8e942e0adb443b97e6;hpb=ec86dc02fdc062d0d298814b1e78b482ab38caf7;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8b5a44f..efd5aff 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -16,8 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include - #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -27,7 +25,6 @@ #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" @@ -35,13 +32,11 @@ #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc_btree.h" -#include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_dir_leaf.h" #include "xfs_itable.h" #include "xfs_btree.h" #include "xfs_ialloc.h" @@ -56,34 +51,14 @@ #include "xfs_refcache.h" #include "xfs_trans_space.h" #include "xfs_log_priv.h" -#include "xfs_mac.h" - - -/* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 2 extents back from - * bmapi. - */ -#define SYMLINK_MAPS 2 +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" -/* - * For xfs, we check that the file isn't too big to be opened by this kernel. - * No other open action is required for regular files. Devices are handled - * through the specfs file system, pipes through fifofs. Device and - * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, - * when a new vnode is first looked up or created. - */ -STATIC int +int xfs_open( - bhv_desc_t *bdp, - cred_t *credp) + xfs_inode_t *ip) { int mode; - vnode_t *vp; - xfs_inode_t *ip; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); @@ -92,7 +67,7 @@ xfs_open( * If it's a directory with any blocks, read-ahead block 0 * as we're almost certain to have the next operation be a read there. */ - if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) { + if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { mode = xfs_ilock_map_shared(ip); if (ip->i_d.di_nextents > 0) (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); @@ -101,26 +76,19 @@ xfs_open( return 0; } - /* * xfs_getattr */ -STATIC int +int xfs_getattr( - bhv_desc_t *bdp, - vattr_t *vap, - int flags, - cred_t *credp) + xfs_inode_t *ip, + bhv_vattr_t *vap, + int flags) { - xfs_inode_t *ip; - xfs_mount_t *mp; - vnode_t *vp; + bhv_vnode_t *vp = XFS_ITOV(ip); + xfs_mount_t *mp = ip->i_mount; - vp = BHV_TO_VNODE(bdp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -128,7 +96,7 @@ xfs_getattr( if (!(flags & ATTR_LAZY)) xfs_ilock(ip, XFS_ILOCK_SHARED); - vap->va_size = ip->i_d.di_size; + vap->va_size = XFS_ISIZE(ip); if (vap->va_mask == XFS_AT_SIZE) goto all_done; @@ -178,9 +146,8 @@ xfs_getattr( * realtime extent size or the realtime volume's * extent size. */ - vap->va_blocksize = ip->i_d.di_extsize ? - (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : - (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); + vap->va_blocksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; } break; } @@ -240,14 +207,14 @@ xfs_getattr( */ int xfs_setattr( - bhv_desc_t *bdp, - vattr_t *vap, + xfs_inode_t *ip, + bhv_vattr_t *vap, int flags, cred_t *credp) { - xfs_inode_t *ip; + bhv_vnode_t *vp = XFS_ITOV(ip); + xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; - xfs_mount_t *mp; int mask; int code; uint lock_flags; @@ -255,17 +222,15 @@ xfs_setattr( uid_t uid=0, iuid=0; gid_t gid=0, igid=0; int timeflags = 0; - vnode_t *vp; xfs_prid_t projid=0, iprojid=0; int mandlock_before, mandlock_after; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; int file_owner; int need_iolock = 1; - vp = BHV_TO_VNODE(bdp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); /* @@ -276,9 +241,6 @@ xfs_setattr( return XFS_ERROR(EINVAL); } - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; - if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -347,7 +309,6 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; - ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1); if (flags & ATTR_NOLOCK) need_iolock = 0; if (!(mask & XFS_AT_SIZE)) { @@ -363,7 +324,7 @@ xfs_setattr( } } } else { - if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && + if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && !(flags & ATTR_DMI)) { int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, @@ -492,7 +453,7 @@ xfs_setattr( if (mask & XFS_AT_SIZE) { /* Short circuit the truncate case for zero length files */ if ((vap->va_size == 0) && - (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { + (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; if (mask & XFS_AT_CTIME) @@ -610,11 +571,35 @@ xfs_setattr( */ if (mask & XFS_AT_SIZE) { code = 0; - if ((vap->va_size > ip->i_d.di_size) && + if ((vap->va_size > ip->i_size) && (flags & ATTR_NOSIZETOK) == 0) { code = xfs_igrow_start(ip, vap->va_size, credp); } xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * We are going to log the inode size change in this + * transaction so any previous writes that are beyond the on + * disk EOF and the new EOF that have not been written out need + * to be written here. If we do not write the data out, we + * expose ourselves to the null files problem. + * + * Only flush from the on disk size to the smaller of the in + * memory file size or the new size as that's the range we + * really care about here and prevents waiting for other data + * not within the range we care about here. + */ + if (!code && + (ip->i_size != ip->i_d.di_size) && + (vap->va_size > ip->i_d.di_size)) { + code = xfs_flush_pages(ip, + ip->i_d.di_size, vap->va_size, + XFS_B_ASYNC, FI_NONE); + } + + /* wait for all I/O to complete */ + vn_iowait(ip); + if (!code) code = xfs_itruncate_data(ip, vap->va_size); if (code) { @@ -649,10 +634,10 @@ xfs_setattr( * Truncate file. Must have write permission and not be a directory. */ if (mask & XFS_AT_SIZE) { - if (vap->va_size > ip->i_d.di_size) { + if (vap->va_size > ip->i_size) { xfs_igrow_finish(tp, ip, vap->va_size, !(flags & ATTR_DMI)); - } else if ((vap->va_size <= ip->i_d.di_size) || + } else if ((vap->va_size <= ip->i_size) || ((vap->va_size == 0) && ip->i_d.di_nextents)) { /* * signal a sync transaction unless @@ -665,9 +650,17 @@ xfs_setattr( ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); - if (code) { + if (code) goto abort_return; - } + /* + * Truncated "down", so we're removing references + * to old data here - if we now delay flushing for + * a long time, we expose ourselves unduly to the + * notorious NULL files problem. So, we mark this + * vnode and flush it when the file is closed, and + * do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); } /* * Have to do this even if the file's size doesn't change. @@ -799,6 +792,10 @@ xfs_setattr( di_flags |= XFS_DIFLAG_NODUMP; if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) di_flags |= XFS_DIFLAG_PROJINHERIT; + if (vap->va_xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (vap->va_xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { if (vap->va_xflags & XFS_XFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_RTINHERIT; @@ -847,7 +844,7 @@ xfs_setattr( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesytems. + * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis @@ -858,7 +855,7 @@ xfs_setattr( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, commit_flags, NULL); + code = xfs_trans_commit(tp, commit_flags); } /* @@ -867,10 +864,6 @@ xfs_setattr( * racing calls to vop_vnode_change. */ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); - if (mandlock_before != mandlock_after) { - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, - mandlock_after); - } xfs_iunlock(ip, lock_flags); @@ -886,7 +879,7 @@ xfs_setattr( return code; } - if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && + if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && !(flags & ATTR_DMI)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, NULL, NULL, @@ -914,19 +907,16 @@ xfs_setattr( * xfs_access * Null conversion from vnode mode bits to inode mode bits, as in efs. */ -STATIC int +int xfs_access( - bhv_desc_t *bdp, + xfs_inode_t *ip, int mode, cred_t *credp) { - xfs_inode_t *ip; int error; - vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, - (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - ip = XFS_BHVTOI(bdp); xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_iaccess(ip, mode, credp); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -935,104 +925,94 @@ xfs_access( /* - * xfs_readlink - * + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. */ +#define SYMLINK_MAPS 2 + STATIC int -xfs_readlink( - bhv_desc_t *bdp, - uio_t *uiop, - int ioflags, - cred_t *credp) +xfs_readlink_bmap( + xfs_inode_t *ip, + char *link) { - xfs_inode_t *ip; - int count; - xfs_off_t offset; - int pathlen; - vnode_t *vp; - int error = 0; - xfs_mount_t *mp; - int nmaps; + xfs_mount_t *mp = ip->i_mount; + int pathlen = ip->i_d.di_size; + int nmaps = SYMLINK_MAPS; xfs_bmbt_irec_t mval[SYMLINK_MAPS]; xfs_daddr_t d; int byte_cnt; int n; xfs_buf_t *bp; + int error = 0; - vp = BHV_TO_VNODE(bdp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0, + mval, &nmaps, NULL, NULL); + if (error) + goto out; - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; + for (n = 0; n < nmaps; n++) { + d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); + byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); + error = XFS_BUF_GETERROR(bp); + if (error) { + xfs_ioerror_alert("xfs_readlink", + ip->i_mount, bp, XFS_BUF_ADDR(bp)); + xfs_buf_relse(bp); + goto out; + } + if (pathlen < byte_cnt) + byte_cnt = pathlen; + pathlen -= byte_cnt; - xfs_ilock(ip, XFS_ILOCK_SHARED); + memcpy(link, XFS_BUF_PTR(bp), byte_cnt); + xfs_buf_relse(bp); + } - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); + link[ip->i_d.di_size] = '\0'; + error = 0; - offset = uiop->uio_offset; - count = uiop->uio_resid; + out: + return error; +} - if (offset < 0) { - error = XFS_ERROR(EINVAL); - goto error_return; - } - if (count <= 0) { - error = 0; - goto error_return; - } +int +xfs_readlink( + xfs_inode_t *ip, + char *link) +{ + xfs_mount_t *mp = ip->i_mount; + int pathlen; + int error = 0; - /* - * See if the symlink is stored inline. - */ - pathlen = (int)ip->i_d.di_size; + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - if (ip->i_df.if_flags & XFS_IFINLINE) { - error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); - } - else { - /* - * Symlink not inline. Call bmap to get it in. - */ - nmaps = SYMLINK_MAPS; - - error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), - 0, NULL, 0, mval, &nmaps, NULL); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); - if (error) { - goto error_return; - } + xfs_ilock(ip, XFS_ILOCK_SHARED); - for (n = 0; n < nmaps; n++) { - d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); - byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); - bp = xfs_buf_read(mp->m_ddev_targp, d, - BTOBB(byte_cnt), 0); - error = XFS_BUF_GETERROR(bp); - if (error) { - xfs_ioerror_alert("xfs_readlink", - ip->i_mount, bp, XFS_BUF_ADDR(bp)); - xfs_buf_relse(bp); - goto error_return; - } - if (pathlen < byte_cnt) - byte_cnt = pathlen; - pathlen -= byte_cnt; + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); + ASSERT(ip->i_d.di_size <= MAXPATHLEN); - error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); - xfs_buf_relse (bp); - } + pathlen = ip->i_d.di_size; + if (!pathlen) + goto out; + if (ip->i_df.if_flags & XFS_IFINLINE) { + memcpy(link, ip->i_df.if_u1.if_data, pathlen); + link[pathlen] = '\0'; + } else { + error = xfs_readlink_bmap(ip, link); } -error_return: + out: xfs_iunlock(ip, XFS_ILOCK_SHARED); return error; } - /* * xfs_fsync * @@ -1042,29 +1022,27 @@ error_return: * be held while flushing the data, so acquire after we're done * with that. */ -STATIC int +int xfs_fsync( - bhv_desc_t *bdp, + xfs_inode_t *ip, int flag, - cred_t *credp, xfs_off_t start, xfs_off_t stop) { - xfs_inode_t *ip; xfs_trans_t *tp; int error; int log_flushed = 0, changed = 1; - vn_trace_entry(BHV_TO_VNODE(bdp), - __FUNCTION__, (inst_t *)__return_address); - - ip = XFS_BHVTOI(bdp); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); ASSERT(start >= 0 && stop >= -1); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); + if (flag & FSYNC_DATA) + filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* * We always need to make sure that the required inode state * is safe on disk. The vnode might be clean but because @@ -1154,7 +1132,7 @@ xfs_fsync( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (flag & FSYNC_WAIT) xfs_trans_set_sync(tp); - error = _xfs_trans_commit(tp, 0, NULL, &log_flushed); + error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -1169,7 +1147,7 @@ xfs_fsync( /* * If this inode is on the RT dev we need to flush that - * cache aswell. + * cache as well. */ if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); @@ -1179,13 +1157,15 @@ xfs_fsync( } /* - * This is called by xfs_inactive to free any blocks beyond eof, - * when the link count isn't zero. + * This is called by xfs_inactive to free any blocks beyond eof + * when the link count isn't zero and by xfs_dm_punch_hole() when + * punching a hole to EOF. */ -STATIC int -xfs_inactive_free_eofblocks( +int +xfs_free_eofblocks( xfs_mount_t *mp, - xfs_inode_t *ip) + xfs_inode_t *ip, + int flags) { xfs_trans_t *tp; int error; @@ -1194,12 +1174,13 @@ xfs_inactive_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; + int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ - end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); + end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); map_len = last_fsb - end_fsb; if (map_len <= 0) @@ -1207,8 +1188,8 @@ xfs_inactive_free_eofblocks( nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0, - NULL, 0, &imap, &nimaps, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0, + NULL, 0, &imap, &nimaps, NULL, NULL); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && @@ -1234,9 +1215,16 @@ xfs_inactive_free_eofblocks( * cache and we can't * do that within a transaction. */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, - ip->i_d.di_size); + if (use_iolock) + xfs_ilock(ip, XFS_IOLOCK_EXCL); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, + ip->i_size); + if (error) { + xfs_trans_cancel(tp, 0); + if (use_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1256,7 +1244,7 @@ xfs_inactive_free_eofblocks( xfs_trans_ihold(tp, ip); error = xfs_itruncate_finish(&tp, ip, - ip->i_d.di_size, + ip->i_size, XFS_DATA_FORK, 0); /* @@ -1269,10 +1257,10 @@ xfs_inactive_free_eofblocks( XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, - XFS_TRANS_RELEASE_LOG_RES, - NULL); + XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); + xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) + : XFS_ILOCK_EXCL)); } return error; } @@ -1334,10 +1322,10 @@ xfs_inactive_symlink_rmt( */ done = 0; XFS_BMAP_INIT(&free_list, &first_block); - nmaps = sizeof(mval) / sizeof(mval[0]); + nmaps = ARRAY_SIZE(mval); if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size), XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps, - &free_list))) + &free_list, NULL))) goto error0; /* * Invalidate the block(s). @@ -1352,13 +1340,13 @@ xfs_inactive_symlink_rmt( * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done))) + &first_block, &free_list, NULL, &done))) goto error1; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ - if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed))) + if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) goto error1; /* * The transaction must have been committed, since there were @@ -1379,12 +1367,12 @@ xfs_inactive_symlink_rmt( */ ntp = xfs_trans_dup(tp); /* - * Commit the transaction containing extent freeing and EFD's. + * Commit the transaction containing extent freeing and EFDs. * If we get an error on the commit here or on the reserve below, * we need to unlock the inode since the new transaction doesn't * have the inode attached. */ - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); tp = ntp; if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); @@ -1468,9 +1456,6 @@ xfs_inactive_symlink_local( return 0; } -/* - * - */ STATIC int xfs_inactive_attrs( xfs_inode_t *ip, @@ -1484,7 +1469,7 @@ xfs_inactive_attrs( tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_d.di_forkoff != 0); - xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); error = xfs_attr_inactive(ip); @@ -1518,42 +1503,64 @@ xfs_inactive_attrs( return 0; } -STATIC int +int xfs_release( - bhv_desc_t *bdp) + xfs_inode_t *ip) { - xfs_inode_t *ip; - vnode_t *vp; - xfs_mount_t *mp; + bhv_vnode_t *vp = XFS_ITOV(ip); + xfs_mount_t *mp = ip->i_mount; int error; - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); - - if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) { + if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) return 0; - } /* If this is a read-only mount, don't do this (would generate I/O) */ - if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; + if (!XFS_FORCED_SHUTDOWN(mp)) { + int truncated; + + /* + * If we are using filestreams, and we have an unlinked + * file that we are processing the last close on, then nothing + * will be able to reopen and write to this file. Purge this + * inode from the filestreams cache so that it doesn't delay + * teardown of the inode. + */ + if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + + /* + * If we previously truncated this file and removed old data + * in the process, we want to initiate "early" writeout on + * the last close. This is an attempt to combat the notorious + * NULL files problem which is particularly noticable from a + * truncate down, buffered (re-)write (delalloc), followed by + * a crash. What we are effectively doing here is + * significantly reducing the time window where we'd otherwise + * be exposed to that problem. + */ + truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); + if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); + } + #ifdef HAVE_REFCACHE /* If we are in the NFS reference cache then don't do this now */ if (ip->i_refcache) return 0; #endif - mp = ip->i_mount; - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return error; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1572,14 +1579,12 @@ xfs_release( * now be truncated. Also, we clear all of the read-ahead state * kept for the inode here since the file is now closed. */ -STATIC int +int xfs_inactive( - bhv_desc_t *bdp, - cred_t *credp) + xfs_inode_t *ip) { - xfs_inode_t *ip; - vnode_t *vp; - xfs_bmap_free_t free_list; + bhv_vnode_t *vp = XFS_ITOV(ip); + xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; xfs_trans_t *tp; @@ -1587,10 +1592,7 @@ xfs_inactive( int error; int truncate; - vp = BHV_TO_VNODE(bdp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - - ip = XFS_BHVTOI(bdp); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); /* * If the inode is already free, then there can be nothing @@ -1609,32 +1611,32 @@ xfs_inactive( * only one with a reference to the inode. */ truncate = ((ip->i_d.di_nlink == 0) && - ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || - (ip->i_delayed_blks > 0)) && + ((ip->i_d.di_size != 0) || (ip->i_size != 0) || + (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); mp = ip->i_mount; - if (ip->i_d.di_nlink == 0 && - DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) { + if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); } error = 0; /* If this is a read-only mount, don't do this (would generate I/O) */ - if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + if (mp->m_flags & XFS_MOUNT_RDONLY) goto out; if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - if ((error = xfs_inactive_free_eofblocks(mp, ip))) + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + if (error) return VN_INACTIVE_CACHE; /* Update linux inode block count after free above */ vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp, @@ -1658,7 +1660,12 @@ xfs_inactive( */ xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return VN_INACTIVE_CACHE; + } error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), @@ -1759,7 +1766,7 @@ xfs_inactive( cmn_err(CE_NOTE, "xfs_inactive: xfs_ifree() returned an error = %d on %s", error, mp->m_fsname); - xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } else { @@ -1772,9 +1779,8 @@ xfs_inactive( * Just ignore errors at this point. There is * nothing we can do except to try to keep going. */ - (void) xfs_bmap_finish(&tp, &free_list, first_block, - &committed); - (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + (void) xfs_bmap_finish(&tp, &free_list, &committed); + (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } /* * Release the dquots held by inode, if any. @@ -1788,34 +1794,24 @@ xfs_inactive( } -/* - * xfs_lookup - */ -STATIC int +int xfs_lookup( - bhv_desc_t *dir_bdp, - vname_t *dentry, - vnode_t **vpp, - int flags, - vnode_t *rdir, - cred_t *credp) + xfs_inode_t *dp, + bhv_vname_t *dentry, + bhv_vnode_t **vpp) { - xfs_inode_t *dp, *ip; + xfs_inode_t *ip; xfs_ino_t e_inum; int error; uint lock_mode; - vnode_t *dir_vp; - - dir_vp = BHV_TO_VNODE(dir_bdp); - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); - dp = XFS_BHVTOI(dir_bdp); + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); lock_mode = xfs_ilock_map_shared(dp); - error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip); + error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); if (!error) { *vpp = XFS_ITOV(ip); ITRACE(ip); @@ -1824,53 +1820,43 @@ xfs_lookup( return error; } - -/* - * xfs_create (create a new file). - */ -STATIC int +int xfs_create( - bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + xfs_inode_t *dp, + bhv_vname_t *dentry, + mode_t mode, + xfs_dev_t rdev, + bhv_vnode_t **vpp, cred_t *credp) { char *name = VNAME(dentry); - vnode_t *dir_vp; - xfs_inode_t *dp, *ip; - vnode_t *vp=NULL; + xfs_mount_t *mp = dp->i_mount; + bhv_vnode_t *dir_vp = XFS_ITOV(dp); + xfs_inode_t *ip; + bhv_vnode_t *vp = NULL; xfs_trans_t *tp; - xfs_mount_t *mp; - xfs_dev_t rdev; int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - boolean_t dp_joined_to_trans; + boolean_t unlock_dp_on_error = B_FALSE; int dm_event_sent = 0; uint cancel_flags; int committed; xfs_prid_t prid; struct xfs_dquot *udqp, *gdqp; uint resblks; - int dm_di_mode; int namelen; ASSERT(!*vpp); - dir_vp = BHV_TO_VNODE(dir_bdp); - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); - dp = XFS_BHVTOI(dir_bdp); - mp = dp->i_mount; - - dm_di_mode = vap->va_mode; namelen = VNAMELEN(dentry); - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { + if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, name, NULL, - dm_di_mode, 0, 0); + mode, 0, 0); if (error) return error; @@ -1885,8 +1871,6 @@ xfs_create( udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) prid = dp->i_d.di_projid; - else if (vap->va_mask & XFS_AT_PROJID) - prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -1900,7 +1884,6 @@ xfs_create( goto std_return; ip = NULL; - dp_joined_to_trans = B_FALSE; tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; @@ -1920,11 +1903,11 @@ xfs_create( } if (error) { cancel_flags = 0; - dp = NULL; goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + unlock_dp_on_error = B_TRUE; XFS_BMAP_INIT(&free_list, &first_block); @@ -1937,11 +1920,9 @@ xfs_create( if (error) goto error_return; - if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen))) + if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) goto error_return; - rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0; - error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1, + error = xfs_dir_ialloc(&tp, dp, mode, 1, rdev, credp, prid, resblks > 0, &ip, &committed); if (error) { @@ -1959,19 +1940,19 @@ xfs_create( ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); /* - * Now we join the directory inode to the transaction. - * We do not do it earlier because xfs_dir_ialloc - * might commit the previous transaction (and release - * all the locks). + * Now we join the directory inode to the transaction. We do not do it + * earlier because xfs_dir_ialloc might commit the previous transaction + * (and release all the locks). An error from here on will result in + * the transaction cancel unlocking dp so don't do it explicitly in the + * error path. */ - VN_HOLD(dir_vp); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - dp_joined_to_trans = B_TRUE; + unlock_dp_on_error = B_FALSE; - error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto abort_return; @@ -2005,13 +1986,13 @@ xfs_create( IHOLD(ip); vp = XFS_ITOV(ip); - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); goto abort_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); tp = NULL; @@ -2021,25 +2002,18 @@ xfs_create( XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); - /* - * Propogate the fact that the vnode changed after the - * xfs_inode locks have been released. - */ - VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3); - *vpp = vp; /* Fallthrough to std_return with error = 0 */ std_return: - if ( (*vpp || (error != 0 && dm_event_sent != 0)) && - DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), - DM_EVENT_POSTCREATE)) { + if ((*vpp || (error != 0 && dm_event_sent != 0)) && + DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dir_vp, DM_RIGHT_NULL, *vpp ? vp:NULL, DM_RIGHT_NULL, name, NULL, - dm_di_mode, error, 0); + mode, error, 0); } return error; @@ -2051,11 +2025,12 @@ std_return: if (tp != NULL) xfs_trans_cancel(tp, cancel_flags); - if (!dp_joined_to_trans && (dp != NULL)) - xfs_iunlock(dp, XFS_ILOCK_EXCL); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); + if (unlock_dp_on_error) + xfs_iunlock(dp, XFS_ILOCK_EXCL); + goto std_return; abort_rele: @@ -2106,7 +2081,6 @@ int xfs_rm_attempts; STATIC int xfs_lock_dir_and_entry( xfs_inode_t *dp, - vname_t *dentry, xfs_inode_t *ip) /* inode of entry 'name' */ { int attempts; @@ -2120,7 +2094,7 @@ xfs_lock_dir_and_entry( attempts = 0; again: - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); e_inum = ip->i_ino; @@ -2189,6 +2163,21 @@ int xfs_lock_delays; #endif /* + * Bump the subclass so xfs_lock_inodes() acquires each lock with + * a different value + */ +static inline int +xfs_lock_inumorder(int lock_mode, int subclass) +{ + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; + + return lock_mode; +} + +/* * The following routine will lock n inodes in exclusive mode. * We assume the caller calls us with the inodes in i_ino order. * @@ -2255,7 +2244,7 @@ again: * that is in the AIL. */ ASSERT(i != 0); - if (!xfs_ilock_nowait(ips[i], lock_mode)) { + if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { attempts++; /* @@ -2290,7 +2279,7 @@ again: goto again; } } else { - xfs_ilock(ips[i], lock_mode); + xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); } } @@ -2312,22 +2301,16 @@ int remove_which_error_return = 0; #define REMOVE_DEBUG_TRACE(x) #endif /* ! DEBUG */ - -/* - * xfs_remove - * - */ -STATIC int +int xfs_remove( - bhv_desc_t *dir_bdp, - vname_t *dentry, - cred_t *credp) + xfs_inode_t *dp, + bhv_vname_t *dentry) { - vnode_t *dir_vp; + bhv_vnode_t *dir_vp = XFS_ITOV(dp); char *name = VNAME(dentry); - xfs_inode_t *dp, *ip; + xfs_mount_t *mp = dp->i_mount; + xfs_inode_t *ip; xfs_trans_t *tp = NULL; - xfs_mount_t *mp; int error = 0; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; @@ -2338,21 +2321,22 @@ xfs_remove( uint resblks; int namelen; - dir_vp = BHV_TO_VNODE(dir_bdp); - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); - - dp = XFS_BHVTOI(dir_bdp); - mp = dp->i_mount; + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); namelen = VNAMELEN(dentry); - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { + if (!xfs_get_dir_entry(dentry, &ip)) { + dm_di_mode = ip->i_d.di_mode; + IRELE(ip); + } + + if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return error; } @@ -2369,7 +2353,7 @@ xfs_remove( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &ip); @@ -2380,7 +2364,7 @@ xfs_remove( dm_di_mode = ip->i_d.di_mode; - vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); ITRACE(ip); @@ -2420,7 +2404,7 @@ xfs_remove( return error; } - error = xfs_lock_dir_and_entry(dp, dentry, ip); + error = xfs_lock_dir_and_entry(dp, ip); if (error) { REMOVE_DEBUG_TRACE(__LINE__); xfs_trans_cancel(tp, cancel_flags); @@ -2447,8 +2431,8 @@ xfs_remove( * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. */ XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino, - &first_block, &free_list, 0); + error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, + &first_block, &free_list, 0); if (error) { ASSERT(error != ENOENT); REMOVE_DEBUG_TRACE(__LINE__); @@ -2485,13 +2469,13 @@ xfs_remove( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { REMOVE_DEBUG_TRACE(__LINE__); goto error_rele; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(ip); goto std_return; @@ -2505,19 +2489,22 @@ xfs_remove( */ xfs_refcache_purge_ip(ip); - vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); - /* - * Let interposed file systems know about removed links. + * If we are using filestreams, kill the stream association. + * If the file is still open it may get a new one but that + * will get killed on last close in xfs_close() so we don't + * have to worry about that. */ - VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero); + if (link_zero && xfs_inode_is_filestream(ip)) + xfs_filestream_deassociate(ip); + + vn_trace_exit(ip, __FUNCTION__, (inst_t *)__return_address); IRELE(ip); /* Fall through to std_return with error = 0 */ std_return: - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, - DM_EVENT_POSTREMOVE)) { + if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, @@ -2555,47 +2542,36 @@ xfs_remove( goto std_return; } - -/* - * xfs_link - * - */ -STATIC int +int xfs_link( - bhv_desc_t *target_dir_bdp, - vnode_t *src_vp, - vname_t *dentry, - cred_t *credp) + xfs_inode_t *tdp, + bhv_vnode_t *src_vp, + bhv_vname_t *dentry) { - xfs_inode_t *tdp, *sip; + bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp); + xfs_mount_t *mp = tdp->i_mount; + xfs_inode_t *sip = xfs_vtoi(src_vp); xfs_trans_t *tp; - xfs_mount_t *mp; xfs_inode_t *ips[2]; int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *target_dir_vp; int resblks; char *target_name = VNAME(dentry); int target_namelen; - target_dir_vp = BHV_TO_VNODE(target_dir_bdp); - vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address); - vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(tdp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(xfs_vtoi(src_vp), __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (VN_ISDIR(src_vp)) - return XFS_ERROR(EPERM); + ASSERT(!VN_ISDIR(src_vp)); - sip = xfs_vtoi(src_vp); - tdp = XFS_BHVTOI(target_dir_bdp); - mp = tdp->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) { + if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, target_dir_vp, DM_RIGHT_NULL, src_vp, DM_RIGHT_NULL, @@ -2662,18 +2638,17 @@ xfs_link( */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (tdp->i_d.di_projid != sip->i_d.di_projid))) { - error = XFS_ERROR(EPERM); + error = XFS_ERROR(EXDEV); goto error_return; } if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name, - target_namelen))) + (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) goto error_return; XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen, + error = xfs_dir_createname(tp, tdp, target_name, target_namelen, sip->i_ino, &first_block, &free_list, resblks); if (error) @@ -2683,9 +2658,8 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); - if (error) { + if (error) goto abort_return; - } /* * If this is a synchronous mount, make sure that the @@ -2696,21 +2670,19 @@ xfs_link( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish (&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); goto abort_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) goto std_return; - } /* Fall through to std_return with error = 0. */ std_return: - if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip, - DM_EVENT_POSTLINK)) { + if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, target_dir_vp, DM_RIGHT_NULL, src_vp, DM_RIGHT_NULL, @@ -2726,57 +2698,45 @@ std_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; } -/* - * xfs_mkdir - * - */ -STATIC int + + +int xfs_mkdir( - bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, - vnode_t **vpp, + xfs_inode_t *dp, + bhv_vname_t *dentry, + mode_t mode, + bhv_vnode_t **vpp, cred_t *credp) { + bhv_vnode_t *dir_vp = XFS_ITOV(dp); char *dir_name = VNAME(dentry); - xfs_inode_t *dp; + int dir_namelen = VNAMELEN(dentry); + xfs_mount_t *mp = dp->i_mount; xfs_inode_t *cdp; /* inode of created dir */ - vnode_t *cvp; /* vnode of created dir */ + bhv_vnode_t *cvp; /* vnode of created dir */ xfs_trans_t *tp; - xfs_mount_t *mp; int cancel_flags; int error; int committed; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - vnode_t *dir_vp; - boolean_t dp_joined_to_trans; + boolean_t unlock_dp_on_error = B_FALSE; boolean_t created = B_FALSE; int dm_event_sent = 0; xfs_prid_t prid; struct xfs_dquot *udqp, *gdqp; uint resblks; - int dm_di_mode; - int dir_namelen; - - dir_vp = BHV_TO_VNODE(dir_bdp); - dp = XFS_BHVTOI(dir_bdp); - mp = dp->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - dir_namelen = VNAMELEN(dentry); - tp = NULL; - dp_joined_to_trans = B_FALSE; - dm_di_mode = vap->va_mode; - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) { + if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, dir_name, NULL, - dm_di_mode, 0, 0); + mode, 0, 0); if (error) return error; dm_event_sent = 1; @@ -2784,14 +2744,12 @@ xfs_mkdir( /* Return through std_return after this point. */ - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); mp = dp->i_mount; udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) prid = dp->i_d.di_projid; - else if (vap->va_mask & XFS_AT_PROJID) - prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -2817,11 +2775,11 @@ xfs_mkdir( } if (error) { cancel_flags = 0; - dp = NULL; goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + unlock_dp_on_error = B_TRUE; /* * Check for directory link count overflow. @@ -2839,12 +2797,12 @@ xfs_mkdir( goto error_return; if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen))) + (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) goto error_return; /* * create the directory inode. */ - error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2, + error = xfs_dir_ialloc(&tp, dp, mode, 2, 0, credp, prid, resblks > 0, &cdp, NULL); if (error) { @@ -2858,17 +2816,19 @@ xfs_mkdir( * Now we add the directory inode to the transaction. * We waited until now since xfs_dir_ialloc might start * a new transaction. Had we joined the transaction - * earlier, the locks might have gotten released. + * earlier, the locks might have gotten released. An error + * from here on will result in the transaction cancel + * unlocking dp so don't do it explicitly in the error path. */ VN_HOLD(dir_vp); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - dp_joined_to_trans = B_TRUE; + unlock_dp_on_error = B_FALSE; XFS_BMAP_INIT(&free_list, &first_block); - error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen, - cdp->i_ino, &first_block, &free_list, - resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0); + error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, + &first_block, &free_list, resblks ? + resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != ENOSPC); goto error1; @@ -2882,16 +2842,14 @@ xfs_mkdir( */ dp->i_gen++; - error = XFS_DIR_INIT(mp, tp, cdp, dp); - if (error) { + error = xfs_dir_init(tp, cdp, dp); + if (error) goto error2; - } cdp->i_gen = 1; error = xfs_bumplink(tp, dp); - if (error) { + if (error) goto error2; - } cvp = XFS_ITOV(cdp); @@ -2914,13 +2872,13 @@ xfs_mkdir( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { IRELE(cdp); goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); if (error) { @@ -2931,15 +2889,14 @@ xfs_mkdir( * xfs_trans_commit. */ std_return: - if ( (created || (error != 0 && dm_event_sent != 0)) && - DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), - DM_EVENT_POSTCREATE)) { + if ((created || (error != 0 && dm_event_sent != 0)) && + DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, dir_vp, DM_RIGHT_NULL, created ? XFS_ITOV(cdp):NULL, DM_RIGHT_NULL, dir_name, NULL, - dm_di_mode, error, 0); + mode, error, 0); } return error; @@ -2953,55 +2910,47 @@ std_return: XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); - if (!dp_joined_to_trans && (dp != NULL)) { + if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - } goto std_return; } - -/* - * xfs_rmdir - * - */ -STATIC int +int xfs_rmdir( - bhv_desc_t *dir_bdp, - vname_t *dentry, - cred_t *credp) + xfs_inode_t *dp, + bhv_vname_t *dentry) { + bhv_vnode_t *dir_vp = XFS_ITOV(dp); char *name = VNAME(dentry); - xfs_inode_t *dp; - xfs_inode_t *cdp; /* child directory */ + int namelen = VNAMELEN(dentry); + xfs_mount_t *mp = dp->i_mount; + xfs_inode_t *cdp; /* child directory */ xfs_trans_t *tp; - xfs_mount_t *mp; int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; - vnode_t *dir_vp; - int dm_di_mode = 0; + int dm_di_mode = S_IFDIR; int last_cdp_link; - int namelen; uint resblks; - dir_vp = BHV_TO_VNODE(dir_bdp); - dp = XFS_BHVTOI(dir_bdp); - mp = dp->i_mount; - - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); - if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount)) + if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - namelen = VNAMELEN(dentry); - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) { + if (!xfs_get_dir_entry(dentry, &cdp)) { + dm_di_mode = cdp->i_d.di_mode; + IRELE(cdp); + } + + if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - name, NULL, 0, 0, 0); + name, NULL, dm_di_mode, 0, 0); if (error) return XFS_ERROR(error); } @@ -3019,7 +2968,7 @@ xfs_rmdir( * for a log reservation. Since we'll have to wait for the * inactive code to complete before returning from xfs_iget, * we need to make sure that we don't have log space reserved - * when we call xfs_iget. Instead we get an unlocked referece + * when we call xfs_iget. Instead we get an unlocked reference * to the inode before getting our log reservation. */ error = xfs_get_dir_entry(dentry, &cdp); @@ -3075,7 +3024,7 @@ xfs_rmdir( * that the directory entry for the child directory inode has * not changed while we were obtaining a log reservation. */ - error = xfs_lock_dir_and_entry(dp, dentry, cdp); + error = xfs_lock_dir_and_entry(dp, cdp); if (error) { xfs_trans_cancel(tp, cancel_flags); IRELE(cdp); @@ -3100,16 +3049,15 @@ xfs_rmdir( error = XFS_ERROR(ENOTEMPTY); goto error_return; } - if (!XFS_DIR_ISEMPTY(mp, cdp)) { + if (!xfs_dir_isempty(cdp)) { error = XFS_ERROR(ENOTEMPTY); goto error_return; } - error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino, - &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3161,7 +3109,7 @@ xfs_rmdir( xfs_trans_set_sync(tp); } - error = xfs_bmap_finish (&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish (&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | @@ -3170,24 +3118,19 @@ xfs_rmdir( goto std_return; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { IRELE(cdp); goto std_return; } - /* - * Let interposed file systems know about removed links. - */ - VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link); - IRELE(cdp); /* Fall through to std_return with error = 0 or the errno * from xfs_trans_commit. */ -std_return: - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) { + std_return: + if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, @@ -3196,73 +3139,34 @@ std_return: } return error; -error1: + error1: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ -error_return: + error_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; } - -/* - * xfs_readdir - * - * Read dp's entries starting at uiop->uio_offset and translate them into - * bufsize bytes worth of struct dirents starting at bufbase. - */ -STATIC int -xfs_readdir( - bhv_desc_t *dir_bdp, - uio_t *uiop, - cred_t *credp, - int *eofp) -{ - xfs_inode_t *dp; - xfs_trans_t *tp = NULL; - int error = 0; - uint lock_mode; - - vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__, - (inst_t *)__return_address); - dp = XFS_BHVTOI(dir_bdp); - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) { - return XFS_ERROR(EIO); - } - - lock_mode = xfs_ilock_map_shared(dp); - error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp); - xfs_iunlock_map_shared(dp, lock_mode); - return error; -} - - -/* - * xfs_symlink - * - */ -STATIC int +int xfs_symlink( - bhv_desc_t *dir_bdp, - vname_t *dentry, - vattr_t *vap, + xfs_inode_t *dp, + bhv_vname_t *dentry, char *target_path, - vnode_t **vpp, + mode_t mode, + bhv_vnode_t **vpp, cred_t *credp) { + bhv_vnode_t *dir_vp = XFS_ITOV(dp); + xfs_mount_t *mp = dp->i_mount; xfs_trans_t *tp; - xfs_mount_t *mp; - xfs_inode_t *dp; xfs_inode_t *ip; int error; int pathlen; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - boolean_t dp_joined_to_trans; - vnode_t *dir_vp; + boolean_t unlock_dp_on_error = B_FALSE; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; @@ -3281,16 +3185,12 @@ xfs_symlink( int link_namelen; *vpp = NULL; - dir_vp = BHV_TO_VNODE(dir_bdp); - dp = XFS_BHVTOI(dir_bdp); - dp_joined_to_trans = B_FALSE; error = 0; ip = NULL; tp = NULL; - vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address); - mp = dp->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3307,7 +3207,7 @@ xfs_symlink( int len, total; char *path; - for(total = 0, path = target_path; total < pathlen;) { + for (total = 0, path = target_path; total < pathlen;) { /* * Skip any slashes. */ @@ -3329,7 +3229,7 @@ xfs_symlink( } } - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) { + if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, link_name, target_path, 0, 0, 0); @@ -3342,8 +3242,6 @@ xfs_symlink( udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) prid = dp->i_d.di_projid; - else if (vap->va_mask & XFS_AT_PROJID) - prid = (xfs_prid_t)vap->va_projid; else prid = (xfs_prid_t)dfltprid; @@ -3376,11 +3274,11 @@ xfs_symlink( } if (error) { cancel_flags = 0; - dp = NULL; goto error_return; } - xfs_ilock(dp, XFS_ILOCK_EXCL); + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + unlock_dp_on_error = B_TRUE; /* * Check whether the directory allows new symlinks or not. @@ -3401,7 +3299,7 @@ xfs_symlink( * Check for ability to enter directory entry, if no space reserved. */ if (resblks == 0 && - (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen))) + (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) goto error_return; /* * Initialize the bmap freelist prior to calling either @@ -3412,7 +3310,7 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT), + error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, credp, prid, resblks > 0, &ip, NULL); if (error) { if (error == ENOSPC) @@ -3421,9 +3319,14 @@ xfs_symlink( } ITRACE(ip); + /* + * An error after we've joined dp to the transaction will result in the + * transaction cancel unlocking dp so don't do it explicitly in the + * error path. + */ VN_HOLD(dir_vp); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - dp_joined_to_trans = B_TRUE; + unlock_dp_on_error = B_FALSE; /* * Also attach the dquot(s) to it, if applicable. @@ -3456,7 +3359,7 @@ xfs_symlink( error = xfs_bmapi(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, - &free_list); + &free_list, NULL); if (error) { goto error1; } @@ -3488,11 +3391,10 @@ xfs_symlink( /* * Create the directory entry for the symlink. */ - error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen, - ip->i_ino, &first_block, &free_list, resblks); - if (error) { + error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, + &first_block, &free_list, resblks); + if (error) goto error1; - } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -3519,19 +3421,18 @@ xfs_symlink( */ IHOLD(ip); - error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error2; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: - if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp), - DM_EVENT_POSTSYMLINK)) { + if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, dir_vp, DM_RIGHT_NULL, error ? NULL : XFS_ITOV(ip), @@ -3540,7 +3441,7 @@ std_return: } if (!error) { - vnode_t *vp; + bhv_vnode_t *vp; ASSERT(ip); vp = XFS_ITOV(ip); @@ -3558,35 +3459,21 @@ std_return: XFS_QM_DQRELE(mp, udqp); XFS_QM_DQRELE(mp, gdqp); - if (!dp_joined_to_trans && (dp != NULL)) { + if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - } goto std_return; } -/* - * xfs_fid2 - * - * A fid routine that takes a pointer to a previously allocated - * fid structure (like xfs_fast_fid) but uses a 64 bit inode number. - */ -STATIC int +int xfs_fid2( - bhv_desc_t *bdp, - fid_t *fidp) + xfs_inode_t *ip, + xfs_fid_t *xfid) { - xfs_inode_t *ip; - xfs_fid2_t *xfid; + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); - vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, - (inst_t *)__return_address); - ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t)); - - xfid = (xfs_fid2_t *)fidp; - ip = XFS_BHVTOI(bdp); - xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len); + xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len); xfid->fid_pad = 0; /* * use memcpy because the inode is a long long and there's no @@ -3599,21 +3486,13 @@ xfs_fid2( } -/* - * xfs_rwlock - */ int xfs_rwlock( - bhv_desc_t *bdp, - vrwlock_t locktype) + xfs_inode_t *ip, + bhv_vrwlock_t locktype) { - xfs_inode_t *ip; - vnode_t *vp; - - vp = BHV_TO_VNODE(bdp); - if (VN_ISDIR(vp)) + if (S_ISDIR(ip->i_d.di_mode)) return 1; - ip = XFS_BHVTOI(bdp); if (locktype == VRWLOCK_WRITE) { xfs_ilock(ip, XFS_IOLOCK_EXCL); } else if (locktype == VRWLOCK_TRY_READ) { @@ -3630,21 +3509,13 @@ xfs_rwlock( } -/* - * xfs_rwunlock - */ void xfs_rwunlock( - bhv_desc_t *bdp, - vrwlock_t locktype) + xfs_inode_t *ip, + bhv_vrwlock_t locktype) { - xfs_inode_t *ip; - vnode_t *vp; - - vp = BHV_TO_VNODE(bdp); - if (VN_ISDIR(vp)) - return; - ip = XFS_BHVTOI(bdp); + if (S_ISDIR(ip->i_d.di_mode)) + return; if (locktype == VRWLOCK_WRITE) { /* * In the write case, we may have added a new entry to @@ -3662,20 +3533,16 @@ xfs_rwunlock( return; } -STATIC int + +int xfs_inode_flush( - bhv_desc_t *bdp, + xfs_inode_t *ip, int flags) { - xfs_inode_t *ip; - xfs_mount_t *mp; - xfs_inode_log_item_t *iip; + xfs_mount_t *mp = ip->i_mount; + xfs_inode_log_item_t *iip = ip->i_itemp; int error = 0; - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; - iip = ip->i_itemp; - if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3697,12 +3564,16 @@ xfs_inode_flush( sync_lsn = log->l_last_sync_lsn; GRANT_UNLOCK(log, s); - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) - return 0; + if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { + if (flags & FLUSH_SYNC) + log_flags |= XFS_LOG_SYNC; + error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (error) + return error; + } - if (flags & FLUSH_SYNC) - log_flags |= XFS_LOG_SYNC; - return xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (ip->i_update_core == 0) + return 0; } } @@ -3716,9 +3587,6 @@ xfs_inode_flush( if (flags & FLUSH_INODE) { int flush_flags; - if (xfs_ipincount(ip)) - return EAGAIN; - if (flags & FLUSH_SYNC) { xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_iflock(ip); @@ -3745,23 +3613,18 @@ xfs_inode_flush( int -xfs_set_dmattrs ( - bhv_desc_t *bdp, +xfs_set_dmattrs( + xfs_inode_t *ip, u_int evmask, - u_int16_t state, - cred_t *credp) + u_int16_t state) { - xfs_inode_t *ip; + xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; - xfs_mount_t *mp; int error; if (!capable(CAP_SYS_ADMIN)) return XFS_ERROR(EPERM); - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; - if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -3779,26 +3642,18 @@ xfs_set_dmattrs ( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); IHOLD(ip); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); return error; } - -/* - * xfs_reclaim - */ -STATIC int +int xfs_reclaim( - bhv_desc_t *bdp) + xfs_inode_t *ip) { - xfs_inode_t *ip; - vnode_t *vp; - - vp = BHV_TO_VNODE(bdp); - ip = XFS_BHVTOI(bdp); + bhv_vnode_t *vp = XFS_ITOV(ip); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); ASSERT(!VN_MAPPED(vp)); @@ -3808,7 +3663,7 @@ xfs_reclaim( return 0; } - vn_iowait(vp); + vn_iowait(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -3818,11 +3673,16 @@ xfs_reclaim( */ xfs_synchronize_atime(ip); - /* If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode - * and the linux inode and clean up the xfs inode later. This - * avoids flushing the inode to disk during the delete operation - * itself. + /* + * If we have nothing to flush with this inode then complete the + * teardown now, otherwise break the link between the xfs inode and the + * linux inode and clean up the xfs inode later. This avoids flushing + * the inode to disk during the delete operation itself. + * + * When breaking the link, we need to set the XFS_IRECLAIMABLE flag + * first to ensure that xfs_iunpin() will never see an xfs inode + * that has a linux inode being reclaimed. Synchronisation is provided + * by the i_flags_lock. */ if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -3831,11 +3691,14 @@ xfs_reclaim( } else { xfs_mount_t *mp = ip->i_mount; - /* Protect sync from us */ + /* Protect sync and unpin from us */ XFS_MOUNT_ILOCK(mp); - vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); + spin_lock(&ip->i_flags_lock); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + vn_to_inode(vp)->i_private = NULL; + ip->i_vnode = NULL; + spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - ip->i_flags |= XFS_IRECLAIMABLE; XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3847,8 +3710,8 @@ xfs_finish_reclaim( int locked, int sync_mode) { - xfs_ihash_t *ih = ip->i_hash; - vnode_t *vp = XFS_ITOV_NULL(ip); + xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + bhv_vnode_t *vp = XFS_ITOV_NULL(ip); int error; if (vp && VN_BAD(vp)) @@ -3859,18 +3722,22 @@ xfs_finish_reclaim( * Once we have the XFS_IRECLAIM flag set it will not touch * us. */ - write_lock(&ih->ih_lock); - if ((ip->i_flags & XFS_IRECLAIM) || - (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { - write_unlock(&ih->ih_lock); + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); if (locked) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); } return 1; } - ip->i_flags |= XFS_IRECLAIM; - write_unlock(&ih->ih_lock); + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(ip->i_mount, pag); /* * If the inode is still dirty, then flush it out. If the inode @@ -4004,27 +3871,21 @@ xfs_alloc_file_space( int committed; int error; - vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rt = XFS_IS_REALTIME_INODE(ip); - if (unlikely(rt)) { - if (!(extsz = ip->i_d.di_extsize)) - extsz = mp->m_sb.sb_rextsize; - } else { - extsz = ip->i_d.di_extsize; - } - if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; if (len <= 0) return XFS_ERROR(EINVAL); + rt = XFS_IS_REALTIME_INODE(ip); + extsz = xfs_get_extsz_hint(ip); + count = len; - error = 0; imapp = &imaps[0]; nimaps = 1; bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); @@ -4032,14 +3893,14 @@ xfs_alloc_file_space( allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* Generate a DMAPI event if needed. */ - if (alloc_type != 0 && offset < ip->i_d.di_size && + if (alloc_type != 0 && offset < ip->i_size && (attr_flags&ATTR_DMI) == 0 && - DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { + DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { xfs_off_t end_dmi_offset; end_dmi_offset = offset+len; - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), offset, end_dmi_offset - offset, 0, NULL); @@ -4115,10 +3976,10 @@ retry: * Issue the xfs_bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bmapi(tp, ip, startoffset_fsb, + error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, allocatesize_fsb, bmapi_flag, &firstfsb, 0, imapp, &nimaps, - &free_list); + &free_list, NULL); if (error) { goto error0; } @@ -4126,12 +3987,12 @@ retry: /* * Complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; @@ -4148,9 +4009,8 @@ retry: allocatesize_fsb -= allocated_fsb; } dmapi_enospc_check: - if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 && - DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) { - + if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && + DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, XFS_ITOV(ip), DM_RIGHT_NULL, XFS_ITOV(ip), DM_RIGHT_NULL, @@ -4198,8 +4058,8 @@ xfs_zero_remaining_bytes( for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; - error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap, - &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0, + NULL, 0, &imap, &nimap, NULL, NULL); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); @@ -4258,7 +4118,7 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - vnode_t *vp; + bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -4266,14 +4126,13 @@ xfs_free_file_space( int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; - xfs_off_t ilen; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; - int rounding; + uint rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; @@ -4282,7 +4141,7 @@ xfs_free_file_space( vp = XFS_ITOV(ip); mp = ip->i_mount; - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return error; @@ -4295,11 +4154,10 @@ xfs_free_file_space( end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); - if (offset < ip->i_d.di_size && - (attr_flags & ATTR_DMI) == 0 && - DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { - if (end_dmi_offset > ip->i_d.di_size) - end_dmi_offset = ip->i_d.di_size; + if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && + DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { + if (end_dmi_offset > ip->i_size) + end_dmi_offset = ip->i_size; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, offset, end_dmi_offset - offset, AT_DELAY_FLAG(attr_flags), NULL); @@ -4307,24 +4165,24 @@ xfs_free_file_space( return error; } - ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1); if (attr_flags & ATTR_NOLOCK) need_iolock = 0; - if (need_iolock) + if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); + vn_iowait(ip); /* wait for the completion of any pending DIOs */ + } - rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog), - (__uint8_t)NBPP); - ilen = len + (offset & (rounding - 1)); + rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP); ioffset = offset & ~(rounding - 1); - if (ilen & (rounding - 1)) - ilen = (ilen + rounding) & ~(rounding - 1); if (VN_CACHED(vp) != 0) { xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1, ctooff(offtoct(ioffset)), -1); - VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(ioffset)), + error = xfs_flushinval_pages(ip, + ctooff(offtoct(ioffset)), -1, FI_REMAPF_LOCKED); + if (error) + goto out_unlock_iolock; } /* @@ -4335,8 +4193,8 @@ xfs_free_file_space( */ if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { nimap = 1; - error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4350,8 +4208,8 @@ xfs_free_file_space( startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; - error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0, - &imap, &nimap, NULL); + error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1, + 1, 0, NULL, 0, &imap, &nimap, NULL, NULL); if (error) goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); @@ -4388,9 +4246,12 @@ xfs_free_file_space( while (!error && !done) { /* - * allocate and setup the transaction + * allocate and setup the transaction. Allow this + * transaction to dip into the reserve blocks to ensure + * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), @@ -4423,9 +4284,9 @@ xfs_free_file_space( * issue the bunmapi() call to free the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); - error = xfs_bunmapi(tp, ip, startoffset_fsb, + error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb, endoffset_fsb - startoffset_fsb, - 0, 2, &firstfsb, &free_list, &done); + 0, 2, &firstfsb, &free_list, NULL, &done); if (error) { goto error0; } @@ -4433,12 +4294,12 @@ xfs_free_file_space( /* * complete the transaction */ - error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); + error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -4469,35 +4330,29 @@ xfs_free_file_space( */ int xfs_change_file_space( - bhv_desc_t *bdp, + xfs_inode_t *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, cred_t *credp, int attr_flags) { + xfs_mount_t *mp = ip->i_mount; int clrprealloc; int error; xfs_fsize_t fsize; - xfs_inode_t *ip; - xfs_mount_t *mp; int setprealloc; xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - vattr_t va; - vnode_t *vp; - - vp = BHV_TO_VNODE(bdp); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + bhv_vattr_t va; - ip = XFS_BHVTOI(bdp); - mp = ip->i_mount; + vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address); /* * must be a regular file and have write permission */ - if (!VN_ISREG(vp)) + if (!S_ISREG(ip->i_d.di_mode)) return XFS_ERROR(EINVAL); xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -4516,7 +4371,7 @@ xfs_change_file_space( bf->l_start += offset; break; case 2: /*SEEK_END*/ - bf->l_start += ip->i_d.di_size; + bf->l_start += ip->i_size; break; default: return XFS_ERROR(EINVAL); @@ -4533,7 +4388,7 @@ xfs_change_file_space( bf->l_whence = 0; startoffset = bf->l_start; - fsize = ip->i_d.di_size; + fsize = ip->i_size; /* * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve @@ -4579,7 +4434,7 @@ xfs_change_file_space( va.va_mask = XFS_AT_SIZE; va.va_size = startoffset; - error = xfs_setattr(bdp, &va, attr_flags, credp); + error = xfs_setattr(ip, &va, attr_flags, credp); if (error) return error; @@ -4632,51 +4487,9 @@ xfs_change_file_space( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0, NULL); + error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } - -vnodeops_t xfs_vnodeops = { - BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS), - .vop_open = xfs_open, - .vop_read = xfs_read, -#ifdef HAVE_SENDFILE - .vop_sendfile = xfs_sendfile, -#endif - .vop_write = xfs_write, - .vop_ioctl = xfs_ioctl, - .vop_getattr = xfs_getattr, - .vop_setattr = xfs_setattr, - .vop_access = xfs_access, - .vop_lookup = xfs_lookup, - .vop_create = xfs_create, - .vop_remove = xfs_remove, - .vop_link = xfs_link, - .vop_rename = xfs_rename, - .vop_mkdir = xfs_mkdir, - .vop_rmdir = xfs_rmdir, - .vop_readdir = xfs_readdir, - .vop_symlink = xfs_symlink, - .vop_readlink = xfs_readlink, - .vop_fsync = xfs_fsync, - .vop_inactive = xfs_inactive, - .vop_fid2 = xfs_fid2, - .vop_rwlock = xfs_rwlock, - .vop_rwunlock = xfs_rwunlock, - .vop_bmap = xfs_bmap, - .vop_reclaim = xfs_reclaim, - .vop_attr_get = xfs_attr_get, - .vop_attr_set = xfs_attr_set, - .vop_attr_remove = xfs_attr_remove, - .vop_attr_list = xfs_attr_list, - .vop_link_removed = (vop_link_removed_t)fs_noval, - .vop_vnode_change = (vop_vnode_change_t)fs_noval, - .vop_tosspages = fs_tosspages, - .vop_flushinval_pages = fs_flushinval_pages, - .vop_flush_pages = fs_flush_pages, - .vop_release = xfs_release, - .vop_iflush = xfs_inode_flush, -};