X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Flinux-2.6%2Fxfs_lrw.c;h=1bf47f219c971c9da83b6a8208531d47d2f58921;hb=3126c136bc30225d7a43af741778aa50e95e467a;hp=110c038910ff39580cabb45cf52f4f504c0b4332;hpb=721259bce2851893155c6cb88a3f8ecb106b348c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 110c038..1bf47f2 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -42,14 +42,12 @@ #include "xfs_error.h" #include "xfs_itable.h" #include "xfs_rw.h" -#include "xfs_acl.h" -#include "xfs_cap.h" -#include "xfs_mac.h" #include "xfs_attr.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_iomap.h" +#include "xfs_vnodeops.h" #include #include @@ -59,14 +57,12 @@ void xfs_rw_enter_trace( int tag, - xfs_iocore_t *io, + xfs_inode_t *ip, void *data, size_t segs, loff_t offset, int ioflags) { - xfs_inode_t *ip = XFS_IO_INODE(io); - if (ip->i_rwtrace == NULL) return; ktrace_enter(ip->i_rwtrace, @@ -79,8 +75,8 @@ xfs_rw_enter_trace( (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)ioflags), - (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)), - (void *)((unsigned long)(io->io_new_size & 0xffffffff)), + (void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)), + (void *)((unsigned long)(ip->i_new_size & 0xffffffff)), (void *)((unsigned long)current_pid()), (void *)NULL, (void *)NULL, @@ -90,13 +86,12 @@ xfs_rw_enter_trace( void xfs_inval_cached_trace( - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, xfs_off_t len, xfs_off_t first, xfs_off_t last) { - xfs_inode_t *ip = XFS_IO_INODE(io); if (ip->i_rwtrace == NULL) return; @@ -132,56 +127,38 @@ xfs_inval_cached_trace( */ STATIC int xfs_iozero( - struct inode *ip, /* inode */ + struct xfs_inode *ip, /* inode */ loff_t pos, /* offset in file */ - size_t count, /* size of data to zero */ - loff_t end_size) /* max file size to set */ + size_t count) /* size of data to zero */ { - unsigned bytes; struct page *page; struct address_space *mapping; - char *kaddr; int status; - mapping = ip->i_mapping; + mapping = VFS_I(ip)->i_mapping; do { - unsigned long index, offset; + unsigned offset, bytes; + void *fsdata; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; - status = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (status) break; - kaddr = kmap(page); - status = mapping->a_ops->prepare_write(NULL, page, offset, - offset + bytes); - if (status) { - goto unlock; - } - - memset((void *) (kaddr + offset), 0, bytes); - flush_dcache_page(page); - status = mapping->a_ops->commit_write(NULL, page, offset, - offset + bytes); - if (!status) { - pos += bytes; - count -= bytes; - if (pos > i_size_read(ip)) - i_size_write(ip, pos < end_size ? pos : end_size); - } + zero_user(page, offset, bytes); -unlock: - kunmap(page); - unlock_page(page); - page_cache_release(page); - if (status) - break; + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, + page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + pos += bytes; + count -= bytes; + status = 0; } while (count); return (-status); @@ -189,27 +166,21 @@ unlock: ssize_t /* bytes read, or (-) error */ xfs_read( - bhv_desc_t *bdp, + xfs_inode_t *ip, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, - int ioflags, - cred_t *credp) + int ioflags) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + xfs_mount_t *mp = ip->i_mount; size_t size = 0; - ssize_t ret; + ssize_t ret = 0; xfs_fsize_t n; - xfs_inode_t *ip; - xfs_mount_t *mp; - bhv_vnode_t *vp; unsigned long seg; - ip = XFS_BHVTOI(bdp); - vp = BHV_TO_VNODE(bdp); - mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); @@ -229,11 +200,11 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? + XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->bt_smask) || (size & target->bt_smask)) { - if (*offset == ip->i_d.di_size) { + if (*offset == ip->i_size) { return (0); } return -XFS_ERROR(EINVAL); @@ -254,14 +225,12 @@ xfs_read( mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && - !(ioflags & IO_INVIS)) { - bhv_vrwlock_t locktype = VRWLOCK_READ; + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); + int iolock = XFS_IOLOCK_SHARED; - ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, - BHV_TO_VNODE(bdp), *offset, size, - dmflags, &locktype); + ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, + dmflags, &iolock); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (unlikely(ioflags & IO_ISDIRECT)) @@ -270,62 +239,22 @@ xfs_read( } } - if (unlikely((ioflags & IO_ISDIRECT) && VN_CACHED(vp))) - bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), - -1, FI_REMAPF_LOCKED); - - if (unlikely(ioflags & IO_ISDIRECT)) + if (unlikely(ioflags & IO_ISDIRECT)) { + if (inode->i_mapping->nrpages) + ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); - - xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, - (void *)iovp, segs, *offset, ioflags); - ret = __generic_file_aio_read(iocb, iovp, segs, offset); - if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) - ret = wait_on_sync_kiocb(iocb); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -ssize_t -xfs_sendfile( - bhv_desc_t *bdp, - struct file *filp, - loff_t *offset, - int ioflags, - size_t count, - read_actor_t actor, - void *target, - cred_t *credp) -{ - xfs_inode_t *ip = XFS_BHVTOI(bdp); - xfs_mount_t *mp = ip->i_mount; - ssize_t ret; - - XFS_STATS_INC(xs_read_calls); - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_SHARED); - - if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && - (!(ioflags & IO_INVIS))) { - bhv_vrwlock_t locktype = VRWLOCK_READ; - int error; - - error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), - *offset, count, - FILP_DELAY_FLAG(filp), &locktype); - if (error) { + if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return -error; + return ret; } } - xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, - (void *)(unsigned long)target, count, *offset, ioflags); - ret = generic_file_sendfile(filp, offset, count, actor, target); + + xfs_rw_enter_trace(XFS_READ_ENTER, ip, + (void *)iovp, segs, *offset, ioflags); + + iocb->ki_pos = *offset; + ret = generic_file_aio_read(iocb, iovp, segs, *offset); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -335,16 +264,14 @@ xfs_sendfile( ssize_t xfs_splice_read( - bhv_desc_t *bdp, + xfs_inode_t *ip, struct file *infilp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, int flags, - int ioflags, - cred_t *credp) + int ioflags) { - xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; @@ -354,20 +281,18 @@ xfs_splice_read( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && - (!(ioflags & IO_INVIS))) { - bhv_vrwlock_t locktype = VRWLOCK_READ; + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { + int iolock = XFS_IOLOCK_SHARED; int error; - error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), - *ppos, count, - FILP_DELAY_FLAG(infilp), &locktype); + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, + FILP_DELAY_FLAG(infilp), &iolock); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } - xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, + xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) @@ -379,18 +304,18 @@ xfs_splice_read( ssize_t xfs_splice_write( - bhv_desc_t *bdp, + xfs_inode_t *ip, struct pipe_inode_info *pipe, struct file *outfilp, loff_t *ppos, size_t count, int flags, - int ioflags, - cred_t *credp) + int ioflags) { - xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; + struct inode *inode = outfilp->f_mapping->host; + xfs_fsize_t isize, new_size; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -398,25 +323,49 @@ xfs_splice_write( xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && - (!(ioflags & IO_INVIS))) { - bhv_vrwlock_t locktype = VRWLOCK_WRITE; + if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { + int iolock = XFS_IOLOCK_EXCL; int error; - error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), - *ppos, count, - FILP_DELAY_FLAG(outfilp), &locktype); + error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, + FILP_DELAY_FLAG(outfilp), &iolock); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; } } - xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, + + new_size = *ppos + count; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (new_size > ip->i_size) + ip->i_new_size = new_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_size) + ip->i_size = *ppos; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + + if (ip->i_new_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + ip->i_new_size = 0; + if (ip->i_d.di_size > ip->i_size) + ip->i_d.di_size = ip->i_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } @@ -429,21 +378,19 @@ xfs_splice_write( */ STATIC int /* error (positive) */ xfs_zero_last_block( - struct inode *ip, - xfs_iocore_t *io, - xfs_fsize_t isize, - xfs_fsize_t end_size) + xfs_inode_t *ip, + xfs_fsize_t offset, + xfs_fsize_t isize) { xfs_fileoff_t last_fsb; - xfs_mount_t *mp = io->io_mount; + xfs_mount_t *mp = ip->i_mount; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; - loff_t loff; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { @@ -456,7 +403,7 @@ xfs_zero_last_block( last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; - error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; @@ -474,13 +421,14 @@ xfs_zero_last_block( * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL); - loff = XFS_FSB_TO_B(mp, last_fsb); zero_len = mp->m_sb.sb_blocksize - zero_offset; - error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size); + if (isize + zero_len > offset) + zero_len = offset - isize; + error = xfs_iozero(ip, isize, zero_len); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(error >= 0); return error; } @@ -498,34 +446,31 @@ xfs_zero_last_block( int /* error (positive) */ xfs_zero_eof( - bhv_vnode_t *vp, - xfs_iocore_t *io, + xfs_inode_t *ip, xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize, /* current inode size */ - xfs_fsize_t end_size) /* terminal inode size */ + xfs_fsize_t isize) /* current inode size */ { - struct inode *ip = vn_to_inode(vp); + xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; - xfs_mount_t *mp = io->io_mount; + xfs_fileoff_t zero_off; + xfs_fsize_t zero_len; int nimaps; int error = 0; xfs_bmbt_irec_t imap; - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ - error = xfs_zero_last_block(ip, io, isize, end_size); + error = xfs_zero_last_block(ip, offset, isize); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; } @@ -553,11 +498,10 @@ xfs_zero_eof( while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { - ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); - ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; } ASSERT(nimaps > 0); @@ -581,12 +525,15 @@ xfs_zero_eof( * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ - XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); + zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); + + if ((zero_off + zero_len) > offset) + zero_len = offset - zero_off; - error = xfs_iozero(ip, - XFS_FSB_TO_B(mp, start_zero_fsb), - XFS_FSB_TO_B(mp, imap.br_blockcount), - end_size); + error = xfs_iozero(ip, zero_off, zero_len); if (error) { goto out_lock; } @@ -594,69 +541,44 @@ xfs_zero_eof( start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL); } return 0; out_lock: - - XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); + xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(error >= 0); return error; } ssize_t /* bytes written, or (-) error */ xfs_write( - bhv_desc_t *bdp, + struct xfs_inode *xip, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, - int ioflags, - cred_t *credp) + int ioflags) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; - xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; - xfs_iocore_t *io; - bhv_vnode_t *vp; - unsigned long seg; int iolock; int eventsent = 0; - bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; - int need_i_mutex = 1, need_flush = 0; + int need_i_mutex; XFS_STATS_INC(xs_write_calls); - vp = BHV_TO_VNODE(bdp); - xip = XFS_BHVTOI(bdp); - - for (seg = 0; seg < segs; seg++) { - const struct iovec *iv = &iovp[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - ocount += iv->iov_len; - if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - segs = seg; - ocount -= iv->iov_len; /* This segment is no good */ - break; - } + error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); + if (error) + return error; count = ocount; pos = *offset; @@ -664,47 +586,25 @@ xfs_write( if (count == 0) return 0; - io = &xip->i_iocore; - mp = io->io_mount; + mp = xip->i_mount; - vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE); + xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (ioflags & IO_ISDIRECT) { - xfs_buftarg_t *target = - (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return XFS_ERROR(-EINVAL); - - if (!VN_CACHED(vp) && pos < i_size_read(inode)) - need_i_mutex = 0; - - if (VN_CACHED(vp)) - need_flush = 1; - } - relock: - if (need_i_mutex) { + if (ioflags & IO_ISDIRECT) { + iolock = XFS_IOLOCK_SHARED; + need_i_mutex = 0; + } else { iolock = XFS_IOLOCK_EXCL; - locktype = VRWLOCK_WRITE; - + need_i_mutex = 1; mutex_lock(&inode->i_mutex); - } else { - iolock = XFS_IOLOCK_SHARED; - locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); - isize = i_size_read(inode); - - if (file->f_flags & O_APPEND) - *offset = isize; - start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -713,25 +613,18 @@ start: goto out_unlock_mutex; } - new_size = pos + count; - if (new_size > isize) - io->io_new_size = new_size; - - if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && + if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { - loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); - error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, - pos, count, - dmflags, &locktype); + error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, + pos, count, dmflags, &iolock); if (error) { - xfs_iunlock(xip, iolock); - goto out_unlock_mutex; + goto out_unlock_internal; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; @@ -743,17 +636,36 @@ start: * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ - if ((file->f_flags & O_APPEND) && savedsize != isize) { - pos = isize = xip->i_d.di_size; + if ((file->f_flags & O_APPEND) && pos != xip->i_size) + goto start; + } + + if (ioflags & IO_ISDIRECT) { + xfs_buftarg_t *target = + XFS_IS_REALTIME_INODE(xip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + if ((pos & target->bt_smask) || (count & target->bt_smask)) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + return XFS_ERROR(-EINVAL); + } + + if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { + xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); + iolock = XFS_IOLOCK_EXCL; + need_i_mutex = 1; + mutex_lock(&inode->i_mutex); + xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } - if (likely(!(ioflags & IO_INVIS))) { + new_size = pos + count; + if (new_size > xip->i_size) + xip->i_new_size = new_size; + + if (likely(!(ioflags & IO_INVIS))) file_update_time(file); - xfs_ichgtime_fast(xip, inode, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } /* * If the offset is beyond the size of the file, we have a couple @@ -764,12 +676,11 @@ start: * to zero it out up to the new size. */ - if (pos > isize) { - error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, - isize, pos + count); + if (pos > xip->i_size) { + error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { - xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); - goto out_unlock_mutex; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); @@ -787,36 +698,37 @@ start: !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) - error = -remove_suid(file->f_dentry); + error = -file_remove_suid(file); if (unlikely(error)) { - xfs_iunlock(xip, iolock); - goto out_unlock_mutex; + goto out_unlock_internal; } } -retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { - if (need_flush) { - xfs_inval_cached_trace(io, pos, -1, - ctooff(offtoct(pos)), -1); - bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), + if (mapping->nrpages) { + WARN_ON(need_i_mutex == 0); + xfs_inval_cached_trace(xip, pos, -1, + (pos & PAGE_CACHE_MASK), -1); + error = xfs_flushinval_pages(xip, + (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); + if (error) + goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ - XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); + xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; - locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } - xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, + xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); @@ -831,56 +743,60 @@ retry: pos += ret; count -= ret; - need_i_mutex = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { - xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, + int enospc = 0; + ssize_t ret2 = 0; + +write_retry: + xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); - ret = generic_file_buffered_write(iocb, iovp, segs, + ret2 = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we + * aren't holding any page locks and retry *once* + */ + if (ret2 == -ENOSPC && !enospc) { + error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); + if (error) + goto out_unlock_internal; + enospc = 1; + goto write_retry; + } + ret = ret2; } current->backing_dev_info = NULL; - if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) - ret = wait_on_sync_kiocb(iocb); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) + *offset = isize; - if ((ret == -ENOSPC) && - DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && - !(ioflags & IO_INVIS)) { + if (*offset > xip->i_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_size) + xip->i_size = *offset; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } - xfs_rwunlock(bdp, locktype); + if (ret == -ENOSPC && + DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { + xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); - error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, - DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, + error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, + DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ - if (error) - goto out_nounlocks; if (need_i_mutex) mutex_lock(&inode->i_mutex); - xfs_rwlock(bdp, locktype); - pos = xip->i_d.di_size; - ret = 0; - goto retry; - } - - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) - *offset = isize; - - if (*offset > xip->i_d.di_size) { - xfs_ilock(xip, XFS_ILOCK_EXCL); - if (*offset > xip->i_d.di_size) { - xip->i_d.di_size = *offset; - i_size_write(inode, *offset); - xip->i_update_core = 1; - xip->i_update_size = 1; - } - xfs_iunlock(xip, XFS_ILOCK_EXCL); + xfs_ilock(xip, iolock); + if (error) + goto out_unlock_internal; + goto start; } error = -ret; @@ -890,27 +806,45 @@ retry: XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { - error = xfs_write_sync_logforce(mp, xip); - if (error) - goto out_unlock_internal; + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { + loff_t end = pos + ret - 1; + int error2; - xfs_rwunlock(bdp, locktype); + xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); - error = sync_page_range(inode, mapping, pos, ret); + error2 = filemap_write_and_wait_range(mapping, pos, end); if (!error) - error = ret; - return error; + error = error2; + if (need_i_mutex) + mutex_lock(&inode->i_mutex); + xfs_ilock(xip, iolock); + + error2 = xfs_fsync(xip); + if (!error) + error = error2; } out_unlock_internal: - xfs_rwunlock(bdp, locktype); + if (xip->i_new_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + xip->i_new_size = 0; + /* + * If this was a direct or synchronous I/O that failed (such + * as ENOSPC) then part of the I/O may have been written to + * disk before the error occured. In this case the on-disk + * file size may have been adjusted beyond the in-memory file + * size and now needs to be truncated back. + */ + if (xip->i_d.di_size > xip->i_size) + xip->i_d.di_size = xip->i_size; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + xfs_iunlock(xip, iolock); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); - out_nounlocks: return -error; } @@ -923,13 +857,7 @@ retry: int xfs_bdstrat_cb(struct xfs_buf *bp) { - xfs_mount_t *mp; - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return 0; - } else { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but @@ -942,50 +870,29 @@ xfs_bdstrat_cb(struct xfs_buf *bp) else return (xfs_bioerror(bp)); } -} - -int -xfs_bmap(bhv_desc_t *bdp, - xfs_off_t offset, - ssize_t count, - int flags, - xfs_iomap_t *iomapp, - int *niomaps) -{ - xfs_inode_t *ip = XFS_BHVTOI(bdp); - xfs_iocore_t *io = &ip->i_iocore; - - ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); - ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == - ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); - - return xfs_iomap(io, offset, count, flags, iomapp, niomaps); + xfs_buf_iorequest(bp); + return 0; } /* - * Wrapper around bdstrat so that we can stop data - * from going to disk in case we are shutting down the filesystem. - * Typically user data goes thru this path; one of the exceptions - * is the superblock. + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. */ -int +void xfsbdstrat( struct xfs_mount *mp, struct xfs_buf *bp) { ASSERT(mp); if (!XFS_FORCED_SHUTDOWN(mp)) { - /* Grio redirection would go here - * if (XFS_BUF_IS_GRIO(bp)) { - */ - xfs_buf_iorequest(bp); - return 0; + return; } xfs_buftrace("XFSBDSTRAT IOERROR", bp); - return (xfs_bioerror_relse(bp)); + xfs_bioerror_relse(bp); } /*