X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fxfs%2Flinux-2.6%2Fxfs_aops.c;h=c2e30eea74dc2cd6344dd6710806465b029db422;hb=a372bf8b6a12f23f68e716113ccaea4bf646dd0f;hp=80a3214623152802e0f726c6eac9a3153384eeb3;hpb=e927af90aaa7d75543edbbd9c2810e6963d0443f;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 80a3214..c2e30ee 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -37,10 +37,45 @@ #include "xfs_error.h" #include "xfs_rw.h" #include "xfs_iomap.h" +#include "xfs_vnodeops.h" #include #include #include + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + STATIC void xfs_count_page_state( struct page *page, @@ -72,7 +107,6 @@ xfs_page_trace( unsigned long pgoff) { xfs_inode_t *ip; - bhv_vnode_t *vp = vn_from_inode(inode); loff_t isize = i_size_read(inode); loff_t offset = page_offset(page); int delalloc = -1, unmapped = -1, unwritten = -1; @@ -80,7 +114,7 @@ xfs_page_trace( if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - ip = xfs_vtoi(vp); + ip = XFS_I(inode); if (!ip->i_rwtrace) return; @@ -106,21 +140,16 @@ xfs_page_trace( #define xfs_page_trace(tag, inode, page, pgoff) #endif -/* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. - */ -STATIC void -xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) +STATIC struct block_device * +xfs_find_bdev_for_inode( + struct xfs_inode *ip) { - if (atomic_dec_and_test(&ioend->io_remaining)) { - queue_work(xfsdatad_workqueue, &ioend->io_work); - if (wait) - flush_workqueue(xfsdatad_workqueue); - } + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + else + return mp->m_ddev_targp->bt_bdev; } /* @@ -134,33 +163,60 @@ xfs_destroy_ioend( xfs_ioend_t *ioend) { struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; bh->b_end_io(bh, !ioend->io_error); } - if (unlikely(ioend->io_error)) - vn_ioerror(ioend->io_vnode, ioend->io_error, __FILE__,__LINE__); - vn_iowake(ioend->io_vnode); + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); + } + + xfs_ioend_wake(ip); mempool_free(ioend, xfs_ioend_pool); } /* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MAX(ip->i_size, ip->i_new_size); + isize = MIN(isize, bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + +/* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond - * eof io_new_size will be the intended file size until i_size is + * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. */ + STATIC void xfs_setfilesize( xfs_ioend_t *ioend) { - xfs_inode_t *ip; + xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; - xfs_fsize_t bsize; - - ip = xfs_vtoi(ioend->io_vnode); ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(ioend->io_type != IOMAP_READ); @@ -168,17 +224,11 @@ xfs_setfilesize( if (unlikely(ioend->io_error)) return; - bsize = ioend->io_offset + ioend->io_size; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - isize = MAX(ip->i_size, ip->i_iocore.io_new_size); - isize = MIN(isize, bsize); - - if (ip->i_d.di_size < isize) { + isize = xfs_ioend_new_eof(ioend); + if (isize) { ip->i_d.di_size = isize; - ip->i_update_core = 1; - ip->i_update_size = 1; + xfs_mark_inode_dirty_sync(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -224,12 +274,17 @@ xfs_end_bio_unwritten( { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); - bhv_vnode_t *vp = ioend->io_vnode; + struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; if (likely(!ioend->io_error)) { - bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL); + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { + int error; + error = xfs_iomap_write_unwritten(ip, offset, size); + if (error) + ioend->io_error = error; + } xfs_setfilesize(ioend); } xfs_destroy_ioend(ioend); @@ -249,6 +304,27 @@ xfs_end_bio_read( } /* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend, + int wait) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + struct workqueue_struct *wq = xfsdatad_workqueue; + if (ioend->io_work.func == xfs_end_bio_unwritten) + wq = xfsconvertd_workqueue; + + queue_work(wq, &ioend->io_work); + if (wait) + flush_workqueue(wq); + } +} + +/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later @@ -272,10 +348,10 @@ xfs_alloc_ioend( ioend->io_error = 0; ioend->io_list = NULL; ioend->io_type = type; - ioend->io_vnode = vn_from_inode(inode); + ioend->io_inode = inode; ioend->io_buffer_head = NULL; ioend->io_buffer_tail = NULL; - atomic_inc(&ioend->io_vnode->v_iocount); + atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); ioend->io_offset = 0; ioend->io_size = 0; @@ -299,13 +375,9 @@ xfs_map_blocks( xfs_iomap_t *mapp, int flags) { - bhv_vnode_t *vp = vn_from_inode(inode); - int error, nmaps = 1; + int nmaps = 1; - error = bhv_vop_bmap(vp, offset, count, flags, mapp, &nmaps); - if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) - VMODIFY(vp); - return -error; + return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); } STATIC_INLINE int @@ -320,17 +392,13 @@ xfs_iomap_valid( /* * BIO completion handler for buffered IO. */ -STATIC int +STATIC void xfs_end_bio( struct bio *bio, - unsigned int bytes_done, int error) { xfs_ioend_t *ioend = bio->bi_private; - if (bio->bi_size) - return 1; - ASSERT(atomic_read(&bio->bi_cnt) >= 1); ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; @@ -340,7 +408,6 @@ xfs_end_bio( bio_put(bio); xfs_finish_ioend(ioend, 0); - return 0; } STATIC void @@ -349,10 +416,16 @@ xfs_submit_ioend_bio( struct bio *bio) { atomic_inc(&ioend->io_remaining); - bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); + submit_bio(WRITE, bio); ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); bio_put(bio); @@ -394,7 +467,6 @@ xfs_start_buffer_writeback( STATIC void xfs_start_page_writeback( struct page *page, - struct writeback_control *wbc, int clear_dirty, int buffers) { @@ -404,10 +476,9 @@ xfs_start_page_writeback( clear_page_dirty_for_io(page); set_page_writeback(page); unlock_page(page); - if (!buffers) { + /* If no buffers on the page are to be written, finish it here */ + if (!buffers) end_page_writeback(page); - wbc->pages_skipped++; /* We didn't write this page */ - } } static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) @@ -500,7 +571,7 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - vn_iowake(ioend->io_vnode); + xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } @@ -650,7 +721,7 @@ xfs_probe_cluster( for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - size_t pg_offset, len = 0; + size_t pg_offset, pg_len = 0; if (tindex == tlast) { pg_offset = @@ -662,17 +733,17 @@ xfs_probe_cluster( } else pg_offset = PAGE_CACHE_SIZE; - if (page->index == tindex && !TestSetPageLocked(page)) { - len = xfs_probe_page(page, pg_offset, mapped); + if (page->index == tindex && trylock_page(page)) { + pg_len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } - if (!len) { + if (!pg_len) { done = 1; break; } - total += len; + total += pg_len; tindex++; } @@ -746,7 +817,7 @@ xfs_convert_page( if (page->index != tindex) goto fail; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto fail; if (PageWriteback(page)) goto fail_unlock_page; @@ -844,7 +915,7 @@ xfs_convert_page( done = 1; } } - xfs_start_page_writeback(page, wbc, !page_dirty, count); + xfs_start_page_writeback(page, !page_dirty, count); } return done; @@ -1008,6 +1079,8 @@ xfs_page_state_convert( if (buffer_unwritten(bh) || buffer_delay(bh) || ((buffer_uptodate(bh) || PageUptodate(page)) && !buffer_mapped(bh) && (unmapped || startio))) { + int new_ioend = 0; + /* * Make sure we don't use a read-only iomap */ @@ -1026,6 +1099,15 @@ xfs_page_state_convert( } if (!iomap_valid) { + /* + * if we didn't have a valid mapping then we + * need to ensure that we put the new mapping + * in a new ioend structure. This needs to be + * done to ensure that the ioends correctly + * reflect the block mappings at io completion + * for unwritten extent conversion. + */ + new_ioend = 1; if (type == IOMAP_NEW) { size = xfs_probe_cluster(inode, page, bh, head, 0); @@ -1045,7 +1127,7 @@ xfs_page_state_convert( if (startio) { xfs_add_to_ioend(inode, bh, offset, type, &ioend, - !iomap_valid); + new_ioend); } else { set_buffer_dirty(bh); unlock_buffer(bh); @@ -1080,7 +1162,7 @@ xfs_page_state_convert( * that we are writing into for the first time. */ type = IOMAP_NEW; - if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) all_bh = 1; @@ -1105,7 +1187,7 @@ xfs_page_state_convert( SetPageUptodate(page); if (startio) - xfs_start_page_writeback(page, wbc, 1, count); + xfs_start_page_writeback(page, 1, count); if (ioend && iomap_valid) { offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> @@ -1203,6 +1285,14 @@ xfs_vm_writepage( if (!page_has_buffers(page)) create_empty_buffers(page, 1 << inode->i_blkbits, 0); + + /* + * VM calculation for nr_to_write seems off. Bump it way + * up, this gets simple streaming writes zippy again. + * To be reviewed again after Jens' writeback changes. + */ + wbc->nr_to_write *= 4; + /* * Convert delayed allocate, unwritten or unmapped space * to real space and flush out to disk. @@ -1229,10 +1319,7 @@ xfs_vm_writepages( struct address_space *mapping, struct writeback_control *wbc) { - struct bhv_vnode *vp = vn_from_inode(mapping->host); - - if (VN_TRUNC(vp)) - VUNTRUNCATE(vp); + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return generic_writepages(mapping, wbc); } @@ -1309,7 +1396,6 @@ __xfs_get_blocks( int direct, bmapi_flags_t flags) { - bhv_vnode_t *vp = vn_from_inode(inode); xfs_iomap_t iomap; xfs_off_t offset; ssize_t size; @@ -1319,7 +1405,11 @@ __xfs_get_blocks( offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; - error = bhv_vop_bmap(vp, offset, size, + + if (!create && direct && offset >= i_size_read(inode)) + return 0; + + error = xfs_iomap(XFS_I(inode), offset, size, create ? flags : BMAPI_READ, &iomap, &niomap); if (error) return -error; @@ -1467,28 +1557,21 @@ xfs_vm_direct_IO( { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); - xfs_iomap_t iomap; - int maps = 1; - int error; + struct block_device *bdev; ssize_t ret; - error = bhv_vop_bmap(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps); - if (error) - return -error; + bdev = xfs_find_bdev_for_inode(XFS_I(inode)); if (rw == WRITE) { iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } else { iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); ret = blockdev_direct_IO_no_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, + bdev, iov, offset, nr_segs, xfs_get_blocks_direct, xfs_end_io_direct); } @@ -1499,13 +1582,18 @@ xfs_vm_direct_IO( } STATIC int -xfs_vm_prepare_write( +xfs_vm_write_begin( struct file *file, - struct page *page, - unsigned int from, - unsigned int to) + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned flags, + struct page **pagep, + void **fsdata) { - return block_prepare_write(page, from, to, xfs_get_blocks); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + xfs_get_blocks); } STATIC sector_t @@ -1514,12 +1602,12 @@ xfs_vm_bmap( sector_t block) { struct inode *inode = (struct inode *)mapping->host; - bhv_vnode_t *vp = vn_from_inode(inode); + struct xfs_inode *ip = XFS_I(inode); - vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - bhv_vop_rwlock(vp, VRWLOCK_READ); - bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); - bhv_vop_rwunlock(vp, VRWLOCK_READ); + xfs_itrace_entry(XFS_I(inode)); + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); return generic_block_bmap(mapping, block, xfs_get_blocks); } @@ -1559,9 +1647,11 @@ const struct address_space_operations xfs_address_space_operations = { .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, - .prepare_write = xfs_vm_prepare_write, - .commit_write = generic_commit_write, + .write_begin = xfs_vm_write_begin, + .write_end = generic_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, };