X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fnfs%2Fwrite.c;h=3aea3ca98ab788c5dae67c0769ff723eb9b94217;hb=0ca1f7ceb1991099ed5273885ebcf4323948c72e;hp=754066cc9146dcbbae2f62294fab3e0e0ccdc2ae;hpb=9fd367f0f376ccfb2592eed9be0eece70429894f;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 754066c..3aea3ca 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,8 @@ #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "nfs4_fs.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -34,11 +37,9 @@ /* * Local function declarations */ -static struct nfs_page * nfs_update_request(struct nfs_open_context*, - struct page *, - unsigned int, unsigned int); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, struct inode *inode, int ioflags); +static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; @@ -47,30 +48,25 @@ static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -struct nfs_write_data *nfs_commit_alloc(void) +struct nfs_write_data *nfs_commitdata_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } return p; } -static void nfs_commit_rcu_free(struct rcu_head *head) +void nfs_commit_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } -void nfs_commit_free(struct nfs_write_data *wdata) -{ - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); -} - struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); @@ -79,6 +75,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -92,22 +89,24 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -static void nfs_writedata_rcu_free(struct rcu_head *head) +void nfs_writedata_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_free(struct nfs_write_data *wdata) +static void nfs_writedata_release(struct nfs_write_data *wdata) { - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); + put_nfs_open_context(wdata->args.context); + nfs_writedata_free(wdata); } -void nfs_writedata_release(void *wdata) +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { - nfs_writedata_free(wdata); + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } static struct nfs_page *nfs_page_find_request_locked(struct page *page) @@ -124,12 +123,12 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page) static struct nfs_page *nfs_page_find_request(struct page *page) { + struct inode *inode = page->mapping->host; struct nfs_page *req = NULL; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; - spin_lock(req_lock); + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); return req; } @@ -137,16 +136,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page) static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { struct inode *inode = page->mapping->host; - loff_t end, i_size = i_size_read(inode); - pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + loff_t end, i_size; + pgoff_t end_index; + spin_lock(&inode->i_lock); + i_size = i_size_read(inode); + end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; if (i_size > 0 && page->index < end_index) - return; + goto out; end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); if (i_size >= end) - return; - nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + goto out; i_size_write(inode, end); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); +out: + spin_unlock(&inode->i_lock); } /* A writeback failed: mark the page as bad, and invalidate the page cache */ @@ -167,39 +171,14 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int return; if (count != nfs_page_length(page)) return; - if (count != PAGE_CACHE_SIZE) - zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0); SetPageUptodate(page); } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) -{ - struct nfs_page *req; - int ret; - - for (;;) { - req = nfs_update_request(ctx, page, offset, count); - if (!IS_ERR(req)) - break; - ret = PTR_ERR(req); - if (ret != -EBUSY) - return ret; - ret = nfs_wb_page(page->mapping->host, page); - if (ret != 0) - return ret; - } - /* Update file length */ - nfs_grow_file(page, offset, count); - nfs_unlock_request(req); - return 0; -} - static int wb_priority(struct writeback_control *wbc) { if (wbc->for_reclaim) return FLUSH_HIGHPRI | FLUSH_STABLE; - if (wbc->for_kupdate) + if (wbc->for_kupdate || wbc->for_background) return FLUSH_LOWPRI; return 0; } @@ -222,9 +201,12 @@ static int nfs_set_page_writeback(struct page *page) struct inode *inode = page->mapping->host; struct nfs_server *nfss = NFS_SERVER(inode); + page_cache_get(page); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) - set_bdi_congested(&nfss->backing_dev_info, WRITE); + NFS_CONGESTION_ON_THRESH) { + set_bdi_congested(&nfss->backing_dev_info, + BLK_RW_ASYNC); + } } return ret; } @@ -235,144 +217,147 @@ static void nfs_end_page_writeback(struct page *page) struct nfs_server *nfss = NFS_SERVER(inode); end_page_writeback(page); - if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { - clear_bdi_congested(&nfss->backing_dev_info, WRITE); - congestion_end(WRITE); - } + page_cache_release(page); + if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) + clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -/* - * Find an associated nfs write request, and prepare to flush it out - * Returns 1 if there was no write request, or if the request was - * already tagged by nfs_set_page_dirty.Returns 0 if the request - * was not tagged. - * May also return an error if the user signalled nfs_wait_on_request(). - */ -static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page) +static struct nfs_page *nfs_find_and_lock_request(struct page *page) { + struct inode *inode = page->mapping->host; struct nfs_page *req; - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - spinlock_t *req_lock = &nfsi->req_lock; int ret; - spin_lock(req_lock); - for(;;) { + spin_lock(&inode->i_lock); + for (;;) { req = nfs_page_find_request_locked(page); - if (req == NULL) { - spin_unlock(req_lock); - return 1; - } - if (nfs_lock_request_dontget(req)) + if (req == NULL) + break; + if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request_dontget() will always + * then the call to nfs_set_page_tag_locked() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) - return ret; - spin_lock(req_lock); - } - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { - /* This request is marked for commit */ - spin_unlock(req_lock); - nfs_unlock_request(req); - nfs_pageio_complete(pgio); - return 1; + return ERR_PTR(ret); + spin_lock(&inode->i_lock); } - if (nfs_set_page_writeback(page) != 0) { - spin_unlock(req_lock); - BUG(); - } - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_LOCKED); - ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); - nfs_pageio_add_request(pgio, req); - return ret; + spin_unlock(&inode->i_lock); + return req; } /* - * Write an mmapped page to the server. + * Find an associated nfs write request, and prepare to flush it out + * May return an error if the user signalled nfs_wait_on_request(). */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + struct page *page) { - struct nfs_pageio_descriptor mypgio, *pgio; - struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; - unsigned offset; - int err; + struct nfs_page *req; + int ret = 0; - nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + req = nfs_find_and_lock_request(page); + if (!req) + goto out; + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; - if (wbc->for_writepages) - pgio = wbc->fs_private; - else { - nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); - pgio = &mypgio; + ret = nfs_set_page_writeback(page); + BUG_ON(ret != 0); + BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + + if (!nfs_pageio_add_request(pgio, req)) { + nfs_redirty_request(req); + ret = pgio->pg_error; } +out: + return ret; +} - nfs_pageio_cond_complete(pgio, page->index); +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +{ + struct inode *inode = page->mapping->host; - err = nfs_page_async_flush(pgio, page); - if (err <= 0) - goto out; - err = 0; - offset = nfs_page_length(page); - if (!offset) - goto out; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); + nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); nfs_pageio_cond_complete(pgio, page->index); + return nfs_page_async_flush(pgio, page); +} - ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); - if (ctx == NULL) { - err = -EBADF; - goto out; - } - err = nfs_writepage_setup(ctx, page, 0, offset); - put_nfs_open_context(ctx); - if (err != 0) - goto out; - err = nfs_page_async_flush(pgio, page); - if (err > 0) - err = 0; -out: - if (!wbc->for_writepages) - nfs_pageio_complete(pgio); - return err; +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + struct nfs_pageio_descriptor pgio; + int err; + + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_complete(&pgio); + if (err < 0) + return err; + if (pgio.pg_error < 0) + return pgio.pg_error; + return 0; } int nfs_writepage(struct page *page, struct writeback_control *wbc) { - int err; + int ret; + + ret = nfs_writepage_locked(page, wbc); + unlock_page(page); + return ret; +} + +static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +{ + int ret; - err = nfs_writepage_locked(page, wbc); + ret = nfs_do_writepage(page, wbc, data); unlock_page(page); - return err; + return ret; } int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + unsigned long *bitlock = &NFS_I(inode)->flags; struct nfs_pageio_descriptor pgio; int err; + /* Stop dirtying of new pages while we sync */ + err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (err) + goto out_err; + nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); - wbc->fs_private = &pgio; - err = generic_writepages(mapping, wbc); + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - if (err) - return err; - if (pgio.pg_error) - return pgio.pg_error; + + clear_bit_unlock(NFS_INO_FLUSHING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_FLUSHING); + + if (err < 0) + goto out_err; + err = pgio.pg_error; + if (err < 0) + goto out_err; return 0; +out_err: + return err; } /* @@ -383,23 +368,31 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); int error; + error = radix_tree_preload(GFP_NOFS); + if (error != 0) + goto out; + + /* Lock the request! */ + nfs_lock_request_dontget(req); + + spin_lock(&inode->i_lock); error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error == -EEXIST); - if (error) - return error; + BUG_ON(error); if (!nfsi->npages) { igrab(inode); - nfs_begin_data_update(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); - if (PageDirty(req->wb_page)) - set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; kref_get(&req->wb_kref); - return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); + spin_unlock(&inode->i_lock); + radix_tree_preload_end(); +out: + return error; } /* @@ -412,40 +405,25 @@ static void nfs_inode_remove_request(struct nfs_page *req) BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&nfsi->req_lock); + spin_lock(&inode->i_lock); set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); - if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) - __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { - spin_unlock(&nfsi->req_lock); - nfs_end_data_update(inode); + spin_unlock(&inode->i_lock); iput(inode); } else - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); nfs_clear_request(req); nfs_release_request(req); } static void -nfs_redirty_request(struct nfs_page *req) +nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); -} - -/* - * Check if a request is dirty - */ -static inline int -nfs_dirty_request(struct nfs_page *req) -{ - struct page *page = req->wb_page; - - if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) - return 0; - return !PageWriteback(req->wb_page); + __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -458,15 +436,31 @@ nfs_mark_request_commit(struct nfs_page *req) struct inode *inode = req->wb_context->path.dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfsi->req_lock); - nfs_list_add_request(req, &nfsi->commit); + spin_lock(&inode->i_lock); + set_bit(PG_CLEAN, &(req)->wb_flags); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, + NFS_PAGE_TAG_COMMIT); nfsi->ncommit++; - set_bit(PG_NEED_COMMIT, &(req)->wb_flags); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } +static int +nfs_clear_request_commit(struct nfs_page *req) +{ + struct page *page = req->wb_page; + + if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); + return 1; + } + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -476,12 +470,12 @@ int nfs_write_need_commit(struct nfs_write_data *data) static inline int nfs_reschedule_unstable_write(struct nfs_page *req) { - if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { nfs_mark_request_commit(req); return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { - nfs_redirty_request(req); + nfs_mark_request_dirty(req); return 1; } return 0; @@ -492,6 +486,12 @@ nfs_mark_request_commit(struct nfs_page *req) { } +static inline int +nfs_clear_request_commit(struct nfs_page *req) +{ + return 0; +} + static inline int nfs_write_need_commit(struct nfs_write_data *data) { @@ -505,59 +505,13 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #endif -/* - * Wait for a request to complete. - * - * Interruptible by signals only if mounted with intr flag. - */ -static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req; - pgoff_t idx_end, next; - unsigned int res = 0; - int error; - - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { - if (req->wb_index > idx_end) - break; - - next = req->wb_index + 1; - BUG_ON(!NFS_WBACK_BUSY(req)); - - kref_get(&req->wb_kref); - spin_unlock(&nfsi->req_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - spin_lock(&nfsi->req_lock); - if (error < 0) - return error; - res++; - } - return res; -} - -static void nfs_cancel_commit_list(struct list_head *head) +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +static int +nfs_need_commit(struct nfs_inode *nfsi) { - struct nfs_page *req; - - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); - nfs_inode_remove_request(req); - nfs_unlock_request(req); - } + return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); } -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan @@ -572,17 +526,24 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res = 0; + int ret; - if (nfsi->ncommit != 0) { - res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); - } - return res; + if (!nfs_need_commit(nfsi)) + return 0; + + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; } #else +static inline int nfs_need_commit(struct nfs_inode *nfsi) +{ + return 0; +} + static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { return 0; @@ -590,99 +551,130 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg #endif /* - * Try to update any existing write request, or create one if there is none. - * In order to match, the request's credentials must match those of - * the calling process. + * Search for an existing write request, and attempt to update + * it to reflect a new dirty region on a given page. * - * Note: Should always be called with the Page Lock held! + * If the attempt fails, then the existing request is flushed out + * to disk. */ -static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, - struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page *nfs_try_to_update_request(struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int bytes) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req, *new = NULL; - pgoff_t rqend, end; + struct nfs_page *req; + unsigned int rqend; + unsigned int end; + int error; + + if (!PagePrivate(page)) + return NULL; end = offset + bytes; + spin_lock(&inode->i_lock); for (;;) { - /* Loop over all inode entries and see if we find - * A request for the page we wish to update - */ - spin_lock(&nfsi->req_lock); req = nfs_page_find_request_locked(page); - if (req) { - if (!nfs_lock_request_dontget(req)) { - int error; - - spin_unlock(&nfsi->req_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - if (error < 0) { - if (new) - nfs_release_request(new); - return ERR_PTR(error); - } - continue; - } - spin_unlock(&nfsi->req_lock); - if (new) - nfs_release_request(new); - break; - } + if (req == NULL) + goto out_unlock; + + rqend = req->wb_offset + req->wb_bytes; + /* + * Tell the caller to flush out the request if + * the offsets are non-contiguous. + * Note: nfs_flush_incompatible() will already + * have flushed out requests having wrong owners. + */ + if (offset > rqend + || end < req->wb_offset) + goto out_flushme; - if (new) { - int error; - nfs_lock_request_dontget(new); - error = nfs_inode_add_request(inode, new); - if (error) { - spin_unlock(&nfsi->req_lock); - nfs_unlock_request(new); - return ERR_PTR(error); - } - spin_unlock(&nfsi->req_lock); - return new; - } - spin_unlock(&nfsi->req_lock); + if (nfs_set_page_tag_locked(req)) + break; - new = nfs_create_request(ctx, inode, page, offset, bytes); - if (IS_ERR(new)) - return new; + /* The request is locked, so wait and then retry */ + spin_unlock(&inode->i_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error != 0) + goto out_err; + spin_lock(&inode->i_lock); } - /* We have a request for our page. - * If the creds don't match, or the - * page addresses don't match, - * tell the caller to wait on the conflicting - * request. - */ - rqend = req->wb_offset + req->wb_bytes; - if (req->wb_context != ctx - || req->wb_page != page - || !nfs_dirty_request(req) - || offset > rqend || end < req->wb_offset) { - nfs_unlock_request(req); - return ERR_PTR(-EBUSY); - } + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + NFS_I(inode)->ncommit--; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; - req->wb_bytes = rqend - req->wb_offset; } - if (end > rqend) req->wb_bytes = end - req->wb_offset; + else + req->wb_bytes = rqend - req->wb_offset; +out_unlock: + spin_unlock(&inode->i_lock); + return req; +out_flushme: + spin_unlock(&inode->i_lock); + nfs_release_request(req); + error = nfs_wb_page(inode, page); +out_err: + return ERR_PTR(error); +} +/* + * Try to update an existing write request, or create one if there is none. + * + * Note: Should always be called with the Page Lock held to prevent races + * if we have to add a new request. Also assumes that the caller has + * already called nfs_flush_incompatible() if necessary. + */ +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, + struct page *page, unsigned int offset, unsigned int bytes) +{ + struct inode *inode = page->mapping->host; + struct nfs_page *req; + int error; + + req = nfs_try_to_update_request(inode, page, offset, bytes); + if (req != NULL) + goto out; + req = nfs_create_request(ctx, inode, page, offset, bytes); + if (IS_ERR(req)) + goto out; + error = nfs_inode_add_request(inode, req); + if (error != 0) { + nfs_release_request(req); + req = ERR_PTR(error); + } +out: return req; } +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, + unsigned int offset, unsigned int count) +{ + struct nfs_page *req; + + req = nfs_setup_write_request(ctx, page, offset, count); + if (IS_ERR(req)) + return PTR_ERR(req); + nfs_mark_request_dirty(req); + /* Update file length */ + nfs_grow_file(page, offset, count); + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_request_dirty(req); + nfs_clear_page_tag_locked(req); + return 0; +} + int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* @@ -697,8 +689,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) req = nfs_page_find_request(page); if (req == NULL) return 0; - do_flush = req->wb_page != page || req->wb_context != ctx - || !nfs_dirty_request(req); + do_flush = req->wb_page != page || req->wb_context != ctx; nfs_release_request(req); if (!do_flush) return 0; @@ -708,6 +699,17 @@ int nfs_flush_incompatible(struct file *file, struct page *page) } /* + * If the page cache is marked as unsafe or invalid, then we can't rely on + * the PageUptodate() flag. In this case, we will need to turn off + * write optimisations that depend on the page contents being correct. + */ +static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +{ + return PageUptodate(page) && + !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); +} + +/* * Update and possibly write a cached page of an NFS file. * * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad @@ -716,53 +718,49 @@ int nfs_flush_incompatible(struct file *file, struct page *page) int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", + dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, count, - (long long)(page_offset(page) +offset)); + (long long)(page_offset(page) + offset)); /* If we're not using byte range locks, and we know the page - * is entirely in cache, it may be more efficient to avoid - * fragmenting write requests. + * is up to date, it may be more efficient to extend the write + * to cover the entire page in order to avoid fragmentation + * inefficiencies. */ - if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { + if (nfs_write_pageuptodate(page, inode) && + inode->i_flock == NULL && + !(file->f_flags & O_DSYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); - - dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", - status, (long long)i_size_read(inode)); if (status < 0) nfs_set_pageerror(page); + + dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", + status, (long long)i_size_read(inode)); return status; } static void nfs_writepage_release(struct nfs_page *req) { + struct page *page = req->wb_page; - if (PageError(req->wb_page)) { - nfs_end_page_writeback(req->wb_page); + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) nfs_inode_remove_request(req); - } else if (!nfs_reschedule_unstable_write(req)) { - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); - nfs_end_page_writeback(req->wb_page); - nfs_inode_remove_request(req); - } else - nfs_end_page_writeback(req->wb_page); nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } -static inline int flush_task_priority(int how) +static int flush_task_priority(int how) { switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { case FLUSH_HIGHPRI: @@ -776,28 +774,51 @@ static inline int flush_task_priority(int how) /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_page *req, +static int nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, int how) { - struct inode *inode; - int flags; + struct inode *inode = req->wb_context->path.dentry->d_inode; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = req->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = NFS_CLIENT(inode), + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + .priority = priority, + }; + int ret = 0; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; - data->cred = req->wb_context->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); + data->args.stable = NFS_UNSTABLE; + if (how & FLUSH_STABLE) { + data->args.stable = NFS_DATA_SYNC; + if (!nfs_need_commit(NFS_I(inode))) + data->args.stable = NFS_FILE_SYNC; + } data->res.fattr = &data->fattr; data->res.count = count; @@ -805,30 +826,42 @@ static void nfs_write_rpcsetup(struct nfs_page *req, nfs_fattr_init(&data->fattr); /* Set up the initial task struct. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); - NFS_PROTO(inode)->write_setup(data, how); - - data->task.tk_priority = flush_task_priority(how); - data->task.tk_cookie = (unsigned long)inode; + NFS_PROTO(inode)->write_setup(data, &msg); dprintk("NFS: %5u initiated write call " - "(req %s/%Ld, %u bytes @ offset %Lu)\n", + "(req %s/%lld, %u bytes @ offset %llu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), count, (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } + rpc_put_task(task); +out: + return ret; } -static void nfs_execute_write(struct nfs_write_data *data) +/* If a nfs_flush_* function fails, it should remove reqs from @head and + * call this on each, which will prepare them to be retried on next + * writeback using standard nfs. + */ +static void nfs_redirty_request(struct nfs_page *req) { - struct rpc_clnt *clnt = NFS_CLIENT(data->inode); - sigset_t oldset; + struct page *page = req->wb_page; - rpc_clnt_sigmask(clnt, &oldset); - rpc_execute(&data->task); - rpc_clnt_sigunmask(clnt, &oldset); + nfs_mark_request_dirty(req); + nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } /* @@ -843,6 +876,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned size_t wsize = NFS_SERVER(inode)->wsize, nbytes; unsigned int offset; int requests = 0; + int ret = 0; LIST_HEAD(list); nfs_list_remove_request(req); @@ -864,6 +898,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned offset = 0; nbytes = count; do { + int ret2; + data = list_entry(list.next, struct nfs_write_data, pages); list_del_init(&data->pages); @@ -871,14 +907,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned if (nbytes < wsize) wsize = nbytes; - nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, wsize, offset, how); + if (ret == 0) + ret = ret2; offset += wsize; nbytes -= wsize; - nfs_execute_write(data); } while (nbytes != 0); - return 0; + return ret; out_bad: while (!list_empty(&list)) { @@ -887,8 +924,6 @@ out_bad: nfs_writedata_release(data); } nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -921,17 +956,12 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i req = nfs_list_entry(data->pages.next); /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); - - nfs_execute_write(data); - return 0; + return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -939,7 +969,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - int wsize = NFS_SERVER(inode)->wsize; + size_t wsize = NFS_SERVER(inode)->wsize; if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); @@ -953,29 +983,35 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req = data->req; - struct page *page = req->wb_page; - dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); + dprintk("NFS: %5u write(%s/%lld %d@%lld)", + task->tk_pid, + data->req->wb_context->path.dentry->d_inode->i_sb->s_id, + (long long) + NFS_FILEID(data->req->wb_context->path.dentry->d_inode), + data->req->wb_bytes, (long long)req_offset(data->req)); - if (nfs_writeback_done(task, data) != 0) - return; + nfs_writeback_done(task, data); +} + +static void nfs_writeback_release_partial(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + int status = data->task.tk_status; - if (task->tk_status < 0) { + if (status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; - dprintk(", error = %d\n", task->tk_status); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); goto out; } if (nfs_write_need_commit(data)) { - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct inode *inode = page->mapping->host; - spin_lock(req_lock); + spin_lock(&inode->i_lock); if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { /* Do nothing we need to resend the writes */ } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { @@ -986,18 +1022,35 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) clear_bit(PG_NEED_COMMIT, &req->wb_flags); dprintk(" server reboot detected\n"); } - spin_unlock(req_lock); + spin_unlock(&inode->i_lock); } else dprintk(" OK\n"); out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req); + nfs_writedata_release(calldata); +} + +#if defined(CONFIG_NFS_V4_1) +void nfs_write_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; + + if (nfs4_setup_sequence(clp, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); } +#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_write_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, - .rpc_release = nfs_writedata_release, + .rpc_release = nfs_writeback_release_partial, }; /* @@ -1010,52 +1063,58 @@ static const struct rpc_call_ops nfs_write_partial_ops = { static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req; - struct page *page; - if (nfs_writeback_done(task, data) != 0) - return; + nfs_writeback_done(task, data); +} + +static void nfs_writeback_release_full(void *calldata) +{ + struct nfs_write_data *data = calldata; + int status = data->task.tk_status; /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { - req = nfs_list_entry(data->pages.next); + struct nfs_page *req = nfs_list_entry(data->pages.next); + struct page *page = req->wb_page; + nfs_list_remove_request(req); - page = req->wb_page; - dprintk("NFS: write (%s/%Ld %d@%Ld)", + dprintk("NFS: %5u write (%s/%lld %d@%lld)", + data->task.tk_pid, req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - if (task->tk_status < 0) { + if (status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; - dprintk(", error = %d\n", task->tk_status); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); goto remove_request; } if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req); - nfs_end_page_writeback(page); dprintk(" marked for commit\n"); goto next; } - /* Set the PG_uptodate flag? */ - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); dprintk(" OK\n"); remove_request: - nfs_end_page_writeback(page); nfs_inode_remove_request(req); next: nfs_clear_page_tag_locked(req); + nfs_end_page_writeback(page); } + nfs_writedata_release(calldata); } static const struct rpc_call_ops nfs_write_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, - .rpc_release = nfs_writedata_release, + .rpc_release = nfs_writeback_release_full, }; @@ -1066,6 +1125,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + struct nfs_server *server = NFS_SERVER(data->inode); int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1096,9 +1156,9 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) static unsigned long complain; if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" + dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->nfs_client->cl_hostname, + server->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } @@ -1124,7 +1184,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - rpc_restart_call(task); + nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; } if (time_before(complain, jiffies)) { @@ -1141,50 +1201,88 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commit_release(void *wdata) +static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) { + if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) + return 1; + if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, + NFS_INO_COMMIT, nfs_wait_bit_killable, + TASK_KILLABLE)) + return 1; + return 0; +} + +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +{ + clear_bit(NFS_INO_COMMIT, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); +} + + +static void nfs_commitdata_release(void *data) +{ + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_commit_rpcsetup(struct list_head *head, +static int nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { - struct nfs_page *first; - struct inode *inode; - int flags; + struct nfs_page *first = nfs_list_entry(head->next); + struct inode *inode = first->wb_context->path.dentry->d_inode; + int priority = flush_task_priority(how); + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = first->wb_context->cred, + }; + struct rpc_task_setup task_setup_data = { + .task = &data->task, + .rpc_client = NFS_CLIENT(inode), + .rpc_message = &msg, + .callback_ops = &nfs_commit_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + .priority = priority, + }; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); - first = nfs_list_entry(data->pages.next); - inode = first->wb_context->path.dentry->d_inode; data->inode = inode; - data->cred = first->wb_context->cred; + data->cred = msg.rpc_cred; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(first->wb_context); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); /* Set up the initial task struct. */ - flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; - rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data); - NFS_PROTO(inode)->commit_setup(data, how); + NFS_PROTO(inode)->commit_setup(data, &msg); - data->task.tk_priority = flush_task_priority(how); - data->task.tk_cookie = (unsigned long)inode; - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } /* @@ -1196,24 +1294,24 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(); + data = nfs_commitdata_alloc(); if (!data) goto out_bad; /* Set up the argument struct */ - nfs_commit_rpcsetup(head, data, how); - - nfs_execute_write(data); - return 0; + return nfs_commit_rpcsetup(head, data, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1223,7 +1321,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req; dprintk("NFS: %5u nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); @@ -1231,22 +1328,28 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) return; +} + +static void nfs_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req; + int status = data->task.tk_status; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + nfs_clear_request_commit(req); - dprintk("NFS: commit (%s/%Ld %d@%Ld)", + dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - if (task->tk_status < 0) { - req->wb_context->error = task->tk_status; + if (status < 0) { + nfs_context_set_write_error(req->wb_context, status); nfs_inode_remove_request(req); - dprintk(", error = %d\n", task->tk_status); + dprintk(", error = %d\n", status); goto next; } @@ -1254,98 +1357,97 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) * returned by the server against all stored verfs. */ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { /* We have a match */ - /* Set the PG_uptodate flag */ - nfs_mark_uptodate(req->wb_page, req->wb_pgbase, - req->wb_bytes); nfs_inode_remove_request(req); dprintk(" OK\n"); goto next; } /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); next: nfs_clear_page_tag_locked(req); } + nfs_commit_clear_lock(NFS_I(data->inode)); + nfs_commitdata_release(calldata); } static const struct rpc_call_ops nfs_commit_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; -int nfs_commit_inode(struct inode *inode, int how) +static int nfs_commit_inode(struct inode *inode, int how) { - struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); - int res; + int may_wait = how & FLUSH_SYNC; + int res = 0; - spin_lock(&nfsi->req_lock); + if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) + goto out; + spin_lock(&inode->i_lock); res = nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); + spin_unlock(&inode->i_lock); if (res) { int error = nfs_commit_list(inode, &head, how); if (error < 0) return error; - } + if (may_wait) + wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, + nfs_wait_bit_killable, + TASK_KILLABLE); + } else + nfs_commit_clear_lock(NFS_I(inode)); +out: return res; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + struct nfs_inode *nfsi = NFS_I(inode); + int flags = FLUSH_SYNC; + int ret = 0; + + /* Don't commit yet if this is a non-blocking flush and there are + * lots of outstanding writes for this mapping. + */ + if (wbc->sync_mode == WB_SYNC_NONE && + nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; + + if (wbc->nonblocking || wbc->for_background) + flags = 0; + ret = nfs_commit_inode(inode, flags); + if (ret >= 0) { + if (wbc->sync_mode == WB_SYNC_NONE) { + if (ret < wbc->nr_to_write) + wbc->nr_to_write -= ret; + else + wbc->nr_to_write = 0; + } + return 0; + } +out_mark_dirty: + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; +} #else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +static int nfs_commit_inode(struct inode *inode, int how) +{ + return 0; +} + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { return 0; } #endif -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); - pgoff_t idx_start, idx_end; - unsigned int npages = 0; - LIST_HEAD(head); - int nocommit = how & FLUSH_NOCOMMIT; - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - pgoff_t l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (pgoff_t)npages != l_npages) - npages = 0; - } - } - how &= ~FLUSH_NOCOMMIT; - spin_lock(&nfsi->req_lock); - do { - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - if (ret != 0) - continue; - if (nocommit) - break; - pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) - break; - if (how & FLUSH_INVALIDATE) { - spin_unlock(&nfsi->req_lock); - nfs_cancel_commit_list(&head); - ret = pages; - spin_lock(&nfsi->req_lock); - continue; - } - pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); - ret = nfs_commit_list(inode, &head, how); - spin_lock(&nfsi->req_lock); - } while (ret >= 0); - spin_unlock(&nfsi->req_lock); - return ret; + return nfs_commit_unstable_pages(inode, wbc); } /* @@ -1353,124 +1455,117 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr */ int nfs_wb_all(struct inode *inode) { - struct address_space *mapping = inode->i_mapping; struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_writepages = 1, - .range_cyclic = 1, + .range_start = 0, + .range_end = LLONG_MAX, }; - int ret; - ret = nfs_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; + return sync_inode(inode, &wbc); } -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) +int nfs_wb_page_cancel(struct inode *inode, struct page *page) { - struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - .for_writepages = 1, - }; - int ret; + struct nfs_page *req; + int ret = 0; - ret = nfs_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + BUG_ON(!PageLocked(page)); + for (;;) { + wait_on_page_writeback(page); + req = nfs_page_find_request(page); + if (req == NULL) + break; + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + nfs_release_request(req); + if (ret < 0) + break; + } return ret; } -int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, + .nr_to_write = 0, .range_start = range_start, .range_end = range_end, }; int ret; - BUG_ON(!PageLocked(page)); - if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + while(PagePrivate(page)) { + wait_on_page_writeback(page); + if (clear_page_dirty_for_io(page)) { + ret = nfs_writepage_locked(page, &wbc); + if (ret < 0) + goto out_error; + } + ret = sync_inode(inode, &wbc); if (ret < 0) - goto out; + goto out_error; } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(inode, I_DIRTY_PAGES); + return 0; +out_error: return ret; } -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) +#ifdef CONFIG_MIGRATION +int nfs_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page) { - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - -int nfs_set_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode; - spinlock_t *req_lock; struct nfs_page *req; int ret; - if (!mapping) - goto out_raced; - inode = mapping->host; - if (!inode) - goto out_raced; - req_lock = &NFS_I(inode)->req_lock; - spin_lock(req_lock); - req = nfs_page_find_request_locked(page); - if (req != NULL) { - /* Mark any existing write requests for flushing */ - ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(req_lock); - nfs_release_request(req); - return ret; - } - ret = __set_page_dirty_nobuffers(page); - spin_unlock(req_lock); + nfs_fscache_release_page(page, GFP_KERNEL); + + req = nfs_find_and_lock_request(page); + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + ret = migrate_page(mapping, newpage, page); + if (!req) + goto out; + if (ret) + goto out_unlock; + page_cache_get(newpage); + spin_lock(&mapping->host->i_lock); + req->wb_page = newpage; + SetPagePrivate(newpage); + set_page_private(newpage, (unsigned long)req); + ClearPagePrivate(page); + set_page_private(page, 0); + spin_unlock(&mapping->host->i_lock); + page_cache_release(page); +out_unlock: + nfs_clear_page_tag_locked(req); +out: return ret; -out_raced: - return !TestSetPageDirty(page); } - +#endif int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + NULL); if (nfs_wdata_cachep == NULL) return -ENOMEM;