NFS: Replace atomic_t variables in nfs_direct_req with a single spin lock
[safe/jmp/linux-2.6] / fs / nfs / direct.c
1 /*
2  * linux/fs/nfs/direct.c
3  *
4  * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
5  *
6  * High-performance uncached I/O for the Linux NFS client
7  *
8  * There are important applications whose performance or correctness
9  * depends on uncached access to file data.  Database clusters
10  * (multiple copies of the same instance running on separate hosts)
11  * implement their own cache coherency protocol that subsumes file
12  * system cache protocols.  Applications that process datasets
13  * considerably larger than the client's memory do not always benefit
14  * from a local cache.  A streaming video server, for instance, has no
15  * need to cache the contents of a file.
16  *
17  * When an application requests uncached I/O, all read and write requests
18  * are made directly to the server; data stored or fetched via these
19  * requests is not cached in the Linux page cache.  The client does not
20  * correct unaligned requests from applications.  All requested bytes are
21  * held on permanent storage before a direct write system call returns to
22  * an application.
23  *
24  * Solaris implements an uncached I/O facility called directio() that
25  * is used for backups and sequential I/O to very large files.  Solaris
26  * also supports uncaching whole NFS partitions with "-o forcedirectio,"
27  * an undocumented mount option.
28  *
29  * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
30  * help from Andrew Morton.
31  *
32  * 18 Dec 2001  Initial implementation for 2.4  --cel
33  * 08 Jul 2002  Version for 2.4.19, with bug fixes --trondmy
34  * 08 Jun 2003  Port to 2.5 APIs  --cel
35  * 31 Mar 2004  Handle direct I/O without VFS support  --cel
36  * 15 Sep 2004  Parallel async reads  --cel
37  * 04 May 2005  support O_DIRECT with aio  --cel
38  *
39  */
40
41 #include <linux/config.h>
42 #include <linux/errno.h>
43 #include <linux/sched.h>
44 #include <linux/kernel.h>
45 #include <linux/smp_lock.h>
46 #include <linux/file.h>
47 #include <linux/pagemap.h>
48 #include <linux/kref.h>
49
50 #include <linux/nfs_fs.h>
51 #include <linux/nfs_page.h>
52 #include <linux/sunrpc/clnt.h>
53
54 #include <asm/system.h>
55 #include <asm/uaccess.h>
56 #include <asm/atomic.h>
57
58 #include "iostat.h"
59
60 #define NFSDBG_FACILITY         NFSDBG_VFS
61
62 static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
63 static kmem_cache_t *nfs_direct_cachep;
64
65 /*
66  * This represents a set of asynchronous requests that we're waiting on
67  */
68 struct nfs_direct_req {
69         struct kref             kref;           /* release manager */
70
71         /* I/O parameters */
72         struct list_head        list;           /* nfs_read/write_data structs */
73         struct file *           filp;           /* file descriptor */
74         struct kiocb *          iocb;           /* controlling i/o request */
75         wait_queue_head_t       wait;           /* wait for i/o completion */
76         struct inode *          inode;          /* target file of i/o */
77         struct page **          pages;          /* pages in our buffer */
78         unsigned int            npages;         /* count of pages */
79
80         /* completion state */
81         spinlock_t              lock;           /* protect completion state */
82         int                     outstanding;    /* i/os we're waiting for */
83         ssize_t                 count,          /* bytes actually processed */
84                                 error;          /* any reported error */
85 };
86
87 /**
88  * nfs_direct_IO - NFS address space operation for direct I/O
89  * @rw: direction (read or write)
90  * @iocb: target I/O control block
91  * @iov: array of vectors that define I/O buffer
92  * @pos: offset in file to begin the operation
93  * @nr_segs: size of iovec array
94  *
95  * The presence of this routine in the address space ops vector means
96  * the NFS client supports direct I/O.  However, we shunt off direct
97  * read and write requests before the VFS gets them, so this method
98  * should never be called.
99  */
100 ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
101 {
102         struct dentry *dentry = iocb->ki_filp->f_dentry;
103
104         dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
105                         dentry->d_name.name, (long long) pos, nr_segs);
106
107         return -EINVAL;
108 }
109
110 static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
111 {
112         int result = -ENOMEM;
113         unsigned long page_count;
114         size_t array_size;
115
116         page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
117         page_count -= user_addr >> PAGE_SHIFT;
118
119         array_size = (page_count * sizeof(struct page *));
120         *pages = kmalloc(array_size, GFP_KERNEL);
121         if (*pages) {
122                 down_read(&current->mm->mmap_sem);
123                 result = get_user_pages(current, current->mm, user_addr,
124                                         page_count, (rw == READ), 0,
125                                         *pages, NULL);
126                 up_read(&current->mm->mmap_sem);
127                 /*
128                  * If we got fewer pages than expected from get_user_pages(),
129                  * the user buffer runs off the end of a mapping; return EFAULT.
130                  */
131                 if (result >= 0 && result < page_count) {
132                         nfs_free_user_pages(*pages, result, 0);
133                         *pages = NULL;
134                         result = -EFAULT;
135                 }
136         }
137         return result;
138 }
139
140 static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
141 {
142         int i;
143         for (i = 0; i < npages; i++) {
144                 struct page *page = pages[i];
145                 if (do_dirty && !PageCompound(page))
146                         set_page_dirty_lock(page);
147                 page_cache_release(page);
148         }
149         kfree(pages);
150 }
151
152 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
153 {
154         struct nfs_direct_req *dreq;
155
156         dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
157         if (!dreq)
158                 return NULL;
159
160         kref_init(&dreq->kref);
161         init_waitqueue_head(&dreq->wait);
162         INIT_LIST_HEAD(&dreq->list);
163         dreq->iocb = NULL;
164         spin_lock_init(&dreq->lock);
165         dreq->outstanding = 0;
166         dreq->count = 0;
167         dreq->error = 0;
168
169         return dreq;
170 }
171
172 static void nfs_direct_req_release(struct kref *kref)
173 {
174         struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
175         kmem_cache_free(nfs_direct_cachep, dreq);
176 }
177
178 /*
179  * Collects and returns the final error value/byte-count.
180  */
181 static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
182 {
183         ssize_t result = -EIOCBQUEUED;
184
185         /* Async requests don't wait here */
186         if (dreq->iocb)
187                 goto out;
188
189         result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0));
190
191         if (!result)
192                 result = dreq->error;
193         if (!result)
194                 result = dreq->count;
195
196 out:
197         kref_put(&dreq->kref, nfs_direct_req_release);
198         return (ssize_t) result;
199 }
200
201 /*
202  * We must hold a reference to all the pages in this direct read request
203  * until the RPCs complete.  This could be long *after* we are woken up in
204  * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
205  *
206  * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
207  * can't trust the iocb is still valid here if this is a synchronous
208  * request.  If the waiter is woken prematurely, the iocb is long gone.
209  */
210 static void nfs_direct_complete(struct nfs_direct_req *dreq)
211 {
212         nfs_free_user_pages(dreq->pages, dreq->npages, 1);
213
214         if (dreq->iocb) {
215                 long res = (long) dreq->error;
216                 if (!res)
217                         res = (long) dreq->count;
218                 aio_complete(dreq->iocb, res, 0);
219         } else
220                 wake_up(&dreq->wait);
221
222         kref_put(&dreq->kref, nfs_direct_req_release);
223 }
224
225 /*
226  * Note we also set the number of requests we have in the dreq when we are
227  * done.  This prevents races with I/O completion so we will always wait
228  * until all requests have been dispatched and completed.
229  */
230 static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
231 {
232         struct list_head *list;
233         struct nfs_direct_req *dreq;
234         unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
235
236         dreq = nfs_direct_req_alloc();
237         if (!dreq)
238                 return NULL;
239
240         list = &dreq->list;
241         for(;;) {
242                 struct nfs_read_data *data = nfs_readdata_alloc(rpages);
243
244                 if (unlikely(!data)) {
245                         while (!list_empty(list)) {
246                                 data = list_entry(list->next,
247                                                   struct nfs_read_data, pages);
248                                 list_del(&data->pages);
249                                 nfs_readdata_free(data);
250                         }
251                         kref_put(&dreq->kref, nfs_direct_req_release);
252                         return NULL;
253                 }
254
255                 INIT_LIST_HEAD(&data->pages);
256                 list_add(&data->pages, list);
257
258                 data->req = (struct nfs_page *) dreq;
259                 dreq->outstanding++;
260                 if (nbytes <= rsize)
261                         break;
262                 nbytes -= rsize;
263         }
264         kref_get(&dreq->kref);
265         return dreq;
266 }
267
268 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
269 {
270         struct nfs_read_data *data = calldata;
271         struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
272
273         if (nfs_readpage_result(task, data) != 0)
274                 return;
275
276         spin_lock(&dreq->lock);
277
278         if (likely(task->tk_status >= 0))
279                 dreq->count += data->res.count;
280         else
281                 dreq->error = task->tk_status;
282
283         if (--dreq->outstanding) {
284                 spin_unlock(&dreq->lock);
285                 return;
286         }
287
288         spin_unlock(&dreq->lock);
289         nfs_direct_complete(dreq);
290 }
291
292 static const struct rpc_call_ops nfs_read_direct_ops = {
293         .rpc_call_done = nfs_direct_read_result,
294         .rpc_release = nfs_readdata_release,
295 };
296
297 /*
298  * For each nfs_read_data struct that was allocated on the list, dispatch
299  * an NFS READ operation
300  */
301 static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
302 {
303         struct file *file = dreq->filp;
304         struct inode *inode = file->f_mapping->host;
305         struct nfs_open_context *ctx = (struct nfs_open_context *)
306                                                         file->private_data;
307         struct list_head *list = &dreq->list;
308         struct page **pages = dreq->pages;
309         size_t rsize = NFS_SERVER(inode)->rsize;
310         unsigned int curpage, pgbase;
311
312         curpage = 0;
313         pgbase = user_addr & ~PAGE_MASK;
314         do {
315                 struct nfs_read_data *data;
316                 size_t bytes;
317
318                 bytes = rsize;
319                 if (count < rsize)
320                         bytes = count;
321
322                 data = list_entry(list->next, struct nfs_read_data, pages);
323                 list_del_init(&data->pages);
324
325                 data->inode = inode;
326                 data->cred = ctx->cred;
327                 data->args.fh = NFS_FH(inode);
328                 data->args.context = ctx;
329                 data->args.offset = pos;
330                 data->args.pgbase = pgbase;
331                 data->args.pages = &pages[curpage];
332                 data->args.count = bytes;
333                 data->res.fattr = &data->fattr;
334                 data->res.eof = 0;
335                 data->res.count = bytes;
336
337                 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
338                                 &nfs_read_direct_ops, data);
339                 NFS_PROTO(inode)->read_setup(data);
340
341                 data->task.tk_cookie = (unsigned long) inode;
342
343                 lock_kernel();
344                 rpc_execute(&data->task);
345                 unlock_kernel();
346
347                 dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
348                                 data->task.tk_pid,
349                                 inode->i_sb->s_id,
350                                 (long long)NFS_FILEID(inode),
351                                 bytes,
352                                 (unsigned long long)data->args.offset);
353
354                 pos += bytes;
355                 pgbase += bytes;
356                 curpage += pgbase >> PAGE_SHIFT;
357                 pgbase &= ~PAGE_MASK;
358
359                 count -= bytes;
360         } while (count != 0);
361 }
362
363 static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
364 {
365         ssize_t result;
366         sigset_t oldset;
367         struct inode *inode = iocb->ki_filp->f_mapping->host;
368         struct rpc_clnt *clnt = NFS_CLIENT(inode);
369         struct nfs_direct_req *dreq;
370
371         dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
372         if (!dreq)
373                 return -ENOMEM;
374
375         dreq->pages = pages;
376         dreq->npages = nr_pages;
377         dreq->inode = inode;
378         dreq->filp = iocb->ki_filp;
379         if (!is_sync_kiocb(iocb))
380                 dreq->iocb = iocb;
381
382         nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
383         rpc_clnt_sigmask(clnt, &oldset);
384         nfs_direct_read_schedule(dreq, user_addr, count, pos);
385         result = nfs_direct_wait(dreq);
386         rpc_clnt_sigunmask(clnt, &oldset);
387
388         return result;
389 }
390
391 static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
392 {
393         struct list_head *list;
394         struct nfs_direct_req *dreq;
395         unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
396
397         dreq = nfs_direct_req_alloc();
398         if (!dreq)
399                 return NULL;
400
401         list = &dreq->list;
402         for(;;) {
403                 struct nfs_write_data *data = nfs_writedata_alloc(wpages);
404
405                 if (unlikely(!data)) {
406                         while (!list_empty(list)) {
407                                 data = list_entry(list->next,
408                                                   struct nfs_write_data, pages);
409                                 list_del(&data->pages);
410                                 nfs_writedata_free(data);
411                         }
412                         kref_put(&dreq->kref, nfs_direct_req_release);
413                         return NULL;
414                 }
415
416                 INIT_LIST_HEAD(&data->pages);
417                 list_add(&data->pages, list);
418
419                 data->req = (struct nfs_page *) dreq;
420                 dreq->outstanding++;
421                 if (nbytes <= wsize)
422                         break;
423                 nbytes -= wsize;
424         }
425         kref_get(&dreq->kref);
426         return dreq;
427 }
428
429 /*
430  * NB: Return the value of the first error return code.  Subsequent
431  *     errors after the first one are ignored.
432  */
433 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
434 {
435         struct nfs_write_data *data = calldata;
436         struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
437         int status = task->tk_status;
438
439         if (nfs_writeback_done(task, data) != 0)
440                 return;
441         /* If the server fell back to an UNSTABLE write, it's an error. */
442         if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
443                 status = -EIO;
444
445         spin_lock(&dreq->lock);
446
447         if (likely(status >= 0))
448                 dreq->count += data->res.count;
449         else
450                 dreq->error = status;
451
452         if (--dreq->outstanding) {
453                 spin_unlock(&dreq->lock);
454                 return;
455         }
456
457         spin_unlock(&dreq->lock);
458
459         nfs_end_data_update(data->inode);
460         nfs_direct_complete(dreq);
461 }
462
463 static const struct rpc_call_ops nfs_write_direct_ops = {
464         .rpc_call_done = nfs_direct_write_result,
465         .rpc_release = nfs_writedata_release,
466 };
467
468 /*
469  * For each nfs_write_data struct that was allocated on the list, dispatch
470  * an NFS WRITE operation
471  *
472  * XXX: For now, support only FILE_SYNC writes.  Later we may add
473  *      support for UNSTABLE + COMMIT.
474  */
475 static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
476 {
477         struct file *file = dreq->filp;
478         struct inode *inode = file->f_mapping->host;
479         struct nfs_open_context *ctx = (struct nfs_open_context *)
480                                                         file->private_data;
481         struct list_head *list = &dreq->list;
482         struct page **pages = dreq->pages;
483         size_t wsize = NFS_SERVER(inode)->wsize;
484         unsigned int curpage, pgbase;
485
486         curpage = 0;
487         pgbase = user_addr & ~PAGE_MASK;
488         do {
489                 struct nfs_write_data *data;
490                 size_t bytes;
491
492                 bytes = wsize;
493                 if (count < wsize)
494                         bytes = count;
495
496                 data = list_entry(list->next, struct nfs_write_data, pages);
497                 list_del_init(&data->pages);
498
499                 data->inode = inode;
500                 data->cred = ctx->cred;
501                 data->args.fh = NFS_FH(inode);
502                 data->args.context = ctx;
503                 data->args.offset = pos;
504                 data->args.pgbase = pgbase;
505                 data->args.pages = &pages[curpage];
506                 data->args.count = bytes;
507                 data->res.fattr = &data->fattr;
508                 data->res.count = bytes;
509                 data->res.verf = &data->verf;
510
511                 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
512                                 &nfs_write_direct_ops, data);
513                 NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
514
515                 data->task.tk_priority = RPC_PRIORITY_NORMAL;
516                 data->task.tk_cookie = (unsigned long) inode;
517
518                 lock_kernel();
519                 rpc_execute(&data->task);
520                 unlock_kernel();
521
522                 dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
523                                 data->task.tk_pid,
524                                 inode->i_sb->s_id,
525                                 (long long)NFS_FILEID(inode),
526                                 bytes,
527                                 (unsigned long long)data->args.offset);
528
529                 pos += bytes;
530                 pgbase += bytes;
531                 curpage += pgbase >> PAGE_SHIFT;
532                 pgbase &= ~PAGE_MASK;
533
534                 count -= bytes;
535         } while (count != 0);
536 }
537
538 static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
539 {
540         ssize_t result;
541         sigset_t oldset;
542         struct inode *inode = iocb->ki_filp->f_mapping->host;
543         struct rpc_clnt *clnt = NFS_CLIENT(inode);
544         struct nfs_direct_req *dreq;
545
546         dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize);
547         if (!dreq)
548                 return -ENOMEM;
549
550         dreq->pages = pages;
551         dreq->npages = nr_pages;
552         dreq->inode = inode;
553         dreq->filp = iocb->ki_filp;
554         if (!is_sync_kiocb(iocb))
555                 dreq->iocb = iocb;
556
557         nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
558
559         nfs_begin_data_update(inode);
560
561         rpc_clnt_sigmask(clnt, &oldset);
562         nfs_direct_write_schedule(dreq, user_addr, count, pos);
563         result = nfs_direct_wait(dreq);
564         rpc_clnt_sigunmask(clnt, &oldset);
565
566         return result;
567 }
568
569 /**
570  * nfs_file_direct_read - file direct read operation for NFS files
571  * @iocb: target I/O control block
572  * @buf: user's buffer into which to read data
573  * @count: number of bytes to read
574  * @pos: byte offset in file where reading starts
575  *
576  * We use this function for direct reads instead of calling
577  * generic_file_aio_read() in order to avoid gfar's check to see if
578  * the request starts before the end of the file.  For that check
579  * to work, we must generate a GETATTR before each direct read, and
580  * even then there is a window between the GETATTR and the subsequent
581  * READ where the file size could change.  Our preference is simply
582  * to do all reads the application wants, and the server will take
583  * care of managing the end of file boundary.
584  *
585  * This function also eliminates unnecessarily updating the file's
586  * atime locally, as the NFS server sets the file's atime, and this
587  * client must read the updated atime from the server back into its
588  * cache.
589  */
590 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
591 {
592         ssize_t retval = -EINVAL;
593         int page_count;
594         struct page **pages;
595         struct file *file = iocb->ki_filp;
596         struct address_space *mapping = file->f_mapping;
597
598         dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
599                 file->f_dentry->d_parent->d_name.name,
600                 file->f_dentry->d_name.name,
601                 (unsigned long) count, (long long) pos);
602
603         if (count < 0)
604                 goto out;
605         retval = -EFAULT;
606         if (!access_ok(VERIFY_WRITE, buf, count))
607                 goto out;
608         retval = 0;
609         if (!count)
610                 goto out;
611
612         retval = nfs_sync_mapping(mapping);
613         if (retval)
614                 goto out;
615
616         page_count = nfs_get_user_pages(READ, (unsigned long) buf,
617                                                 count, &pages);
618         if (page_count < 0) {
619                 nfs_free_user_pages(pages, 0, 0);
620                 retval = page_count;
621                 goto out;
622         }
623
624         retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
625                                                 pages, page_count);
626         if (retval > 0)
627                 iocb->ki_pos = pos + retval;
628
629 out:
630         return retval;
631 }
632
633 /**
634  * nfs_file_direct_write - file direct write operation for NFS files
635  * @iocb: target I/O control block
636  * @buf: user's buffer from which to write data
637  * @count: number of bytes to write
638  * @pos: byte offset in file where writing starts
639  *
640  * We use this function for direct writes instead of calling
641  * generic_file_aio_write() in order to avoid taking the inode
642  * semaphore and updating the i_size.  The NFS server will set
643  * the new i_size and this client must read the updated size
644  * back into its cache.  We let the server do generic write
645  * parameter checking and report problems.
646  *
647  * We also avoid an unnecessary invocation of generic_osync_inode(),
648  * as it is fairly meaningless to sync the metadata of an NFS file.
649  *
650  * We eliminate local atime updates, see direct read above.
651  *
652  * We avoid unnecessary page cache invalidations for normal cached
653  * readers of this file.
654  *
655  * Note that O_APPEND is not supported for NFS direct writes, as there
656  * is no atomic O_APPEND write facility in the NFS protocol.
657  */
658 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
659 {
660         ssize_t retval;
661         int page_count;
662         struct page **pages;
663         struct file *file = iocb->ki_filp;
664         struct address_space *mapping = file->f_mapping;
665
666         dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
667                 file->f_dentry->d_parent->d_name.name,
668                 file->f_dentry->d_name.name,
669                 (unsigned long) count, (long long) pos);
670
671         retval = generic_write_checks(file, &pos, &count, 0);
672         if (retval)
673                 goto out;
674
675         retval = -EINVAL;
676         if ((ssize_t) count < 0)
677                 goto out;
678         retval = 0;
679         if (!count)
680                 goto out;
681
682         retval = -EFAULT;
683         if (!access_ok(VERIFY_READ, buf, count))
684                 goto out;
685
686         retval = nfs_sync_mapping(mapping);
687         if (retval)
688                 goto out;
689
690         page_count = nfs_get_user_pages(WRITE, (unsigned long) buf,
691                                                 count, &pages);
692         if (page_count < 0) {
693                 nfs_free_user_pages(pages, 0, 0);
694                 retval = page_count;
695                 goto out;
696         }
697
698         retval = nfs_direct_write(iocb, (unsigned long) buf, count,
699                                         pos, pages, page_count);
700
701         /*
702          * XXX: nfs_end_data_update() already ensures this file's
703          *      cached data is subsequently invalidated.  Do we really
704          *      need to call invalidate_inode_pages2() again here?
705          *
706          *      For aio writes, this invalidation will almost certainly
707          *      occur before the writes complete.  Kind of racey.
708          */
709         if (mapping->nrpages)
710                 invalidate_inode_pages2(mapping);
711
712         if (retval > 0)
713                 iocb->ki_pos = pos + retval;
714
715 out:
716         return retval;
717 }
718
719 /**
720  * nfs_init_directcache - create a slab cache for nfs_direct_req structures
721  *
722  */
723 int nfs_init_directcache(void)
724 {
725         nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
726                                                 sizeof(struct nfs_direct_req),
727                                                 0, SLAB_RECLAIM_ACCOUNT,
728                                                 NULL, NULL);
729         if (nfs_direct_cachep == NULL)
730                 return -ENOMEM;
731
732         return 0;
733 }
734
735 /**
736  * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
737  *
738  */
739 void nfs_destroy_directcache(void)
740 {
741         if (kmem_cache_destroy(nfs_direct_cachep))
742                 printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
743 }