#include <linux/uio.h>
#include <linux/hash.h>
#include <linux/writeback.h>
+#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/cpuset.h>
-#include "filemap.h"
+#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
+#include <linux/memcontrol.h>
#include "internal.h"
/*
{
struct address_space *mapping = page->mapping;
+ mem_cgroup_uncharge_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
BUG_ON(page_mapped(page));
+
+ /*
+ * Some filesystems seem to re-dirty the page even after
+ * the VM has canceled the dirty bit (eg ext3 journaling).
+ *
+ * Fix it up by doing a final dirty accounting check after
+ * having removed the page entirely.
+ */
+ if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
+ dec_zone_page_state(page, NR_FILE_DIRTY);
+ dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ }
}
void remove_from_page_cache(struct page *page)
return 0;
}
+static int sync_page_killable(void *word)
+{
+ sync_page(word);
+ return fatal_signal_pending(current) ? -EINTR : 0;
+}
+
/**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
EXPORT_SYMBOL(sync_page_range);
/**
- * sync_page_range_nolock
+ * sync_page_range_nolock - write & wait on all pages in the passed range without locking
* @inode: target inode
* @mapping: target address_space
* @pos: beginning offset in pages to write
int add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+ int error = mem_cgroup_cache_charge(page, current->mm,
+ gfp_mask & ~__GFP_HIGHMEM);
+ if (error)
+ goto out;
+ error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
write_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
page->index = offset;
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
- }
+ } else
+ mem_cgroup_uncharge_page(page);
+
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
- }
+ } else
+ mem_cgroup_uncharge_page(page);
+out:
return error;
}
EXPORT_SYMBOL(add_to_page_cache);
__wake_up_bit(page_waitqueue(page), &page->flags, bit);
}
-void fastcall wait_on_page_bit(struct page *page, int bit_nr)
+void wait_on_page_bit(struct page *page, int bit_nr)
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
* the clear_bit and the read of the waitqueue (to avoid SMP races with a
* parallel wait_on_page_locked()).
*/
-void fastcall unlock_page(struct page *page)
+void unlock_page(struct page *page)
{
smp_mb__before_clear_bit();
if (!TestClearPageLocked(page))
* chances are that on the second loop, the block layer's plug list is empty,
* so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
-void fastcall __lock_page(struct page *page)
+void __lock_page(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
}
EXPORT_SYMBOL(__lock_page);
-/*
+int __lock_page_killable(struct page *page)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+
+ return __wait_on_bit_lock(page_waitqueue(page), &wait,
+ sync_page_killable, TASK_KILLABLE);
+}
+
+/**
+ * __lock_page_nosync - get a lock on the page, without calling sync_page()
+ * @page: the page to lock
+ *
* Variant of lock_page that does not require the caller to hold a reference
* on the page's mapping.
*/
-void fastcall __lock_page_nosync(struct page *page)
+void __lock_page_nosync(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
__wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
* Is there a pagecache struct page at the given (mapping, offset) tuple?
* If yes, increment its refcount and return it; if no, return NULL.
*/
-struct page * find_get_page(struct address_space *mapping, unsigned long offset)
+struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page;
* Returns zero if the page was not present. find_lock_page() may sleep.
*/
struct page *find_lock_page(struct address_space *mapping,
- unsigned long offset)
+ pgoff_t offset)
{
struct page *page;
- read_lock_irq(&mapping->tree_lock);
repeat:
+ read_lock_irq(&mapping->tree_lock);
page = radix_tree_lookup(&mapping->page_tree, offset);
if (page) {
page_cache_get(page);
if (TestSetPageLocked(page)) {
read_unlock_irq(&mapping->tree_lock);
__lock_page(page);
- read_lock_irq(&mapping->tree_lock);
/* Has the page been truncated while we slept? */
- if (unlikely(page->mapping != mapping ||
- page->index != offset)) {
+ if (unlikely(page->mapping != mapping)) {
unlock_page(page);
page_cache_release(page);
goto repeat;
}
+ VM_BUG_ON(page->index != offset);
+ goto out;
}
}
read_unlock_irq(&mapping->tree_lock);
+out:
return page;
}
EXPORT_SYMBOL(find_lock_page);
* memory exhaustion.
*/
struct page *find_or_create_page(struct address_space *mapping,
- unsigned long index, gfp_t gfp_mask)
+ pgoff_t index, gfp_t gfp_mask)
{
- struct page *page, *cached_page = NULL;
+ struct page *page;
int err;
repeat:
page = find_lock_page(mapping, index);
if (!page) {
- if (!cached_page) {
- cached_page =
- __page_cache_alloc(gfp_mask);
- if (!cached_page)
- return NULL;
+ page = __page_cache_alloc(gfp_mask);
+ if (!page)
+ return NULL;
+ err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+ if (unlikely(err)) {
+ page_cache_release(page);
+ page = NULL;
+ if (err == -EEXIST)
+ goto repeat;
}
- err = add_to_page_cache_lru(cached_page, mapping,
- index, gfp_mask);
- if (!err) {
- page = cached_page;
- cached_page = NULL;
- } else if (err == -EEXIST)
- goto repeat;
}
- if (cached_page)
- page_cache_release(cached_page);
return page;
}
EXPORT_SYMBOL(find_or_create_page);
* and deadlock against the caller's locked page.
*/
struct page *
-grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
+grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
{
struct page *page = find_get_page(mapping, index);
}
/**
- * do_generic_mapping_read - generic file read routine
- * @mapping: address_space to be read
- * @_ra: file's readahead state
+ * do_generic_file_read - generic file read routine
* @filp: the file to read
* @ppos: current file position
* @desc: read_descriptor
*
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
- *
- * Note the struct file* is only passed for the use of readpage.
- * It may be NULL.
*/
-void do_generic_mapping_read(struct address_space *mapping,
- struct file_ra_state *_ra,
- struct file *filp,
- loff_t *ppos,
- read_descriptor_t *desc,
- read_actor_t actor)
+static void do_generic_file_read(struct file *filp, loff_t *ppos,
+ read_descriptor_t *desc, read_actor_t actor)
{
+ struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- unsigned long index;
- unsigned long offset;
- unsigned long last_index;
- unsigned long next_index;
- unsigned long prev_index;
+ struct file_ra_state *ra = &filp->f_ra;
+ pgoff_t index;
+ pgoff_t last_index;
+ pgoff_t prev_index;
+ unsigned long offset; /* offset into pagecache page */
unsigned int prev_offset;
- struct page *cached_page;
int error;
- struct file_ra_state ra = *_ra;
- cached_page = NULL;
index = *ppos >> PAGE_CACHE_SHIFT;
- next_index = index;
- prev_index = ra.prev_index;
- prev_offset = ra.prev_offset;
+ prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
+ prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK;
for (;;) {
struct page *page;
- unsigned long end_index;
+ pgoff_t end_index;
loff_t isize;
unsigned long nr, ret;
cond_resched();
- if (index == next_index)
- next_index = page_cache_readahead(mapping, &ra, filp,
- index, last_index - index);
-
find_page:
page = find_get_page(mapping, index);
- if (unlikely(page == NULL)) {
- handle_ra_miss(mapping, &ra, index);
- goto no_cached_page;
+ if (!page) {
+ page_cache_sync_readahead(mapping,
+ ra, filp,
+ index, last_index - index);
+ page = find_get_page(mapping, index);
+ if (unlikely(page == NULL))
+ goto no_cached_page;
+ }
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(mapping,
+ ra, filp, page,
+ index, last_index - index);
}
if (!PageUptodate(page))
goto page_not_up_to_date;
index += offset >> PAGE_CACHE_SHIFT;
offset &= ~PAGE_CACHE_MASK;
prev_offset = offset;
- ra.prev_offset = offset;
page_cache_release(page);
if (ret == nr && desc->count)
page_not_up_to_date:
/* Get exclusive access to the page ... */
- lock_page(page);
+ if (lock_page_killable(page))
+ goto readpage_eio;
/* Did it get truncated before we got the lock? */
if (!page->mapping) {
}
if (!PageUptodate(page)) {
- lock_page(page);
+ if (lock_page_killable(page))
+ goto readpage_eio;
if (!PageUptodate(page)) {
if (page->mapping == NULL) {
/*
goto find_page;
}
unlock_page(page);
- error = -EIO;
- shrink_readahead_size_eio(filp, &ra);
- goto readpage_error;
+ shrink_readahead_size_eio(filp, ra);
+ goto readpage_eio;
}
unlock_page(page);
}
goto page_ok;
+readpage_eio:
+ error = -EIO;
readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */
desc->error = error;
* Ok, it wasn't cached, so we need to create a new
* page..
*/
- if (!cached_page) {
- cached_page = page_cache_alloc_cold(mapping);
- if (!cached_page) {
- desc->error = -ENOMEM;
- goto out;
- }
+ page = page_cache_alloc_cold(mapping);
+ if (!page) {
+ desc->error = -ENOMEM;
+ goto out;
}
- error = add_to_page_cache_lru(cached_page, mapping,
+ error = add_to_page_cache_lru(page, mapping,
index, GFP_KERNEL);
if (error) {
+ page_cache_release(page);
if (error == -EEXIST)
goto find_page;
desc->error = error;
goto out;
}
- page = cached_page;
- cached_page = NULL;
goto readpage;
}
out:
- *_ra = ra;
+ ra->prev_pos = prev_index;
+ ra->prev_pos <<= PAGE_CACHE_SHIFT;
+ ra->prev_pos |= prev_offset;
- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
- if (cached_page)
- page_cache_release(cached_page);
+ *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
if (filp)
file_accessed(filp);
}
-EXPORT_SYMBOL(do_generic_mapping_read);
int file_read_actor(read_descriptor_t *desc, struct page *page,
unsigned long offset, unsigned long size)
}
EXPORT_SYMBOL(generic_file_aio_read);
-int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
-{
- ssize_t written;
- unsigned long count = desc->count;
- struct file *file = desc->arg.data;
-
- if (size > count)
- size = count;
-
- written = file->f_op->sendpage(file, page, offset,
- size, &file->f_pos, size<count);
- if (written < 0) {
- desc->error = written;
- written = 0;
- }
- desc->count = count - written;
- desc->written += written;
- return written;
-}
-
static ssize_t
do_readahead(struct address_space *mapping, struct file *filp,
- unsigned long index, unsigned long nr)
+ pgoff_t index, unsigned long nr)
{
if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
return -EINVAL;
if (file) {
if (file->f_mode & FMODE_READ) {
struct address_space *mapping = file->f_mapping;
- unsigned long start = offset >> PAGE_CACHE_SHIFT;
- unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+ pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
unsigned long len = end - start + 1;
ret = do_readahead(mapping, file, start, len);
}
}
#ifdef CONFIG_MMU
-static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
/**
* page_cache_read - adds requested page to the page cache if not already there
* @file: file to read
* This adds the requested page to the page cache if it isn't already there,
* and schedules an I/O to read in its contents from disk.
*/
-static int fastcall page_cache_read(struct file * file, unsigned long offset)
+static int page_cache_read(struct file *file, pgoff_t offset)
{
struct address_space *mapping = file->f_mapping;
struct page *page;
/**
* filemap_fault - read in file data for page fault handling
- * @vma: user vma (not used)
- * @fdata: the applicable fault_data
+ * @vma: vma in which the fault was taken
+ * @vmf: struct vm_fault containing details of the fault
*
* filemap_fault() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault.
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*/
-struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
+int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int error;
struct file *file = vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
- unsigned long size;
+ pgoff_t size;
int did_readaround = 0;
-
- fdata->type = VM_FAULT_MINOR;
-
- BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+ int ret = 0;
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (fdata->pgoff >= size)
- goto outside_data_content;
+ if (vmf->pgoff >= size)
+ return VM_FAULT_SIGBUS;
/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(vma))
goto no_cached_page;
/*
- * The readahead code wants to be told about each and every page
- * so it can build and shrink its windows appropriately
- *
- * For sequential accesses, we use the generic readahead logic.
- */
- if (VM_SequentialReadHint(vma))
- page_cache_readahead(mapping, ra, file, fdata->pgoff, 1);
-
- /*
* Do we have something in the page cache already?
*/
retry_find:
- page = find_lock_page(mapping, fdata->pgoff);
+ page = find_lock_page(mapping, vmf->pgoff);
+ /*
+ * For sequential accesses, we use the generic readahead logic.
+ */
+ if (VM_SequentialReadHint(vma)) {
+ if (!page) {
+ page_cache_sync_readahead(mapping, ra, file,
+ vmf->pgoff, 1);
+ page = find_lock_page(mapping, vmf->pgoff);
+ if (!page)
+ goto no_cached_page;
+ }
+ if (PageReadahead(page)) {
+ page_cache_async_readahead(mapping, ra, file, page,
+ vmf->pgoff, 1);
+ }
+ }
+
if (!page) {
unsigned long ra_pages;
- if (VM_SequentialReadHint(vma)) {
- handle_ra_miss(mapping, ra, fdata->pgoff);
- goto no_cached_page;
- }
ra->mmap_miss++;
/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
*/
- if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+ if (ra->mmap_miss > MMAP_LOTSAMISS)
goto no_cached_page;
/*
* check did_readaround, as this is an inner loop.
*/
if (!did_readaround) {
- fdata->type = VM_FAULT_MAJOR;
+ ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}
did_readaround = 1;
if (ra_pages) {
pgoff_t start = 0;
- if (fdata->pgoff > ra_pages / 2)
- start = fdata->pgoff - ra_pages / 2;
+ if (vmf->pgoff > ra_pages / 2)
+ start = vmf->pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
}
- page = find_lock_page(mapping, fdata->pgoff);
+ page = find_lock_page(mapping, vmf->pgoff);
if (!page)
goto no_cached_page;
}
if (!did_readaround)
- ra->mmap_hit++;
+ ra->mmap_miss--;
/*
* We have a locked page in the page cache, now we need to check
/* Must recheck i_size under page lock */
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (unlikely(fdata->pgoff >= size)) {
+ if (unlikely(vmf->pgoff >= size)) {
unlock_page(page);
- goto outside_data_content;
+ page_cache_release(page);
+ return VM_FAULT_SIGBUS;
}
/*
* Found the page and have a reference on it.
*/
mark_page_accessed(page);
- return page;
+ ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ vmf->page = page;
+ return ret | VM_FAULT_LOCKED;
-outside_data_content:
- /*
- * An external ptracer can access pages that normally aren't
- * accessible..
- */
- if (vma->vm_mm == current->mm) {
- fdata->type = VM_FAULT_SIGBUS;
- return NULL;
- }
- /* Fall through to the non-read-ahead case */
no_cached_page:
/*
* We're only likely to ever get here if MADV_RANDOM is in
* effect.
*/
- error = page_cache_read(file, fdata->pgoff);
+ error = page_cache_read(file, vmf->pgoff);
/*
* The page we want has now been added to the page cache.
* to schedule I/O.
*/
if (error == -ENOMEM)
- fdata->type = VM_FAULT_OOM;
- else
- fdata->type = VM_FAULT_SIGBUS;
- return NULL;
+ return VM_FAULT_OOM;
+ return VM_FAULT_SIGBUS;
page_not_uptodate:
/* IO error path */
if (!did_readaround) {
- fdata->type = VM_FAULT_MAJOR;
+ ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}
/* Things didn't work out. Return zero to tell the mm layer so. */
shrink_readahead_size_eio(file, ra);
- fdata->type = VM_FAULT_SIGBUS;
- return NULL;
+ return VM_FAULT_SIGBUS;
}
EXPORT_SYMBOL(filemap_fault);
-/*
- * filemap_nopage and filemap_populate are legacy exports that are not used
- * in tree. Scheduled for removal.
- */
-struct page *filemap_nopage(struct vm_area_struct *area,
- unsigned long address, int *type)
-{
- struct page *page;
- struct fault_data fdata;
- fdata.address = address;
- fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
- + area->vm_pgoff;
- fdata.flags = 0;
-
- page = filemap_fault(area, &fdata);
- if (type)
- *type = fdata.type;
-
- return page;
-}
-EXPORT_SYMBOL(filemap_nopage);
-
-static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
- int nonblock)
-{
- struct address_space *mapping = file->f_mapping;
- struct page *page;
- int error;
-
- /*
- * Do we have something in the page cache already?
- */
-retry_find:
- page = find_get_page(mapping, pgoff);
- if (!page) {
- if (nonblock)
- return NULL;
- goto no_cached_page;
- }
-
- /*
- * Ok, found a page in the page cache, now we need to check
- * that it's up-to-date.
- */
- if (!PageUptodate(page)) {
- if (nonblock) {
- page_cache_release(page);
- return NULL;
- }
- goto page_not_uptodate;
- }
-
-success:
- /*
- * Found the page and have a reference on it.
- */
- mark_page_accessed(page);
- return page;
-
-no_cached_page:
- error = page_cache_read(file, pgoff);
-
- /*
- * The page we want has now been added to the page cache.
- * In the unlikely event that someone removed it in the
- * meantime, we'll just come back here and read it again.
- */
- if (error >= 0)
- goto retry_find;
-
- /*
- * An error return from page_cache_read can result if the
- * system is low on memory, or a problem occurs while trying
- * to schedule I/O.
- */
- return NULL;
-
-page_not_uptodate:
- lock_page(page);
-
- /* Did it get truncated while we waited for it? */
- if (!page->mapping) {
- unlock_page(page);
- goto err;
- }
-
- /* Did somebody else get it up-to-date? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
-
- error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- goto retry_find;
- }
-
- /*
- * Umm, take care of errors if the page isn't up-to-date.
- * Try to re-read it _once_. We do this synchronously,
- * because there really aren't any performance issues here
- * and we need to check for errors.
- */
- lock_page(page);
-
- /* Somebody truncated the page on us? */
- if (!page->mapping) {
- unlock_page(page);
- goto err;
- }
- /* Somebody else successfully read it in? */
- if (PageUptodate(page)) {
- unlock_page(page);
- goto success;
- }
-
- ClearPageError(page);
- error = mapping->a_ops->readpage(file, page);
- if (!error) {
- wait_on_page_locked(page);
- if (PageUptodate(page))
- goto success;
- } else if (error == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- goto retry_find;
- }
-
- /*
- * Things didn't work out. Return zero to tell the
- * mm layer so, possibly freeing the page cache page first.
- */
-err:
- page_cache_release(page);
-
- return NULL;
-}
-
-int filemap_populate(struct vm_area_struct *vma, unsigned long addr,
- unsigned long len, pgprot_t prot, unsigned long pgoff,
- int nonblock)
-{
- struct file *file = vma->vm_file;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- unsigned long size;
- struct mm_struct *mm = vma->vm_mm;
- struct page *page;
- int err;
-
- if (!nonblock)
- force_page_cache_readahead(mapping, vma->vm_file,
- pgoff, len >> PAGE_CACHE_SHIFT);
-
-repeat:
- size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
- return -EINVAL;
-
- page = filemap_getpage(file, pgoff, nonblock);
-
- /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as
- * done in shmem_populate calling shmem_getpage */
- if (!page && !nonblock)
- return -ENOMEM;
-
- if (page) {
- err = install_page(mm, vma, addr, page, prot);
- if (err) {
- page_cache_release(page);
- return err;
- }
- } else if (vma->vm_flags & VM_NONLINEAR) {
- /* No page was found just because we can't read it in now (being
- * here implies nonblock != 0), but the page may exist, so set
- * the PTE to fault it in later. */
- err = install_file_pte(mm, vma, addr, pgoff, prot);
- if (err)
- return err;
- }
-
- len -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- if (len)
- goto repeat;
-
- return 0;
-}
-EXPORT_SYMBOL(filemap_populate);
-
struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault,
};
return -ENOEXEC;
file_accessed(file);
vma->vm_ops = &generic_file_vm_ops;
- vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
return 0;
}
EXPORT_SYMBOL(generic_file_readonly_mmap);
static struct page *__read_cache_page(struct address_space *mapping,
- unsigned long index,
+ pgoff_t index,
int (*filler)(void *,struct page*),
void *data)
{
- struct page *page, *cached_page = NULL;
+ struct page *page;
int err;
repeat:
page = find_get_page(mapping, index);
if (!page) {
- if (!cached_page) {
- cached_page = page_cache_alloc_cold(mapping);
- if (!cached_page)
- return ERR_PTR(-ENOMEM);
- }
- err = add_to_page_cache_lru(cached_page, mapping,
- index, GFP_KERNEL);
- if (err == -EEXIST)
- goto repeat;
- if (err < 0) {
+ page = page_cache_alloc_cold(mapping);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+ err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ if (unlikely(err)) {
+ page_cache_release(page);
+ if (err == -EEXIST)
+ goto repeat;
/* Presumably ENOMEM for radix tree node */
- page_cache_release(cached_page);
return ERR_PTR(err);
}
- page = cached_page;
- cached_page = NULL;
err = filler(data, page);
if (err < 0) {
page_cache_release(page);
page = ERR_PTR(err);
}
}
- if (cached_page)
- page_cache_release(cached_page);
return page;
}
-/*
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping: the page's address_space
+ * @index: the page index
+ * @filler: function to perform the read
+ * @data: destination for read data
+ *
* Same as read_cache_page, but don't wait for page to become unlocked
* after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
*/
struct page *read_cache_page_async(struct address_space *mapping,
- unsigned long index,
+ pgoff_t index,
int (*filler)(void *,struct page*),
void *data)
{
* If the page does not get brought uptodate, return -EIO.
*/
struct page *read_cache_page(struct address_space *mapping,
- unsigned long index,
+ pgoff_t index,
int (*filler)(void *,struct page*),
void *data)
{
EXPORT_SYMBOL(read_cache_page);
/*
- * If the page was newly created, increment its refcount and add it to the
- * caller's lru-buffering pagevec. This function is specifically for
- * generic_file_write().
- */
-static inline struct page *
-__grab_cache_page(struct address_space *mapping, unsigned long index,
- struct page **cached_page, struct pagevec *lru_pvec)
-{
- int err;
- struct page *page;
-repeat:
- page = find_lock_page(mapping, index);
- if (!page) {
- if (!*cached_page) {
- *cached_page = page_cache_alloc(mapping);
- if (!*cached_page)
- return NULL;
- }
- err = add_to_page_cache(*cached_page, mapping,
- index, GFP_KERNEL);
- if (err == -EEXIST)
- goto repeat;
- if (err == 0) {
- page = *cached_page;
- page_cache_get(page);
- if (!pagevec_add(lru_pvec, page))
- __pagevec_lru_add(lru_pvec);
- *cached_page = NULL;
- }
- }
- return page;
-}
-
-/*
* The logic we want is
*
* if suid or (sgid and xgrp)
int remove_suid(struct dentry *dentry)
{
- int kill = should_remove_suid(dentry);
+ int killsuid = should_remove_suid(dentry);
+ int killpriv = security_inode_need_killpriv(dentry);
+ int error = 0;
- if (unlikely(kill))
- return __remove_suid(dentry, kill);
+ if (killpriv < 0)
+ return killpriv;
+ if (killpriv)
+ error = security_inode_killpriv(dentry);
+ if (!error && killsuid)
+ error = __remove_suid(dentry, killsuid);
- return 0;
+ return error;
}
EXPORT_SYMBOL(remove_suid);
-size_t
-__filemap_copy_from_user_iovec_inatomic(char *vaddr,
+static size_t __iovec_copy_from_user_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
size_t copied = 0, left = 0;
}
/*
+ * Copy as much as we can into the page and return the number of bytes which
+ * were sucessfully copied. If a fault is encountered then return the number of
+ * bytes which were copied.
+ */
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+ struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+ char *kaddr;
+ size_t copied;
+
+ BUG_ON(!in_atomic());
+ kaddr = kmap_atomic(page, KM_USER0);
+ if (likely(i->nr_segs == 1)) {
+ int left;
+ char __user *buf = i->iov->iov_base + i->iov_offset;
+ left = __copy_from_user_inatomic_nocache(kaddr + offset,
+ buf, bytes);
+ copied = bytes - left;
+ } else {
+ copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+ i->iov, i->iov_offset, bytes);
+ }
+ kunmap_atomic(kaddr, KM_USER0);
+
+ return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+/*
+ * This has the same sideeffects and return value as
+ * iov_iter_copy_from_user_atomic().
+ * The difference is that it attempts to resolve faults.
+ * Page must not be locked.
+ */
+size_t iov_iter_copy_from_user(struct page *page,
+ struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+ char *kaddr;
+ size_t copied;
+
+ kaddr = kmap(page);
+ if (likely(i->nr_segs == 1)) {
+ int left;
+ char __user *buf = i->iov->iov_base + i->iov_offset;
+ left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
+ copied = bytes - left;
+ } else {
+ copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+ i->iov, i->iov_offset, bytes);
+ }
+ kunmap(page);
+ return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user);
+
+void iov_iter_advance(struct iov_iter *i, size_t bytes)
+{
+ BUG_ON(i->count < bytes);
+
+ if (likely(i->nr_segs == 1)) {
+ i->iov_offset += bytes;
+ i->count -= bytes;
+ } else {
+ const struct iovec *iov = i->iov;
+ size_t base = i->iov_offset;
+
+ /*
+ * The !iov->iov_len check ensures we skip over unlikely
+ * zero-length segments (without overruning the iovec).
+ */
+ while (bytes || unlikely(!iov->iov_len && i->count)) {
+ int copy;
+
+ copy = min(bytes, iov->iov_len - base);
+ BUG_ON(!i->count || i->count < copy);
+ i->count -= copy;
+ bytes -= copy;
+ base += copy;
+ if (iov->iov_len == base) {
+ iov++;
+ base = 0;
+ }
+ }
+ i->iov = iov;
+ i->iov_offset = base;
+ }
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Fault in the first iovec of the given iov_iter, to a maximum length
+ * of bytes. Returns 0 on success, or non-zero if the memory could not be
+ * accessed (ie. because it is an invalid address).
+ *
+ * writev-intensive code may want this to prefault several iovecs -- that
+ * would be possible (callers must not rely on the fact that _only_ the
+ * first iovec will be faulted with the current implementation).
+ */
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+{
+ char __user *buf = i->iov->iov_base + i->iov_offset;
+ bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+ return fault_in_pages_readable(buf, bytes);
+}
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(struct iov_iter *i)
+{
+ const struct iovec *iov = i->iov;
+ if (i->nr_segs == 1)
+ return i->count;
+ else
+ return min(i->count, iov->iov_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+/*
* Performs necessary checks before doing a write
*
* Can adjust writing position or amount of bytes to write.
}
EXPORT_SYMBOL(generic_write_checks);
+int pagecache_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ const struct address_space_operations *aops = mapping->a_ops;
+
+ if (aops->write_begin) {
+ return aops->write_begin(file, mapping, pos, len, flags,
+ pagep, fsdata);
+ } else {
+ int ret;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ struct inode *inode = mapping->host;
+ struct page *page;
+again:
+ page = __grab_cache_page(mapping, index);
+ *pagep = page;
+ if (!page)
+ return -ENOMEM;
+
+ if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
+ /*
+ * There is no way to resolve a short write situation
+ * for a !Uptodate page (except by double copying in
+ * the caller done by generic_perform_write_2copy).
+ *
+ * Instead, we have to bring it uptodate here.
+ */
+ ret = aops->readpage(file, page);
+ page_cache_release(page);
+ if (ret) {
+ if (ret == AOP_TRUNCATED_PAGE)
+ goto again;
+ return ret;
+ }
+ goto again;
+ }
+
+ ret = aops->prepare_write(file, page, offset, offset+len);
+ if (ret) {
+ unlock_page(page);
+ page_cache_release(page);
+ if (pos + len > inode->i_size)
+ vmtruncate(inode, inode->i_size);
+ }
+ return ret;
+ }
+}
+EXPORT_SYMBOL(pagecache_write_begin);
+
+int pagecache_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ const struct address_space_operations *aops = mapping->a_ops;
+ int ret;
+
+ if (aops->write_end) {
+ mark_page_accessed(page);
+ ret = aops->write_end(file, mapping, pos, len, copied,
+ page, fsdata);
+ } else {
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ struct inode *inode = mapping->host;
+
+ flush_dcache_page(page);
+ ret = aops->commit_write(file, page, offset, offset+len);
+ unlock_page(page);
+ mark_page_accessed(page);
+ page_cache_release(page);
+
+ if (ret < 0) {
+ if (pos + len > inode->i_size)
+ vmtruncate(inode, inode->i_size);
+ } else if (ret > 0)
+ ret = min_t(size_t, copied, ret);
+ else
+ ret = copied;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(pagecache_write_end);
+
ssize_t
generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long *nr_segs, loff_t pos, loff_t *ppos,
}
EXPORT_SYMBOL(generic_file_direct_write);
-ssize_t
-generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, loff_t *ppos,
- size_t count, ssize_t written)
+/*
+ * Find or create a page at the given pagecache position. Return the locked
+ * page. This function is specifically for buffered writes.
+ */
+struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
{
- struct file *file = iocb->ki_filp;
- struct address_space * mapping = file->f_mapping;
- const struct address_space_operations *a_ops = mapping->a_ops;
- struct inode *inode = mapping->host;
- long status = 0;
- struct page *page;
- struct page *cached_page = NULL;
- size_t bytes;
- struct pagevec lru_pvec;
- const struct iovec *cur_iov = iov; /* current iovec */
- size_t iov_base = 0; /* offset in the current iovec */
- char __user *buf;
-
- pagevec_init(&lru_pvec, 0);
+ int status;
+ struct page *page;
+repeat:
+ page = find_lock_page(mapping, index);
+ if (likely(page))
+ return page;
- /*
- * handle partial DIO write. Adjust cur_iov if needed.
- */
- if (likely(nr_segs == 1))
- buf = iov->iov_base + written;
- else {
- filemap_set_next_iovec(&cur_iov, &iov_base, written);
- buf = cur_iov->iov_base + iov_base;
+ page = page_cache_alloc(mapping);
+ if (!page)
+ return NULL;
+ status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ if (unlikely(status)) {
+ page_cache_release(page);
+ if (status == -EEXIST)
+ goto repeat;
+ return NULL;
}
+ return page;
+}
+EXPORT_SYMBOL(__grab_cache_page);
+
+static ssize_t generic_perform_write_2copy(struct file *file,
+ struct iov_iter *i, loff_t pos)
+{
+ struct address_space *mapping = file->f_mapping;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ struct inode *inode = mapping->host;
+ long status = 0;
+ ssize_t written = 0;
do {
- unsigned long index;
- unsigned long offset;
- size_t copied;
+ struct page *src_page;
+ struct page *page;
+ pgoff_t index; /* Pagecache index for current page */
+ unsigned long offset; /* Offset into pagecache page */
+ unsigned long bytes; /* Bytes to write to page */
+ size_t copied; /* Bytes copied from user */
- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ offset = (pos & (PAGE_CACHE_SIZE - 1));
index = pos >> PAGE_CACHE_SHIFT;
- bytes = PAGE_CACHE_SIZE - offset;
-
- /* Limit the size of the copy to the caller's write size */
- bytes = min(bytes, count);
+ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+ iov_iter_count(i));
- /* We only need to worry about prefaulting when writes are from
- * user-space. NFSd uses vfs_writev with several non-aligned
- * segments in the vector, and limiting to one segment a time is
- * a noticeable performance for re-write
+ /*
+ * a non-NULL src_page indicates that we're doing the
+ * copy via get_user_pages and kmap.
*/
- if (!segment_eq(get_fs(), KERNEL_DS)) {
- /*
- * Limit the size of the copy to that of the current
- * segment, because fault_in_pages_readable() doesn't
- * know how to walk segments.
- */
- bytes = min(bytes, cur_iov->iov_len - iov_base);
+ src_page = NULL;
- /*
- * Bring in the user page that we will copy from
- * _first_. Otherwise there's a nasty deadlock on
- * copying from the same page as we're writing to,
- * without it being marked up-to-date.
- */
- fault_in_pages_readable(buf, bytes);
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
}
- page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
+
+ page = __grab_cache_page(mapping, index);
if (!page) {
status = -ENOMEM;
break;
}
- if (unlikely(bytes == 0)) {
- status = 0;
- copied = 0;
- goto zero_length_segment;
- }
+ /*
+ * non-uptodate pages cannot cope with short copies, and we
+ * cannot take a pagefault with the destination page locked.
+ * So pin the source page to copy it.
+ */
+ if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
+ unlock_page(page);
- status = a_ops->prepare_write(file, page, offset, offset+bytes);
- if (unlikely(status)) {
- loff_t isize = i_size_read(inode);
+ src_page = alloc_page(GFP_KERNEL);
+ if (!src_page) {
+ page_cache_release(page);
+ status = -ENOMEM;
+ break;
+ }
+
+ /*
+ * Cannot get_user_pages with a page locked for the
+ * same reason as we can't take a page fault with a
+ * page locked (as explained below).
+ */
+ copied = iov_iter_copy_from_user(src_page, i,
+ offset, bytes);
+ if (unlikely(copied == 0)) {
+ status = -EFAULT;
+ page_cache_release(page);
+ page_cache_release(src_page);
+ break;
+ }
+ bytes = copied;
- if (status != AOP_TRUNCATED_PAGE)
+ lock_page(page);
+ /*
+ * Can't handle the page going uptodate here, because
+ * that means we would use non-atomic usercopies, which
+ * zero out the tail of the page, which can cause
+ * zeroes to become transiently visible. We could just
+ * use a non-zeroing copy, but the APIs aren't too
+ * consistent.
+ */
+ if (unlikely(!page->mapping || PageUptodate(page))) {
unlock_page(page);
- page_cache_release(page);
- if (status == AOP_TRUNCATED_PAGE)
+ page_cache_release(page);
+ page_cache_release(src_page);
continue;
+ }
+ }
+
+ status = a_ops->prepare_write(file, page, offset, offset+bytes);
+ if (unlikely(status))
+ goto fs_write_aop_error;
+
+ if (!src_page) {
/*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again.
+ * Must not enter the pagefault handler here, because
+ * we hold the page lock, so we might recursively
+ * deadlock on the same lock, or get an ABBA deadlock
+ * against a different lock, or against the mmap_sem
+ * (which nests outside the page lock). So increment
+ * preempt count, and use _atomic usercopies.
+ *
+ * The page is uptodate so we are OK to encounter a
+ * short copy: if unmodified parts of the page are
+ * marked dirty and written out to disk, it doesn't
+ * really matter.
*/
- if (pos + bytes > isize)
- vmtruncate(inode, isize);
- break;
+ pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i,
+ offset, bytes);
+ pagefault_enable();
+ } else {
+ void *src, *dst;
+ src = kmap_atomic(src_page, KM_USER0);
+ dst = kmap_atomic(page, KM_USER1);
+ memcpy(dst + offset, src + offset, bytes);
+ kunmap_atomic(dst, KM_USER1);
+ kunmap_atomic(src, KM_USER0);
+ copied = bytes;
}
- if (likely(nr_segs == 1))
- copied = filemap_copy_from_user(page, offset,
- buf, bytes);
- else
- copied = filemap_copy_from_user_iovec(page, offset,
- cur_iov, iov_base, bytes);
flush_dcache_page(page);
+
status = a_ops->commit_write(file, page, offset, offset+bytes);
- if (status == AOP_TRUNCATED_PAGE) {
- page_cache_release(page);
- continue;
- }
-zero_length_segment:
- if (likely(copied >= 0)) {
- if (!status)
- status = copied;
-
- if (status >= 0) {
- written += status;
- count -= status;
- pos += status;
- buf += status;
- if (unlikely(nr_segs > 1)) {
- filemap_set_next_iovec(&cur_iov,
- &iov_base, status);
- if (count)
- buf = cur_iov->iov_base +
- iov_base;
- } else {
- iov_base += status;
- }
- }
- }
- if (unlikely(copied != bytes))
- if (status >= 0)
- status = -EFAULT;
+ if (unlikely(status < 0))
+ goto fs_write_aop_error;
+ if (unlikely(status > 0)) /* filesystem did partial write */
+ copied = min_t(size_t, copied, status);
+
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
- if (status < 0)
- break;
+ if (src_page)
+ page_cache_release(src_page);
+
+ iov_iter_advance(i, copied);
+ pos += copied;
+ written += copied;
+
balance_dirty_pages_ratelimited(mapping);
cond_resched();
- } while (count);
- *ppos = pos;
+ continue;
+
+fs_write_aop_error:
+ unlock_page(page);
+ page_cache_release(page);
+ if (src_page)
+ page_cache_release(src_page);
- if (cached_page)
- page_cache_release(cached_page);
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (pos + bytes > inode->i_size)
+ vmtruncate(inode, inode->i_size);
+ break;
+ } while (iov_iter_count(i));
+
+ return written ? written : status;
+}
+
+static ssize_t generic_perform_write(struct file *file,
+ struct iov_iter *i, loff_t pos)
+{
+ struct address_space *mapping = file->f_mapping;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ long status = 0;
+ ssize_t written = 0;
+ unsigned int flags = 0;
/*
- * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
+ * Copies from kernel address space cannot fail (NFSD is a big user).
*/
+ if (segment_eq(get_fs(), KERNEL_DS))
+ flags |= AOP_FLAG_UNINTERRUPTIBLE;
+
+ do {
+ struct page *page;
+ pgoff_t index; /* Pagecache index for current page */
+ unsigned long offset; /* Offset into pagecache page */
+ unsigned long bytes; /* Bytes to write to page */
+ size_t copied; /* Bytes copied from user */
+ void *fsdata;
+
+ offset = (pos & (PAGE_CACHE_SIZE - 1));
+ index = pos >> PAGE_CACHE_SHIFT;
+ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+ iov_iter_count(i));
+
+again:
+
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+
+ status = a_ops->write_begin(file, mapping, pos, bytes, flags,
+ &page, &fsdata);
+ if (unlikely(status))
+ break;
+
+ pagefault_disable();
+ copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+ pagefault_enable();
+ flush_dcache_page(page);
+
+ status = a_ops->write_end(file, mapping, pos, bytes, copied,
+ page, fsdata);
+ if (unlikely(status < 0))
+ break;
+ copied = status;
+
+ cond_resched();
+
+ iov_iter_advance(i, copied);
+ if (unlikely(copied == 0)) {
+ /*
+ * If we were unable to copy any data at all, we must
+ * fall back to a single segment length write.
+ *
+ * If we didn't fallback here, we could livelock
+ * because not all segments in the iov can be copied at
+ * once without a pagefault.
+ */
+ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+ iov_iter_single_seg_count(i));
+ goto again;
+ }
+ pos += copied;
+ written += copied;
+
+ balance_dirty_pages_ratelimited(mapping);
+
+ } while (iov_iter_count(i));
+
+ return written ? written : status;
+}
+
+ssize_t
+generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos, loff_t *ppos,
+ size_t count, ssize_t written)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ struct inode *inode = mapping->host;
+ ssize_t status;
+ struct iov_iter i;
+
+ iov_iter_init(&i, iov, nr_segs, count, written);
+ if (a_ops->write_begin)
+ status = generic_perform_write(file, &i, pos);
+ else
+ status = generic_perform_write_2copy(file, &i, pos);
+
if (likely(status >= 0)) {
+ written += status;
+ *ppos = pos + status;
+
+ /*
+ * For now, when the user asks for O_SYNC, we'll actually give
+ * O_DSYNC
+ */
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
if (!a_ops->writepage || !is_sync_kiocb(iocb))
status = generic_osync_inode(inode, mapping,
if (unlikely(file->f_flags & O_DIRECT) && written)
status = filemap_write_and_wait(mapping);
- pagevec_lru_add(&lru_pvec);
return written ? written : status;
}
EXPORT_SYMBOL(generic_file_buffered_write);
}
retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
- if (retval)
- goto out;
/*
* Finally, try again to invalidate clean pages which might have been
- * faulted in by get_user_pages() if the source of the write was an
- * mmap()ed region of the file we're writing. That's a pretty crazy
- * thing to do, so we don't support it 100%. If this invalidation
- * fails and we have -EIOCBQUEUED we ignore the failure.
+ * cached by non-direct readahead, or faulted in by get_user_pages()
+ * if the source of the write was an mmap'ed region of the file
+ * we're writing. Either one is a pretty crazy thing to do,
+ * so we don't support it 100%. If this invalidation
+ * fails, tough, the write still worked...
*/
if (rw == WRITE && mapping->nrpages) {
- int err = invalidate_inode_pages2_range(mapping,
- offset >> PAGE_CACHE_SHIFT, end);
- if (err && retval >= 0)
- retval = err;
+ invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end);
}
out:
return retval;