X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=mm%2Ffilemap_xip.c;h=83364df74a33811ea7aea971412bfe91d4fe9c17;hp=9354ee279b1345051bf686a336b58447b4810686;hb=4434ade8c9334a3ab975d8993de456f06841899e;hpb=b5810039a54e5babf428e9a1e89fc1940fabff11 diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 9354ee2..83364df 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -13,41 +13,71 @@ #include #include #include +#include +#include +#include +#include +#include #include -#include "filemap.h" +#include + +/* + * We do use our own empty page to avoid interference with other users + * of ZERO_PAGE(), such as /dev/zero + */ +static DEFINE_MUTEX(xip_sparse_mutex); +static seqcount_t xip_sparse_seq = SEQCNT_ZERO; +static struct page *__xip_sparse_page; + +/* called under xip_sparse_mutex */ +static struct page *xip_sparse_page(void) +{ + if (!__xip_sparse_page) { + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); + + if (page) + __xip_sparse_page = page; + } + return __xip_sparse_page; +} /* * This is a file read routine for execute in place files, and uses - * the mapping->a_ops->get_xip_page() function for the actual low-level + * the mapping->a_ops->get_xip_mem() function for the actual low-level * stuff. * * Note the struct file* is not used at all. It may be NULL. */ -static void +static ssize_t do_xip_mapping_read(struct address_space *mapping, struct file_ra_state *_ra, struct file *filp, - loff_t *ppos, - read_descriptor_t *desc, - read_actor_t actor) + char __user *buf, + size_t len, + loff_t *ppos) { struct inode *inode = mapping->host; - unsigned long index, end_index, offset; - loff_t isize; + pgoff_t index, end_index; + unsigned long offset; + loff_t isize, pos; + size_t copied = 0, error = 0; - BUG_ON(!mapping->a_ops->get_xip_page); + BUG_ON(!mapping->a_ops->get_xip_mem); - index = *ppos >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; + pos = *ppos; + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; isize = i_size_read(inode); if (!isize) goto out; end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - for (;;) { - struct page *page; - unsigned long nr, ret; + do { + unsigned long nr, left; + void *xip_mem; + unsigned long xip_pfn; + int zero = 0; /* nr is the maximum number of bytes to copy from this page */ nr = PAGE_CACHE_SIZE; @@ -60,19 +90,17 @@ do_xip_mapping_read(struct address_space *mapping, } } nr = nr - offset; + if (nr > len - copied) + nr = len - copied; - page = mapping->a_ops->get_xip_page(mapping, - index*(PAGE_SIZE/512), 0); - if (!page) - goto no_xip_page; - if (unlikely(IS_ERR(page))) { - if (PTR_ERR(page) == -ENODATA) { + error = mapping->a_ops->get_xip_mem(mapping, index, 0, + &xip_mem, &xip_pfn); + if (unlikely(error)) { + if (error == -ENODATA) { /* sparse */ - page = ZERO_PAGE(0); - } else { - desc->error = PTR_ERR(page); + zero = 1; + } else goto out; - } } /* If users can be writing to this page using arbitrary @@ -80,10 +108,10 @@ do_xip_mapping_read(struct address_space *mapping, * before reading the page on the kernel side. */ if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); + /* address based flush */ ; /* - * Ok, we have the page, so now we can copy it to user space... + * Ok, we have the mem, so now we can copy it to user space... * * The actor routine returns how many bytes were actually used.. * NOTE! This may not be the same as how much of a user buffer @@ -91,78 +119,47 @@ do_xip_mapping_read(struct address_space *mapping, * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ - ret = actor(desc, page, offset, nr); - offset += ret; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; + if (!zero) + left = __copy_to_user(buf+copied, xip_mem+offset, nr); + else + left = __clear_user(buf + copied, nr); - if (ret == nr && desc->count) - continue; - goto out; + if (left) { + error = -EFAULT; + goto out; + } -no_xip_page: - /* Did not get the page. Report it */ - desc->error = -EIO; - goto out; - } + copied += (nr - left); + offset += (nr - left); + index += offset >> PAGE_CACHE_SHIFT; + offset &= ~PAGE_CACHE_MASK; + } while (copied < len); out: - *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; + *ppos = pos + copied; if (filp) file_accessed(filp); + + return (copied ? copied : error); } ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { - read_descriptor_t desc; - if (!access_ok(VERIFY_WRITE, buf, len)) return -EFAULT; - desc.written = 0; - desc.arg.buf = buf; - desc.count = len; - desc.error = 0; - - do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, - ppos, &desc, file_read_actor); - - if (desc.written) - return desc.written; - else - return desc.error; + return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, + buf, len, ppos); } EXPORT_SYMBOL_GPL(xip_file_read); -ssize_t -xip_file_sendfile(struct file *in_file, loff_t *ppos, - size_t count, read_actor_t actor, void *target) -{ - read_descriptor_t desc; - - if (!count) - return 0; - - desc.written = 0; - desc.count = count; - desc.arg.data = target; - desc.error = 0; - - do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file, - ppos, &desc, actor); - if (desc.written) - return desc.written; - return desc.error; -} -EXPORT_SYMBOL_GPL(xip_file_sendfile); - /* * __xip_unmap is invoked from xip_unmap and * xip_write * * This function walks all vmas of the address_space and unmaps the - * ZERO_PAGE when found at pgoff. Should it go in rmap.c? + * __xip_sparse_page when found at pgoff. */ static void __xip_unmap (struct address_space * mapping, @@ -174,99 +171,143 @@ __xip_unmap (struct address_space * mapping, unsigned long address; pte_t *pte; pte_t pteval; - struct page *page = ZERO_PAGE(address); + spinlock_t *ptl; + struct page *page; + unsigned count; + int locked = 0; + + count = read_seqcount_begin(&xip_sparse_seq); + + page = __xip_sparse_page; + if (!page) + return; +retry: spin_lock(&mapping->i_mmap_lock); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); BUG_ON(address < vma->vm_start || address >= vma->vm_end); - /* - * We need the page_table_lock to protect us from page faults, - * munmap, fork, etc... - */ - pte = page_check_address(page, mm, address); - if (!IS_ERR(pte)) { + pte = page_check_address(page, mm, address, &ptl, 1); + if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); page_remove_rmap(page); - dec_mm_counter(mm, file_rss); + dec_mm_counter(mm, MM_FILEPAGES); BUG_ON(pte_dirty(pteval)); - pte_unmap(pte); - spin_unlock(&mm->page_table_lock); + pte_unmap_unlock(pte, ptl); page_cache_release(page); } } spin_unlock(&mapping->i_mmap_lock); + + if (locked) { + mutex_unlock(&xip_sparse_mutex); + } else if (read_seqcount_retry(&xip_sparse_seq, count)) { + mutex_lock(&xip_sparse_mutex); + locked = 1; + goto retry; + } } /* - * xip_nopage() is invoked via the vma operations vector for a + * xip_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * - * This function is derived from filemap_nopage, but used for execute in place + * This function is derived from filemap_fault, but used for execute in place */ -static struct page * -xip_file_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + pgoff_t size; + void *xip_mem; + unsigned long xip_pfn; struct page *page; - unsigned long size, pgoff, endoff; - - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; + int error; + /* XXX: are VM_FAULT_ codes OK? */ +again: size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) { - return NULL; - } + if (vmf->pgoff >= size) + return VM_FAULT_SIGBUS; - page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); - if (!IS_ERR(page)) { - goto out; - } - if (PTR_ERR(page) != -ENODATA) - return NULL; + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, + &xip_mem, &xip_pfn); + if (likely(!error)) + goto found; + if (error != -ENODATA) + return VM_FAULT_OOM; /* sparse block */ - if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && - (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && + if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) && + (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) && (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { + int err; + /* maybe shared writable, allocate new block */ - page = mapping->a_ops->get_xip_page (mapping, - pgoff*(PAGE_SIZE/512), 1); - if (IS_ERR(page)) - return NULL; - /* unmap page at pgoff from all other vmas */ - __xip_unmap(mapping, pgoff); + mutex_lock(&xip_sparse_mutex); + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, + &xip_mem, &xip_pfn); + mutex_unlock(&xip_sparse_mutex); + if (error) + return VM_FAULT_SIGBUS; + /* unmap sparse mappings at pgoff from all other vmas */ + __xip_unmap(mapping, vmf->pgoff); + +found: + err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, + xip_pfn); + if (err == -ENOMEM) + return VM_FAULT_OOM; + BUG_ON(err); + return VM_FAULT_NOPAGE; } else { - /* not shared and writable, use ZERO_PAGE() */ - page = ZERO_PAGE(address); - } + int err, ret = VM_FAULT_OOM; + + mutex_lock(&xip_sparse_mutex); + write_seqcount_begin(&xip_sparse_seq); + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, + &xip_mem, &xip_pfn); + if (unlikely(!error)) { + write_seqcount_end(&xip_sparse_seq); + mutex_unlock(&xip_sparse_mutex); + goto again; + } + if (error != -ENODATA) + goto out; + /* not shared and writable, use xip_sparse_page() */ + page = xip_sparse_page(); + if (!page) + goto out; + err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, + page); + if (err == -ENOMEM) + goto out; + ret = VM_FAULT_NOPAGE; out: - page_cache_get(page); - return page; + write_seqcount_end(&xip_sparse_seq); + mutex_unlock(&xip_sparse_mutex); + + return ret; + } } -static struct vm_operations_struct xip_file_vm_ops = { - .nopage = xip_file_nopage, +static const struct vm_operations_struct xip_file_vm_ops = { + .fault = xip_file_fault, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) { - BUG_ON(!file->f_mapping->a_ops->get_xip_page); + BUG_ON(!file->f_mapping->a_ops->get_xip_mem); file_accessed(file); vma->vm_ops = &xip_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR | VM_MIXEDMAP; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); @@ -276,19 +317,20 @@ __xip_file_write(struct file *filp, const char __user *buf, size_t count, loff_t pos, loff_t *ppos) { struct address_space * mapping = filp->f_mapping; - struct address_space_operations *a_ops = mapping->a_ops; + const struct address_space_operations *a_ops = mapping->a_ops; struct inode *inode = mapping->host; long status = 0; - struct page *page; size_t bytes; ssize_t written = 0; - BUG_ON(!mapping->a_ops->get_xip_page); + BUG_ON(!mapping->a_ops->get_xip_mem); do { unsigned long index; unsigned long offset; size_t copied; + void *xip_mem; + unsigned long xip_pfn; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ index = pos >> PAGE_CACHE_SHIFT; @@ -296,32 +338,25 @@ __xip_file_write(struct file *filp, const char __user *buf, if (bytes > count) bytes = count; - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - */ - fault_in_pages_readable(buf, bytes); - - page = a_ops->get_xip_page(mapping, - index*(PAGE_SIZE/512), 0); - if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) { + status = a_ops->get_xip_mem(mapping, index, 0, + &xip_mem, &xip_pfn); + if (status == -ENODATA) { /* we allocate a new page unmap it */ - page = a_ops->get_xip_page(mapping, - index*(PAGE_SIZE/512), 1); - if (!IS_ERR(page)) + mutex_lock(&xip_sparse_mutex); + status = a_ops->get_xip_mem(mapping, index, 1, + &xip_mem, &xip_pfn); + mutex_unlock(&xip_sparse_mutex); + if (!status) /* unmap page at pgoff from all other vmas */ __xip_unmap(mapping, index); } - if (IS_ERR(page)) { - status = PTR_ERR(page); + if (status) break; - } - copied = filemap_copy_from_user(page, offset, buf, bytes); - flush_dcache_page(page); + copied = bytes - + __copy_from_user_nocache(xip_mem + offset, buf, bytes); + if (likely(copied > 0)) { status = copied; @@ -341,7 +376,7 @@ __xip_file_write(struct file *filp, const char __user *buf, *ppos = pos; /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_sem. + * cannot change under us because we hold i_mutex. */ if (pos > inode->i_size) { i_size_write(inode, pos); @@ -361,7 +396,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t pos; ssize_t ret; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (!access_ok(VERIFY_READ, buf, len)) { ret=-EFAULT; @@ -382,25 +417,25 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (count == 0) goto out_backing; - ret = remove_suid(filp->f_dentry); + ret = file_remove_suid(filp); if (ret) goto out_backing; - inode_update_time(inode, 1); + file_update_time(filp); ret = __xip_file_write (filp, buf, count, pos, ppos); out_backing: current->backing_dev_info = NULL; out_up: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } EXPORT_SYMBOL_GPL(xip_file_write); /* * truncate a page used for execute in place - * functionality is analog to block_truncate_page but does use get_xip_page + * functionality is analog to block_truncate_page but does use get_xip_mem * to get the page instead of page cache */ int @@ -410,10 +445,11 @@ xip_truncate_page(struct address_space *mapping, loff_t from) unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize; unsigned length; - struct page *page; - void *kaddr; + void *xip_mem; + unsigned long xip_pfn; + int err; - BUG_ON(!mapping->a_ops->get_xip_page); + BUG_ON(!mapping->a_ops->get_xip_mem); blocksize = 1 << mapping->host->i_blkbits; length = offset & (blocksize - 1); @@ -424,22 +460,16 @@ xip_truncate_page(struct address_space *mapping, loff_t from) length = blocksize - length; - page = mapping->a_ops->get_xip_page(mapping, - index*(PAGE_SIZE/512), 0); - if (!page) - return -ENOMEM; - if (unlikely(IS_ERR(page))) { - if (PTR_ERR(page) == -ENODATA) + err = mapping->a_ops->get_xip_mem(mapping, index, 0, + &xip_mem, &xip_pfn); + if (unlikely(err)) { + if (err == -ENODATA) /* Hole? No need to truncate */ return 0; else - return PTR_ERR(page); + return err; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - kunmap_atomic(kaddr, KM_USER0); - - flush_dcache_page(page); + memset(xip_mem + offset, 0, length); return 0; } EXPORT_SYMBOL_GPL(xip_truncate_page);