EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmalloc_earlyreserve);
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+ randomize_va_space = 0;
+ return 1;
+}
+__setup("norandmaps", disable_randmaps);
+
+
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
anon_vma_unlink(vma);
unlink_file_vma(vma);
- if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+ if (is_vm_hugetlb_page(vma)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
} else {
* Optimization: gather nearby vmas into one call down
*/
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
- && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
- HPAGE_SIZE)) {
+ && !is_vm_hugetlb_page(next)) {
vma = next;
next = vma->vm_next;
anon_vma_unlink(vma);
dump_stack();
}
+static inline int is_cow_mapping(unsigned int flags)
+{
+ return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
/*
* This function gets the "struct page" associated with a pte.
*
{
unsigned long pfn = pte_pfn(pte);
- if (vma->vm_flags & VM_PFNMAP) {
+ if (unlikely(vma->vm_flags & VM_PFNMAP)) {
unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
if (pfn == vma->vm_pgoff + off)
return NULL;
+ if (!is_cow_mapping(vma->vm_flags))
+ return NULL;
}
/*
* we should just do "return pfn_to_page(pfn)", but
* in the meantime we check that we get a valid pfn,
* and that the resulting page looks ok.
- *
- * Remove this test eventually!
*/
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr);
* If it's a COW mapping, write protect it both
* in the parent and the child
*/
- if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+ if (is_cow_mapping(vm_flags)) {
ptep_set_wrprotect(src_mm, addr, src_pte);
pte = *src_pte;
}
* readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting.
*/
- if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) {
+ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
if (!vma->anon_vma)
return 0;
}
(*zap_work)--;
continue;
}
+
+ (*zap_work) -= PAGE_SIZE;
+
if (pte_present(ptent)) {
struct page *page;
- (*zap_work) -= PAGE_SIZE;
-
page = vm_normal_page(vma, addr, ptent);
if (unlikely(details) && page) {
/*
continue;
}
- if (!vma || (vma->vm_flags & VM_IO)
+ if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
|| !(vm_flags & vma->vm_flags))
return i ? : -EFAULT;
}
if (pages) {
pages[i] = page;
+
+ flush_anon_page(page, start);
flush_dcache_page(page);
}
if (vmas)
* The page has to be a nice clean _individual_ kernel allocation.
* If you allocate a compound page, you need to have marked it as
* such (__GFP_COMP), or manually just split the page up yourself
- * (which is mainly an issue of doing "set_page_count(page, 1)" for
- * each sub-page, and then freeing them one by one when you free
- * them rather than freeing it as a compound page).
+ * (see split_page()).
*
* NOTE! Traditionally this was done with "remap_pfn_range()" which
* took an arbitrary page protection parameter. This doesn't allow
return -EFAULT;
if (!page_count(page))
return -EINVAL;
+ vma->vm_flags |= VM_INSERTPAGE;
return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
}
-EXPORT_SYMBOL_GPL(vm_insert_page);
-
-/*
- * Somebody does a pfn remapping that doesn't actually work as a vma.
- *
- * Do it as individual pages instead, and warn about it. It's bad form,
- * and very inefficient.
- */
-static int incomplete_pfn_remap(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- unsigned long pfn, pgprot_t prot)
-{
- static int warn = 10;
- struct page *page;
- int retval;
-
- if (!(vma->vm_flags & VM_INCOMPLETE)) {
- if (warn) {
- warn--;
- printk("%s does an incomplete pfn remapping", current->comm);
- dump_stack();
- }
- }
- vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED;
-
- if (start < vma->vm_start || end > vma->vm_end)
- return -EINVAL;
-
- if (!pfn_valid(pfn))
- return -EINVAL;
-
- page = pfn_to_page(pfn);
- if (!PageReserved(page))
- return -EINVAL;
-
- retval = 0;
- while (start < end) {
- retval = insert_page(vma->vm_mm, start, page, prot);
- if (retval < 0)
- break;
- start += PAGE_SIZE;
- page++;
- }
- return retval;
-}
+EXPORT_SYMBOL(vm_insert_page);
/*
* maps a range of physical memory into the requested pages. the old
struct mm_struct *mm = vma->vm_mm;
int err;
- if (addr != vma->vm_start || end != vma->vm_end)
- return incomplete_pfn_remap(vma, addr, end, pfn, prot);
-
/*
* Physically remapped pages are special. Tell the
* rest of the world about it:
* VM_PFNMAP tells the core MM that the base pages are just
* raw PFN mappings, and do not have a "struct page" associated
* with them.
+ *
+ * There's a horrible special case to handle copy-on-write
+ * behaviour that some programs depend on. We mark the "original"
+ * un-COW'ed pages by matching them up with "vma->vm_pgoff".
*/
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (addr != vma->vm_start || end != vma->vm_end)
+ return -EINVAL;
+ vma->vm_pgoff = pfn;
+ }
+
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
- vma->vm_pgoff = pfn;
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
lru_cache_add_active(new_page);
- page_add_anon_rmap(new_page, vma, address);
+ page_add_new_anon_rmap(new_page, vma, address);
/* Free the old page.. */
new_page = old_page;
out_busy:
return -ETXTBSY;
}
-
EXPORT_SYMBOL(vmtruncate);
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ /*
+ * If the underlying filesystem is not going to provide
+ * a way to truncate a range of blocks (punch a hole) -
+ * we should return failure right now.
+ */
+ if (!inode->i_op || !inode->i_op->truncate_range)
+ return -ENOSYS;
+
+ mutex_lock(&inode->i_mutex);
+ down_write(&inode->i_alloc_sem);
+ unmap_mapping_range(mapping, offset, (end - offset), 1);
+ truncate_inode_pages_range(mapping, offset, end);
+ inode->i_op->truncate_range(inode, offset, end);
+ up_write(&inode->i_alloc_sem);
+ mutex_unlock(&inode->i_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(vmtruncate_range);
+
/*
* Primitive swap readahead code. We simply read an aligned block of
* (1 << page_cluster) entries in the swap area. This method is chosen
goto out;
entry = pte_to_swp_entry(orig_pte);
+again:
page = lookup_swap_cache(entry);
if (!page) {
swapin_readahead(entry, address, vma);
mark_page_accessed(page);
lock_page(page);
+ if (!PageSwapCache(page)) {
+ /* Page migration has occured */
+ unlock_page(page);
+ page_cache_release(page);
+ goto again;
+ }
/*
* Back out if somebody else already faulted in this pte.
goto release;
inc_mm_counter(mm, anon_rss);
lru_cache_add_active(page);
- SetPageReferenced(page);
- page_add_anon_rmap(page, vma, address);
+ page_add_new_anon_rmap(page, vma, address);
} else {
/* Map the ZERO_PAGE - vm_page_prot is readonly */
page = ZERO_PAGE(address);
if (anon) {
inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page);
- page_add_anon_rmap(new_page, vma, address);
+ page_add_new_anon_rmap(new_page, vma, address);
} else {
inc_mm_counter(mm, file_rss);
page_add_file_rmap(new_page);
return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
}
+EXPORT_SYMBOL_GPL(__handle_mm_fault);
+
#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
if (!vma)
return -1;
write = (vma->vm_flags & VM_WRITE) != 0;
- if (addr >= end)
- BUG();
- if (end > vma->vm_end)
- BUG();
+ BUG_ON(addr >= end);
+ BUG_ON(end > vma->vm_end);
len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
ret = get_user_pages(current, current->mm, addr,
len, write, 0, NULL, NULL);