[SPARC64]: Don't clobber alt-global %g4 on window fixups.

[safe/jmp/linux-2.6] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 745b348..85e80a5 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages);
  EXPORT_SYMBOL(high_memory);
  EXPORT_SYMBOL(vmalloc_earlyreserve);
  
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+       randomize_va_space = 0;
+       return 0;
+}
+__setup("norandmaps", disable_randmaps);
+
+
  /*
   * If a p?d_bad entry is found while walking page tables, report
   * the error, before resetting entry to p?d_none.  Usually (but
@@ -349,6 +359,11 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
         dump_stack();
  }
  
+static inline int is_cow_mapping(unsigned int flags)
+{
+       return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
  /*
   * This function gets the "struct page" associated with a pte.
   *
@@ -377,6 +392,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
                 unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
                 if (pfn == vma->vm_pgoff + off)
                         return NULL;
+               if (!is_cow_mapping(vma->vm_flags))
+                       return NULL;
         }
  
         /*
@@ -437,7 +454,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
          * If it's a COW mapping, write protect it both
          * in the parent and the child
          */
-       if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+       if (is_cow_mapping(vm_flags)) {
                 ptep_set_wrprotect(src_mm, addr, src_pte);
                 pte = *src_pte;
         }
@@ -567,7 +584,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
          * readonly mappings. The tradeoff is that copy_page_range is more
          * efficient than faulting.
          */
-       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) {
+       if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
                 if (!vma->anon_vma)
                         return 0;
         }
@@ -606,11 +623,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                         (*zap_work)--;
                         continue;
                 }
+
+               (*zap_work) -= PAGE_SIZE;
+
                 if (pte_present(ptent)) {
                         struct page *page;
  
-                       (*zap_work) -= PAGE_SIZE;
-
                         page = vm_normal_page(vma, addr, ptent);
                         if (unlikely(details) && page) {
                                 /*
@@ -1002,7 +1020,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         continue;
                 }
  
-               if (!vma || (vma->vm_flags & VM_IO)
+               if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
                                 || !(vm_flags & vma->vm_flags))
                         return i ? : -EFAULT;
  
@@ -1146,12 +1164,12 @@ int zeromap_page_range(struct vm_area_struct *vma,
         return err;
  }
  
-pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
  {
         pgd_t * pgd = pgd_offset(mm, addr);
         pud_t * pud = pud_alloc(mm, pgd, addr);
         if (pud) {
-               pmd_t * pmd = pmd_alloc(mm, pgd, addr);
+               pmd_t * pmd = pmd_alloc(mm, pud, addr);
                 if (pmd)
                         return pte_alloc_map_lock(mm, pmd, addr, ptl);
         }
@@ -1172,7 +1190,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
         spinlock_t *ptl;  
  
         retval = -EINVAL;
-       if (PageAnon(page) || !PageReserved(page))
+       if (PageAnon(page))
                 goto out;
         retval = -ENOMEM;
         flush_dcache_page(page);
@@ -1197,45 +1215,34 @@ out:
  }
  
  /*
- * Somebody does a pfn remapping that doesn't actually work as a vma.
+ * This allows drivers to insert individual pages they've allocated
+ * into a user vma.
+ *
+ * The page has to be a nice clean _individual_ kernel allocation.
+ * If you allocate a compound page, you need to have marked it as
+ * such (__GFP_COMP), or manually just split the page up yourself
+ * (which is mainly an issue of doing "set_page_count(page, 1)" for
+ * each sub-page, and then freeing them one by one when you free
+ * them rather than freeing it as a compound page).
+ *
+ * NOTE! Traditionally this was done with "remap_pfn_range()" which
+ * took an arbitrary page protection parameter. This doesn't allow
+ * that. Your vma protection will have to be set up correctly, which
+ * means that if you want a shared writable mapping, you'd better
+ * ask for a shared writable mapping!
   *
- * Do it as individual pages instead, and warn about it. It's bad form,
- * and very inefficient.
+ * The page does not need to be reserved.
   */
-static int incomplete_pfn_remap(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end,
-               unsigned long pfn, pgprot_t prot)
+int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
  {
-       static int warn = 10;
-       struct page *page;
-       int retval;
-
-       if (!(vma->vm_flags & VM_INCOMPLETE)) {
-               if (warn) {
-                       warn--;
-                       printk("%s does an incomplete pfn remapping", current->comm);
-                       dump_stack();
-               }
-       }
-       vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED;
-
-       if (start < vma->vm_start || end > vma->vm_end)
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return -EFAULT;
+       if (!page_count(page))
                 return -EINVAL;
-
-       if (!pfn_valid(pfn))
-               return -EINVAL;
-
-       retval = 0;
-       page = pfn_to_page(pfn);
-       while (start < end) {
-               retval = insert_page(vma->vm_mm, start, page, prot);
-               if (retval < 0)
-                       break;
-               start += PAGE_SIZE;
-               page++;
-       }
-       return retval;
+       vma->vm_flags |= VM_INSERTPAGE;
+       return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
  }
+EXPORT_SYMBOL(vm_insert_page);
  
  /*
   * maps a range of physical memory into the requested pages. the old
@@ -1311,9 +1318,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
         struct mm_struct *mm = vma->vm_mm;
         int err;
  
-       if (addr != vma->vm_start || end != vma->vm_end)
-               return incomplete_pfn_remap(vma, addr, end, pfn, prot);
-
         /*
          * Physically remapped pages are special. Tell the
          * rest of the world about it:
@@ -1327,9 +1331,18 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
          *   VM_PFNMAP tells the core MM that the base pages are just
          *      raw PFN mappings, and do not have a "struct page" associated
          *      with them.
+        *
+        * There's a horrible special case to handle copy-on-write
+        * behaviour that some programs depend on. We mark the "original"
+        * un-COW'ed pages by matching them up with "vma->vm_pgoff".
          */
+       if (is_cow_mapping(vma->vm_flags)) {
+               if (addr != vma->vm_start || end != vma->vm_end)
+                       return -EINVAL;
+               vma->vm_pgoff = pfn;
+       }
+
         vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
-       vma->vm_pgoff = pfn;
  
         BUG_ON(addr >= end);
         pfn -= addr >> PAGE_SHIFT;
@@ -1433,12 +1446,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned long address, pte_t *page_table, pmd_t *pmd,
                 spinlock_t *ptl, pte_t orig_pte)
  {
-       struct page *old_page, *src_page, *new_page;
+       struct page *old_page, *new_page;
         pte_t entry;
         int ret = VM_FAULT_MINOR;
  
         old_page = vm_normal_page(vma, address, orig_pte);
-       src_page = old_page;
         if (!old_page)
                 goto gotten;
  
@@ -1466,7 +1478,7 @@ gotten:
  
         if (unlikely(anon_vma_prepare(vma)))
                 goto oom;
-       if (src_page == ZERO_PAGE(address)) {
+       if (old_page == ZERO_PAGE(address)) {
                 new_page = alloc_zeroed_user_highpage(vma, address);
                 if (!new_page)
                         goto oom;
@@ -1474,7 +1486,7 @@ gotten:
                 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
                 if (!new_page)
                         goto oom;
-               cow_user_page(new_page, src_page, address);
+               cow_user_page(new_page, old_page, address);
         }
  
         /*
@@ -1497,7 +1509,7 @@ gotten:
                 update_mmu_cache(vma, address, entry);
                 lazy_mmu_prot_update(entry);
                 lru_cache_add_active(new_page);
-               page_add_anon_rmap(new_page, vma, address);
+               page_add_new_anon_rmap(new_page, vma, address);
  
                 /* Free the old page.. */
                 new_page = old_page;
@@ -1769,9 +1781,32 @@ out_big:
  out_busy:
         return -ETXTBSY;
  }
-
  EXPORT_SYMBOL(vmtruncate);
  
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+       struct address_space *mapping = inode->i_mapping;
+
+       /*
+        * If the underlying filesystem is not going to provide
+        * a way to truncate a range of blocks (punch a hole) -
+        * we should return failure right now.
+        */
+       if (!inode->i_op || !inode->i_op->truncate_range)
+               return -ENOSYS;
+
+       mutex_lock(&inode->i_mutex);
+       down_write(&inode->i_alloc_sem);
+       unmap_mapping_range(mapping, offset, (end - offset), 1);
+       truncate_inode_pages_range(mapping, offset, end);
+       inode->i_op->truncate_range(inode, offset, end);
+       up_write(&inode->i_alloc_sem);
+       mutex_unlock(&inode->i_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL(vmtruncate_range);
+
  /* 
   * Primitive swap readahead code. We simply read an aligned block of
   * (1 << page_cluster) entries in the swap area. This method is chosen
@@ -1847,6 +1882,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 goto out;
  
         entry = pte_to_swp_entry(orig_pte);
+again:
         page = lookup_swap_cache(entry);
         if (!page) {
                 swapin_readahead(entry, address, vma);
@@ -1870,6 +1906,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         mark_page_accessed(page);
         lock_page(page);
+       if (!PageSwapCache(page)) {
+               /* Page migration has occured */
+               unlock_page(page);
+               page_cache_release(page);
+               goto again;
+       }
  
         /*
          * Back out if somebody else already faulted in this pte.
@@ -1953,8 +1995,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         goto release;
                 inc_mm_counter(mm, anon_rss);
                 lru_cache_add_active(page);
-               SetPageReferenced(page);
-               page_add_anon_rmap(page, vma, address);
+               page_add_new_anon_rmap(page, vma, address);
         } else {
                 /* Map the ZERO_PAGE - vm_page_prot is readonly */
                 page = ZERO_PAGE(address);
@@ -2010,6 +2051,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
         int anon = 0;
  
         pte_unmap(page_table);
+       BUG_ON(vma->vm_flags & VM_PFNMAP);
+
         if (vma->vm_file) {
                 mapping = vma->vm_file->f_mapping;
                 sequence = mapping->truncate_count;
@@ -2042,7 +2085,7 @@ retry:
                 page = alloc_page_vma(GFP_HIGHUSER, vma, address);
                 if (!page)
                         goto oom;
-               cow_user_page(page, new_page, address);
+               copy_user_highpage(page, new_page, address);
                 page_cache_release(new_page);
                 new_page = page;
                 anon = 1;
@@ -2083,7 +2126,7 @@ retry:
                 if (anon) {
                         inc_mm_counter(mm, anon_rss);
                         lru_cache_add_active(new_page);
-                       page_add_anon_rmap(new_page, vma, address);
+                       page_add_new_anon_rmap(new_page, vma, address);
                 } else {
                         inc_mm_counter(mm, file_rss);
                         page_add_file_rmap(new_page);
@@ -2242,6 +2285,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
  }
  
+EXPORT_SYMBOL_GPL(__handle_mm_fault);
+
  #ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.