Merge branch 'master' into for-2.6.35
[safe/jmp/linux-2.6] / mm / hugetlb.c
index 450493d..ffbdfc8 100644 (file)
@@ -2,7 +2,6 @@
  * Generic hugetlb support.
  * (C) William Irwin, April 2004
  */
-#include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -18,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
+#include <linux/slab.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -402,7 +402,7 @@ static void clear_huge_page(struct page *page,
 {
        int i;
 
-       if (unlikely(sz > MAX_ORDER_NR_PAGES)) {
+       if (unlikely(sz/PAGE_SIZE > MAX_ORDER_NR_PAGES)) {
                clear_gigantic_page(page, addr, sz);
                return;
        }
@@ -546,6 +546,7 @@ static void free_huge_page(struct page *page)
 
        mapping = (struct address_space *) page_private(page);
        set_page_private(page, 0);
+       page->mapping = NULL;
        BUG_ON(page_count(page));
        INIT_LIST_HEAD(&page->lru);
 
@@ -1278,6 +1279,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                if (!ret)
                        goto out;
 
+               /* Bail for signals. Probably ctrl-c from user */
+               if (signal_pending(current))
+                       goto out;
        }
 
        /*
@@ -1512,10 +1516,9 @@ static struct attribute_group hstate_attr_group = {
        .attrs = hstate_attrs,
 };
 
-static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
-                               struct kobject *parent,
-                               struct kobject **hstate_kobjs,
-                               struct attribute_group *hstate_attr_group)
+static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
+                                   struct kobject **hstate_kobjs,
+                                   struct attribute_group *hstate_attr_group)
 {
        int retval;
        int hi = h - hstates;
@@ -2085,7 +2088,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
 
        entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep)));
        if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) {
-               update_mmu_cache(vma, address, entry);
+               update_mmu_cache(vma, address, ptep);
        }
 }
 
@@ -2237,6 +2240,12 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                + (vma->vm_pgoff >> PAGE_SHIFT);
        mapping = (struct address_space *)page_private(page);
 
+       /*
+        * Take the mapping lock for the duration of the table walk. As
+        * this mapping should be shared between all the VMAs,
+        * __unmap_hugepage_range() is called as the lock is already held
+        */
+       spin_lock(&mapping->i_mmap_lock);
        vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
                /* Do not unmap the current VMA */
                if (iter_vma == vma)
@@ -2250,10 +2259,11 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                 * from the time of fork. This would look like data corruption
                 */
                if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
-                       unmap_hugepage_range(iter_vma,
+                       __unmap_hugepage_range(iter_vma,
                                address, address + huge_page_size(h),
                                page);
        }
+       spin_unlock(&mapping->i_mmap_lock);
 
        return 1;
 }
@@ -2293,6 +2303,9 @@ retry_avoidcopy:
                outside_reserve = 1;
 
        page_cache_get(old_page);
+
+       /* Drop page_table_lock as buddy allocator may be called */
+       spin_unlock(&mm->page_table_lock);
        new_page = alloc_huge_page(vma, address, outside_reserve);
 
        if (IS_ERR(new_page)) {
@@ -2310,19 +2323,25 @@ retry_avoidcopy:
                        if (unmap_ref_private(mm, vma, old_page, address)) {
                                BUG_ON(page_count(old_page) != 1);
                                BUG_ON(huge_pte_none(pte));
+                               spin_lock(&mm->page_table_lock);
                                goto retry_avoidcopy;
                        }
                        WARN_ON_ONCE(1);
                }
 
+               /* Caller expects lock to be held */
+               spin_lock(&mm->page_table_lock);
                return -PTR_ERR(new_page);
        }
 
-       spin_unlock(&mm->page_table_lock);
        copy_huge_page(new_page, old_page, address, vma);
        __SetPageUptodate(new_page);
-       spin_lock(&mm->page_table_lock);
 
+       /*
+        * Retake the page_table_lock to check for racing updates
+        * before the page tables are altered
+        */
+       spin_lock(&mm->page_table_lock);
        ptep = huge_pte_offset(mm, address & huge_page_mask(h));
        if (likely(pte_same(huge_ptep_get(ptep), pte))) {
                /* Break COW */
@@ -2429,8 +2448,10 @@ retry:
                        spin_lock(&inode->i_lock);
                        inode->i_blocks += blocks_per_huge_page(h);
                        spin_unlock(&inode->i_lock);
-               } else
+               } else {
                        lock_page(page);
+                       page->mapping = HUGETLB_POISON;
+               }
        }
 
        /*
@@ -2540,7 +2561,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        entry = pte_mkyoung(entry);
        if (huge_ptep_set_access_flags(vma, address, ptep, entry,
                                                flags & FAULT_FLAG_WRITE))
-               update_mmu_cache(vma, address, entry);
+               update_mmu_cache(vma, address, ptep);
 
 out_page_table_lock:
        spin_unlock(&mm->page_table_lock);