nommu: fix malloc performance by adding uninitialized flag
[safe/jmp/linux-2.6] / mm / hugetlb.c
index 544f7bc..65f38c2 100644 (file)
@@ -942,14 +942,14 @@ static void return_unused_surplus_pages(struct hstate *h,
 
        /*
         * We want to release as many surplus pages as possible, spread
-        * evenly across all nodes. Iterate across all nodes until we
-        * can no longer free unreserved surplus pages. This occurs when
-        * the nodes with surplus pages have no free pages.
-        * free_pool_huge_page() will balance the the frees across the
-        * on-line nodes for us and will handle the hstate accounting.
+        * evenly across all nodes with memory. Iterate across these nodes
+        * until we can no longer free unreserved surplus pages. This occurs
+        * when the nodes with surplus pages have no free pages.
+        * free_pool_huge_page() will balance the the freed pages across the
+        * on-line nodes with memory and will handle the hstate accounting.
         */
        while (nr_pages--) {
-               if (!free_pool_huge_page(h, &node_online_map, 1))
+               if (!free_pool_huge_page(h, &node_states[N_HIGH_MEMORY], 1))
                        break;
        }
 }
@@ -1053,14 +1053,14 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 int __weak alloc_bootmem_huge_page(struct hstate *h)
 {
        struct huge_bootmem_page *m;
-       int nr_nodes = nodes_weight(node_online_map);
+       int nr_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
        while (nr_nodes) {
                void *addr;
 
                addr = __alloc_bootmem_node_nopanic(
                                NODE_DATA(hstate_next_node_to_alloc(h,
-                                                       &node_online_map)),
+                                               &node_states[N_HIGH_MEMORY])),
                                huge_page_size(h), huge_page_size(h), 0);
 
                if (addr) {
@@ -1115,7 +1115,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                if (h->order >= MAX_ORDER) {
                        if (!alloc_bootmem_huge_page(h))
                                break;
-               } else if (!alloc_fresh_huge_page(h, &node_online_map))
+               } else if (!alloc_fresh_huge_page(h,
+                                        &node_states[N_HIGH_MEMORY]))
                        break;
        }
        h->max_huge_pages = i;
@@ -1277,6 +1278,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                if (!ret)
                        goto out;
 
+               /* Bail for signals. Probably ctrl-c from user */
+               if (signal_pending(current))
+                       goto out;
        }
 
        /*
@@ -1360,7 +1364,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
        int nid;
        unsigned long count;
        struct hstate *h;
-       NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+       NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
        err = strict_strtoul(buf, 10, &count);
        if (err)
@@ -1388,7 +1392,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
 
        h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
 
-       if (nodes_allowed != &node_online_map)
+       if (nodes_allowed != &node_states[N_HIGH_MEMORY])
                NODEMASK_FREE(nodes_allowed);
 
        return len;
@@ -1610,7 +1614,7 @@ void hugetlb_unregister_node(struct node *node)
        struct node_hstate *nhs = &node_hstates[node->sysdev.id];
 
        if (!nhs->hugepages_kobj)
-               return;
+               return;         /* no hstate attributes */
 
        for_each_hstate(h)
                if (nhs->hstate_kobjs[h - hstates]) {
@@ -1675,15 +1679,15 @@ void hugetlb_register_node(struct node *node)
 }
 
 /*
- * hugetlb init time:  register hstate attributes for all registered
- * node sysdevs.  All on-line nodes should have registered their
- * associated sysdev by the time the hugetlb module initializes.
+ * hugetlb init time:  register hstate attributes for all registered node
+ * sysdevs of nodes that have memory.  All on-line nodes should have
+ * registered their associated sysdev by this time.
  */
 static void hugetlb_register_all_nodes(void)
 {
        int nid;
 
-       for (nid = 0; nid < nr_node_ids; nid++) {
+       for_each_node_state(nid, N_HIGH_MEMORY) {
                struct node *node = &node_devices[nid];
                if (node->sysdev.id == nid)
                        hugetlb_register_node(node);
@@ -1777,8 +1781,8 @@ void __init hugetlb_add_hstate(unsigned order)
        h->free_huge_pages = 0;
        for (i = 0; i < MAX_NUMNODES; ++i)
                INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-       h->next_nid_to_alloc = first_node(node_online_map);
-       h->next_nid_to_free = first_node(node_online_map);
+       h->next_nid_to_alloc = first_node(node_states[N_HIGH_MEMORY]);
+       h->next_nid_to_free = first_node(node_states[N_HIGH_MEMORY]);
        snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
                                        huge_page_size(h)/1024);
 
@@ -1856,7 +1860,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
        if (write) {
-               NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+               NODEMASK_ALLOC(nodemask_t, nodes_allowed,
+                                               GFP_KERNEL | __GFP_NORETRY);
                if (!(obey_mempolicy &&
                               init_nodemask_of_mempolicy(nodes_allowed))) {
                        NODEMASK_FREE(nodes_allowed);
@@ -2235,6 +2240,12 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                + (vma->vm_pgoff >> PAGE_SHIFT);
        mapping = (struct address_space *)page_private(page);
 
+       /*
+        * Take the mapping lock for the duration of the table walk. As
+        * this mapping should be shared between all the VMAs,
+        * __unmap_hugepage_range() is called as the lock is already held
+        */
+       spin_lock(&mapping->i_mmap_lock);
        vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
                /* Do not unmap the current VMA */
                if (iter_vma == vma)
@@ -2248,10 +2259,11 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                 * from the time of fork. This would look like data corruption
                 */
                if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
-                       unmap_hugepage_range(iter_vma,
+                       __unmap_hugepage_range(iter_vma,
                                address, address + huge_page_size(h),
                                page);
        }
+       spin_unlock(&mapping->i_mmap_lock);
 
        return 1;
 }
@@ -2291,6 +2303,9 @@ retry_avoidcopy:
                outside_reserve = 1;
 
        page_cache_get(old_page);
+
+       /* Drop page_table_lock as buddy allocator may be called */
+       spin_unlock(&mm->page_table_lock);
        new_page = alloc_huge_page(vma, address, outside_reserve);
 
        if (IS_ERR(new_page)) {
@@ -2308,19 +2323,25 @@ retry_avoidcopy:
                        if (unmap_ref_private(mm, vma, old_page, address)) {
                                BUG_ON(page_count(old_page) != 1);
                                BUG_ON(huge_pte_none(pte));
+                               spin_lock(&mm->page_table_lock);
                                goto retry_avoidcopy;
                        }
                        WARN_ON_ONCE(1);
                }
 
+               /* Caller expects lock to be held */
+               spin_lock(&mm->page_table_lock);
                return -PTR_ERR(new_page);
        }
 
-       spin_unlock(&mm->page_table_lock);
        copy_huge_page(new_page, old_page, address, vma);
        __SetPageUptodate(new_page);
-       spin_lock(&mm->page_table_lock);
 
+       /*
+        * Retake the page_table_lock to check for racing updates
+        * before the page tables are altered
+        */
+       spin_lock(&mm->page_table_lock);
        ptep = huge_pte_offset(mm, address & huge_page_mask(h));
        if (likely(pte_same(huge_ptep_get(ptep), pte))) {
                /* Break COW */