#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
+#include <linux/seq_file.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
+#include <linux/mmu_notifier.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/io.h>
#include <linux/hugetlb.h>
#include "internal.h"
}
/*
+ * Return the size of the pages allocated when backing a VMA. In the majority
+ * cases this will be same size as used by the page table entries.
+ */
+unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+ struct hstate *hstate;
+
+ if (!is_vm_hugetlb_page(vma))
+ return PAGE_SIZE;
+
+ hstate = hstate_vma(vma);
+
+ return 1UL << (hstate->order + PAGE_SHIFT);
+}
+EXPORT_SYMBOL_GPL(vma_kernel_pagesize);
+
+/*
+ * Return the page size being used by the MMU to back a VMA. In the majority
+ * of cases, the page size used by the kernel matches the MMU size. On
+ * architectures where it differs, an architecture-specific version of this
+ * function is required.
+ */
+#ifndef vma_mmu_pagesize
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+ return vma_kernel_pagesize(vma);
+}
+#endif
+
+/*
* Flags for MAP_PRIVATE reservations. These are stored in the bottom
* bits of the reservation map pointer, which are always clear due to
* alignment.
struct list_head regions;
};
-struct resv_map *resv_map_alloc(void)
+static struct resv_map *resv_map_alloc(void)
{
struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
if (!resv_map)
return resv_map;
}
-void resv_map_release(struct kref *ref)
+static void resv_map_release(struct kref *ref)
{
struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- if (!(vma->vm_flags & VM_SHARED))
+ if (!(vma->vm_flags & VM_MAYSHARE))
return (struct resv_map *)(get_vma_private_data(vma) &
~HPAGE_RESV_MASK);
- return 0;
+ return NULL;
}
static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- VM_BUG_ON(vma->vm_flags & VM_SHARED);
+ VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
set_vma_private_data(vma, (get_vma_private_data(vma) &
HPAGE_RESV_MASK) | (unsigned long)map);
static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- VM_BUG_ON(vma->vm_flags & VM_SHARED);
+ VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
set_vma_private_data(vma, get_vma_private_data(vma) | flags);
}
if (vma->vm_flags & VM_NORESERVE)
return;
- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
/* Shared mappings always use reserves */
h->resv_huge_pages--;
} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
VM_BUG_ON(!is_vm_hugetlb_page(vma));
- if (!(vma->vm_flags & VM_SHARED))
+ if (!(vma->vm_flags & VM_MAYSHARE))
vma->vm_private_data = (void *)0;
}
/* Returns true if the VMA has associated reserve pages */
static int vma_has_reserves(struct vm_area_struct *vma)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma->vm_flags & VM_MAYSHARE)
return 1;
if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return 1;
return 0;
}
+static void clear_gigantic_page(struct page *page,
+ unsigned long addr, unsigned long sz)
+{
+ int i;
+ struct page *p = page;
+
+ might_sleep();
+ for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
+ cond_resched();
+ clear_user_highpage(p, addr + i * PAGE_SIZE);
+ }
+}
static void clear_huge_page(struct page *page,
unsigned long addr, unsigned long sz)
{
int i;
+ if (unlikely(sz > MAX_ORDER_NR_PAGES)) {
+ clear_gigantic_page(page, addr, sz);
+ return;
+ }
+
might_sleep();
for (i = 0; i < sz/PAGE_SIZE; i++) {
cond_resched();
}
}
+static void copy_gigantic_page(struct page *dst, struct page *src,
+ unsigned long addr, struct vm_area_struct *vma)
+{
+ int i;
+ struct hstate *h = hstate_vma(vma);
+ struct page *dst_base = dst;
+ struct page *src_base = src;
+ might_sleep();
+ for (i = 0; i < pages_per_huge_page(h); ) {
+ cond_resched();
+ copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
+
+ i++;
+ dst = mem_map_next(dst, dst_base, i);
+ src = mem_map_next(src, src_base, i);
+ }
+}
static void copy_huge_page(struct page *dst, struct page *src,
unsigned long addr, struct vm_area_struct *vma)
{
int i;
struct hstate *h = hstate_vma(vma);
+ if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
+ copy_gigantic_page(dst, src, addr, vma);
+ return;
+ }
+
might_sleep();
for (i = 0; i < pages_per_huge_page(h); i++) {
cond_resched();
{
int i;
+ VM_BUG_ON(h->order >= MAX_ORDER);
+
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
hugetlb_put_quota(mapping, 1);
}
-/*
- * Increment or decrement surplus_huge_pages. Keep node-specific counters
- * balanced by operating on them in a round-robin fashion.
- * Returns 1 if an adjustment was made.
- */
-static int adjust_pool_surplus(struct hstate *h, int delta)
-{
- static int prev_nid;
- int nid = prev_nid;
- int ret = 0;
-
- VM_BUG_ON(delta != -1 && delta != 1);
- do {
- nid = next_node(nid, node_online_map);
- if (nid == MAX_NUMNODES)
- nid = first_node(node_online_map);
-
- /* To shrink on this node, there must be a surplus page */
- if (delta < 0 && !h->surplus_huge_pages_node[nid])
- continue;
- /* Surplus cannot exceed the total number of pages */
- if (delta > 0 && h->surplus_huge_pages_node[nid] >=
- h->nr_huge_pages_node[nid])
- continue;
-
- h->surplus_huge_pages += delta;
- h->surplus_huge_pages_node[nid] += delta;
- ret = 1;
- break;
- } while (nid != prev_nid);
-
- prev_nid = nid;
- return ret;
-}
-
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
set_compound_page_dtor(page, free_huge_page);
put_page(page); /* free it into the hugepage allocator */
}
+static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+{
+ int i;
+ int nr_pages = 1 << order;
+ struct page *p = page + 1;
+
+ /* we rely on prep_new_huge_page to set the destructor */
+ set_compound_order(page, order);
+ __SetPageHead(page);
+ for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ __SetPageTail(p);
+ p->first_page = page;
+ }
+}
+
+int PageHuge(struct page *page)
+{
+ compound_page_dtor *dtor;
+
+ if (!PageCompound(page))
+ return 0;
+
+ page = compound_head(page);
+ dtor = get_compound_page_dtor(page);
+
+ return dtor == free_huge_page;
+}
+
static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
if (h->order >= MAX_ORDER)
return NULL;
- page = alloc_pages_node(nid,
+ page = alloc_pages_exact_node(nid,
htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
if (page) {
if (arch_prepare_hugepage(page)) {
- __free_pages(page, HUGETLB_PAGE_ORDER);
+ __free_pages(page, huge_page_order(h));
return NULL;
}
prep_new_huge_page(h, page, nid);
* Use a helper variable to find the next node and then
* copy it back to hugetlb_next_nid afterwards:
* otherwise there's a window in which a racer might
- * pass invalid nid MAX_NUMNODES to alloc_pages_node.
+ * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
* But we don't need to use a spin_lock here: it really
* doesn't matter if occasionally a racer chooses the
* same nid as we do. Move nid forward in the mask even
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
+ if (page && arch_prepare_hugepage(page)) {
+ __free_pages(page, huge_page_order(h));
+ return NULL;
+ }
+
spin_lock(&hugetlb_lock);
if (page) {
/*
* can no longer free unreserved surplus pages. This occurs when
* the nodes with surplus pages have no free pages.
*/
- unsigned long remaining_iterations = num_online_nodes();
+ unsigned long remaining_iterations = nr_online_nodes;
/* Uncommit the reservation */
h->resv_huge_pages -= unused_resv_pages;
h->surplus_huge_pages--;
h->surplus_huge_pages_node[nid]--;
nr_pages--;
- remaining_iterations = num_online_nodes();
+ remaining_iterations = nr_online_nodes;
}
}
}
* an instantiated the change should be committed via vma_commit_reservation.
* No action is required on failure.
*/
-static int vma_needs_reservation(struct hstate *h,
+static long vma_needs_reservation(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
{
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
pgoff_t idx = vma_hugecache_offset(h, vma, addr);
return region_chg(&inode->i_mapping->private_list,
idx, idx + 1);
return 1;
} else {
- int err;
+ long err;
pgoff_t idx = vma_hugecache_offset(h, vma, addr);
struct resv_map *reservations = vma_resv_map(vma);
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
pgoff_t idx = vma_hugecache_offset(h, vma, addr);
region_add(&inode->i_mapping->private_list, idx, idx + 1);
struct page *page;
struct address_space *mapping = vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
- unsigned int chg;
+ long chg;
/*
* Processes that did not create the mapping will have no reserves and
return page;
}
-__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h)
+int __weak alloc_bootmem_huge_page(struct hstate *h)
{
struct huge_bootmem_page *m;
int nr_nodes = nodes_weight(node_online_map);
* puts them into the mem_map).
*/
m = addr;
- if (m)
- goto found;
+ goto found;
}
hstate_next_node(h);
nr_nodes--;
return 1;
}
+static void prep_compound_huge_page(struct page *page, int order)
+{
+ if (unlikely(order > (MAX_ORDER - 1)))
+ prep_compound_gigantic_page(page, order);
+ else
+ prep_compound_page(page, order);
+}
+
/* Put bootmem huge pages into the standard lists after mem_map is up */
static void __init gather_bootmem_prealloc(void)
{
struct hstate *h = m->hstate;
__ClearPageReserved(page);
WARN_ON(page_count(page) != 1);
- prep_compound_page(page, h->order);
+ prep_compound_huge_page(page, h->order);
prep_new_huge_page(h, page, page_to_nid(page));
}
}
}
#endif
+/*
+ * Increment or decrement surplus_huge_pages. Keep node-specific counters
+ * balanced by operating on them in a round-robin fashion.
+ * Returns 1 if an adjustment was made.
+ */
+static int adjust_pool_surplus(struct hstate *h, int delta)
+{
+ static int prev_nid;
+ int nid = prev_nid;
+ int ret = 0;
+
+ VM_BUG_ON(delta != -1 && delta != 1);
+ do {
+ nid = next_node(nid, node_online_map);
+ if (nid == MAX_NUMNODES)
+ nid = first_node(node_online_map);
+
+ /* To shrink on this node, there must be a surplus page */
+ if (delta < 0 && !h->surplus_huge_pages_node[nid])
+ continue;
+ /* Surplus cannot exceed the total number of pages */
+ if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+ h->nr_huge_pages_node[nid])
+ continue;
+
+ h->surplus_huge_pages += delta;
+ h->surplus_huge_pages_node[nid] += delta;
+ ret = 1;
+ break;
+ } while (nid != prev_nid);
+
+ prev_nid = nid;
+ return ret;
+}
+
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
{
static int __init hugetlb_init(void)
{
- BUILD_BUG_ON(HPAGE_SHIFT == 0);
+ /* Some platform decide whether they support huge pages at boot
+ * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
+ * there is no such support
+ */
+ if (HPAGE_SHIFT == 0)
+ return 0;
if (!size_to_hstate(default_hstate_size)) {
default_hstate_size = HPAGE_SIZE;
#endif /* CONFIG_SYSCTL */
-int hugetlb_report_meminfo(char *buf)
+void hugetlb_report_meminfo(struct seq_file *m)
{
struct hstate *h = &default_hstate;
- return sprintf(buf,
- "HugePages_Total: %5lu\n"
- "HugePages_Free: %5lu\n"
- "HugePages_Rsvd: %5lu\n"
- "HugePages_Surp: %5lu\n"
- "Hugepagesize: %5lu kB\n",
+ seq_printf(m,
+ "HugePages_Total: %5lu\n"
+ "HugePages_Free: %5lu\n"
+ "HugePages_Rsvd: %5lu\n"
+ "HugePages_Surp: %5lu\n"
+ "Hugepagesize: %8lu kB\n",
h->nr_huge_pages,
h->free_huge_pages,
h->resv_huge_pages,
BUG_ON(start & ~huge_page_mask(h));
BUG_ON(end & ~huge_page_mask(h));
+ mmu_notifier_invalidate_range_start(mm, start, end);
spin_lock(&mm->page_table_lock);
for (address = start; address < end; address += sz) {
ptep = huge_pte_offset(mm, address);
}
spin_unlock(&mm->page_table_lock);
flush_tlb_range(vma, start, end);
+ mmu_notifier_invalidate_range_end(mm, start, end);
list_for_each_entry_safe(page, tmp, &page_list, lru) {
list_del(&page->lru);
put_page(page);
* from other VMAs and let the children be SIGKILLed if they are faulting the
* same region.
*/
-int unmap_ref_private(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct page *page,
- unsigned long address)
+static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct page *page, unsigned long address)
{
+ struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
struct address_space *mapping;
struct prio_tree_iter iter;
* vm_pgoff is in PAGE_SIZE units, hence the different calculation
* from page cache lookup which is in HPAGE_SIZE units.
*/
- address = address & huge_page_mask(hstate_vma(vma));
+ address = address & huge_page_mask(h);
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ (vma->vm_pgoff >> PAGE_SHIFT);
mapping = (struct address_space *)page_private(page);
*/
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
unmap_hugepage_range(iter_vma,
- address, address + HPAGE_SIZE,
+ address, address + huge_page_size(h),
page);
}
* at the time of fork() could consume its reserves on COW instead
* of the full address range.
*/
- if (!(vma->vm_flags & VM_SHARED) &&
+ if (!(vma->vm_flags & VM_MAYSHARE) &&
is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
old_page != pagecache_page)
outside_reserve = 1;
}
static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep, int write_access)
+ unsigned long address, pte_t *ptep, unsigned int flags)
{
struct hstate *h = hstate_vma(vma);
int ret = VM_FAULT_SIGBUS;
clear_huge_page(page, address, huge_page_size(h));
__SetPageUptodate(page);
- if (vma->vm_flags & VM_SHARED) {
+ if (vma->vm_flags & VM_MAYSHARE) {
int err;
struct inode *inode = mapping->host;
lock_page(page);
}
+ /*
+ * If we are going to COW a private mapping later, we examine the
+ * pending reservations for this page now. This will ensure that
+ * any allocations necessary to record that reservation occur outside
+ * the spinlock.
+ */
+ if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
+ if (vma_needs_reservation(h, vma, address) < 0) {
+ ret = VM_FAULT_OOM;
+ goto backout_unlocked;
+ }
+
spin_lock(&mm->page_table_lock);
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
&& (vma->vm_flags & VM_SHARED)));
set_huge_pte_at(mm, address, ptep, new_pte);
- if (write_access && !(vma->vm_flags & VM_SHARED)) {
+ if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
}
backout:
spin_unlock(&mm->page_table_lock);
+backout_unlocked:
unlock_page(page);
put_page(page);
goto out;
}
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, int write_access)
+ unsigned long address, unsigned int flags)
{
pte_t *ptep;
pte_t entry;
int ret;
+ struct page *pagecache_page = NULL;
static DEFINE_MUTEX(hugetlb_instantiation_mutex);
struct hstate *h = hstate_vma(vma);
mutex_lock(&hugetlb_instantiation_mutex);
entry = huge_ptep_get(ptep);
if (huge_pte_none(entry)) {
- ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
- mutex_unlock(&hugetlb_instantiation_mutex);
- return ret;
+ ret = hugetlb_no_page(mm, vma, address, ptep, flags);
+ goto out_mutex;
}
ret = 0;
+ /*
+ * If we are going to COW the mapping later, we examine the pending
+ * reservations for this page now. This will ensure that any
+ * allocations necessary to record that reservation occur outside the
+ * spinlock. For private mappings, we also lookup the pagecache
+ * page now as it is used to determine if a reservation has been
+ * consumed.
+ */
+ if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) {
+ if (vma_needs_reservation(h, vma, address) < 0) {
+ ret = VM_FAULT_OOM;
+ goto out_mutex;
+ }
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ pagecache_page = hugetlbfs_pagecache_page(h,
+ vma, address);
+ }
+
spin_lock(&mm->page_table_lock);
/* Check for a racing update before calling hugetlb_cow */
- if (likely(pte_same(entry, huge_ptep_get(ptep))))
- if (write_access && !pte_write(entry)) {
- struct page *page;
- page = hugetlbfs_pagecache_page(h, vma, address);
- ret = hugetlb_cow(mm, vma, address, ptep, entry, page);
- if (page) {
- unlock_page(page);
- put_page(page);
- }
+ if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
+ goto out_page_table_lock;
+
+
+ if (flags & FAULT_FLAG_WRITE) {
+ if (!pte_write(entry)) {
+ ret = hugetlb_cow(mm, vma, address, ptep, entry,
+ pagecache_page);
+ goto out_page_table_lock;
}
+ entry = pte_mkdirty(entry);
+ }
+ entry = pte_mkyoung(entry);
+ if (huge_ptep_set_access_flags(vma, address, ptep, entry,
+ flags & FAULT_FLAG_WRITE))
+ update_mmu_cache(vma, address, entry);
+
+out_page_table_lock:
spin_unlock(&mm->page_table_lock);
+
+ if (pagecache_page) {
+ unlock_page(pagecache_page);
+ put_page(pagecache_page);
+ }
+
+out_mutex:
mutex_unlock(&hugetlb_instantiation_mutex);
return ret;
return NULL;
}
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+ if (!ptep || write || shared)
+ return 0;
+ else
+ return huge_pte_none(huge_ptep_get(ptep));
+}
+
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i,
unsigned long vaddr = *position;
int remainder = *length;
struct hstate *h = hstate_vma(vma);
+ int zeropage_ok = 0;
+ int shared = vma->vm_flags & VM_SHARED;
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
* first, for the page indexing below to work.
*/
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+ if (huge_zeropage_ok(pte, write, shared))
+ zeropage_ok = 1;
- if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+ if (!pte ||
+ (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
(write && !pte_write(huge_ptep_get(pte)))) {
int ret;
page = pte_page(huge_ptep_get(pte));
same_page:
if (pages) {
- get_page(page);
- pages[i] = page + pfn_offset;
+ if (zeropage_ok)
+ pages[i] = ZERO_PAGE(0);
+ else
+ pages[i] = mem_map_offset(page, pfn_offset);
+ get_page(pages[i]);
}
if (vmas)
int hugetlb_reserve_pages(struct inode *inode,
long from, long to,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma,
+ int acctflag)
{
long ret, chg;
struct hstate *h = hstate_inode(inode);
- if (vma && vma->vm_flags & VM_NORESERVE)
+ /*
+ * Only apply hugepage reservation if asked. At fault time, an
+ * attempt will be made for VM_NORESERVE to allocate a page
+ * and filesystem quota without using reserves
+ */
+ if (acctflag & VM_NORESERVE)
return 0;
/*
* to reserve the full area even if read-only as mprotect() may be
* called to make the mapping read-write. Assume !vma is a shm mapping
*/
- if (!vma || vma->vm_flags & VM_SHARED)
+ if (!vma || vma->vm_flags & VM_MAYSHARE)
chg = region_chg(&inode->i_mapping->private_list, from, to);
else {
struct resv_map *resv_map = resv_map_alloc();
if (chg < 0)
return chg;
+ /* There must be enough filesystem quota for the mapping */
if (hugetlb_get_quota(inode->i_mapping, chg))
return -ENOSPC;
+
+ /*
+ * Check enough hugepages are available for the reservation.
+ * Hand back the quota if there are not
+ */
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
hugetlb_put_quota(inode->i_mapping, chg);
return ret;
}
- if (!vma || vma->vm_flags & VM_SHARED)
+
+ /*
+ * Account for the reservations made. Shared mappings record regions
+ * that have reservations as they are shared by multiple VMAs.
+ * When the last VMA disappears, the region map says how much
+ * the reservation was and the page cache tells how much of
+ * the reservation was consumed. Private mappings are per-VMA and
+ * only the consumed reservations are tracked. When the VMA
+ * disappears, the original reservation is the VMA size and the
+ * consumed reservations are stored in the map. Hence, nothing
+ * else has to be done for private mappings here
+ */
+ if (!vma || vma->vm_flags & VM_MAYSHARE)
region_add(&inode->i_mapping->private_list, from, to);
return 0;
}
long chg = region_truncate(&inode->i_mapping->private_list, offset);
spin_lock(&inode->i_lock);
- inode->i_blocks -= blocks_per_huge_page(h);
+ inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
hugetlb_put_quota(inode->i_mapping, (chg - freed));