X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Fnommu.c;h=8687973462bbea8becddfe0941eb94fab60ce304;hb=9b9a29ecd75e310f75a9243e1c3538ad34598fcb;hp=8cee8c8ff0f26d0f3381385215ef38f5f9b6feb8;hpb=6a6160a7b5c27b3c38651baef92a14fa7072b3c1;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/nommu.c b/mm/nommu.c index 8cee8c8..8687973 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -10,7 +10,7 @@ * Copyright (c) 2000-2003 David McCullough * Copyright (c) 2000-2001 D Jeff Dionne * Copyright (c) 2002 Greg Ungerer - * Copyright (c) 2007-2008 Paul Mundt + * Copyright (c) 2007-2009 Paul Mundt */ #include @@ -33,6 +33,7 @@ #include #include #include +#include #include "internal.h" static inline __attribute__((format(printf, 1, 2))) @@ -56,20 +57,19 @@ void no_printk(const char *fmt, ...) no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__) #endif -#include "internal.h" - void *high_memory; struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +unsigned long highest_memmap_pfn; +struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; -int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ +int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; -atomic_t mmap_pages_allocated; +atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(num_physpages); @@ -79,50 +79,10 @@ static struct kmem_cache *vm_region_jar; struct rb_root nommu_region_tree = RB_ROOT; DECLARE_RWSEM(nommu_region_sem); -struct vm_operations_struct generic_file_vm_ops = { +const struct vm_operations_struct generic_file_vm_ops = { }; /* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode *inode, loff_t offset) -{ - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - i_size_write(inode, offset); - - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - i_size_write(inode, offset); - -out_truncate: - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -EXPORT_SYMBOL(vmtruncate); - -/* * Return the total memory allocated for this pointer, not * just what the caller asked for. * @@ -170,30 +130,29 @@ unsigned int kobjsize(const void *objp) } int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, int flags, - struct page **pages, struct vm_area_struct **vmas) + unsigned long start, int nr_pages, unsigned int foll_flags, + struct page **pages, struct vm_area_struct **vmas) { struct vm_area_struct *vma; unsigned long vm_flags; int i; - int write = !!(flags & GUP_FLAGS_WRITE); - int force = !!(flags & GUP_FLAGS_FORCE); - int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS); /* calculate required read or write permissions. - * - if 'force' is set, we only require the "MAY" flags. + * If FOLL_FORCE is set, we only require the "MAY" flags. */ - vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + vm_flags = (foll_flags & FOLL_WRITE) ? + (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= (foll_flags & FOLL_FORCE) ? + (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); - for (i = 0; i < len; i++) { + for (i = 0; i < nr_pages; i++) { vma = find_vma(mm, start); if (!vma) goto finish_or_fault; /* protect what we can, including chardevs */ - if (vma->vm_flags & (VM_IO | VM_PFNMAP) || - (!ignore && !(vm_flags & vma->vm_flags))) + if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) || + !(vm_flags & vma->vm_flags)) goto finish_or_fault; if (pages) { @@ -212,7 +171,6 @@ finish_or_fault: return i ? : -EFAULT; } - /* * get a list of pages in an address range belonging to the specified process * and indicate the VMA that covers each page @@ -221,22 +179,41 @@ finish_or_fault: * - don't permit access to VMAs that don't support it, such as I/O mappings */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, int write, int force, + unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { int flags = 0; if (write) - flags |= GUP_FLAGS_WRITE; + flags |= FOLL_WRITE; if (force) - flags |= GUP_FLAGS_FORCE; + flags |= FOLL_FORCE; - return __get_user_pages(tsk, mm, - start, len, flags, - pages, vmas); + return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas); } EXPORT_SYMBOL(get_user_pages); +/** + * follow_pfn - look up PFN at a user virtual address + * @vma: memory mapping + * @address: user virtual address + * @pfn: location to store found PFN + * + * Only IO mappings and raw PFN mappings are allowed. + * + * Returns zero and the pfn at @pfn on success, -ve otherwise. + */ +int follow_pfn(struct vm_area_struct *vma, unsigned long address, + unsigned long *pfn) +{ + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + return -EINVAL; + + *pfn = address >> PAGE_SHIFT; + return 0; +} +EXPORT_SYMBOL(follow_pfn); + DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; @@ -394,6 +371,24 @@ void vunmap(const void *addr) } EXPORT_SYMBOL(vunmap); +void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) +{ + BUG(); + return NULL; +} +EXPORT_SYMBOL(vm_map_ram); + +void vm_unmap_ram(const void *mem, unsigned int count) +{ + BUG(); +} +EXPORT_SYMBOL(vm_unmap_ram); + +void vm_unmap_aliases(void) +{ +} +EXPORT_SYMBOL_GPL(vm_unmap_aliases); + /* * Implement a stub for vmalloc_sync_all() if the architecture chose not to * have one. @@ -445,12 +440,11 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { - vm_region_jar = kmem_cache_create("vm_region_jar", - sizeof(struct vm_region), 0, - SLAB_PANIC, NULL); - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); + vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } /* @@ -468,27 +462,24 @@ static noinline void validate_nommu_regions(void) return; last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(last->vm_end <= last->vm_start)) - BUG(); - if (unlikely(last->vm_top < last->vm_end)) - BUG(); + BUG_ON(unlikely(last->vm_end <= last->vm_start)); + BUG_ON(unlikely(last->vm_top < last->vm_end)); while ((p = rb_next(lastp))) { region = rb_entry(p, struct vm_region, vm_rb); last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(region->vm_end <= region->vm_start)) - BUG(); - if (unlikely(region->vm_top < region->vm_end)) - BUG(); - if (unlikely(region->vm_start < last->vm_top)) - BUG(); + BUG_ON(unlikely(region->vm_end <= region->vm_start)); + BUG_ON(unlikely(region->vm_top < region->vm_end)); + BUG_ON(unlikely(region->vm_start < last->vm_top)); lastp = p; } } #else -#define validate_nommu_regions() do {} while(0) +static void validate_nommu_regions(void) +{ +} #endif /* @@ -501,8 +492,6 @@ static void add_nommu_region(struct vm_region *region) validate_nommu_regions(); - BUG_ON(region->vm_start & ~PAGE_MASK); - parent = NULL; p = &nommu_region_tree.rb_node; while (*p) { @@ -545,16 +534,17 @@ static void free_page_series(unsigned long from, unsigned long to) struct page *page = virt_to_page(from); kdebug("- free %lx", from); - atomic_dec(&mmap_pages_allocated); + atomic_long_dec(&mmap_pages_allocated); if (page_count(page) != 1) - kdebug("free page %p [%d]", page, page_count(page)); + kdebug("free page %p: refcount not one: %d", + page, page_count(page)); put_page(page); } } /* * release a reference to a region - * - the caller must hold the region semaphore, which this releases + * - the caller must hold the region semaphore for writing, which this releases * - the region may not have been added to the tree yet, in which case vm_top * will equal vm_start */ @@ -595,6 +585,22 @@ static void put_nommu_region(struct vm_region *region) } /* + * update protection on a vma + */ +static void protect_vma(struct vm_area_struct *vma, unsigned long flags) +{ +#ifdef CONFIG_MPU + struct mm_struct *mm = vma->vm_mm; + long start = vma->vm_start & PAGE_MASK; + while (start < vma->vm_end) { + protect_page(mm, start, flags); + start += PAGE_SIZE; + } + update_protections(mm); +#endif +} + +/* * add a VMA into a process's mm_struct in the appropriate place in the list * and tree and add to the address space's page tree also if not an anonymous * page @@ -613,6 +619,8 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mm->map_count++; vma->vm_mm = mm; + protect_vma(vma, vma->vm_flags); + /* add the VMA to the mapping */ if (vma->vm_file) { mapping = vma->vm_file->f_mapping; @@ -675,6 +683,8 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) kenter("%p", vma); + protect_vma(vma, 0); + mm->map_count--; if (mm->mmap_cache == vma) mm->mmap_cache = NULL; @@ -816,7 +826,7 @@ static int validate_mmap_request(struct file *file, int ret; /* do the simple checks first */ - if (flags & MAP_FIXED || addr) { + if (flags & MAP_FIXED) { printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n", current->pid); @@ -887,6 +897,10 @@ static int validate_mmap_request(struct file *file, if (!file->f_op->read) capabilities &= ~BDI_CAP_MAP_COPY; + /* The file shall have been opened with read permission. */ + if (!(file->f_mode & FMODE_READ)) + return -EACCES; + if (flags & MAP_SHARED) { /* do checks for writing, appending and locking */ if ((prot & PROT_WRITE) && @@ -1020,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1037,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) */ static int do_mmap_private(struct vm_area_struct *vma, struct vm_region *region, - unsigned long len) + unsigned long len, + unsigned long capabilities) { struct page *pages; unsigned long total, point, n, rlen; @@ -1048,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma, * shared mappings on devices or memory * - VM_MAYSHARE will be set if it may attempt to share */ - if (vma->vm_file) { + if (capabilities & BDI_CAP_MAP_DIRECT) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); vma->vm_region->vm_top = vma->vm_region->vm_end; - return ret; + return 0; } if (ret != -ENOSYS) return ret; @@ -1078,7 +1093,7 @@ static int do_mmap_private(struct vm_area_struct *vma, goto enomem; total = 1 << order; - atomic_add(total, &mmap_pages_allocated); + atomic_long_add(total, &mmap_pages_allocated); point = rlen >> PAGE_SHIFT; @@ -1089,7 +1104,7 @@ static int do_mmap_private(struct vm_area_struct *vma, order = ilog2(total - point); n = 1 << order; kdebug("shave %lu/%lu @%lu", n, total - point, total); - atomic_sub(n, &mmap_pages_allocated); + atomic_long_sub(n, &mmap_pages_allocated); total -= n; set_page_refcounted(pages + total); __free_pages(pages + total, order); @@ -1128,9 +1143,6 @@ static int do_mmap_private(struct vm_area_struct *vma, if (ret < rlen) memset(base + ret, 0, rlen - ret); - } else { - /* if it's an anonymous mapping, then just clear it */ - memset(base, 0, rlen); } return 0; @@ -1143,8 +1155,8 @@ error_free: return ret; enomem: - printk("Allocation of length %lu from process %d failed\n", - len, current->pid); + printk("Allocation of length %lu from process %d (%s) failed\n", + len, current->pid, current->comm); show_free_areas(); return -ENOMEM; } @@ -1167,9 +1179,6 @@ unsigned long do_mmap_pgoff(struct file *file, kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); - if (!(flags & MAP_FIXED)) - addr = round_hint_to_min(addr); - /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, @@ -1179,6 +1188,9 @@ unsigned long do_mmap_pgoff(struct file *file, return ret; } + /* we ignore the address hint */ + addr = 0; + /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ vm_flags = determine_vm_flags(file, prot, flags, capabilities); @@ -1292,7 +1304,7 @@ unsigned long do_mmap_pgoff(struct file *file, * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping */ - if (file && file->f_op->get_unmapped_area) { + if (capabilities & BDI_CAP_MAP_DIRECT) { addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR((void *) addr)) { @@ -1317,16 +1329,22 @@ unsigned long do_mmap_pgoff(struct file *file, vma->vm_region = region; - /* set up the mapping */ + /* set up the mapping + * - the region is filled in if BDI_CAP_MAP_DIRECT is still set + */ if (file && vma->vm_flags & VM_SHARED) ret = do_mmap_shared_file(vma); else - ret = do_mmap_private(vma, region, len); + ret = do_mmap_private(vma, region, len, capabilities); if (ret < 0) - goto error_put_region; - + goto error_just_free; add_nommu_region(region); + /* clear anonymous mappings that don't ask for uninitialized data */ + if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) + memset((void *)region->vm_start, 0, + region->vm_end - region->vm_start); + /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; @@ -1343,25 +1361,14 @@ share: kleave(" = %lx", result); return result; -error_put_region: - __put_nommu_region(region); - if (vma) { - if (vma->vm_file) { - fput(vma->vm_file); - if (vma->vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(vma->vm_mm); - } - kmem_cache_free(vm_area_cachep, vma); - } - kleave(" = %d [pr]", ret); - return ret; - error_just_free: up_write(&nommu_region_sem); error: - fput(region->vm_file); + if (region->vm_file) + fput(region->vm_file); kmem_cache_free(vm_region_jar, region); - fput(vma->vm_file); + if (vma->vm_file) + fput(vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(vma->vm_mm); kmem_cache_free(vm_area_cachep, vma); @@ -1518,10 +1525,15 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) /* find the first potentially overlapping VMA */ vma = find_vma(mm, start); if (!vma) { - printk(KERN_WARNING - "munmap of memory not mmapped by process %d (%s):" - " 0x%lx-0x%lx\n", - current->pid, current->comm, start, start + len - 1); + static int limit = 0; + if (limit < 5) { + printk(KERN_WARNING + "munmap of memory not mmapped by process %d" + " (%s): 0x%lx-0x%lx\n", + current->pid, current->comm, + start, start + len - 1); + limit++; + } return -EINVAL; } @@ -1831,12 +1843,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; + error: vm_unacct_memory(pages);