Linux-2.6.12-rc2
[safe/jmp/linux-2.6] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  See Documentation/nommu-mmap.txt
8  *
9  *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
10  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
13  */
14
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/swap.h>
18 #include <linux/file.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/slab.h>
22 #include <linux/vmalloc.h>
23 #include <linux/ptrace.h>
24 #include <linux/blkdev.h>
25 #include <linux/backing-dev.h>
26 #include <linux/mount.h>
27 #include <linux/personality.h>
28 #include <linux/security.h>
29 #include <linux/syscalls.h>
30
31 #include <asm/uaccess.h>
32 #include <asm/tlb.h>
33 #include <asm/tlbflush.h>
34
35 void *high_memory;
36 struct page *mem_map;
37 unsigned long max_mapnr;
38 unsigned long num_physpages;
39 unsigned long askedalloc, realalloc;
40 atomic_t vm_committed_space = ATOMIC_INIT(0);
41 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
42 int sysctl_overcommit_ratio = 50; /* default is 50% */
43 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
44 int heap_stack_gap = 0;
45
46 EXPORT_SYMBOL(mem_map);
47 EXPORT_SYMBOL(sysctl_max_map_count);
48 EXPORT_SYMBOL(sysctl_overcommit_memory);
49 EXPORT_SYMBOL(sysctl_overcommit_ratio);
50 EXPORT_SYMBOL(vm_committed_space);
51 EXPORT_SYMBOL(__vm_enough_memory);
52
53 /* list of shareable VMAs */
54 struct rb_root nommu_vma_tree = RB_ROOT;
55 DECLARE_RWSEM(nommu_vma_sem);
56
57 struct vm_operations_struct generic_file_vm_ops = {
58 };
59
60 /*
61  * Handle all mappings that got truncated by a "truncate()"
62  * system call.
63  *
64  * NOTE! We have to be ready to update the memory sharing
65  * between the file and the memory map for a potential last
66  * incomplete page.  Ugly, but necessary.
67  */
68 int vmtruncate(struct inode *inode, loff_t offset)
69 {
70         struct address_space *mapping = inode->i_mapping;
71         unsigned long limit;
72
73         if (inode->i_size < offset)
74                 goto do_expand;
75         i_size_write(inode, offset);
76
77         truncate_inode_pages(mapping, offset);
78         goto out_truncate;
79
80 do_expand:
81         limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
82         if (limit != RLIM_INFINITY && offset > limit)
83                 goto out_sig;
84         if (offset > inode->i_sb->s_maxbytes)
85                 goto out;
86         i_size_write(inode, offset);
87
88 out_truncate:
89         if (inode->i_op && inode->i_op->truncate)
90                 inode->i_op->truncate(inode);
91         return 0;
92 out_sig:
93         send_sig(SIGXFSZ, current, 0);
94 out:
95         return -EFBIG;
96 }
97
98 EXPORT_SYMBOL(vmtruncate);
99
100 /*
101  * Return the total memory allocated for this pointer, not
102  * just what the caller asked for.
103  *
104  * Doesn't have to be accurate, i.e. may have races.
105  */
106 unsigned int kobjsize(const void *objp)
107 {
108         struct page *page;
109
110         if (!objp || !((page = virt_to_page(objp))))
111                 return 0;
112
113         if (PageSlab(page))
114                 return ksize(objp);
115
116         BUG_ON(page->index < 0);
117         BUG_ON(page->index >= MAX_ORDER);
118
119         return (PAGE_SIZE << page->index);
120 }
121
122 /*
123  * The nommu dodgy version :-)
124  */
125 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
126         unsigned long start, int len, int write, int force,
127         struct page **pages, struct vm_area_struct **vmas)
128 {
129         int i;
130         static struct vm_area_struct dummy_vma;
131
132         for (i = 0; i < len; i++) {
133                 if (pages) {
134                         pages[i] = virt_to_page(start);
135                         if (pages[i])
136                                 page_cache_get(pages[i]);
137                 }
138                 if (vmas)
139                         vmas[i] = &dummy_vma;
140                 start += PAGE_SIZE;
141         }
142         return(i);
143 }
144
145 DEFINE_RWLOCK(vmlist_lock);
146 struct vm_struct *vmlist;
147
148 void vfree(void *addr)
149 {
150         kfree(addr);
151 }
152
153 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
154 {
155         /*
156          * kmalloc doesn't like __GFP_HIGHMEM for some reason
157          */
158         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
159 }
160
161 struct page * vmalloc_to_page(void *addr)
162 {
163         return virt_to_page(addr);
164 }
165
166 unsigned long vmalloc_to_pfn(void *addr)
167 {
168         return page_to_pfn(virt_to_page(addr));
169 }
170
171
172 long vread(char *buf, char *addr, unsigned long count)
173 {
174         memcpy(buf, addr, count);
175         return count;
176 }
177
178 long vwrite(char *buf, char *addr, unsigned long count)
179 {
180         /* Don't allow overflow */
181         if ((unsigned long) addr + count < count)
182                 count = -(unsigned long) addr;
183
184         memcpy(addr, buf, count);
185         return(count);
186 }
187
188 /*
189  *      vmalloc  -  allocate virtually continguos memory
190  *
191  *      @size:          allocation size
192  *
193  *      Allocate enough pages to cover @size from the page level
194  *      allocator and map them into continguos kernel virtual space.
195  *
196  *      For tight cotrol over page level allocator and protection flags
197  *      use __vmalloc() instead.
198  */
199 void *vmalloc(unsigned long size)
200 {
201        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
202 }
203
204 /*
205  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
206  *
207  *      @size:          allocation size
208  *
209  *      Allocate enough 32bit PA addressable pages to cover @size from the
210  *      page level allocator and map them into continguos kernel virtual space.
211  */
212 void *vmalloc_32(unsigned long size)
213 {
214         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
215 }
216
217 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
218 {
219         BUG();
220         return NULL;
221 }
222
223 void vunmap(void *addr)
224 {
225         BUG();
226 }
227
228 /*
229  *  sys_brk() for the most part doesn't need the global kernel
230  *  lock, except when an application is doing something nasty
231  *  like trying to un-brk an area that has already been mapped
232  *  to a regular file.  in this case, the unmapping will need
233  *  to invoke file system routines that need the global lock.
234  */
235 asmlinkage unsigned long sys_brk(unsigned long brk)
236 {
237         struct mm_struct *mm = current->mm;
238
239         if (brk < mm->start_brk || brk > mm->context.end_brk)
240                 return mm->brk;
241
242         if (mm->brk == brk)
243                 return mm->brk;
244
245         /*
246          * Always allow shrinking brk
247          */
248         if (brk <= mm->brk) {
249                 mm->brk = brk;
250                 return brk;
251         }
252
253         /*
254          * Ok, looks good - let it rip.
255          */
256         return mm->brk = brk;
257 }
258
259 #ifdef DEBUG
260 static void show_process_blocks(void)
261 {
262         struct vm_list_struct *vml;
263
264         printk("Process blocks %d:", current->pid);
265
266         for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
267                 printk(" %p: %p", vml, vml->vma);
268                 if (vml->vma)
269                         printk(" (%d @%lx #%d)",
270                                kobjsize((void *) vml->vma->vm_start),
271                                vml->vma->vm_start,
272                                atomic_read(&vml->vma->vm_usage));
273                 printk(vml->next ? " ->" : ".\n");
274         }
275 }
276 #endif /* DEBUG */
277
278 static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
279 {
280         struct vm_area_struct *vma;
281         struct rb_node *n = nommu_vma_tree.rb_node;
282
283         while (n) {
284                 vma = rb_entry(n, struct vm_area_struct, vm_rb);
285
286                 if (start < vma->vm_start)
287                         n = n->rb_left;
288                 else if (start > vma->vm_start)
289                         n = n->rb_right;
290                 else
291                         return vma;
292         }
293
294         return NULL;
295 }
296
297 static void add_nommu_vma(struct vm_area_struct *vma)
298 {
299         struct vm_area_struct *pvma;
300         struct address_space *mapping;
301         struct rb_node **p = &nommu_vma_tree.rb_node;
302         struct rb_node *parent = NULL;
303
304         /* add the VMA to the mapping */
305         if (vma->vm_file) {
306                 mapping = vma->vm_file->f_mapping;
307
308                 flush_dcache_mmap_lock(mapping);
309                 vma_prio_tree_insert(vma, &mapping->i_mmap);
310                 flush_dcache_mmap_unlock(mapping);
311         }
312
313         /* add the VMA to the master list */
314         while (*p) {
315                 parent = *p;
316                 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
317
318                 if (vma->vm_start < pvma->vm_start) {
319                         p = &(*p)->rb_left;
320                 }
321                 else if (vma->vm_start > pvma->vm_start) {
322                         p = &(*p)->rb_right;
323                 }
324                 else {
325                         /* mappings are at the same address - this can only
326                          * happen for shared-mem chardevs and shared file
327                          * mappings backed by ramfs/tmpfs */
328                         BUG_ON(!(pvma->vm_flags & VM_SHARED));
329
330                         if (vma < pvma)
331                                 p = &(*p)->rb_left;
332                         else if (vma > pvma)
333                                 p = &(*p)->rb_right;
334                         else
335                                 BUG();
336                 }
337         }
338
339         rb_link_node(&vma->vm_rb, parent, p);
340         rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
341 }
342
343 static void delete_nommu_vma(struct vm_area_struct *vma)
344 {
345         struct address_space *mapping;
346
347         /* remove the VMA from the mapping */
348         if (vma->vm_file) {
349                 mapping = vma->vm_file->f_mapping;
350
351                 flush_dcache_mmap_lock(mapping);
352                 vma_prio_tree_remove(vma, &mapping->i_mmap);
353                 flush_dcache_mmap_unlock(mapping);
354         }
355
356         /* remove from the master list */
357         rb_erase(&vma->vm_rb, &nommu_vma_tree);
358 }
359
360 /*
361  * determine whether a mapping should be permitted and, if so, what sort of
362  * mapping we're capable of supporting
363  */
364 static int validate_mmap_request(struct file *file,
365                                  unsigned long addr,
366                                  unsigned long len,
367                                  unsigned long prot,
368                                  unsigned long flags,
369                                  unsigned long pgoff,
370                                  unsigned long *_capabilities)
371 {
372         unsigned long capabilities;
373         unsigned long reqprot = prot;
374         int ret;
375
376         /* do the simple checks first */
377         if (flags & MAP_FIXED || addr) {
378                 printk(KERN_DEBUG
379                        "%d: Can't do fixed-address/overlay mmap of RAM\n",
380                        current->pid);
381                 return -EINVAL;
382         }
383
384         if ((flags & MAP_TYPE) != MAP_PRIVATE &&
385             (flags & MAP_TYPE) != MAP_SHARED)
386                 return -EINVAL;
387
388         if (PAGE_ALIGN(len) == 0)
389                 return addr;
390
391         if (len > TASK_SIZE)
392                 return -EINVAL;
393
394         /* offset overflow? */
395         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
396                 return -EINVAL;
397
398         if (file) {
399                 /* validate file mapping requests */
400                 struct address_space *mapping;
401
402                 /* files must support mmap */
403                 if (!file->f_op || !file->f_op->mmap)
404                         return -ENODEV;
405
406                 /* work out if what we've got could possibly be shared
407                  * - we support chardevs that provide their own "memory"
408                  * - we support files/blockdevs that are memory backed
409                  */
410                 mapping = file->f_mapping;
411                 if (!mapping)
412                         mapping = file->f_dentry->d_inode->i_mapping;
413
414                 capabilities = 0;
415                 if (mapping && mapping->backing_dev_info)
416                         capabilities = mapping->backing_dev_info->capabilities;
417
418                 if (!capabilities) {
419                         /* no explicit capabilities set, so assume some
420                          * defaults */
421                         switch (file->f_dentry->d_inode->i_mode & S_IFMT) {
422                         case S_IFREG:
423                         case S_IFBLK:
424                                 capabilities = BDI_CAP_MAP_COPY;
425                                 break;
426
427                         case S_IFCHR:
428                                 capabilities =
429                                         BDI_CAP_MAP_DIRECT |
430                                         BDI_CAP_READ_MAP |
431                                         BDI_CAP_WRITE_MAP;
432                                 break;
433
434                         default:
435                                 return -EINVAL;
436                         }
437                 }
438
439                 /* eliminate any capabilities that we can't support on this
440                  * device */
441                 if (!file->f_op->get_unmapped_area)
442                         capabilities &= ~BDI_CAP_MAP_DIRECT;
443                 if (!file->f_op->read)
444                         capabilities &= ~BDI_CAP_MAP_COPY;
445
446                 if (flags & MAP_SHARED) {
447                         /* do checks for writing, appending and locking */
448                         if ((prot & PROT_WRITE) &&
449                             !(file->f_mode & FMODE_WRITE))
450                                 return -EACCES;
451
452                         if (IS_APPEND(file->f_dentry->d_inode) &&
453                             (file->f_mode & FMODE_WRITE))
454                                 return -EACCES;
455
456                         if (locks_verify_locked(file->f_dentry->d_inode))
457                                 return -EAGAIN;
458
459                         if (!(capabilities & BDI_CAP_MAP_DIRECT))
460                                 return -ENODEV;
461
462                         if (((prot & PROT_READ)  && !(capabilities & BDI_CAP_READ_MAP))  ||
463                             ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
464                             ((prot & PROT_EXEC)  && !(capabilities & BDI_CAP_EXEC_MAP))
465                             ) {
466                                 printk("MAP_SHARED not completely supported on !MMU\n");
467                                 return -EINVAL;
468                         }
469
470                         /* we mustn't privatise shared mappings */
471                         capabilities &= ~BDI_CAP_MAP_COPY;
472                 }
473                 else {
474                         /* we're going to read the file into private memory we
475                          * allocate */
476                         if (!(capabilities & BDI_CAP_MAP_COPY))
477                                 return -ENODEV;
478
479                         /* we don't permit a private writable mapping to be
480                          * shared with the backing device */
481                         if (prot & PROT_WRITE)
482                                 capabilities &= ~BDI_CAP_MAP_DIRECT;
483                 }
484
485                 /* handle executable mappings and implied executable
486                  * mappings */
487                 if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) {
488                         if (prot & PROT_EXEC)
489                                 return -EPERM;
490                 }
491                 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
492                         /* handle implication of PROT_EXEC by PROT_READ */
493                         if (current->personality & READ_IMPLIES_EXEC) {
494                                 if (capabilities & BDI_CAP_EXEC_MAP)
495                                         prot |= PROT_EXEC;
496                         }
497                 }
498                 else if ((prot & PROT_READ) &&
499                          (prot & PROT_EXEC) &&
500                          !(capabilities & BDI_CAP_EXEC_MAP)
501                          ) {
502                         /* backing file is not executable, try to copy */
503                         capabilities &= ~BDI_CAP_MAP_DIRECT;
504                 }
505         }
506         else {
507                 /* anonymous mappings are always memory backed and can be
508                  * privately mapped
509                  */
510                 capabilities = BDI_CAP_MAP_COPY;
511
512                 /* handle PROT_EXEC implication by PROT_READ */
513                 if ((prot & PROT_READ) &&
514                     (current->personality & READ_IMPLIES_EXEC))
515                         prot |= PROT_EXEC;
516         }
517
518         /* allow the security API to have its say */
519         ret = security_file_mmap(file, reqprot, prot, flags);
520         if (ret < 0)
521                 return ret;
522
523         /* looks okay */
524         *_capabilities = capabilities;
525         return 0;
526 }
527
528 /*
529  * we've determined that we can make the mapping, now translate what we
530  * now know into VMA flags
531  */
532 static unsigned long determine_vm_flags(struct file *file,
533                                         unsigned long prot,
534                                         unsigned long flags,
535                                         unsigned long capabilities)
536 {
537         unsigned long vm_flags;
538
539         vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
540         vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
541         /* vm_flags |= mm->def_flags; */
542
543         if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
544                 /* attempt to share read-only copies of mapped file chunks */
545                 if (file && !(prot & PROT_WRITE))
546                         vm_flags |= VM_MAYSHARE;
547         }
548         else {
549                 /* overlay a shareable mapping on the backing device or inode
550                  * if possible - used for chardevs, ramfs/tmpfs/shmfs and
551                  * romfs/cramfs */
552                 if (flags & MAP_SHARED)
553                         vm_flags |= VM_MAYSHARE | VM_SHARED;
554                 else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
555                         vm_flags |= VM_MAYSHARE;
556         }
557
558         /* refuse to let anyone share private mappings with this process if
559          * it's being traced - otherwise breakpoints set in it may interfere
560          * with another untraced process
561          */
562         if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
563                 vm_flags &= ~VM_MAYSHARE;
564
565         return vm_flags;
566 }
567
568 /*
569  * set up a shared mapping on a file
570  */
571 static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
572 {
573         int ret;
574
575         ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
576         if (ret != -ENOSYS)
577                 return ret;
578
579         /* getting an ENOSYS error indicates that direct mmap isn't
580          * possible (as opposed to tried but failed) so we'll fall
581          * through to making a private copy of the data and mapping
582          * that if we can */
583         return -ENODEV;
584 }
585
586 /*
587  * set up a private mapping or an anonymous shared mapping
588  */
589 static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
590 {
591         void *base;
592         int ret;
593
594         /* invoke the file's mapping function so that it can keep track of
595          * shared mappings on devices or memory
596          * - VM_MAYSHARE will be set if it may attempt to share
597          */
598         if (vma->vm_file) {
599                 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
600                 if (ret != -ENOSYS) {
601                         /* shouldn't return success if we're not sharing */
602                         BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
603                         return ret; /* success or a real error */
604                 }
605
606                 /* getting an ENOSYS error indicates that direct mmap isn't
607                  * possible (as opposed to tried but failed) so we'll try to
608                  * make a private copy of the data and map that instead */
609         }
610
611         /* allocate some memory to hold the mapping
612          * - note that this may not return a page-aligned address if the object
613          *   we're allocating is smaller than a page
614          */
615         base = kmalloc(len, GFP_KERNEL);
616         if (!base)
617                 goto enomem;
618
619         vma->vm_start = (unsigned long) base;
620         vma->vm_end = vma->vm_start + len;
621         vma->vm_flags |= VM_MAPPED_COPY;
622
623 #ifdef WARN_ON_SLACK
624         if (len + WARN_ON_SLACK <= kobjsize(result))
625                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
626                        len, current->pid, kobjsize(result) - len);
627 #endif
628
629         if (vma->vm_file) {
630                 /* read the contents of a file into the copy */
631                 mm_segment_t old_fs;
632                 loff_t fpos;
633
634                 fpos = vma->vm_pgoff;
635                 fpos <<= PAGE_SHIFT;
636
637                 old_fs = get_fs();
638                 set_fs(KERNEL_DS);
639                 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
640                 set_fs(old_fs);
641
642                 if (ret < 0)
643                         goto error_free;
644
645                 /* clear the last little bit */
646                 if (ret < len)
647                         memset(base + ret, 0, len - ret);
648
649         } else {
650                 /* if it's an anonymous mapping, then just clear it */
651                 memset(base, 0, len);
652         }
653
654         return 0;
655
656 error_free:
657         kfree(base);
658         vma->vm_start = 0;
659         return ret;
660
661 enomem:
662         printk("Allocation of length %lu from process %d failed\n",
663                len, current->pid);
664         show_free_areas();
665         return -ENOMEM;
666 }
667
668 /*
669  * handle mapping creation for uClinux
670  */
671 unsigned long do_mmap_pgoff(struct file *file,
672                             unsigned long addr,
673                             unsigned long len,
674                             unsigned long prot,
675                             unsigned long flags,
676                             unsigned long pgoff)
677 {
678         struct vm_list_struct *vml = NULL;
679         struct vm_area_struct *vma = NULL;
680         struct rb_node *rb;
681         unsigned long capabilities, vm_flags;
682         void *result;
683         int ret;
684
685         /* decide whether we should attempt the mapping, and if so what sort of
686          * mapping */
687         ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
688                                     &capabilities);
689         if (ret < 0)
690                 return ret;
691
692         /* we've determined that we can make the mapping, now translate what we
693          * now know into VMA flags */
694         vm_flags = determine_vm_flags(file, prot, flags, capabilities);
695
696         /* we're going to need to record the mapping if it works */
697         vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
698         if (!vml)
699                 goto error_getting_vml;
700         memset(vml, 0, sizeof(*vml));
701
702         down_write(&nommu_vma_sem);
703
704         /* if we want to share, we need to check for VMAs created by other
705          * mmap() calls that overlap with our proposed mapping
706          * - we can only share with an exact match on most regular files
707          * - shared mappings on character devices and memory backed files are
708          *   permitted to overlap inexactly as far as we are concerned for in
709          *   these cases, sharing is handled in the driver or filesystem rather
710          *   than here
711          */
712         if (vm_flags & VM_MAYSHARE) {
713                 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
714                 unsigned long vmpglen;
715
716                 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
717                         vma = rb_entry(rb, struct vm_area_struct, vm_rb);
718
719                         if (!(vma->vm_flags & VM_MAYSHARE))
720                                 continue;
721
722                         /* search for overlapping mappings on the same file */
723                         if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
724                                 continue;
725
726                         if (vma->vm_pgoff >= pgoff + pglen)
727                                 continue;
728
729                         vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
730                         vmpglen >>= PAGE_SHIFT;
731                         if (pgoff >= vma->vm_pgoff + vmpglen)
732                                 continue;
733
734                         /* handle inexactly overlapping matches between mappings */
735                         if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
736                                 if (!(capabilities & BDI_CAP_MAP_DIRECT))
737                                         goto sharing_violation;
738                                 continue;
739                         }
740
741                         /* we've found a VMA we can share */
742                         atomic_inc(&vma->vm_usage);
743
744                         vml->vma = vma;
745                         result = (void *) vma->vm_start;
746                         goto shared;
747                 }
748
749                 vma = NULL;
750
751                 /* obtain the address at which to make a shared mapping
752                  * - this is the hook for quasi-memory character devices to
753                  *   tell us the location of a shared mapping
754                  */
755                 if (file && file->f_op->get_unmapped_area) {
756                         addr = file->f_op->get_unmapped_area(file, addr, len,
757                                                              pgoff, flags);
758                         if (IS_ERR((void *) addr)) {
759                                 ret = addr;
760                                 if (ret != (unsigned long) -ENOSYS)
761                                         goto error;
762
763                                 /* the driver refused to tell us where to site
764                                  * the mapping so we'll have to attempt to copy
765                                  * it */
766                                 ret = (unsigned long) -ENODEV;
767                                 if (!(capabilities & BDI_CAP_MAP_COPY))
768                                         goto error;
769
770                                 capabilities &= ~BDI_CAP_MAP_DIRECT;
771                         }
772                 }
773         }
774
775         /* we're going to need a VMA struct as well */
776         vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
777         if (!vma)
778                 goto error_getting_vma;
779
780         memset(vma, 0, sizeof(*vma));
781         INIT_LIST_HEAD(&vma->anon_vma_node);
782         atomic_set(&vma->vm_usage, 1);
783         if (file)
784                 get_file(file);
785         vma->vm_file    = file;
786         vma->vm_flags   = vm_flags;
787         vma->vm_start   = addr;
788         vma->vm_end     = addr + len;
789         vma->vm_pgoff   = pgoff;
790
791         vml->vma = vma;
792
793         /* set up the mapping */
794         if (file && vma->vm_flags & VM_SHARED)
795                 ret = do_mmap_shared_file(vma, len);
796         else
797                 ret = do_mmap_private(vma, len);
798         if (ret < 0)
799                 goto error;
800
801         /* okay... we have a mapping; now we have to register it */
802         result = (void *) vma->vm_start;
803
804         if (vma->vm_flags & VM_MAPPED_COPY) {
805                 realalloc += kobjsize(result);
806                 askedalloc += len;
807         }
808
809         realalloc += kobjsize(vma);
810         askedalloc += sizeof(*vma);
811
812         current->mm->total_vm += len >> PAGE_SHIFT;
813
814         add_nommu_vma(vma);
815
816  shared:
817         realalloc += kobjsize(vml);
818         askedalloc += sizeof(*vml);
819
820         vml->next = current->mm->context.vmlist;
821         current->mm->context.vmlist = vml;
822
823         up_write(&nommu_vma_sem);
824
825         if (prot & PROT_EXEC)
826                 flush_icache_range((unsigned long) result,
827                                    (unsigned long) result + len);
828
829 #ifdef DEBUG
830         printk("do_mmap:\n");
831         show_process_blocks();
832 #endif
833
834         return (unsigned long) result;
835
836  error:
837         up_write(&nommu_vma_sem);
838         kfree(vml);
839         if (vma) {
840                 fput(vma->vm_file);
841                 kfree(vma);
842         }
843         return ret;
844
845  sharing_violation:
846         up_write(&nommu_vma_sem);
847         printk("Attempt to share mismatched mappings\n");
848         kfree(vml);
849         return -EINVAL;
850
851  error_getting_vma:
852         up_write(&nommu_vma_sem);
853         kfree(vml);
854         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
855                len, current->pid);
856         show_free_areas();
857         return -ENOMEM;
858
859  error_getting_vml:
860         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
861                len, current->pid);
862         show_free_areas();
863         return -ENOMEM;
864 }
865
866 /*
867  * handle mapping disposal for uClinux
868  */
869 static void put_vma(struct vm_area_struct *vma)
870 {
871         if (vma) {
872                 down_write(&nommu_vma_sem);
873
874                 if (atomic_dec_and_test(&vma->vm_usage)) {
875                         delete_nommu_vma(vma);
876
877                         if (vma->vm_ops && vma->vm_ops->close)
878                                 vma->vm_ops->close(vma);
879
880                         /* IO memory and memory shared directly out of the pagecache from
881                          * ramfs/tmpfs mustn't be released here */
882                         if (vma->vm_flags & VM_MAPPED_COPY) {
883                                 realalloc -= kobjsize((void *) vma->vm_start);
884                                 askedalloc -= vma->vm_end - vma->vm_start;
885                                 kfree((void *) vma->vm_start);
886                         }
887
888                         realalloc -= kobjsize(vma);
889                         askedalloc -= sizeof(*vma);
890
891                         if (vma->vm_file)
892                                 fput(vma->vm_file);
893                         kfree(vma);
894                 }
895
896                 up_write(&nommu_vma_sem);
897         }
898 }
899
900 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
901 {
902         struct vm_list_struct *vml, **parent;
903         unsigned long end = addr + len;
904
905 #ifdef DEBUG
906         printk("do_munmap:\n");
907 #endif
908
909         for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next)
910                 if ((*parent)->vma->vm_start == addr &&
911                     (*parent)->vma->vm_end == end)
912                         goto found;
913
914         printk("munmap of non-mmaped memory by process %d (%s): %p\n",
915                current->pid, current->comm, (void *) addr);
916         return -EINVAL;
917
918  found:
919         vml = *parent;
920
921         put_vma(vml->vma);
922
923         *parent = vml->next;
924         realalloc -= kobjsize(vml);
925         askedalloc -= sizeof(*vml);
926         kfree(vml);
927         mm->total_vm -= len >> PAGE_SHIFT;
928
929 #ifdef DEBUG
930         show_process_blocks();
931 #endif
932
933         return 0;
934 }
935
936 /* Release all mmaps. */
937 void exit_mmap(struct mm_struct * mm)
938 {
939         struct vm_list_struct *tmp;
940
941         if (mm) {
942 #ifdef DEBUG
943                 printk("Exit_mmap:\n");
944 #endif
945
946                 mm->total_vm = 0;
947
948                 while ((tmp = mm->context.vmlist)) {
949                         mm->context.vmlist = tmp->next;
950                         put_vma(tmp->vma);
951
952                         realalloc -= kobjsize(tmp);
953                         askedalloc -= sizeof(*tmp);
954                         kfree(tmp);
955                 }
956
957 #ifdef DEBUG
958                 show_process_blocks();
959 #endif
960         }
961 }
962
963 asmlinkage long sys_munmap(unsigned long addr, size_t len)
964 {
965         int ret;
966         struct mm_struct *mm = current->mm;
967
968         down_write(&mm->mmap_sem);
969         ret = do_munmap(mm, addr, len);
970         up_write(&mm->mmap_sem);
971         return ret;
972 }
973
974 unsigned long do_brk(unsigned long addr, unsigned long len)
975 {
976         return -ENOMEM;
977 }
978
979 /*
980  * Expand (or shrink) an existing mapping, potentially moving it at the
981  * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
982  *
983  * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
984  * This option implies MREMAP_MAYMOVE.
985  *
986  * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the
987  * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable
988  */
989 unsigned long do_mremap(unsigned long addr,
990                         unsigned long old_len, unsigned long new_len,
991                         unsigned long flags, unsigned long new_addr)
992 {
993         struct vm_list_struct *vml = NULL;
994
995         /* insanity checks first */
996         if (new_len == 0)
997                 return (unsigned long) -EINVAL;
998
999         if (flags & MREMAP_FIXED && new_addr != addr)
1000                 return (unsigned long) -EINVAL;
1001
1002         for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1003                 if (vml->vma->vm_start == addr)
1004                         goto found;
1005
1006         return (unsigned long) -EINVAL;
1007
1008  found:
1009         if (vml->vma->vm_end != vml->vma->vm_start + old_len)
1010                 return (unsigned long) -EFAULT;
1011
1012         if (vml->vma->vm_flags & VM_MAYSHARE)
1013                 return (unsigned long) -EPERM;
1014
1015         if (new_len > kobjsize((void *) addr))
1016                 return (unsigned long) -ENOMEM;
1017
1018         /* all checks complete - do it */
1019         vml->vma->vm_end = vml->vma->vm_start + new_len;
1020
1021         askedalloc -= old_len;
1022         askedalloc += new_len;
1023
1024         return vml->vma->vm_start;
1025 }
1026
1027 /*
1028  * Look up the first VMA which satisfies  addr < vm_end,  NULL if none
1029  */
1030 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1031 {
1032         struct vm_list_struct *vml;
1033
1034         for (vml = mm->context.vmlist; vml; vml = vml->next)
1035                 if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end)
1036                         return vml->vma;
1037
1038         return NULL;
1039 }
1040
1041 EXPORT_SYMBOL(find_vma);
1042
1043 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
1044 {
1045         return NULL;
1046 }
1047
1048 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
1049 {
1050         return NULL;
1051 }
1052
1053 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1054                 unsigned long to, unsigned long size, pgprot_t prot)
1055 {
1056         return -EPERM;
1057 }
1058
1059 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1060 {
1061 }
1062
1063 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1064         unsigned long len, unsigned long pgoff, unsigned long flags)
1065 {
1066         return -ENOMEM;
1067 }
1068
1069 void arch_unmap_area(struct vm_area_struct *area)
1070 {
1071 }
1072
1073 void update_mem_hiwater(struct task_struct *tsk)
1074 {
1075         unsigned long rss = get_mm_counter(tsk->mm, rss);
1076
1077         if (likely(tsk->mm)) {
1078                 if (tsk->mm->hiwater_rss < rss)
1079                         tsk->mm->hiwater_rss = rss;
1080                 if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
1081                         tsk->mm->hiwater_vm = tsk->mm->total_vm;
1082         }
1083 }
1084
1085 void unmap_mapping_range(struct address_space *mapping,
1086                          loff_t const holebegin, loff_t const holelen,
1087                          int even_cows)
1088 {
1089 }
1090
1091 /*
1092  * Check that a process has enough memory to allocate a new virtual
1093  * mapping. 0 means there is enough memory for the allocation to
1094  * succeed and -ENOMEM implies there is not.
1095  *
1096  * We currently support three overcommit policies, which are set via the
1097  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
1098  *
1099  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
1100  * Additional code 2002 Jul 20 by Robert Love.
1101  *
1102  * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
1103  *
1104  * Note this is a helper function intended to be used by LSMs which
1105  * wish to use this logic.
1106  */
1107 int __vm_enough_memory(long pages, int cap_sys_admin)
1108 {
1109         unsigned long free, allowed;
1110
1111         vm_acct_memory(pages);
1112
1113         /*
1114          * Sometimes we want to use more memory than we have
1115          */
1116         if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1117                 return 0;
1118
1119         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1120                 unsigned long n;
1121
1122                 free = get_page_cache_size();
1123                 free += nr_swap_pages;
1124
1125                 /*
1126                  * Any slabs which are created with the
1127                  * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1128                  * which are reclaimable, under pressure.  The dentry
1129                  * cache and most inode caches should fall into this
1130                  */
1131                 free += atomic_read(&slab_reclaim_pages);
1132
1133                 /*
1134                  * Leave the last 3% for root
1135                  */
1136                 if (!cap_sys_admin)
1137                         free -= free / 32;
1138
1139                 if (free > pages)
1140                         return 0;
1141
1142                 /*
1143                  * nr_free_pages() is very expensive on large systems,
1144                  * only call if we're about to fail.
1145                  */
1146                 n = nr_free_pages();
1147                 if (!cap_sys_admin)
1148                         n -= n / 32;
1149                 free += n;
1150
1151                 if (free > pages)
1152                         return 0;
1153                 vm_unacct_memory(pages);
1154                 return -ENOMEM;
1155         }
1156
1157         allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1158         /*
1159          * Leave the last 3% for root
1160          */
1161         if (!cap_sys_admin)
1162                 allowed -= allowed / 32;
1163         allowed += total_swap_pages;
1164
1165         /* Don't let a single process grow too big:
1166            leave 3% of the size of this process for other processes */
1167         allowed -= current->mm->total_vm / 32;
1168
1169         if (atomic_read(&vm_committed_space) < allowed)
1170                 return 0;
1171
1172         vm_unacct_memory(pages);
1173
1174         return -ENOMEM;
1175 }
1176
1177 int in_gate_area_no_task(unsigned long addr)
1178 {
1179         return 0;
1180 }