[PATCH] mm: pte_offset_map_lock loops
[safe/jmp/linux-2.6] / mm / mprotect.c
1 /*
2  *  mm/mprotect.c
3  *
4  *  (C) Copyright 1994 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  *
7  *  Address space accounting code       <alan@redhat.com>
8  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
9  */
10
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/slab.h>
14 #include <linux/shm.h>
15 #include <linux/mman.h>
16 #include <linux/fs.h>
17 #include <linux/highmem.h>
18 #include <linux/security.h>
19 #include <linux/mempolicy.h>
20 #include <linux/personality.h>
21 #include <linux/syscalls.h>
22
23 #include <asm/uaccess.h>
24 #include <asm/pgtable.h>
25 #include <asm/cacheflush.h>
26 #include <asm/tlbflush.h>
27
28 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
29                 unsigned long addr, unsigned long end, pgprot_t newprot)
30 {
31         pte_t *pte;
32         spinlock_t *ptl;
33
34         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
35         do {
36                 if (pte_present(*pte)) {
37                         pte_t ptent;
38
39                         /* Avoid an SMP race with hardware updated dirty/clean
40                          * bits by wiping the pte and then setting the new pte
41                          * into place.
42                          */
43                         ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
44                         set_pte_at(mm, addr, pte, ptent);
45                         lazy_mmu_prot_update(ptent);
46                 }
47         } while (pte++, addr += PAGE_SIZE, addr != end);
48         pte_unmap_unlock(pte - 1, ptl);
49 }
50
51 static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud,
52                 unsigned long addr, unsigned long end, pgprot_t newprot)
53 {
54         pmd_t *pmd;
55         unsigned long next;
56
57         pmd = pmd_offset(pud, addr);
58         do {
59                 next = pmd_addr_end(addr, end);
60                 if (pmd_none_or_clear_bad(pmd))
61                         continue;
62                 change_pte_range(mm, pmd, addr, next, newprot);
63         } while (pmd++, addr = next, addr != end);
64 }
65
66 static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd,
67                 unsigned long addr, unsigned long end, pgprot_t newprot)
68 {
69         pud_t *pud;
70         unsigned long next;
71
72         pud = pud_offset(pgd, addr);
73         do {
74                 next = pud_addr_end(addr, end);
75                 if (pud_none_or_clear_bad(pud))
76                         continue;
77                 change_pmd_range(mm, pud, addr, next, newprot);
78         } while (pud++, addr = next, addr != end);
79 }
80
81 static void change_protection(struct vm_area_struct *vma,
82                 unsigned long addr, unsigned long end, pgprot_t newprot)
83 {
84         struct mm_struct *mm = vma->vm_mm;
85         pgd_t *pgd;
86         unsigned long next;
87         unsigned long start = addr;
88
89         BUG_ON(addr >= end);
90         pgd = pgd_offset(mm, addr);
91         flush_cache_range(vma, addr, end);
92         do {
93                 next = pgd_addr_end(addr, end);
94                 if (pgd_none_or_clear_bad(pgd))
95                         continue;
96                 change_pud_range(mm, pgd, addr, next, newprot);
97         } while (pgd++, addr = next, addr != end);
98         flush_tlb_range(vma, start, end);
99 }
100
101 static int
102 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
103         unsigned long start, unsigned long end, unsigned long newflags)
104 {
105         struct mm_struct *mm = vma->vm_mm;
106         unsigned long oldflags = vma->vm_flags;
107         long nrpages = (end - start) >> PAGE_SHIFT;
108         unsigned long charged = 0;
109         pgprot_t newprot;
110         pgoff_t pgoff;
111         int error;
112
113         if (newflags == oldflags) {
114                 *pprev = vma;
115                 return 0;
116         }
117
118         /*
119          * If we make a private mapping writable we increase our commit;
120          * but (without finer accounting) cannot reduce our commit if we
121          * make it unwritable again.
122          *
123          * FIXME? We haven't defined a VM_NORESERVE flag, so mprotecting
124          * a MAP_NORESERVE private mapping to writable will now reserve.
125          */
126         if (newflags & VM_WRITE) {
127                 if (oldflags & VM_RESERVED) {
128                         BUG_ON(oldflags & VM_WRITE);
129                         printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
130                                 "PROT_WRITE mprotect of VM_RESERVED memory, "
131                                 "which is deprecated. Please report this to "
132                                 "linux-kernel@vger.kernel.org\n",current->comm);
133                         return -EACCES;
134                 }
135                 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
136                         charged = nrpages;
137                         if (security_vm_enough_memory(charged))
138                                 return -ENOMEM;
139                         newflags |= VM_ACCOUNT;
140                 }
141         }
142
143         newprot = protection_map[newflags & 0xf];
144
145         /*
146          * First try to merge with previous and/or next vma.
147          */
148         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
149         *pprev = vma_merge(mm, *pprev, start, end, newflags,
150                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
151         if (*pprev) {
152                 vma = *pprev;
153                 goto success;
154         }
155
156         *pprev = vma;
157
158         if (start != vma->vm_start) {
159                 error = split_vma(mm, vma, start, 1);
160                 if (error)
161                         goto fail;
162         }
163
164         if (end != vma->vm_end) {
165                 error = split_vma(mm, vma, end, 0);
166                 if (error)
167                         goto fail;
168         }
169
170 success:
171         /*
172          * vm_flags and vm_page_prot are protected by the mmap_sem
173          * held in write mode.
174          */
175         vma->vm_flags = newflags;
176         vma->vm_page_prot = newprot;
177         change_protection(vma, start, end, newprot);
178         vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
179         vm_stat_account(mm, newflags, vma->vm_file, nrpages);
180         return 0;
181
182 fail:
183         vm_unacct_memory(charged);
184         return error;
185 }
186
187 asmlinkage long
188 sys_mprotect(unsigned long start, size_t len, unsigned long prot)
189 {
190         unsigned long vm_flags, nstart, end, tmp, reqprot;
191         struct vm_area_struct *vma, *prev;
192         int error = -EINVAL;
193         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
194         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
195         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
196                 return -EINVAL;
197
198         if (start & ~PAGE_MASK)
199                 return -EINVAL;
200         if (!len)
201                 return 0;
202         len = PAGE_ALIGN(len);
203         end = start + len;
204         if (end <= start)
205                 return -ENOMEM;
206         if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM))
207                 return -EINVAL;
208
209         reqprot = prot;
210         /*
211          * Does the application expect PROT_READ to imply PROT_EXEC:
212          */
213         if (unlikely((prot & PROT_READ) &&
214                         (current->personality & READ_IMPLIES_EXEC)))
215                 prot |= PROT_EXEC;
216
217         vm_flags = calc_vm_prot_bits(prot);
218
219         down_write(&current->mm->mmap_sem);
220
221         vma = find_vma_prev(current->mm, start, &prev);
222         error = -ENOMEM;
223         if (!vma)
224                 goto out;
225         if (unlikely(grows & PROT_GROWSDOWN)) {
226                 if (vma->vm_start >= end)
227                         goto out;
228                 start = vma->vm_start;
229                 error = -EINVAL;
230                 if (!(vma->vm_flags & VM_GROWSDOWN))
231                         goto out;
232         }
233         else {
234                 if (vma->vm_start > start)
235                         goto out;
236                 if (unlikely(grows & PROT_GROWSUP)) {
237                         end = vma->vm_end;
238                         error = -EINVAL;
239                         if (!(vma->vm_flags & VM_GROWSUP))
240                                 goto out;
241                 }
242         }
243         if (start > vma->vm_start)
244                 prev = vma;
245
246         for (nstart = start ; ; ) {
247                 unsigned long newflags;
248
249                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
250
251                 if (is_vm_hugetlb_page(vma)) {
252                         error = -EACCES;
253                         goto out;
254                 }
255
256                 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
257
258                 /* newflags >> 4 shift VM_MAY% in place of VM_% */
259                 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
260                         error = -EACCES;
261                         goto out;
262                 }
263
264                 error = security_file_mprotect(vma, reqprot, prot);
265                 if (error)
266                         goto out;
267
268                 tmp = vma->vm_end;
269                 if (tmp > end)
270                         tmp = end;
271                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
272                 if (error)
273                         goto out;
274                 nstart = tmp;
275
276                 if (nstart < prev->vm_end)
277                         nstart = prev->vm_end;
278                 if (nstart >= end)
279                         goto out;
280
281                 vma = prev->vm_next;
282                 if (!vma || vma->vm_start != nstart) {
283                         error = -ENOMEM;
284                         goto out;
285                 }
286         }
287 out:
288         up_write(&current->mm->mmap_sem);
289         return error;
290 }