pid namespaces: define is_global_init() and is_container_init()
[safe/jmp/linux-2.6] / arch / s390 / lib / uaccess_pt.c
1 /*
2  *  arch/s390/lib/uaccess_pt.c
3  *
4  *  User access functions based on page table walks for enhanced
5  *  system layout without hardware support.
6  *
7  *    Copyright IBM Corp. 2006
8  *    Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
9  */
10
11 #include <linux/errno.h>
12 #include <linux/hardirq.h>
13 #include <linux/mm.h>
14 #include <asm/uaccess.h>
15 #include <asm/futex.h>
16 #include "uaccess.h"
17
18 static int __handle_fault(struct mm_struct *mm, unsigned long address,
19                           int write_access)
20 {
21         struct vm_area_struct *vma;
22         int ret = -EFAULT;
23         int fault;
24
25         if (in_atomic())
26                 return ret;
27         down_read(&mm->mmap_sem);
28         vma = find_vma(mm, address);
29         if (unlikely(!vma))
30                 goto out;
31         if (unlikely(vma->vm_start > address)) {
32                 if (!(vma->vm_flags & VM_GROWSDOWN))
33                         goto out;
34                 if (expand_stack(vma, address))
35                         goto out;
36         }
37
38         if (!write_access) {
39                 /* page not present, check vm flags */
40                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
41                         goto out;
42         } else {
43                 if (!(vma->vm_flags & VM_WRITE))
44                         goto out;
45         }
46
47 survive:
48         fault = handle_mm_fault(mm, vma, address, write_access);
49         if (unlikely(fault & VM_FAULT_ERROR)) {
50                 if (fault & VM_FAULT_OOM)
51                         goto out_of_memory;
52                 else if (fault & VM_FAULT_SIGBUS)
53                         goto out_sigbus;
54                 BUG();
55         }
56         if (fault & VM_FAULT_MAJOR)
57                 current->maj_flt++;
58         else
59                 current->min_flt++;
60         ret = 0;
61 out:
62         up_read(&mm->mmap_sem);
63         return ret;
64
65 out_of_memory:
66         up_read(&mm->mmap_sem);
67         if (is_global_init(current)) {
68                 yield();
69                 down_read(&mm->mmap_sem);
70                 goto survive;
71         }
72         printk("VM: killing process %s\n", current->comm);
73         return ret;
74
75 out_sigbus:
76         up_read(&mm->mmap_sem);
77         current->thread.prot_addr = address;
78         current->thread.trap_no = 0x11;
79         force_sig(SIGBUS, current);
80         return ret;
81 }
82
83 static size_t __user_copy_pt(unsigned long uaddr, void *kptr,
84                              size_t n, int write_user)
85 {
86         struct mm_struct *mm = current->mm;
87         unsigned long offset, pfn, done, size;
88         pgd_t *pgd;
89         pmd_t *pmd;
90         pte_t *pte;
91         void *from, *to;
92
93         done = 0;
94 retry:
95         spin_lock(&mm->page_table_lock);
96         do {
97                 pgd = pgd_offset(mm, uaddr);
98                 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
99                         goto fault;
100
101                 pmd = pmd_offset(pgd, uaddr);
102                 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
103                         goto fault;
104
105                 pte = pte_offset_map(pmd, uaddr);
106                 if (!pte || !pte_present(*pte) ||
107                     (write_user && !pte_write(*pte)))
108                         goto fault;
109
110                 pfn = pte_pfn(*pte);
111                 if (!pfn_valid(pfn))
112                         goto out;
113
114                 offset = uaddr & (PAGE_SIZE - 1);
115                 size = min(n - done, PAGE_SIZE - offset);
116                 if (write_user) {
117                         to = (void *)((pfn << PAGE_SHIFT) + offset);
118                         from = kptr + done;
119                 } else {
120                         from = (void *)((pfn << PAGE_SHIFT) + offset);
121                         to = kptr + done;
122                 }
123                 memcpy(to, from, size);
124                 done += size;
125                 uaddr += size;
126         } while (done < n);
127 out:
128         spin_unlock(&mm->page_table_lock);
129         return n - done;
130 fault:
131         spin_unlock(&mm->page_table_lock);
132         if (__handle_fault(mm, uaddr, write_user))
133                 return n - done;
134         goto retry;
135 }
136
137 /*
138  * Do DAT for user address by page table walk, return kernel address.
139  * This function needs to be called with current->mm->page_table_lock held.
140  */
141 static unsigned long __dat_user_addr(unsigned long uaddr)
142 {
143         struct mm_struct *mm = current->mm;
144         unsigned long pfn, ret;
145         pgd_t *pgd;
146         pmd_t *pmd;
147         pte_t *pte;
148         int rc;
149
150         ret = 0;
151 retry:
152         pgd = pgd_offset(mm, uaddr);
153         if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
154                 goto fault;
155
156         pmd = pmd_offset(pgd, uaddr);
157         if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
158                 goto fault;
159
160         pte = pte_offset_map(pmd, uaddr);
161         if (!pte || !pte_present(*pte))
162                 goto fault;
163
164         pfn = pte_pfn(*pte);
165         if (!pfn_valid(pfn))
166                 goto out;
167
168         ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
169 out:
170         return ret;
171 fault:
172         spin_unlock(&mm->page_table_lock);
173         rc = __handle_fault(mm, uaddr, 0);
174         spin_lock(&mm->page_table_lock);
175         if (rc)
176                 goto out;
177         goto retry;
178 }
179
180 size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
181 {
182         size_t rc;
183
184         if (segment_eq(get_fs(), KERNEL_DS)) {
185                 memcpy(to, (void __kernel __force *) from, n);
186                 return 0;
187         }
188         rc = __user_copy_pt((unsigned long) from, to, n, 0);
189         if (unlikely(rc))
190                 memset(to + n - rc, 0, rc);
191         return rc;
192 }
193
194 size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
195 {
196         if (segment_eq(get_fs(), KERNEL_DS)) {
197                 memcpy((void __kernel __force *) to, from, n);
198                 return 0;
199         }
200         return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
201 }
202
203 static size_t clear_user_pt(size_t n, void __user *to)
204 {
205         long done, size, ret;
206
207         if (segment_eq(get_fs(), KERNEL_DS)) {
208                 memset((void __kernel __force *) to, 0, n);
209                 return 0;
210         }
211         done = 0;
212         do {
213                 if (n - done > PAGE_SIZE)
214                         size = PAGE_SIZE;
215                 else
216                         size = n - done;
217                 ret = __user_copy_pt((unsigned long) to + done,
218                                       &empty_zero_page, size, 1);
219                 done += size;
220                 if (ret)
221                         return ret + n - done;
222         } while (done < n);
223         return 0;
224 }
225
226 static size_t strnlen_user_pt(size_t count, const char __user *src)
227 {
228         char *addr;
229         unsigned long uaddr = (unsigned long) src;
230         struct mm_struct *mm = current->mm;
231         unsigned long offset, pfn, done, len;
232         pgd_t *pgd;
233         pmd_t *pmd;
234         pte_t *pte;
235         size_t len_str;
236
237         if (segment_eq(get_fs(), KERNEL_DS))
238                 return strnlen((const char __kernel __force *) src, count) + 1;
239         done = 0;
240 retry:
241         spin_lock(&mm->page_table_lock);
242         do {
243                 pgd = pgd_offset(mm, uaddr);
244                 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
245                         goto fault;
246
247                 pmd = pmd_offset(pgd, uaddr);
248                 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
249                         goto fault;
250
251                 pte = pte_offset_map(pmd, uaddr);
252                 if (!pte || !pte_present(*pte))
253                         goto fault;
254
255                 pfn = pte_pfn(*pte);
256                 if (!pfn_valid(pfn)) {
257                         done = -1;
258                         goto out;
259                 }
260
261                 offset = uaddr & (PAGE_SIZE-1);
262                 addr = (char *)(pfn << PAGE_SHIFT) + offset;
263                 len = min(count - done, PAGE_SIZE - offset);
264                 len_str = strnlen(addr, len);
265                 done += len_str;
266                 uaddr += len_str;
267         } while ((len_str == len) && (done < count));
268 out:
269         spin_unlock(&mm->page_table_lock);
270         return done + 1;
271 fault:
272         spin_unlock(&mm->page_table_lock);
273         if (__handle_fault(mm, uaddr, 0)) {
274                 return 0;
275         }
276         goto retry;
277 }
278
279 static size_t strncpy_from_user_pt(size_t count, const char __user *src,
280                                    char *dst)
281 {
282         size_t n = strnlen_user_pt(count, src);
283
284         if (!n)
285                 return -EFAULT;
286         if (n > count)
287                 n = count;
288         if (segment_eq(get_fs(), KERNEL_DS)) {
289                 memcpy(dst, (const char __kernel __force *) src, n);
290                 if (dst[n-1] == '\0')
291                         return n-1;
292                 else
293                         return n;
294         }
295         if (__user_copy_pt((unsigned long) src, dst, n, 0))
296                 return -EFAULT;
297         if (dst[n-1] == '\0')
298                 return n-1;
299         else
300                 return n;
301 }
302
303 static size_t copy_in_user_pt(size_t n, void __user *to,
304                               const void __user *from)
305 {
306         struct mm_struct *mm = current->mm;
307         unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
308                       uaddr, done, size;
309         unsigned long uaddr_from = (unsigned long) from;
310         unsigned long uaddr_to = (unsigned long) to;
311         pgd_t *pgd_from, *pgd_to;
312         pmd_t *pmd_from, *pmd_to;
313         pte_t *pte_from, *pte_to;
314         int write_user;
315
316         done = 0;
317 retry:
318         spin_lock(&mm->page_table_lock);
319         do {
320                 pgd_from = pgd_offset(mm, uaddr_from);
321                 if (pgd_none(*pgd_from) || unlikely(pgd_bad(*pgd_from))) {
322                         uaddr = uaddr_from;
323                         write_user = 0;
324                         goto fault;
325                 }
326                 pgd_to = pgd_offset(mm, uaddr_to);
327                 if (pgd_none(*pgd_to) || unlikely(pgd_bad(*pgd_to))) {
328                         uaddr = uaddr_to;
329                         write_user = 1;
330                         goto fault;
331                 }
332
333                 pmd_from = pmd_offset(pgd_from, uaddr_from);
334                 if (pmd_none(*pmd_from) || unlikely(pmd_bad(*pmd_from))) {
335                         uaddr = uaddr_from;
336                         write_user = 0;
337                         goto fault;
338                 }
339                 pmd_to = pmd_offset(pgd_to, uaddr_to);
340                 if (pmd_none(*pmd_to) || unlikely(pmd_bad(*pmd_to))) {
341                         uaddr = uaddr_to;
342                         write_user = 1;
343                         goto fault;
344                 }
345
346                 pte_from = pte_offset_map(pmd_from, uaddr_from);
347                 if (!pte_from || !pte_present(*pte_from)) {
348                         uaddr = uaddr_from;
349                         write_user = 0;
350                         goto fault;
351                 }
352                 pte_to = pte_offset_map(pmd_to, uaddr_to);
353                 if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) {
354                         uaddr = uaddr_to;
355                         write_user = 1;
356                         goto fault;
357                 }
358
359                 pfn_from = pte_pfn(*pte_from);
360                 if (!pfn_valid(pfn_from))
361                         goto out;
362                 pfn_to = pte_pfn(*pte_to);
363                 if (!pfn_valid(pfn_to))
364                         goto out;
365
366                 offset_from = uaddr_from & (PAGE_SIZE-1);
367                 offset_to = uaddr_from & (PAGE_SIZE-1);
368                 offset_max = max(offset_from, offset_to);
369                 size = min(n - done, PAGE_SIZE - offset_max);
370
371                 memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
372                        (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
373                 done += size;
374                 uaddr_from += size;
375                 uaddr_to += size;
376         } while (done < n);
377 out:
378         spin_unlock(&mm->page_table_lock);
379         return n - done;
380 fault:
381         spin_unlock(&mm->page_table_lock);
382         if (__handle_fault(mm, uaddr, write_user))
383                 return n - done;
384         goto retry;
385 }
386
387 #define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)      \
388         asm volatile("0: l   %1,0(%6)\n"                                \
389                      "1: " insn                                         \
390                      "2: cs  %1,%2,0(%6)\n"                             \
391                      "3: jl  1b\n"                                      \
392                      "   lhi %0,0\n"                                    \
393                      "4:\n"                                             \
394                      EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b)    \
395                      : "=d" (ret), "=&d" (oldval), "=&d" (newval),      \
396                        "=m" (*uaddr)                                    \
397                      : "0" (-EFAULT), "d" (oparg), "a" (uaddr),         \
398                        "m" (*uaddr) : "cc" );
399
400 int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
401 {
402         int oldval = 0, newval, ret;
403
404         spin_lock(&current->mm->page_table_lock);
405         uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
406         if (!uaddr) {
407                 spin_unlock(&current->mm->page_table_lock);
408                 return -EFAULT;
409         }
410         get_page(virt_to_page(uaddr));
411         spin_unlock(&current->mm->page_table_lock);
412         switch (op) {
413         case FUTEX_OP_SET:
414                 __futex_atomic_op("lr %2,%5\n",
415                                   ret, oldval, newval, uaddr, oparg);
416                 break;
417         case FUTEX_OP_ADD:
418                 __futex_atomic_op("lr %2,%1\nar %2,%5\n",
419                                   ret, oldval, newval, uaddr, oparg);
420                 break;
421         case FUTEX_OP_OR:
422                 __futex_atomic_op("lr %2,%1\nor %2,%5\n",
423                                   ret, oldval, newval, uaddr, oparg);
424                 break;
425         case FUTEX_OP_ANDN:
426                 __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
427                                   ret, oldval, newval, uaddr, oparg);
428                 break;
429         case FUTEX_OP_XOR:
430                 __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
431                                   ret, oldval, newval, uaddr, oparg);
432                 break;
433         default:
434                 ret = -ENOSYS;
435         }
436         put_page(virt_to_page(uaddr));
437         *old = oldval;
438         return ret;
439 }
440
441 int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
442 {
443         int ret;
444
445         spin_lock(&current->mm->page_table_lock);
446         uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
447         if (!uaddr) {
448                 spin_unlock(&current->mm->page_table_lock);
449                 return -EFAULT;
450         }
451         get_page(virt_to_page(uaddr));
452         spin_unlock(&current->mm->page_table_lock);
453         asm volatile("   cs   %1,%4,0(%5)\n"
454                      "0: lr   %0,%1\n"
455                      "1:\n"
456                      EX_TABLE(0b,1b)
457                      : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
458                      : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
459                      : "cc", "memory" );
460         put_page(virt_to_page(uaddr));
461         return ret;
462 }
463
464 struct uaccess_ops uaccess_pt = {
465         .copy_from_user         = copy_from_user_pt,
466         .copy_from_user_small   = copy_from_user_pt,
467         .copy_to_user           = copy_to_user_pt,
468         .copy_to_user_small     = copy_to_user_pt,
469         .copy_in_user           = copy_in_user_pt,
470         .clear_user             = clear_user_pt,
471         .strnlen_user           = strnlen_user_pt,
472         .strncpy_from_user      = strncpy_from_user_pt,
473         .futex_atomic_op        = futex_atomic_op_pt,
474         .futex_atomic_cmpxchg   = futex_atomic_cmpxchg_pt,
475 };