nfsd: nfsd should drop CAP_MKNOD for non-root
[safe/jmp/linux-2.6] / mm / mlock.c
index 1ada366..cbe9e05 100644 (file)
@@ -173,12 +173,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
                  (atomic_read(&mm->mm_users) != 0));
 
        /*
-        * mlock:   don't page populate if page has PROT_NONE permission.
-        * munlock: the pages always do munlock althrough
-        *          its has PROT_NONE permission.
+        * mlock:   don't page populate if vma has PROT_NONE permission.
+        * munlock: always do munlock although the vma has PROT_NONE
+        *          permission, or SIGKILL is pending.
         */
        if (!mlock)
-               gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS;
+               gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
+                            GUP_FLAGS_IGNORE_SIGKILL;
 
        if (vma->vm_flags & VM_WRITE)
                gup_flags |= GUP_FLAGS_WRITE;
@@ -293,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
  *
  * return number of pages [> 0] to be removed from locked_vm on success
  * of "special" vmas.
- *
- * return negative error if vma spanning @start-@range disappears while
- * mmap semaphore is dropped.  Unlikely?
  */
 long mlock_vma_pages_range(struct vm_area_struct *vma,
                        unsigned long start, unsigned long end)
 {
-       struct mm_struct *mm = vma->vm_mm;
        int nr_pages = (end - start) / PAGE_SIZE;
        BUG_ON(!(vma->vm_flags & VM_LOCKED));
 
@@ -313,20 +310,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
        if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
                        is_vm_hugetlb_page(vma) ||
                        vma == get_gate_vma(current))) {
-               long error;
-               downgrade_write(&mm->mmap_sem);
-
-               error = __mlock_vma_pages_range(vma, start, end, 1);
 
-               up_read(&mm->mmap_sem);
-               /* vma can change or disappear */
-               down_write(&mm->mmap_sem);
-               vma = find_vma(mm, start);
-               /* non-NULL vma must contain @start, but need to check @end */
-               if (!vma ||  end > vma->vm_end)
-                       return -ENOMEM;
+               __mlock_vma_pages_range(vma, start, end, 1);
 
-               return 0;       /* hide other errors from mmap(), et al */
+               /* Hide errors from mmap() and other callers */
+               return 0;
        }
 
        /*
@@ -437,41 +425,14 @@ success:
        vma->vm_flags = newflags;
 
        if (lock) {
-               /*
-                * mmap_sem is currently held for write.  Downgrade the write
-                * lock to a read lock so that other faults, mmap scans, ...
-                * while we fault in all pages.
-                */
-               downgrade_write(&mm->mmap_sem);
-
                ret = __mlock_vma_pages_range(vma, start, end, 1);
 
-               /*
-                * Need to reacquire mmap sem in write mode, as our callers
-                * expect this.  We have no support for atomically upgrading
-                * a sem to write, so we need to check for ranges while sem
-                * is unlocked.
-                */
-               up_read(&mm->mmap_sem);
-               /* vma can change or disappear */
-               down_write(&mm->mmap_sem);
-               *prev = find_vma(mm, start);
-               /* non-NULL *prev must contain @start, but need to check @end */
-               if (!(*prev) || end > (*prev)->vm_end)
-                       ret = -ENOMEM;
-               else if (ret > 0) {
+               if (ret > 0) {
                        mm->locked_vm -= ret;
                        ret = 0;
                } else
                        ret = __mlock_posix_error_return(ret); /* translate if needed */
        } else {
-               /*
-                * TODO:  for unlocking, pages will already be resident, so
-                * we don't need to wait for allocations/reclaim/pagein, ...
-                * However, unlocking a very large region can still take a
-                * while.  Should we downgrade the semaphore for both lock
-                * AND unlock ?
-                */
                __mlock_vma_pages_range(vma, start, end, 0);
        }
 
@@ -529,7 +490,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
        return error;
 }
 
-asmlinkage long sys_mlock(unsigned long start, size_t len)
+SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 {
        unsigned long locked;
        unsigned long lock_limit;
@@ -557,7 +518,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
        return error;
 }
 
-asmlinkage long sys_munlock(unsigned long start, size_t len)
+SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 {
        int ret;
 
@@ -594,7 +555,7 @@ out:
        return 0;
 }
 
-asmlinkage long sys_mlockall(int flags)
+SYSCALL_DEFINE1(mlockall, int, flags)
 {
        unsigned long lock_limit;
        int ret = -EINVAL;
@@ -622,7 +583,7 @@ out:
        return ret;
 }
 
-asmlinkage long sys_munlockall(void)
+SYSCALL_DEFINE0(munlockall)
 {
        int ret;
 
@@ -667,3 +628,53 @@ void user_shm_unlock(size_t size, struct user_struct *user)
        spin_unlock(&shmlock_user_lock);
        free_uid(user);
 }
+
+void *alloc_locked_buffer(size_t size)
+{
+       unsigned long rlim, vm, pgsz;
+       void *buffer = NULL;
+
+       pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+       down_write(&current->mm->mmap_sem);
+
+       rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+       vm   = current->mm->total_vm + pgsz;
+       if (rlim < vm)
+               goto out;
+
+       rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+       vm   = current->mm->locked_vm + pgsz;
+       if (rlim < vm)
+               goto out;
+
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
+               goto out;
+
+       current->mm->total_vm  += pgsz;
+       current->mm->locked_vm += pgsz;
+
+ out:
+       up_write(&current->mm->mmap_sem);
+       return buffer;
+}
+
+void release_locked_buffer(void *buffer, size_t size)
+{
+       unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+       down_write(&current->mm->mmap_sem);
+
+       current->mm->total_vm  -= pgsz;
+       current->mm->locked_vm -= pgsz;
+
+       up_write(&current->mm->mmap_sem);
+}
+
+void free_locked_buffer(void *buffer, size_t size)
+{
+       release_locked_buffer(buffer, size);
+
+       kfree(buffer);
+}