X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=ipc%2Fshm.c;h=1a314c89f93cfbffa46d14e00641708147b1a918;hp=8098968519026406191014b4cb64ee870e2a721c;hb=4434ade8c9334a3ab975d8993de456f06841899e;hpb=073115d6b29c7910feaa08241c6484637f5ca958 diff --git a/ipc/shm.c b/ipc/shm.c index 8098968..1a314c8 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -15,9 +15,12 @@ * * support for audit of ipc object properties and permission changes * Dustin Kirkland + * + * namespaces support + * OpenVZ, SWsoft Inc. + * Pavel Emelianov */ -#include #include #include #include @@ -32,98 +35,147 @@ #include #include #include -#include +#include +#include +#include +#include #include #include "util.h" -static struct file_operations shm_file_operations; -static struct vm_operations_struct shm_vm_ops; +struct shm_file_data { + int id; + struct ipc_namespace *ns; + struct file *file; + const struct vm_operations_struct *vm_ops; +}; + +#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) -static struct ipc_ids shm_ids; +static const struct file_operations shm_file_operations; +static const struct vm_operations_struct shm_vm_ops; -#define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id)) -#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm) -#define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id)) -#define shm_buildid(id, seq) \ - ipc_buildid(&shm_ids, id, seq) +#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) -static int newseg (key_t key, int shmflg, size_t size); -static void shm_open (struct vm_area_struct *shmd); -static void shm_close (struct vm_area_struct *shmd); +#define shm_unlock(shp) \ + ipc_unlock(&(shp)->shm_perm) + +static int newseg(struct ipc_namespace *, struct ipc_params *); +static void shm_open(struct vm_area_struct *vma); +static void shm_close(struct vm_area_struct *vma); +static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif -size_t shm_ctlmax = SHMMAX; -size_t shm_ctlall = SHMALL; -int shm_ctlmni = SHMMNI; +void shm_init_ns(struct ipc_namespace *ns) +{ + ns->shm_ctlmax = SHMMAX; + ns->shm_ctlall = SHMALL; + ns->shm_ctlmni = SHMMNI; + ns->shm_tot = 0; + ipc_init_ids(&shm_ids(ns)); +} -static int shm_tot; /* total number of shared memory pages */ +/* + * Called with shm_ids.rw_mutex (writer) and the shp structure locked. + * Only shm_ids.rw_mutex remains locked on exit. + */ +static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) +{ + struct shmid_kernel *shp; + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + if (shp->shm_nattch){ + shp->shm_perm.mode |= SHM_DEST; + /* Do not find it any more */ + shp->shm_perm.key = IPC_PRIVATE; + shm_unlock(shp); + } else + shm_destroy(ns, shp); +} + +#ifdef CONFIG_IPC_NS +void shm_exit_ns(struct ipc_namespace *ns) +{ + free_ipcs(ns, &shm_ids(ns), do_shm_rmid); + idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); +} +#endif void __init shm_init (void) { - ipc_init_ids(&shm_ids, 1); + shm_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/shm", " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", - &shm_ids, - sysvipc_shm_proc_show); + IPC_SHM_IDS, sysvipc_shm_proc_show); } -static inline int shm_checkid(struct shmid_kernel *s, int id) +/* + * shm_lock_(check_) routines are called in the paths where the rw_mutex + * is not necessarily held. + */ +static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { - if (ipc_checkid(&shm_ids,&s->shm_perm,id)) - return -EIDRM; - return 0; + struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return (struct shmid_kernel *)ipcp; + + return container_of(ipcp, struct shmid_kernel, shm_perm); } -static inline struct shmid_kernel *shm_rmid(int id) +static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, + int id) { - return (struct shmid_kernel *)ipc_rmid(&shm_ids,id); + struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return (struct shmid_kernel *)ipcp; + + return container_of(ipcp, struct shmid_kernel, shm_perm); } -static inline int shm_addid(struct shmid_kernel *shp) +static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { - return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni); + ipc_rmid(&shm_ids(ns), &s->shm_perm); } - -static inline void shm_inc (int id) { +/* This is called by fork, once for every shm attach. */ +static void shm_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; - shp = shm_lock(id); - BUG_ON(!shp); + shp = shm_lock(sfd->ns, sfd->id); + BUG_ON(IS_ERR(shp)); shp->shm_atim = get_seconds(); - shp->shm_lprid = current->tgid; + shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; shm_unlock(shp); } -/* This is called by fork, once for every shm attach. */ -static void shm_open (struct vm_area_struct *shmd) -{ - shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino); -} - /* * shm_destroy - free the struct shmid_kernel * + * @ns: namespace * @shp: struct to free * - * It has to be called with shp and shm_ids.mutex locked, + * It has to be called with shp and shm_ids.rw_mutex (writer) locked, * but returns with shp unlocked and freed. */ -static void shm_destroy (struct shmid_kernel *shp) +static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_rmid (shp->id); + ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + shm_rmid(ns, shp); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); - else - user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size, + else if (shp->mlock_user) + user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, shp->mlock_user); fput (shp->shm_file); security_shm_free(shp); @@ -131,77 +183,173 @@ static void shm_destroy (struct shmid_kernel *shp) } /* - * remove the attach descriptor shmd. + * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close (struct vm_area_struct *shmd) +static void shm_close(struct vm_area_struct *vma) { - struct file * file = shmd->vm_file; - int id = file->f_dentry->d_inode->i_ino; + struct file * file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; + struct ipc_namespace *ns = sfd->ns; - mutex_lock(&shm_ids.mutex); + down_write(&shm_ids(ns).rw_mutex); /* remove from the list of attaches of the shm segment */ - shp = shm_lock(id); - BUG_ON(!shp); - shp->shm_lprid = current->tgid; + shp = shm_lock(ns, sfd->id); + BUG_ON(IS_ERR(shp)); + shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) - shm_destroy (shp); + shm_destroy(ns, shp); else shm_unlock(shp); - mutex_unlock(&shm_ids.mutex); + up_write(&shm_ids(ns).rw_mutex); +} + +static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + return sfd->vm_ops->fault(vma, vmf); } +#ifdef CONFIG_NUMA +static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err = 0; + if (sfd->vm_ops->set_policy) + err = sfd->vm_ops->set_policy(vma, new); + return err; +} + +static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + struct mempolicy *pol = NULL; + + if (sfd->vm_ops->get_policy) + pol = sfd->vm_ops->get_policy(vma, addr); + else if (vma->vm_policy) + pol = vma->vm_policy; + + return pol; +} +#endif + static int shm_mmap(struct file * file, struct vm_area_struct * vma) { + struct shm_file_data *sfd = shm_file_data(file); int ret; - ret = shmem_mmap(file, vma); - if (ret == 0) { - vma->vm_ops = &shm_vm_ops; - if (!(vma->vm_flags & VM_WRITE)) - vma->vm_flags &= ~VM_MAYWRITE; - shm_inc(file->f_dentry->d_inode->i_ino); - } + ret = sfd->file->f_op->mmap(sfd->file, vma); + if (ret != 0) + return ret; + sfd->vm_ops = vma->vm_ops; +#ifdef CONFIG_MMU + BUG_ON(!sfd->vm_ops->fault); +#endif + vma->vm_ops = &shm_vm_ops; + shm_open(vma); return ret; } -static struct file_operations shm_file_operations = { - .mmap = shm_mmap, +static int shm_release(struct inode *ino, struct file *file) +{ + struct shm_file_data *sfd = shm_file_data(file); + + put_ipc_ns(sfd->ns); + shm_file_data(file) = NULL; + kfree(sfd); + return 0; +} + +static int shm_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + int (*fsync) (struct file *, struct dentry *, int datasync); + struct shm_file_data *sfd = shm_file_data(file); + int ret = -EINVAL; + + fsync = sfd->file->f_op->fsync; + if (fsync) + ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync); + return ret; +} + +static unsigned long shm_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct shm_file_data *sfd = shm_file_data(file); + return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, + pgoff, flags); +} + +static const struct file_operations shm_file_operations = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, #ifndef CONFIG_MMU - .get_unmapped_area = shmem_get_unmapped_area, + .get_unmapped_area = shm_get_unmapped_area, #endif }; -static struct vm_operations_struct shm_vm_ops = { +static const struct file_operations shm_file_operations_huge = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, + .get_unmapped_area = shm_get_unmapped_area, +}; + +int is_file_shm_hugepages(struct file *file) +{ + return file->f_op == &shm_file_operations_huge; +} + +static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ - .nopage = shmem_nopage, -#if defined(CONFIG_NUMA) && defined(CONFIG_SHMEM) - .set_policy = shmem_set_policy, - .get_policy = shmem_get_policy, + .fault = shm_fault, +#if defined(CONFIG_NUMA) + .set_policy = shm_set_policy, + .get_policy = shm_get_policy, #endif }; -static int newseg (key_t key, int shmflg, size_t size) +/** + * newseg - Create a new shared memory segment + * @ns: namespace + * @params: ptr to the structure that contains key, size and shmflg + * + * Called with shm_ids.rw_mutex held as a writer. + */ + +static int newseg(struct ipc_namespace *ns, struct ipc_params *params) { + key_t key = params->key; + int shmflg = params->flg; + size_t size = params->u.size; int error; struct shmid_kernel *shp; int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; struct file * file; char name[13]; int id; + int acctflag = 0; - if (size < SHMMIN || size > shm_ctlmax) + if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; - if (shm_tot + numpages >= shm_ctlall) + if (ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; shp = ipc_rcu_alloc(sizeof(*shp)); @@ -219,50 +367,54 @@ static int newseg (key_t key, int shmflg, size_t size) return error; } + sprintf (name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { - /* hugetlb_zero_setup takes care of mlock user accounting */ - file = hugetlb_zero_setup(size); - shp->mlock_user = current->user; + /* hugetlb_file_setup applies strict accounting */ + if (shmflg & SHM_NORESERVE) + acctflag = VM_NORESERVE; + file = hugetlb_file_setup(name, size, acctflag, + &shp->mlock_user, HUGETLB_SHMFS_INODE); } else { - int acctflag = VM_ACCOUNT; /* * Do not allow no accounting for OVERCOMMIT_NEVER, even * if it's asked for. */ if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) - acctflag = 0; - sprintf (name, "SYSV%08x", key); + acctflag = VM_NORESERVE; file = shmem_file_setup(name, size, acctflag); } error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; - error = -ENOSPC; - id = shm_addid(shp); - if(id == -1) + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; goto no_id; + } - shp->shm_cprid = current->tgid; + shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = get_seconds(); shp->shm_segsz = size; shp->shm_nattch = 0; - shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; - file->f_dentry->d_inode->i_ino = shp->id; - - /* Hugetlb ops would have already been assigned. */ - if (!(shmflg & SHM_HUGETLB)) - file->f_op = &shm_file_operations; + /* + * shmid gets reported as "inode#" in /proc/pid/maps. + * proc-ps tools use this. Changing this will break them. + */ + file->f_dentry->d_inode->i_ino = shp->shm_perm.id; - shm_tot += numpages; + ns->shm_tot += numpages; + error = shp->shm_perm.id; shm_unlock(shp); - return shp->id; + return error; no_id: + if (is_file_hugepages(file) && shp->mlock_user) + user_shm_unlock(size, shp->mlock_user); fput(file); no_file: security_shm_free(shp); @@ -270,39 +422,49 @@ no_file: return error; } -asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) +/* + * Called with shm_ids.rw_mutex and ipcp locked. + */ +static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { struct shmid_kernel *shp; - int err, id = 0; - - mutex_lock(&shm_ids.mutex); - if (key == IPC_PRIVATE) { - err = newseg(key, shmflg, size); - } else if ((id = ipc_findkey(&shm_ids, key)) == -1) { - if (!(shmflg & IPC_CREAT)) - err = -ENOENT; - else - err = newseg(key, shmflg, size); - } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { - err = -EEXIST; - } else { - shp = shm_lock(id); - BUG_ON(shp==NULL); - if (shp->shm_segsz < size) - err = -EINVAL; - else if (ipcperms(&shp->shm_perm, shmflg)) - err = -EACCES; - else { - int shmid = shm_buildid(id, shp->shm_perm.seq); - err = security_shm_associate(shp, shmflg); - if (!err) - err = shmid; - } - shm_unlock(shp); - } - mutex_unlock(&shm_ids.mutex); - return err; + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + return security_shm_associate(shp, shmflg); +} + +/* + * Called with shm_ids.rw_mutex and ipcp locked. + */ +static inline int shm_more_checks(struct kern_ipc_perm *ipcp, + struct ipc_params *params) +{ + struct shmid_kernel *shp; + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + if (shp->shm_segsz < params->u.size) + return -EINVAL; + + return 0; +} + +SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) +{ + struct ipc_namespace *ns; + struct ipc_ops shm_ops; + struct ipc_params shm_params; + + ns = current->nsproxy->ipc_ns; + + shm_ops.getnew = newseg; + shm_ops.associate = shm_security; + shm_ops.more_checks = shm_more_checks; + + shm_params.key = key; + shm_params.flg = shmflg; + shm_params.u.size = size; + + return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); } static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) @@ -330,28 +492,14 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ } } -struct shm_setbuf { - uid_t uid; - gid_t gid; - mode_t mode; -}; - -static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) +static inline unsigned long +copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { switch(version) { case IPC_64: - { - struct shmid64_ds tbuf; - - if (copy_from_user(&tbuf, buf, sizeof(tbuf))) + if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; - - out->uid = tbuf.shm_perm.uid; - out->gid = tbuf.shm_perm.gid; - out->mode = tbuf.shm_perm.mode; - return 0; - } case IPC_OLD: { struct shmid_ds tbuf_old; @@ -359,9 +507,9 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->uid = tbuf_old.shm_perm.uid; - out->gid = tbuf_old.shm_perm.gid; - out->mode = tbuf_old.shm_perm.mode; + out->shm_perm.uid = tbuf_old.shm_perm.uid; + out->shm_perm.gid = tbuf_old.shm_perm.gid; + out->shm_perm.mode = tbuf_old.shm_perm.mode; return 0; } @@ -396,41 +544,102 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf } } -static void shm_get_stat(unsigned long *rss, unsigned long *swp) +/* + * Called with shm_ids.rw_mutex held as a reader + */ +static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, + unsigned long *swp) { - int i; + int next_id; + int total, in_use; *rss = 0; *swp = 0; - for (i = 0; i <= shm_ids.max_id; i++) { + in_use = shm_ids(ns).in_use; + + for (total = 0, next_id = 0; total < in_use; next_id++) { + struct kern_ipc_perm *ipc; struct shmid_kernel *shp; struct inode *inode; - shp = shm_get(i); - if(!shp) + ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); + if (ipc == NULL) continue; + shp = container_of(ipc, struct shmid_kernel, shm_perm); - inode = shp->shm_file->f_dentry->d_inode; + inode = shp->shm_file->f_path.dentry->d_inode; if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; - *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; + struct hstate *h = hstate_file(shp->shm_file); + *rss += pages_per_huge_page(h) * mapping->nrpages; } else { +#ifdef CONFIG_SHMEM struct shmem_inode_info *info = SHMEM_I(inode); spin_lock(&info->lock); *rss += inode->i_mapping->nrpages; *swp += info->swapped; spin_unlock(&info->lock); +#else + *rss += inode->i_mapping->nrpages; +#endif } + + total++; + } +} + +/* + * This function handles some shmctl commands which require the rw_mutex + * to be held in write mode. + * NOTE: no locks must be held, the rw_mutex is taken inside this function. + */ +static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, + struct shmid_ds __user *buf, int version) +{ + struct kern_ipc_perm *ipcp; + struct shmid64_ds shmid64; + struct shmid_kernel *shp; + int err; + + if (cmd == IPC_SET) { + if (copy_shmid_from_user(&shmid64, buf, version)) + return -EFAULT; } + + ipcp = ipcctl_pre_down(&shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); + if (IS_ERR(ipcp)) + return PTR_ERR(ipcp); + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock; + switch (cmd) { + case IPC_RMID: + do_shm_rmid(ns, ipcp); + goto out_up; + case IPC_SET: + ipc_update_perm(&shmid64.shm_perm, ipcp); + shp->shm_ctim = get_seconds(); + break; + default: + err = -EINVAL; + } +out_unlock: + shm_unlock(shp); +out_up: + up_write(&shm_ids(ns).rw_mutex); + return err; } -asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { - struct shm_setbuf setbuf; struct shmid_kernel *shp; int err, version; + struct ipc_namespace *ns; if (cmd < 0 || shmid < 0) { err = -EINVAL; @@ -438,6 +647,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) } version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; switch (cmd) { /* replace with proc interface ? */ case IPC_INFO: @@ -448,16 +658,19 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) if (err) return err; - memset(&shminfo,0,sizeof(shminfo)); - shminfo.shmmni = shminfo.shmseg = shm_ctlmni; - shminfo.shmmax = shm_ctlmax; - shminfo.shmall = shm_ctlall; + memset(&shminfo, 0, sizeof(shminfo)); + shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; + shminfo.shmmax = ns->shm_ctlmax; + shminfo.shmall = ns->shm_ctlall; shminfo.shmmin = SHMMIN; if(copy_shminfo_to_user (buf, &shminfo, version)) return -EFAULT; - /* reading a integer is always atomic */ - err= shm_ids.max_id; + + down_read(&shm_ids(ns).rw_mutex); + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rw_mutex); + if(err<0) err = 0; goto out; @@ -470,16 +683,16 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) if (err) return err; - memset(&shm_info,0,sizeof(shm_info)); - mutex_lock(&shm_ids.mutex); - shm_info.used_ids = shm_ids.in_use; - shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp); - shm_info.shm_tot = shm_tot; + memset(&shm_info, 0, sizeof(shm_info)); + down_read(&shm_ids(ns).rw_mutex); + shm_info.used_ids = shm_ids(ns).in_use; + shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); + shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; - err = shm_ids.max_id; - mutex_unlock(&shm_ids.mutex); - if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rw_mutex); + if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; } @@ -492,28 +705,29 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) { struct shmid64_ds tbuf; int result; - memset(&tbuf, 0, sizeof(tbuf)); - shp = shm_lock(shmid); - if(shp==NULL) { - err = -EINVAL; - goto out; - } else if(cmd==SHM_STAT) { - err = -EINVAL; - if (shmid > shm_ids.max_id) - goto out_unlock; - result = shm_buildid(shmid, shp->shm_perm.seq); + + if (cmd == SHM_STAT) { + shp = shm_lock(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out; + } + result = shp->shm_perm.id; } else { - err = shm_checkid(shp,shmid); - if(err) - goto out_unlock; + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out; + } result = 0; } - err=-EACCES; + err = -EACCES; if (ipcperms (&shp->shm_perm, S_IRUGO)) goto out_unlock; err = security_shm_shmctl(shp, cmd); if (err) goto out_unlock; + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; tbuf.shm_atime = shp->shm_atim; @@ -521,10 +735,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) tbuf.shm_ctime = shp->shm_ctim; tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; - if (!is_file_hugepages(shp->shm_file)) - tbuf.shm_nattch = shp->shm_nattch; - else - tbuf.shm_nattch = file_count(shp->shm_file) - 1; + tbuf.shm_nattch = shp->shm_nattch; shm_unlock(shp); if(copy_shmid_to_user (buf, &tbuf, version)) err = -EFAULT; @@ -535,26 +746,25 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) case SHM_LOCK: case SHM_UNLOCK: { - shp = shm_lock(shmid); - if(shp==NULL) { - err = -EINVAL; + struct file *uninitialized_var(shm_file); + + lru_add_drain_all(); /* drain pagevecs to lru lists */ + + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out; } - err = shm_checkid(shp,shmid); - if(err) - goto out_unlock; - err = audit_ipc_obj(&(shp->shm_perm)); - if (err) - goto out_unlock; + audit_ipc_obj(&(shp->shm_perm)); if (!capable(CAP_IPC_LOCK)) { + uid_t euid = current_euid(); err = -EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid) + if (euid != shp->shm_perm.uid && + euid != shp->shm_perm.cuid) goto out_unlock; - if (cmd == SHM_LOCK && - !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) goto out_unlock; } @@ -563,10 +773,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; + struct user_struct *user = current_user(); if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); - if (!err) { + if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ shp->shm_perm.mode |= SHM_LOCKED; shp->mlock_user = user; } @@ -580,102 +790,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) goto out; } case IPC_RMID: - { - /* - * We cannot simply remove the file. The SVID states - * that the block remains until the last person - * detaches from it, then is deleted. A shmat() on - * an RMID segment is legal in older Linux and if - * we change it apps break... - * - * Instead we set a destroyed flag, and then blow - * the name away when the usage hits zero. - */ - mutex_lock(&shm_ids.mutex); - shp = shm_lock(shmid); - err = -EINVAL; - if (shp == NULL) - goto out_up; - err = shm_checkid(shp, shmid); - if(err) - goto out_unlock_up; - - err = audit_ipc_obj(&(shp->shm_perm)); - if (err) - goto out_unlock_up; - - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - err=-EPERM; - goto out_unlock_up; - } - - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - if (shp->shm_nattch){ - shp->shm_perm.mode |= SHM_DEST; - /* Do not find it any more */ - shp->shm_perm.key = IPC_PRIVATE; - shm_unlock(shp); - } else - shm_destroy (shp); - mutex_unlock(&shm_ids.mutex); - goto out; - } - case IPC_SET: - { - if (copy_shmid_from_user (&setbuf, buf, version)) { - err = -EFAULT; - goto out; - } - mutex_lock(&shm_ids.mutex); - shp = shm_lock(shmid); - err=-EINVAL; - if(shp==NULL) - goto out_up; - err = shm_checkid(shp,shmid); - if(err) - goto out_unlock_up; - err = audit_ipc_obj(&(shp->shm_perm)); - if (err) - goto out_unlock_up; - err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, &(shp->shm_perm)); - if (err) - goto out_unlock_up; - err=-EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - goto out_unlock_up; - } - - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - shp->shm_perm.uid = setbuf.uid; - shp->shm_perm.gid = setbuf.gid; - shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) - | (setbuf.mode & S_IRWXUGO); - shp->shm_ctim = get_seconds(); - break; - } - + err = shmctl_down(ns, shmid, cmd, buf, version); + return err; default: - err = -EINVAL; - goto out; + return -EINVAL; } - err = 0; -out_unlock_up: - shm_unlock(shp); -out_up: - mutex_unlock(&shm_ids.mutex); - goto out; out_unlock: shm_unlock(shp); out: @@ -698,14 +819,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) int err; unsigned long flags; unsigned long prot; - unsigned long o_flags; int acc_mode; - void *user_addr; - - if (shmid < 0) { - err = -EINVAL; + unsigned long user_addr; + struct ipc_namespace *ns; + struct shm_file_data *sfd; + struct path path; + fmode_t f_mode; + + err = -EINVAL; + if (shmid < 0) goto out; - } else if ((addr = (ulong)shmaddr)) { + else if ((addr = (ulong)shmaddr)) { if (addr & (SHMLBA-1)) { if (shmflg & SHM_RND) addr &= ~(SHMLBA-1); /* round down */ @@ -713,24 +837,24 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif - return -EINVAL; + goto out; } flags = MAP_SHARED | MAP_FIXED; } else { if ((shmflg & SHM_REMAP)) - return -EINVAL; + goto out; flags = MAP_SHARED; } if (shmflg & SHM_RDONLY) { prot = PROT_READ; - o_flags = O_RDONLY; acc_mode = S_IRUGO; + f_mode = FMODE_READ; } else { prot = PROT_READ | PROT_WRITE; - o_flags = O_RDWR; acc_mode = S_IRUGO | S_IWUGO; + f_mode = FMODE_READ | FMODE_WRITE; } if (shmflg & SHM_EXEC) { prot |= PROT_EXEC; @@ -741,36 +865,49 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) * We cannot rely on the fs check since SYSV IPC does have an * additional creator id... */ - shp = shm_lock(shmid); - if(shp == NULL) { - err = -EINVAL; - goto out; - } - err = shm_checkid(shp,shmid); - if (err) { - shm_unlock(shp); - goto out; - } - if (ipcperms(&shp->shm_perm, acc_mode)) { - shm_unlock(shp); - err = -EACCES; + ns = current->nsproxy->ipc_ns; + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out; } + err = -EACCES; + if (ipcperms(&shp->shm_perm, acc_mode)) + goto out_unlock; + err = security_shm_shmat(shp, shmaddr, shmflg); - if (err) { - shm_unlock(shp); - return err; - } - - file = shp->shm_file; - size = i_size_read(file->f_dentry->d_inode); + if (err) + goto out_unlock; + + path = shp->shm_file->f_path; + path_get(&path); shp->shm_nattch++; + size = i_size_read(path.dentry->d_inode); shm_unlock(shp); + err = -ENOMEM; + sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); + if (!sfd) + goto out_put_dentry; + + file = alloc_file(&path, f_mode, + is_file_hugepages(shp->shm_file) ? + &shm_file_operations_huge : + &shm_file_operations); + if (!file) + goto out_free; + + file->private_data = sfd; + file->f_mapping = shp->shm_file->f_mapping; + sfd->id = shp->shm_perm.id; + sfd->ns = get_ipc_ns(ns); + sfd->file = shp->shm_file; + sfd->vm_ops = NULL; + down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { - user_addr = ERR_PTR(-EINVAL); + err = -EINVAL; if (find_vma_intersection(current->mm, addr, addr + size)) goto invalid; /* @@ -782,31 +919,43 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0); - + user_addr = do_mmap (file, addr, size, prot, flags, 0); + *raddr = user_addr; + err = 0; + if (IS_ERR_VALUE(user_addr)) + err = (long)user_addr; invalid: up_write(¤t->mm->mmap_sem); - mutex_lock(&shm_ids.mutex); - shp = shm_lock(shmid); - BUG_ON(!shp); + fput(file); + +out_nattch: + down_write(&shm_ids(ns).rw_mutex); + shp = shm_lock(ns, shmid); + BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) - shm_destroy (shp); + shm_destroy(ns, shp); else shm_unlock(shp); - mutex_unlock(&shm_ids.mutex); + up_write(&shm_ids(ns).rw_mutex); - *raddr = (unsigned long) user_addr; - err = 0; - if (IS_ERR(user_addr)) - err = PTR_ERR(user_addr); out: return err; + +out_unlock: + shm_unlock(shp); + goto out; + +out_free: + kfree(sfd); +out_put_dentry: + path_put(&path); + goto out_nattch; } -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) +SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) { unsigned long ret; long err; @@ -822,13 +971,16 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) * detach and kill segment if marked destroyed. * The work is done in shm_close. */ -asmlinkage long sys_shmdt(char __user *shmaddr) +SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *next; + struct vm_area_struct *vma; unsigned long addr = (unsigned long)shmaddr; - loff_t size = 0; int retval = -EINVAL; +#ifdef CONFIG_MMU + loff_t size = 0; + struct vm_area_struct *next; +#endif if (addr & ~PAGE_MASK) return retval; @@ -857,6 +1009,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr) */ vma = find_vma(mm, addr); +#ifdef CONFIG_MMU while (vma) { next = vma->vm_next; @@ -865,11 +1018,11 @@ asmlinkage long sys_shmdt(char __user *shmaddr) * a fragment created by mprotect() and/or munmap(), or it * otherwise it starts at this address with no hassles. */ - if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { - size = vma->vm_file->f_dentry->d_inode->i_size; + size = vma->vm_file->f_path.dentry->d_inode->i_size; do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); /* * We discovered the size of the shm segment, so @@ -894,13 +1047,24 @@ asmlinkage long sys_shmdt(char __user *shmaddr) next = vma->vm_next; /* finding a matching vma now does not alter retval */ - if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); vma = next; } +#else /* CONFIG_MMU */ + /* under NOMMU conditions, the exact address to be destroyed must be + * given */ + retval = -EINVAL; + if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + retval = 0; + } + +#endif + up_write(&mm->mmap_sem); return retval; } @@ -909,23 +1073,23 @@ asmlinkage long sys_shmdt(char __user *shmaddr) static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { struct shmid_kernel *shp = it; - char *format; -#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" -#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" +#if BITS_PER_LONG <= 32 +#define SIZE_SPEC "%10lu" +#else +#define SIZE_SPEC "%21lu" +#endif - if (sizeof(size_t) <= sizeof(int)) - format = SMALL_STRING; - else - format = BIG_STRING; - return seq_printf(s, format, + return seq_printf(s, + "%10d %10d %4o " SIZE_SPEC " %5u %5u " + "%5lu %5u %5u %5u %5u %10lu %10lu %10lu\n", shp->shm_perm.key, - shp->id, + shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz, shp->shm_cprid, shp->shm_lprid, - is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch, + shp->shm_nattch, shp->shm_perm.uid, shp->shm_perm.gid, shp->shm_perm.cuid,