/*
* NET4: Implementation of BSD Unix domain sockets.
*
- * Authors: Alan Cox, <alan.cox@linux.org>
+ * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
- *
* Fixes:
* Linus Torvalds : Assorted bug cures.
* Niibe Yutaka : async I/O support.
#include <net/checksum.h>
#include <linux/security.h>
-int sysctl_unix_max_dgram_qlen __read_mostly = 10;
-
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
static DEFINE_SPINLOCK(unix_table_lock);
static atomic_t unix_nr_socks = ATOMIC_INIT(0);
#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
-static struct sock *first_unix_socket(int *i)
-{
- for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
- if (!hlist_empty(&unix_socket_table[*i]))
- return __sk_head(&unix_socket_table[*i]);
- }
- return NULL;
-}
-
-static struct sock *next_unix_socket(int *i, struct sock *s)
-{
- struct sock *next = sk_next(s);
- /* More in this chain? */
- if (next)
- return next;
- /* Look for next non-empty chain. */
- for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
- if (!hlist_empty(&unix_socket_table[*i]))
- return __sk_head(&unix_socket_table[*i]);
- }
- return NULL;
-}
-
-#define forall_unix_sockets(i, s) \
- for (s = first_unix_socket(&(i)); s; s = next_unix_socket(&(i),(s)))
-
#ifdef CONFIG_SECURITY_NETWORK
static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
/*
* SMP locking strategy:
* hash table is protected with spinlock unix_table_lock
- * each socket state is protected by separate rwlock.
+ * each socket state is protected by separate spin lock.
*/
static inline unsigned unix_hash_fold(__wsum n)
static inline int unix_may_send(struct sock *sk, struct sock *osk)
{
- return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
+ return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
+}
+
+static inline int unix_recvq_full(struct sock const *sk)
+{
+ return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
static struct sock *unix_peer_get(struct sock *s)
* - if started by zero, it is abstract name.
*/
-static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
+static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
{
if (len <= sizeof(short) || len > sizeof(*sunaddr))
return -EINVAL;
* we are guaranteed that it is a valid memory location in our
* kernel address buffer.
*/
- ((char *)sunaddr)[len]=0;
+ ((char *)sunaddr)[len] = 0;
len = strlen(sunaddr->sun_path)+1+sizeof(short);
return len;
}
- *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
+ *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
return len;
}
static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
- BUG_TRAP(sk_unhashed(sk));
+ WARN_ON(!sk_unhashed(sk));
sk_add_node(sk, list);
}
spin_unlock(&unix_table_lock);
}
-static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
+static struct sock *__unix_find_socket_byname(struct net *net,
+ struct sockaddr_un *sunname,
int len, int type, unsigned hash)
{
struct sock *s;
sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
struct unix_sock *u = unix_sk(s);
+ if (!net_eq(sock_net(s), net))
+ continue;
+
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
goto found;
return s;
}
-static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
+static inline struct sock *unix_find_socket_byname(struct net *net,
+ struct sockaddr_un *sunname,
int len, int type,
unsigned hash)
{
struct sock *s;
spin_lock(&unix_table_lock);
- s = __unix_find_socket_byname(sunname, len, type, hash);
+ s = __unix_find_socket_byname(net, sunname, len, type, hash);
if (s)
sock_hold(s);
spin_unlock(&unix_table_lock);
return s;
}
-static struct sock *unix_find_socket_byinode(struct inode *i)
+static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
{
struct sock *s;
struct hlist_node *node;
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->dentry;
- if(dentry && dentry->d_inode == i)
- {
+ if (!net_eq(sock_net(s), net))
+ continue;
+
+ if (dentry && dentry->d_inode == i) {
sock_hold(s);
goto found;
}
{
read_lock(&sk->sk_callback_lock);
if (unix_writable(sk)) {
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
- sk_wake_async(sk, 2, POLL_OUT);
+ if (sk_has_sleeper(sk))
+ wake_up_interruptible_sync(sk->sk_sleep);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
read_unlock(&sk->sk_callback_lock);
}
skb_queue_purge(&sk->sk_receive_queue);
- BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
- BUG_TRAP(sk_unhashed(sk));
- BUG_TRAP(!sk->sk_socket);
+ WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(!sk_unhashed(sk));
+ WARN_ON(sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
- printk("Attempt to release alive unix socket: %p\n", sk);
+ printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
return;
}
unix_release_addr(u->addr);
atomic_dec(&unix_nr_socks);
+ local_bh_disable();
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ local_bh_enable();
#ifdef UNIX_REFCNT_DEBUG
- printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
+ printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
+ atomic_read(&unix_nr_socks));
#endif
}
-static int unix_release_sock (struct sock *sk, int embrion)
+static int unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
struct dentry *dentry;
wake_up_interruptible_all(&u->peer_wait);
- skpair=unix_peer(sk);
+ skpair = unix_peer(sk);
- if (skpair!=NULL) {
+ if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
/* No more writes */
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
read_lock(&skpair->sk_callback_lock);
- sk_wake_async(skpair,1,POLL_HUP);
+ sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
read_unlock(&skpair->sk_callback_lock);
}
sock_put(skpair); /* It may now die */
/* Try to flush out this socket. Throw out buffers at least */
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if (state==TCP_LISTEN)
+ if (state == TCP_LISTEN)
unix_release_sock(skb->sk, 1);
/* passed fds are erased in the kfree_skb hook */
kfree_skb(skb);
* What the above comment does talk about? --ANK(980817)
*/
- if (atomic_read(&unix_tot_inflight))
+ if (unix_tot_inflight)
unix_gc(); /* Garbage collect fds */
return 0;
struct unix_sock *u = unix_sk(sk);
err = -EOPNOTSUPP;
- if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
- goto out; /* Only stream/seqpacket sockets accept */
+ if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
+ goto out; /* Only stream/seqpacket sockets accept */
err = -EINVAL;
if (!u->addr)
- goto out; /* No listens on an unbound socket */
+ goto out; /* No listens on an unbound socket */
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
goto out_unlock;
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
/* set credentials so connect can copy them */
- sk->sk_peercred.pid = current->tgid;
- sk->sk_peercred.uid = current->euid;
- sk->sk_peercred.gid = current->egid;
+ sk->sk_peercred.pid = task_tgid_vnr(current);
+ current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
err = 0;
out_unlock:
static int unix_accept(struct socket *, struct socket *, int);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
+static unsigned int unix_dgram_poll(struct file *, struct socket *,
+ poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct kiocb *, struct socket *,
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
- .poll = datagram_poll,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = datagram_poll,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
};
static struct proto unix_proto = {
- .name = "UNIX",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct unix_sock),
+ .name = "UNIX",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct unix_sock),
};
/*
*/
static struct lock_class_key af_unix_sk_receive_queue_lock_key;
-static struct sock * unix_create1(struct net *net, struct socket *sock)
+static struct sock *unix_create1(struct net *net, struct socket *sock)
{
struct sock *sk = NULL;
struct unix_sock *u;
- if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
+ atomic_inc(&unix_nr_socks);
+ if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
if (!sk)
goto out;
- atomic_inc(&unix_nr_socks);
-
- sock_init_data(sock,sk);
+ sock_init_data(sock, sk);
lockdep_set_class(&sk->sk_receive_queue.lock,
&af_unix_sk_receive_queue_lock_key);
sk->sk_write_space = unix_write_space;
- sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
+ sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
u->dentry = NULL;
u->mnt = NULL;
spin_lock_init(&u->lock);
- atomic_set(&u->inflight, 0);
+ atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
unix_insert_socket(unix_sockets_unbound, sk);
out:
+ if (sk == NULL)
+ atomic_dec(&unix_nr_socks);
+ else {
+ local_bh_disable();
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ local_bh_enable();
+ }
return sk;
}
-static int unix_create(struct net *net, struct socket *sock, int protocol)
+static int unix_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
{
- if (net != &init_net)
- return -EAFNOSUPPORT;
-
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
* nothing uses it.
*/
case SOCK_RAW:
- sock->type=SOCK_DGRAM;
+ sock->type = SOCK_DGRAM;
case SOCK_DGRAM:
sock->ops = &unix_dgram_ops;
break;
sock->sk = NULL;
- return unix_release_sock (sk, 0);
+ return unix_release_sock(sk, 0);
}
static int unix_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
static u32 ordernum = 1;
- struct unix_address * addr;
+ struct unix_address *addr;
int err;
mutex_lock(&u->readlock);
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
- addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
+ addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
spin_lock(&unix_table_lock);
ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
+ if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
addr->hash)) {
spin_unlock(&unix_table_lock);
/* Sanity yield. It is unusual case, but yet... */
return err;
}
-static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
+static struct sock *unix_find_other(struct net *net,
+ struct sockaddr_un *sunname, int len,
int type, unsigned hash, int *error)
{
struct sock *u;
- struct nameidata nd;
+ struct path path;
int err = 0;
if (sunname->sun_path[0]) {
- err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ struct inode *inode;
+ err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- err = vfs_permission(&nd, MAY_WRITE);
+ inode = path.dentry->d_inode;
+ err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
err = -ECONNREFUSED;
- if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
+ if (!S_ISSOCK(inode->i_mode))
goto put_fail;
- u=unix_find_socket_byinode(nd.dentry->d_inode);
+ u = unix_find_socket_byinode(net, inode);
if (!u)
goto put_fail;
if (u->sk_type == type)
- touch_atime(nd.mnt, nd.dentry);
+ touch_atime(path.mnt, path.dentry);
- path_release(&nd);
+ path_put(&path);
- err=-EPROTOTYPE;
+ err = -EPROTOTYPE;
if (u->sk_type != type) {
sock_put(u);
goto fail;
}
} else {
err = -ECONNREFUSED;
- u=unix_find_socket_byname(sunname, len, type, hash);
+ u = unix_find_socket_byname(net, sunname, len, type, hash);
if (u) {
struct dentry *dentry;
dentry = unix_sk(u)->dentry;
return u;
put_fail:
- path_release(&nd);
+ path_put(&path);
fail:
- *error=err;
+ *error = err;
return NULL;
}
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
- struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
- struct dentry * dentry = NULL;
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
+ struct dentry *dentry = NULL;
struct nameidata nd;
int err;
unsigned hash;
if (sunaddr->sun_family != AF_UNIX)
goto out;
- if (addr_len==sizeof(short)) {
+ if (addr_len == sizeof(short)) {
err = unix_autobind(sock);
goto out;
}
* All right, let's create it.
*/
mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
- err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
+ err = mnt_want_write(nd.path.mnt);
+ if (err)
+ goto out_mknod_dput;
+ err = security_path_mknod(&nd.path, dentry, mode, 0);
+ if (err)
+ goto out_mknod_drop_write;
+ err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+out_mknod_drop_write:
+ mnt_drop_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
- mutex_unlock(&nd.dentry->d_inode->i_mutex);
- dput(nd.dentry);
- nd.dentry = dentry;
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ dput(nd.path.dentry);
+ nd.path.dentry = dentry;
addr->hash = UNIX_HASH_SIZE;
}
if (!sunaddr->sun_path[0]) {
err = -EADDRINUSE;
- if (__unix_find_socket_byname(sunaddr, addr_len,
+ if (__unix_find_socket_byname(net, sunaddr, addr_len,
sk->sk_type, hash)) {
unix_release_addr(addr);
goto out_unlock;
list = &unix_socket_table[addr->hash];
} else {
list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
- u->dentry = nd.dentry;
- u->mnt = nd.mnt;
+ u->dentry = nd.path.dentry;
+ u->mnt = nd.path.mnt;
}
err = 0;
out_mknod_dput:
dput(dentry);
out_mknod_unlock:
- mutex_unlock(&nd.dentry->d_inode->i_mutex);
- path_release(&nd);
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
out_mknod_parent:
- if (err==-EEXIST)
- err=-EADDRINUSE;
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
unix_release_addr(addr);
goto out_up;
}
int alen, int flags)
{
struct sock *sk = sock->sk;
- struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
+ struct net *net = sock_net(sk);
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
struct sock *other;
unsigned hash;
int err;
goto out;
restart:
- other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
+ other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
if (!other)
goto out;
*/
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
- unix_peer(sk)=other;
+ unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
if (other != old_peer)
unix_dgram_disconnected(sk, old_peer);
sock_put(old_peer);
} else {
- unix_peer(sk)=other;
+ unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
return 0;
sched = !sock_flag(other, SOCK_DEAD) &&
!(other->sk_shutdown & RCV_SHUTDOWN) &&
- (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog);
+ unix_recvq_full(other);
unix_state_unlock(other);
static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
- struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct sock *newsk = NULL;
struct sock *other = NULL;
goto out;
addr_len = err;
- if (test_bit(SOCK_PASSCRED, &sock->flags)
- && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
+ (err = unix_autobind(sock)) != 0)
goto out;
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sk->sk_net, NULL);
+ newsk = unix_create1(sock_net(sk), NULL);
if (newsk == NULL)
goto out;
restart:
/* Find listening sock. */
- other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
+ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
if (!other)
goto out;
err = -ECONNREFUSED;
if (other->sk_state != TCP_LISTEN)
goto out_unlock;
+ if (other->sk_shutdown & RCV_SHUTDOWN)
+ goto out_unlock;
- if (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog) {
+ if (unix_recvq_full(other)) {
err = -EAGAIN;
if (!timeo)
goto out_unlock;
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
- newsk->sk_peercred.pid = current->tgid;
- newsk->sk_peercred.uid = current->euid;
- newsk->sk_peercred.gid = current->egid;
+ newsk->sk_peercred.pid = task_tgid_vnr(current);
+ current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
newu = unix_sk(newsk);
newsk->sk_sleep = &newu->peer_wait;
otheru = unix_sk(other);
unix_state_unlock(other);
out:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
if (newsk)
unix_release_sock(newsk, 0);
if (other)
static int unix_socketpair(struct socket *socka, struct socket *sockb)
{
- struct sock *ska=socka->sk, *skb = sockb->sk;
+ struct sock *ska = socka->sk, *skb = sockb->sk;
/* Join our sockets back to back */
sock_hold(ska);
sock_hold(skb);
- unix_peer(ska)=skb;
- unix_peer(skb)=ska;
- ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
- ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
- ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
+ unix_peer(ska) = skb;
+ unix_peer(skb) = ska;
+ ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
+ current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
+ ska->sk_peercred.uid = skb->sk_peercred.uid;
+ ska->sk_peercred.gid = skb->sk_peercred.gid;
if (ska->sk_type != SOCK_DGRAM) {
ska->sk_state = TCP_ESTABLISHED;
int err;
err = -EOPNOTSUPP;
- if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
+ if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out;
err = -EINVAL;
{
struct sock *sk = sock->sk;
struct unix_sock *u;
- struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
int err = 0;
if (peer) {
skb->destructor = sock_wfree;
UNIXCB(skb).fp = NULL;
- for (i=scm->fp->count-1; i>=0; i--)
+ for (i = scm->fp->count-1; i >= 0; i--)
unix_notinflight(scm->fp->fp[i]);
}
sock_wfree(skb);
}
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
- for (i=scm->fp->count-1; i>=0; i--)
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count-1; i >= 0; i--)
unix_inflight(scm->fp->fp[i]);
- UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds;
- scm->fp = NULL;
+ return 0;
}
/*
{
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
- struct sockaddr_un *sunaddr=msg->msg_name;
+ struct sockaddr_un *sunaddr = msg->msg_name;
struct sock *other = NULL;
int namelen = 0; /* fake GCC */
int err;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
+ wait_for_unix_gc();
err = scm_send(sock, msg, siocb->scm);
if (err < 0)
return err;
goto out;
}
- if (test_bit(SOCK_PASSCRED, &sock->flags)
- && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
+ && (err = unix_autobind(sock)) != 0)
goto out;
err = -EMSGSIZE;
goto out;
skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
- if (skb==NULL)
+ if (skb == NULL)
goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err)
+ goto out_free;
+ }
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+ err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
if (err)
goto out_free;
if (sunaddr == NULL)
goto out_free;
- other = unix_find_other(sunaddr, namelen, sk->sk_type,
+ other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
hash, &err);
- if (other==NULL)
+ if (other == NULL)
goto out_free;
}
err = 0;
unix_state_lock(sk);
if (unix_peer(sk) == other) {
- unix_peer(sk)=NULL;
+ unix_peer(sk) = NULL;
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
goto out_unlock;
}
- if (unix_peer(other) != sk &&
- (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog)) {
+ if (unix_peer(other) != sk && unix_recvq_full(other)) {
if (!timeo) {
err = -EAGAIN;
goto out_unlock;
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
struct sock *sk = sock->sk;
struct sock *other = NULL;
- struct sockaddr_un *sunaddr=msg->msg_name;
- int err,size;
+ struct sockaddr_un *sunaddr = msg->msg_name;
+ int err, size;
struct sk_buff *skb;
- int sent=0;
+ int sent = 0;
struct scm_cookie tmp_scm;
+ bool fds_sent = false;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
+ wait_for_unix_gc();
err = scm_send(sock, msg, siocb->scm);
if (err < 0)
return err;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto pipe_err;
- while(sent < len)
- {
+ while (sent < len) {
/*
* Optimisation for the fact that under 0.01% of X
* messages typically need breaking up.
* Grab a buffer
*/
- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+ skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
+ &err);
- if (skb==NULL)
+ if (skb == NULL)
goto out_err;
/*
size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ /* Only send the fds in the first buffer */
+ if (siocb->scm->fp && !fds_sent) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ fds_sent = true;
+ }
- if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
+ err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+ if (err) {
kfree_skb(skb);
goto out_err;
}
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
other->sk_data_ready(other, size);
- sent+=size;
+ sent += size;
}
scm_destroy(siocb->scm);
unix_state_unlock(other);
kfree_skb(skb);
pipe_err:
- if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
- send_sig(SIGPIPE,current,0);
+ if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
+ send_sig(SIGPIPE, current, 0);
err = -EPIPE;
out_err:
scm_destroy(siocb->scm);
mutex_lock(&u->readlock);
skb = skb_recv_datagram(sk, flags, noblock, &err);
- if (!skb)
+ if (!skb) {
+ unix_state_lock(sk);
+ /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
+ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ err = 0;
+ unix_state_unlock(sk);
goto out_unlock;
+ }
- wake_up_interruptible(&u->peer_wait);
+ wake_up_interruptible_sync(&u->peer_wait);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
siocb->scm->creds = *UNIXCREDS(skb);
unix_set_secdata(siocb->scm, skb);
- if (!(flags & MSG_PEEK))
- {
+ if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb);
- }
- else
- {
+ } else {
/* It is questionable: on PEEK we could:
- do not return fds - good, but too simple 8)
- return fds, and do not return them on read (old strategy,
scm_recv(sock, msg, siocb->scm, flags);
out_free:
- skb_free_datagram(sk,skb);
+ skb_free_datagram(sk, skb);
out_unlock:
mutex_unlock(&u->readlock);
out:
* Sleep until data has arrive. But check for races..
*/
-static long unix_stream_data_wait(struct sock * sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
struct scm_cookie tmp_scm;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
- struct sockaddr_un *sunaddr=msg->msg_name;
+ struct sockaddr_un *sunaddr = msg->msg_name;
int copied = 0;
int check_creds = 0;
int target;
mutex_lock(&u->readlock);
- do
- {
+ do {
int chunk;
struct sk_buff *skb;
unix_state_lock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb==NULL)
- {
+ if (skb == NULL) {
if (copied >= target)
goto unlock;
* POSIX 1003.1g mandates this order.
*/
- if ((err = sock_error(sk)) != 0)
+ err = sock_error(sk);
+ if (err)
goto unlock;
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto unlock;
if (check_creds) {
/* Never glue messages from different writers */
- if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
+ if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
+ sizeof(siocb->scm->creds)) != 0) {
skb_queue_head(&sk->sk_receive_queue, skb);
break;
}
}
/* Copy address just once */
- if (sunaddr)
- {
+ if (sunaddr) {
unix_copy_addr(msg, skb->sk);
sunaddr = NULL;
}
size -= chunk;
/* Mark read part of skb as used */
- if (!(flags & MSG_PEEK))
- {
+ if (!(flags & MSG_PEEK)) {
skb_pull(skb, chunk);
if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb);
/* put the skb back if we didn't use it up.. */
- if (skb->len)
- {
+ if (skb->len) {
skb_queue_head(&sk->sk_receive_queue, skb);
break;
}
if (siocb->scm->fp)
break;
- }
- else
- {
+ } else {
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
if (mode) {
unix_state_lock(sk);
sk->sk_shutdown |= mode;
- other=unix_peer(sk);
+ other = unix_peer(sk);
if (other)
sock_hold(other);
unix_state_unlock(sk);
other->sk_state_change(other);
read_lock(&other->sk_callback_lock);
if (peer_mode == SHUTDOWN_MASK)
- sk_wake_async(other,1,POLL_HUP);
+ sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
else if (peer_mode & RCV_SHUTDOWN)
- sk_wake_async(other,1,POLL_IN);
+ sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&other->sk_callback_lock);
}
if (other)
static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
- long amount=0;
+ long amount = 0;
int err;
- switch(cmd)
- {
- case SIOCOUTQ:
- amount = atomic_read(&sk->sk_wmem_alloc);
- err = put_user(amount, (int __user *)arg);
- break;
- case SIOCINQ:
+ switch (cmd) {
+ case SIOCOUTQ:
+ amount = sk_wmem_alloc_get(sk);
+ err = put_user(amount, (int __user *)arg);
+ break;
+ case SIOCINQ:
{
struct sk_buff *skb;
} else {
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- amount=skb->len;
+ amount = skb->len;
}
spin_unlock(&sk->sk_receive_queue.lock);
err = put_user(amount, (int __user *)arg);
break;
}
- default:
- err = -ENOIOCTLCMD;
- break;
+ default:
+ err = -ENOIOCTLCMD;
+ break;
}
return err;
}
-static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
+static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
- poll_wait(file, sk->sk_sleep, wait);
+ sock_poll_wait(file, sk->sk_sleep, wait);
mask = 0;
/* exceptional events? */
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
- if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+ sk->sk_state == TCP_CLOSE)
mask |= POLLHUP;
/*
return mask;
}
+static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk, *other;
+ unsigned int mask, writable;
+
+ sock_poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLRDHUP;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ if (sk->sk_state == TCP_CLOSE)
+ mask |= POLLHUP;
+ /* connection hasn't started yet? */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return mask;
+ }
+
+ /* writable? */
+ writable = unix_writable(sk);
+ if (writable) {
+ other = unix_peer_get(sk);
+ if (other) {
+ if (unix_peer(other) != sk) {
+ sock_poll_wait(file, &unix_sk(other)->peer_wait,
+ wait);
+ if (unix_recvq_full(other))
+ writable = 0;
+ }
+
+ sock_put(other);
+ }
+ }
+
+ if (writable)
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ return mask;
+}
#ifdef CONFIG_PROC_FS
-static struct sock *unix_seq_idx(int *iter, loff_t pos)
+static struct sock *first_unix_socket(int *i)
+{
+ for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+ if (!hlist_empty(&unix_socket_table[*i]))
+ return __sk_head(&unix_socket_table[*i]);
+ }
+ return NULL;
+}
+
+static struct sock *next_unix_socket(int *i, struct sock *s)
{
+ struct sock *next = sk_next(s);
+ /* More in this chain? */
+ if (next)
+ return next;
+ /* Look for next non-empty chain. */
+ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
+ if (!hlist_empty(&unix_socket_table[*i]))
+ return __sk_head(&unix_socket_table[*i]);
+ }
+ return NULL;
+}
+
+struct unix_iter_state {
+ struct seq_net_private p;
+ int i;
+};
+
+static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
+{
+ struct unix_iter_state *iter = seq->private;
loff_t off = 0;
struct sock *s;
- for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
+ for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
+ if (sock_net(s) != seq_file_net(seq))
+ continue;
if (off == pos)
return s;
++off;
return NULL;
}
-
static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(unix_table_lock)
{
spin_lock(&unix_table_lock);
- return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
+ return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct unix_iter_state *iter = seq->private;
+ struct sock *sk = v;
++*pos;
- if (v == (void *)1)
- return first_unix_socket(seq->private);
- return next_unix_socket(seq->private, v);
+ if (v == SEQ_START_TOKEN)
+ sk = first_unix_socket(&iter->i);
+ else
+ sk = next_unix_socket(&iter->i, sk);
+ while (sk && (sock_net(sk) != seq_file_net(seq)))
+ sk = next_unix_socket(&iter->i, sk);
+ return sk;
}
static void unix_seq_stop(struct seq_file *seq, void *v)
+ __releases(unix_table_lock)
{
spin_unlock(&unix_table_lock);
}
static int unix_seq_show(struct seq_file *seq, void *v)
{
- if (v == (void *)1)
+ if (v == SEQ_START_TOKEN)
seq_puts(seq, "Num RefCount Protocol Flags Type St "
"Inode Path\n");
else {
.show = unix_seq_show,
};
-
static int unix_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &unix_seq_ops, sizeof(int));
+ return seq_open_net(inode, file, &unix_seq_ops,
+ sizeof(struct unix_iter_state));
}
static const struct file_operations unix_seq_fops = {
.open = unix_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif
-static struct net_proto_family unix_family_ops = {
+static const struct net_proto_family unix_family_ops = {
.family = PF_UNIX,
.create = unix_create,
.owner = THIS_MODULE,
};
+
+static int __net_init unix_net_init(struct net *net)
+{
+ int error = -ENOMEM;
+
+ net->unx.sysctl_max_dgram_qlen = 10;
+ if (unix_sysctl_register(net))
+ goto out;
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
+ unix_sysctl_unregister(net);
+ goto out;
+ }
+#endif
+ error = 0;
+out:
+ return error;
+}
+
+static void __net_exit unix_net_exit(struct net *net)
+{
+ unix_sysctl_unregister(net);
+ proc_net_remove(net, "unix");
+}
+
+static struct pernet_operations unix_net_ops = {
+ .init = unix_net_init,
+ .exit = unix_net_exit,
+};
+
static int __init af_unix_init(void)
{
int rc = -1;
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
- __FUNCTION__);
+ __func__);
goto out;
}
sock_register(&unix_family_ops);
-#ifdef CONFIG_PROC_FS
- proc_net_fops_create(&init_net, "unix", 0, &unix_seq_fops);
-#endif
- unix_sysctl_register();
+ register_pernet_subsys(&unix_net_ops);
out:
return rc;
}
static void __exit af_unix_exit(void)
{
sock_unregister(PF_UNIX);
- unix_sysctl_unregister();
- proc_net_remove(&init_net, "unix");
proto_unregister(&unix_proto);
+ unregister_pernet_subsys(&unix_net_ops);
}
-module_init(af_unix_init);
+/* Earlier than device_initcall() so that other drivers invoking
+ request_module() don't end up in a loop when modprobe tries
+ to use a UNIX socket. But later than subsys_initcall() because
+ we depend on stuff initialised there */
+fs_initcall(af_unix_init);
module_exit(af_unix_exit);
MODULE_LICENSE("GPL");