/*
* NET4: Implementation of BSD Unix domain sockets.
*
- * Authors: Alan Cox, <alan.cox@linux.org>
+ * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
- *
* Fixes:
* Linus Torvalds : Assorted bug cures.
* Niibe Yutaka : async I/O support.
return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
}
+static inline int unix_recvq_full(struct sock const *sk)
+{
+ return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
+}
+
static struct sock *unix_peer_get(struct sock *s)
{
struct sock *peer;
static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
- BUG_TRAP(sk_unhashed(sk));
+ WARN_ON(!sk_unhashed(sk));
sk_add_node(sk, list);
}
skb_queue_purge(&sk->sk_receive_queue);
- BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
- BUG_TRAP(sk_unhashed(sk));
- BUG_TRAP(!sk->sk_socket);
+ WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+ WARN_ON(!sk_unhashed(sk));
+ WARN_ON(sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
printk("Attempt to release alive unix socket: %p\n", sk);
return;
static int unix_accept(struct socket *, struct socket *, int);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
+static unsigned int unix_dgram_poll(struct file *, struct socket *,
+ poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct kiocb *, struct socket *,
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
- .poll = datagram_poll,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = datagram_poll,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
u->dentry = NULL;
u->mnt = NULL;
spin_lock_init(&u->lock);
- atomic_set(&u->inflight, 0);
+ atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
int type, unsigned hash, int *error)
{
struct sock *u;
- struct nameidata nd;
+ struct path path;
int err = 0;
if (sunname->sun_path[0]) {
- err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ struct inode *inode;
+ err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- err = vfs_permission(&nd, MAY_WRITE);
+ inode = path.dentry->d_inode;
+ err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
err = -ECONNREFUSED;
- if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
+ if (!S_ISSOCK(inode->i_mode))
goto put_fail;
- u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
+ u = unix_find_socket_byinode(net, inode);
if (!u)
goto put_fail;
if (u->sk_type == type)
- touch_atime(nd.path.mnt, nd.path.dentry);
+ touch_atime(path.mnt, path.dentry);
- path_put(&nd.path);
+ path_put(&path);
err=-EPROTOTYPE;
if (u->sk_type != type) {
return u;
put_fail:
- path_put(&nd.path);
+ path_put(&path);
fail:
*error=err;
return NULL;
*/
mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current->fs->umask);
+ err = mnt_want_write(nd.path.mnt);
+ if (err)
+ goto out_mknod_dput;
err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+ mnt_drop_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
sched = !sock_flag(other, SOCK_DEAD) &&
!(other->sk_shutdown & RCV_SHUTDOWN) &&
- (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog);
+ unix_recvq_full(other);
unix_state_unlock(other);
if (other->sk_state != TCP_LISTEN)
goto out_unlock;
- if (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog) {
+ if (unix_recvq_full(other)) {
err = -EAGAIN;
if (!timeo)
goto out_unlock;
sock_wfree(skb);
}
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]);
- UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds;
- scm->fp = NULL;
+ return 0;
}
/*
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
+ wait_for_unix_gc();
err = scm_send(sock, msg, siocb->scm);
if (err < 0)
return err;
goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err)
+ goto out_free;
+ }
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
goto out_unlock;
}
- if (unix_peer(other) != sk &&
- (skb_queue_len(&other->sk_receive_queue) >
- other->sk_max_ack_backlog)) {
+ if (unix_peer(other) != sk && unix_recvq_full(other)) {
if (!timeo) {
err = -EAGAIN;
goto out_unlock;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
+ wait_for_unix_gc();
err = scm_send(sock, msg, siocb->scm);
if (err < 0)
return err;
size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ }
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
kfree_skb(skb);
return mask;
}
+static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk, *other;
+ unsigned int mask, writable;
+
+ poll_wait(file, sk->sk_sleep, wait);
+ mask = 0;
+
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLRDHUP;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
+ /* readable? */
+ if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ if (sk->sk_state == TCP_CLOSE)
+ mask |= POLLHUP;
+ /* connection hasn't started yet? */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return mask;
+ }
+
+ /* writable? */
+ writable = unix_writable(sk);
+ if (writable) {
+ other = unix_peer_get(sk);
+ if (other) {
+ if (unix_peer(other) != sk) {
+ poll_wait(file, &unix_sk(other)->peer_wait,
+ wait);
+ if (unix_recvq_full(other))
+ writable = 0;
+ }
+
+ sock_put(other);
+ }
+ }
+
+ if (writable)
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ return mask;
+}
#ifdef CONFIG_PROC_FS
static struct sock *first_unix_socket(int *i)
__acquires(unix_table_lock)
{
spin_lock(&unix_table_lock);
- return *pos ? unix_seq_idx(seq, *pos - 1) : ((void *) 1);
+ return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
struct sock *sk = v;
++*pos;
- if (v == (void *)1)
+ if (v == SEQ_START_TOKEN)
sk = first_unix_socket(&iter->i);
else
sk = next_unix_socket(&iter->i, sk);
static int unix_seq_show(struct seq_file *seq, void *v)
{
- if (v == (void *)1)
+ if (v == SEQ_START_TOKEN)
seq_puts(seq, "Num RefCount Protocol Flags Type St "
"Inode Path\n");
else {
#endif
error = 0;
out:
- return 0;
+ return error;
}
static void unix_net_exit(struct net *net)
unregister_pernet_subsys(&unix_net_ops);
}
-module_init(af_unix_init);
+/* Earlier than device_initcall() so that other drivers invoking
+ request_module() don't end up in a loop when modprobe tries
+ to use a UNIX socket. But later than subsys_initcall() because
+ we depend on stuff initialised there */
+fs_initcall(af_unix_init);
module_exit(af_unix_exit);
MODULE_LICENSE("GPL");