AF_UNIX: Fix deadlock on connecting to shutdown socket
[safe/jmp/linux-2.6] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *              Linus Torvalds  :       Assorted bug cures.
13  *              Niibe Yutaka    :       async I/O support.
14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
15  *              Alan Cox        :       Limit size of allocated blocks.
16  *              Alan Cox        :       Fixed the stupid socketpair bug.
17  *              Alan Cox        :       BSD compatibility fine tuning.
18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
19  *              Alan Cox        :       Sorted out a proper draft version of
20  *                                      file descriptor passing hacked up from
21  *                                      Mike Shaver's work.
22  *              Marty Leisner   :       Fixes to fd passing
23  *              Nick Nevin      :       recvmsg bugfix.
24  *              Alan Cox        :       Started proper garbage collector
25  *              Heiko EiBfeldt  :       Missing verify_area check
26  *              Alan Cox        :       Started POSIXisms
27  *              Andreas Schwab  :       Replace inode by dentry for proper
28  *                                      reference counting
29  *              Kirk Petersen   :       Made this a module
30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
31  *                                      Lots of bug fixes.
32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
33  *                                      by above two patches.
34  *           Andrea Arcangeli   :       If possible we block in connect(2)
35  *                                      if the max backlog of the listen socket
36  *                                      is been reached. This won't break
37  *                                      old apps and it will avoid huge amount
38  *                                      of socks hashed (this for unix_gc()
39  *                                      performances reasons).
40  *                                      Security fix that limits the max
41  *                                      number of socks to 2*max_files and
42  *                                      the number of skb queueable in the
43  *                                      dgram receiver.
44  *              Artur Skawina   :       Hash function optimizations
45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
46  *            Malcolm Beattie   :       Set peercred for socketpair
47  *           Michal Ostrowski   :       Module initialization cleanup.
48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
49  *                                      the core infrastructure is doing that
50  *                                      for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *      [TO FIX]
56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
57  *              other the moment one end closes.
58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *      [NOT TO FIX]
61  *      accept() returns a path name even if the connecting socket has closed
62  *              in the meantime (BSD loses the path and gives up).
63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *      BSD af_unix apparently has connect forgetting to block properly.
67  *              (need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *      Bug fixes and improvements.
71  *              - client shutdown killed server socket.
72  *              - removed all useless cli/sti pairs.
73  *
74  *      Semantic changes/extensions.
75  *              - generic control message passing.
76  *              - SCM_CREDENTIALS control message.
77  *              - "Abstract" (not FS based) socket bindings.
78  *                Abstract names are sequences of bytes (not zero terminated)
79  *                started by 0, so that this name space does not intersect
80  *                with BSD names.
81  */
82
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117
118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119 static DEFINE_SPINLOCK(unix_table_lock);
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121
122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
123
124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
125
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
128 {
129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
130 }
131
132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
133 {
134         scm->secid = *UNIXSID(skb);
135 }
136 #else
137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
138 { }
139
140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142 #endif /* CONFIG_SECURITY_NETWORK */
143
144 /*
145  *  SMP locking strategy:
146  *    hash table is protected with spinlock unix_table_lock
147  *    each socket state is protected by separate rwlock.
148  */
149
150 static inline unsigned unix_hash_fold(__wsum n)
151 {
152         unsigned hash = (__force unsigned)n;
153         hash ^= hash>>16;
154         hash ^= hash>>8;
155         return hash&(UNIX_HASH_SIZE-1);
156 }
157
158 #define unix_peer(sk) (unix_sk(sk)->peer)
159
160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
161 {
162         return unix_peer(osk) == sk;
163 }
164
165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
166 {
167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
168 }
169
170 static inline int unix_recvq_full(struct sock const *sk)
171 {
172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
173 }
174
175 static struct sock *unix_peer_get(struct sock *s)
176 {
177         struct sock *peer;
178
179         unix_state_lock(s);
180         peer = unix_peer(s);
181         if (peer)
182                 sock_hold(peer);
183         unix_state_unlock(s);
184         return peer;
185 }
186
187 static inline void unix_release_addr(struct unix_address *addr)
188 {
189         if (atomic_dec_and_test(&addr->refcnt))
190                 kfree(addr);
191 }
192
193 /*
194  *      Check unix socket name:
195  *              - should be not zero length.
196  *              - if started by not zero, should be NULL terminated (FS object)
197  *              - if started by zero, it is abstract name.
198  */
199
200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
201 {
202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
203                 return -EINVAL;
204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
205                 return -EINVAL;
206         if (sunaddr->sun_path[0]) {
207                 /*
208                  * This may look like an off by one error but it is a bit more
209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
210                  * sun_path[108] doesnt as such exist.  However in kernel space
211                  * we are guaranteed that it is a valid memory location in our
212                  * kernel address buffer.
213                  */
214                 ((char *)sunaddr)[len] = 0;
215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
216                 return len;
217         }
218
219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
220         return len;
221 }
222
223 static void __unix_remove_socket(struct sock *sk)
224 {
225         sk_del_node_init(sk);
226 }
227
228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
229 {
230         WARN_ON(!sk_unhashed(sk));
231         sk_add_node(sk, list);
232 }
233
234 static inline void unix_remove_socket(struct sock *sk)
235 {
236         spin_lock(&unix_table_lock);
237         __unix_remove_socket(sk);
238         spin_unlock(&unix_table_lock);
239 }
240
241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243         spin_lock(&unix_table_lock);
244         __unix_insert_socket(list, sk);
245         spin_unlock(&unix_table_lock);
246 }
247
248 static struct sock *__unix_find_socket_byname(struct net *net,
249                                               struct sockaddr_un *sunname,
250                                               int len, int type, unsigned hash)
251 {
252         struct sock *s;
253         struct hlist_node *node;
254
255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
256                 struct unix_sock *u = unix_sk(s);
257
258                 if (!net_eq(sock_net(s), net))
259                         continue;
260
261                 if (u->addr->len == len &&
262                     !memcmp(u->addr->name, sunname, len))
263                         goto found;
264         }
265         s = NULL;
266 found:
267         return s;
268 }
269
270 static inline struct sock *unix_find_socket_byname(struct net *net,
271                                                    struct sockaddr_un *sunname,
272                                                    int len, int type,
273                                                    unsigned hash)
274 {
275         struct sock *s;
276
277         spin_lock(&unix_table_lock);
278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
279         if (s)
280                 sock_hold(s);
281         spin_unlock(&unix_table_lock);
282         return s;
283 }
284
285 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
286 {
287         struct sock *s;
288         struct hlist_node *node;
289
290         spin_lock(&unix_table_lock);
291         sk_for_each(s, node,
292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293                 struct dentry *dentry = unix_sk(s)->dentry;
294
295                 if (!net_eq(sock_net(s), net))
296                         continue;
297
298                 if (dentry && dentry->d_inode == i) {
299                         sock_hold(s);
300                         goto found;
301                 }
302         }
303         s = NULL;
304 found:
305         spin_unlock(&unix_table_lock);
306         return s;
307 }
308
309 static inline int unix_writable(struct sock *sk)
310 {
311         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
312 }
313
314 static void unix_write_space(struct sock *sk)
315 {
316         read_lock(&sk->sk_callback_lock);
317         if (unix_writable(sk)) {
318                 if (sk_has_sleeper(sk))
319                         wake_up_interruptible_sync(sk->sk_sleep);
320                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321         }
322         read_unlock(&sk->sk_callback_lock);
323 }
324
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326  * queue of packets arrived from previous peer. First, it allows to do
327  * flow control based only on wmem_alloc; second, sk connected to peer
328  * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
330 {
331         if (!skb_queue_empty(&sk->sk_receive_queue)) {
332                 skb_queue_purge(&sk->sk_receive_queue);
333                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
334
335                 /* If one link of bidirectional dgram pipe is disconnected,
336                  * we signal error. Messages are lost. Do not make this,
337                  * when peer was not connected to us.
338                  */
339                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
340                         other->sk_err = ECONNRESET;
341                         other->sk_error_report(other);
342                 }
343         }
344 }
345
346 static void unix_sock_destructor(struct sock *sk)
347 {
348         struct unix_sock *u = unix_sk(sk);
349
350         skb_queue_purge(&sk->sk_receive_queue);
351
352         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
353         WARN_ON(!sk_unhashed(sk));
354         WARN_ON(sk->sk_socket);
355         if (!sock_flag(sk, SOCK_DEAD)) {
356                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
357                 return;
358         }
359
360         if (u->addr)
361                 unix_release_addr(u->addr);
362
363         atomic_dec(&unix_nr_socks);
364         local_bh_disable();
365         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366         local_bh_enable();
367 #ifdef UNIX_REFCNT_DEBUG
368         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
369                 atomic_read(&unix_nr_socks));
370 #endif
371 }
372
373 static int unix_release_sock(struct sock *sk, int embrion)
374 {
375         struct unix_sock *u = unix_sk(sk);
376         struct dentry *dentry;
377         struct vfsmount *mnt;
378         struct sock *skpair;
379         struct sk_buff *skb;
380         int state;
381
382         unix_remove_socket(sk);
383
384         /* Clear state */
385         unix_state_lock(sk);
386         sock_orphan(sk);
387         sk->sk_shutdown = SHUTDOWN_MASK;
388         dentry       = u->dentry;
389         u->dentry    = NULL;
390         mnt          = u->mnt;
391         u->mnt       = NULL;
392         state = sk->sk_state;
393         sk->sk_state = TCP_CLOSE;
394         unix_state_unlock(sk);
395
396         wake_up_interruptible_all(&u->peer_wait);
397
398         skpair = unix_peer(sk);
399
400         if (skpair != NULL) {
401                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
402                         unix_state_lock(skpair);
403                         /* No more writes */
404                         skpair->sk_shutdown = SHUTDOWN_MASK;
405                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
406                                 skpair->sk_err = ECONNRESET;
407                         unix_state_unlock(skpair);
408                         skpair->sk_state_change(skpair);
409                         read_lock(&skpair->sk_callback_lock);
410                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411                         read_unlock(&skpair->sk_callback_lock);
412                 }
413                 sock_put(skpair); /* It may now die */
414                 unix_peer(sk) = NULL;
415         }
416
417         /* Try to flush out this socket. Throw out buffers at least */
418
419         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
420                 if (state == TCP_LISTEN)
421                         unix_release_sock(skb->sk, 1);
422                 /* passed fds are erased in the kfree_skb hook        */
423                 kfree_skb(skb);
424         }
425
426         if (dentry) {
427                 dput(dentry);
428                 mntput(mnt);
429         }
430
431         sock_put(sk);
432
433         /* ---- Socket is dead now and most probably destroyed ---- */
434
435         /*
436          * Fixme: BSD difference: In BSD all sockets connected to use get
437          *        ECONNRESET and we die on the spot. In Linux we behave
438          *        like files and pipes do and wait for the last
439          *        dereference.
440          *
441          * Can't we simply set sock->err?
442          *
443          *        What the above comment does talk about? --ANK(980817)
444          */
445
446         if (unix_tot_inflight)
447                 unix_gc();              /* Garbage collect fds */
448
449         return 0;
450 }
451
452 static int unix_listen(struct socket *sock, int backlog)
453 {
454         int err;
455         struct sock *sk = sock->sk;
456         struct unix_sock *u = unix_sk(sk);
457
458         err = -EOPNOTSUPP;
459         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
460                 goto out;       /* Only stream/seqpacket sockets accept */
461         err = -EINVAL;
462         if (!u->addr)
463                 goto out;       /* No listens on an unbound socket */
464         unix_state_lock(sk);
465         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
466                 goto out_unlock;
467         if (backlog > sk->sk_max_ack_backlog)
468                 wake_up_interruptible_all(&u->peer_wait);
469         sk->sk_max_ack_backlog  = backlog;
470         sk->sk_state            = TCP_LISTEN;
471         /* set credentials so connect can copy them */
472         sk->sk_peercred.pid     = task_tgid_vnr(current);
473         current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474         err = 0;
475
476 out_unlock:
477         unix_state_unlock(sk);
478 out:
479         return err;
480 }
481
482 static int unix_release(struct socket *);
483 static int unix_bind(struct socket *, struct sockaddr *, int);
484 static int unix_stream_connect(struct socket *, struct sockaddr *,
485                                int addr_len, int flags);
486 static int unix_socketpair(struct socket *, struct socket *);
487 static int unix_accept(struct socket *, struct socket *, int);
488 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
489 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490 static unsigned int unix_dgram_poll(struct file *, struct socket *,
491                                     poll_table *);
492 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
493 static int unix_shutdown(struct socket *, int);
494 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
495                                struct msghdr *, size_t);
496 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
497                                struct msghdr *, size_t, int);
498 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
499                               struct msghdr *, size_t);
500 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
501                               struct msghdr *, size_t, int);
502 static int unix_dgram_connect(struct socket *, struct sockaddr *,
503                               int, int);
504 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
505                                   struct msghdr *, size_t);
506
507 static const struct proto_ops unix_stream_ops = {
508         .family =       PF_UNIX,
509         .owner =        THIS_MODULE,
510         .release =      unix_release,
511         .bind =         unix_bind,
512         .connect =      unix_stream_connect,
513         .socketpair =   unix_socketpair,
514         .accept =       unix_accept,
515         .getname =      unix_getname,
516         .poll =         unix_poll,
517         .ioctl =        unix_ioctl,
518         .listen =       unix_listen,
519         .shutdown =     unix_shutdown,
520         .setsockopt =   sock_no_setsockopt,
521         .getsockopt =   sock_no_getsockopt,
522         .sendmsg =      unix_stream_sendmsg,
523         .recvmsg =      unix_stream_recvmsg,
524         .mmap =         sock_no_mmap,
525         .sendpage =     sock_no_sendpage,
526 };
527
528 static const struct proto_ops unix_dgram_ops = {
529         .family =       PF_UNIX,
530         .owner =        THIS_MODULE,
531         .release =      unix_release,
532         .bind =         unix_bind,
533         .connect =      unix_dgram_connect,
534         .socketpair =   unix_socketpair,
535         .accept =       sock_no_accept,
536         .getname =      unix_getname,
537         .poll =         unix_dgram_poll,
538         .ioctl =        unix_ioctl,
539         .listen =       sock_no_listen,
540         .shutdown =     unix_shutdown,
541         .setsockopt =   sock_no_setsockopt,
542         .getsockopt =   sock_no_getsockopt,
543         .sendmsg =      unix_dgram_sendmsg,
544         .recvmsg =      unix_dgram_recvmsg,
545         .mmap =         sock_no_mmap,
546         .sendpage =     sock_no_sendpage,
547 };
548
549 static const struct proto_ops unix_seqpacket_ops = {
550         .family =       PF_UNIX,
551         .owner =        THIS_MODULE,
552         .release =      unix_release,
553         .bind =         unix_bind,
554         .connect =      unix_stream_connect,
555         .socketpair =   unix_socketpair,
556         .accept =       unix_accept,
557         .getname =      unix_getname,
558         .poll =         unix_dgram_poll,
559         .ioctl =        unix_ioctl,
560         .listen =       unix_listen,
561         .shutdown =     unix_shutdown,
562         .setsockopt =   sock_no_setsockopt,
563         .getsockopt =   sock_no_getsockopt,
564         .sendmsg =      unix_seqpacket_sendmsg,
565         .recvmsg =      unix_dgram_recvmsg,
566         .mmap =         sock_no_mmap,
567         .sendpage =     sock_no_sendpage,
568 };
569
570 static struct proto unix_proto = {
571         .name                   = "UNIX",
572         .owner                  = THIS_MODULE,
573         .obj_size               = sizeof(struct unix_sock),
574 };
575
576 /*
577  * AF_UNIX sockets do not interact with hardware, hence they
578  * dont trigger interrupts - so it's safe for them to have
579  * bh-unsafe locking for their sk_receive_queue.lock. Split off
580  * this special lock-class by reinitializing the spinlock key:
581  */
582 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
583
584 static struct sock *unix_create1(struct net *net, struct socket *sock)
585 {
586         struct sock *sk = NULL;
587         struct unix_sock *u;
588
589         atomic_inc(&unix_nr_socks);
590         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
591                 goto out;
592
593         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
594         if (!sk)
595                 goto out;
596
597         sock_init_data(sock, sk);
598         lockdep_set_class(&sk->sk_receive_queue.lock,
599                                 &af_unix_sk_receive_queue_lock_key);
600
601         sk->sk_write_space      = unix_write_space;
602         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
603         sk->sk_destruct         = unix_sock_destructor;
604         u         = unix_sk(sk);
605         u->dentry = NULL;
606         u->mnt    = NULL;
607         spin_lock_init(&u->lock);
608         atomic_long_set(&u->inflight, 0);
609         INIT_LIST_HEAD(&u->link);
610         mutex_init(&u->readlock); /* single task reading lock */
611         init_waitqueue_head(&u->peer_wait);
612         unix_insert_socket(unix_sockets_unbound, sk);
613 out:
614         if (sk == NULL)
615                 atomic_dec(&unix_nr_socks);
616         else {
617                 local_bh_disable();
618                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
619                 local_bh_enable();
620         }
621         return sk;
622 }
623
624 static int unix_create(struct net *net, struct socket *sock, int protocol)
625 {
626         if (protocol && protocol != PF_UNIX)
627                 return -EPROTONOSUPPORT;
628
629         sock->state = SS_UNCONNECTED;
630
631         switch (sock->type) {
632         case SOCK_STREAM:
633                 sock->ops = &unix_stream_ops;
634                 break;
635                 /*
636                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
637                  *      nothing uses it.
638                  */
639         case SOCK_RAW:
640                 sock->type = SOCK_DGRAM;
641         case SOCK_DGRAM:
642                 sock->ops = &unix_dgram_ops;
643                 break;
644         case SOCK_SEQPACKET:
645                 sock->ops = &unix_seqpacket_ops;
646                 break;
647         default:
648                 return -ESOCKTNOSUPPORT;
649         }
650
651         return unix_create1(net, sock) ? 0 : -ENOMEM;
652 }
653
654 static int unix_release(struct socket *sock)
655 {
656         struct sock *sk = sock->sk;
657
658         if (!sk)
659                 return 0;
660
661         sock->sk = NULL;
662
663         return unix_release_sock(sk, 0);
664 }
665
666 static int unix_autobind(struct socket *sock)
667 {
668         struct sock *sk = sock->sk;
669         struct net *net = sock_net(sk);
670         struct unix_sock *u = unix_sk(sk);
671         static u32 ordernum = 1;
672         struct unix_address *addr;
673         int err;
674
675         mutex_lock(&u->readlock);
676
677         err = 0;
678         if (u->addr)
679                 goto out;
680
681         err = -ENOMEM;
682         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
683         if (!addr)
684                 goto out;
685
686         addr->name->sun_family = AF_UNIX;
687         atomic_set(&addr->refcnt, 1);
688
689 retry:
690         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
691         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
692
693         spin_lock(&unix_table_lock);
694         ordernum = (ordernum+1)&0xFFFFF;
695
696         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
697                                       addr->hash)) {
698                 spin_unlock(&unix_table_lock);
699                 /* Sanity yield. It is unusual case, but yet... */
700                 if (!(ordernum&0xFF))
701                         yield();
702                 goto retry;
703         }
704         addr->hash ^= sk->sk_type;
705
706         __unix_remove_socket(sk);
707         u->addr = addr;
708         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
709         spin_unlock(&unix_table_lock);
710         err = 0;
711
712 out:    mutex_unlock(&u->readlock);
713         return err;
714 }
715
716 static struct sock *unix_find_other(struct net *net,
717                                     struct sockaddr_un *sunname, int len,
718                                     int type, unsigned hash, int *error)
719 {
720         struct sock *u;
721         struct path path;
722         int err = 0;
723
724         if (sunname->sun_path[0]) {
725                 struct inode *inode;
726                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
727                 if (err)
728                         goto fail;
729                 inode = path.dentry->d_inode;
730                 err = inode_permission(inode, MAY_WRITE);
731                 if (err)
732                         goto put_fail;
733
734                 err = -ECONNREFUSED;
735                 if (!S_ISSOCK(inode->i_mode))
736                         goto put_fail;
737                 u = unix_find_socket_byinode(net, inode);
738                 if (!u)
739                         goto put_fail;
740
741                 if (u->sk_type == type)
742                         touch_atime(path.mnt, path.dentry);
743
744                 path_put(&path);
745
746                 err = -EPROTOTYPE;
747                 if (u->sk_type != type) {
748                         sock_put(u);
749                         goto fail;
750                 }
751         } else {
752                 err = -ECONNREFUSED;
753                 u = unix_find_socket_byname(net, sunname, len, type, hash);
754                 if (u) {
755                         struct dentry *dentry;
756                         dentry = unix_sk(u)->dentry;
757                         if (dentry)
758                                 touch_atime(unix_sk(u)->mnt, dentry);
759                 } else
760                         goto fail;
761         }
762         return u;
763
764 put_fail:
765         path_put(&path);
766 fail:
767         *error = err;
768         return NULL;
769 }
770
771
772 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
773 {
774         struct sock *sk = sock->sk;
775         struct net *net = sock_net(sk);
776         struct unix_sock *u = unix_sk(sk);
777         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
778         struct dentry *dentry = NULL;
779         struct nameidata nd;
780         int err;
781         unsigned hash;
782         struct unix_address *addr;
783         struct hlist_head *list;
784
785         err = -EINVAL;
786         if (sunaddr->sun_family != AF_UNIX)
787                 goto out;
788
789         if (addr_len == sizeof(short)) {
790                 err = unix_autobind(sock);
791                 goto out;
792         }
793
794         err = unix_mkname(sunaddr, addr_len, &hash);
795         if (err < 0)
796                 goto out;
797         addr_len = err;
798
799         mutex_lock(&u->readlock);
800
801         err = -EINVAL;
802         if (u->addr)
803                 goto out_up;
804
805         err = -ENOMEM;
806         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
807         if (!addr)
808                 goto out_up;
809
810         memcpy(addr->name, sunaddr, addr_len);
811         addr->len = addr_len;
812         addr->hash = hash ^ sk->sk_type;
813         atomic_set(&addr->refcnt, 1);
814
815         if (sunaddr->sun_path[0]) {
816                 unsigned int mode;
817                 err = 0;
818                 /*
819                  * Get the parent directory, calculate the hash for last
820                  * component.
821                  */
822                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
823                 if (err)
824                         goto out_mknod_parent;
825
826                 dentry = lookup_create(&nd, 0);
827                 err = PTR_ERR(dentry);
828                 if (IS_ERR(dentry))
829                         goto out_mknod_unlock;
830
831                 /*
832                  * All right, let's create it.
833                  */
834                 mode = S_IFSOCK |
835                        (SOCK_INODE(sock)->i_mode & ~current_umask());
836                 err = mnt_want_write(nd.path.mnt);
837                 if (err)
838                         goto out_mknod_dput;
839                 err = security_path_mknod(&nd.path, dentry, mode, 0);
840                 if (err)
841                         goto out_mknod_drop_write;
842                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
843 out_mknod_drop_write:
844                 mnt_drop_write(nd.path.mnt);
845                 if (err)
846                         goto out_mknod_dput;
847                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
848                 dput(nd.path.dentry);
849                 nd.path.dentry = dentry;
850
851                 addr->hash = UNIX_HASH_SIZE;
852         }
853
854         spin_lock(&unix_table_lock);
855
856         if (!sunaddr->sun_path[0]) {
857                 err = -EADDRINUSE;
858                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
859                                               sk->sk_type, hash)) {
860                         unix_release_addr(addr);
861                         goto out_unlock;
862                 }
863
864                 list = &unix_socket_table[addr->hash];
865         } else {
866                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
867                 u->dentry = nd.path.dentry;
868                 u->mnt    = nd.path.mnt;
869         }
870
871         err = 0;
872         __unix_remove_socket(sk);
873         u->addr = addr;
874         __unix_insert_socket(list, sk);
875
876 out_unlock:
877         spin_unlock(&unix_table_lock);
878 out_up:
879         mutex_unlock(&u->readlock);
880 out:
881         return err;
882
883 out_mknod_dput:
884         dput(dentry);
885 out_mknod_unlock:
886         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
887         path_put(&nd.path);
888 out_mknod_parent:
889         if (err == -EEXIST)
890                 err = -EADDRINUSE;
891         unix_release_addr(addr);
892         goto out_up;
893 }
894
895 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
896 {
897         if (unlikely(sk1 == sk2) || !sk2) {
898                 unix_state_lock(sk1);
899                 return;
900         }
901         if (sk1 < sk2) {
902                 unix_state_lock(sk1);
903                 unix_state_lock_nested(sk2);
904         } else {
905                 unix_state_lock(sk2);
906                 unix_state_lock_nested(sk1);
907         }
908 }
909
910 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
911 {
912         if (unlikely(sk1 == sk2) || !sk2) {
913                 unix_state_unlock(sk1);
914                 return;
915         }
916         unix_state_unlock(sk1);
917         unix_state_unlock(sk2);
918 }
919
920 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
921                               int alen, int flags)
922 {
923         struct sock *sk = sock->sk;
924         struct net *net = sock_net(sk);
925         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
926         struct sock *other;
927         unsigned hash;
928         int err;
929
930         if (addr->sa_family != AF_UNSPEC) {
931                 err = unix_mkname(sunaddr, alen, &hash);
932                 if (err < 0)
933                         goto out;
934                 alen = err;
935
936                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
937                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
938                         goto out;
939
940 restart:
941                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
942                 if (!other)
943                         goto out;
944
945                 unix_state_double_lock(sk, other);
946
947                 /* Apparently VFS overslept socket death. Retry. */
948                 if (sock_flag(other, SOCK_DEAD)) {
949                         unix_state_double_unlock(sk, other);
950                         sock_put(other);
951                         goto restart;
952                 }
953
954                 err = -EPERM;
955                 if (!unix_may_send(sk, other))
956                         goto out_unlock;
957
958                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
959                 if (err)
960                         goto out_unlock;
961
962         } else {
963                 /*
964                  *      1003.1g breaking connected state with AF_UNSPEC
965                  */
966                 other = NULL;
967                 unix_state_double_lock(sk, other);
968         }
969
970         /*
971          * If it was connected, reconnect.
972          */
973         if (unix_peer(sk)) {
974                 struct sock *old_peer = unix_peer(sk);
975                 unix_peer(sk) = other;
976                 unix_state_double_unlock(sk, other);
977
978                 if (other != old_peer)
979                         unix_dgram_disconnected(sk, old_peer);
980                 sock_put(old_peer);
981         } else {
982                 unix_peer(sk) = other;
983                 unix_state_double_unlock(sk, other);
984         }
985         return 0;
986
987 out_unlock:
988         unix_state_double_unlock(sk, other);
989         sock_put(other);
990 out:
991         return err;
992 }
993
994 static long unix_wait_for_peer(struct sock *other, long timeo)
995 {
996         struct unix_sock *u = unix_sk(other);
997         int sched;
998         DEFINE_WAIT(wait);
999
1000         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1001
1002         sched = !sock_flag(other, SOCK_DEAD) &&
1003                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1004                 unix_recvq_full(other);
1005
1006         unix_state_unlock(other);
1007
1008         if (sched)
1009                 timeo = schedule_timeout(timeo);
1010
1011         finish_wait(&u->peer_wait, &wait);
1012         return timeo;
1013 }
1014
1015 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1016                                int addr_len, int flags)
1017 {
1018         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019         struct sock *sk = sock->sk;
1020         struct net *net = sock_net(sk);
1021         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1022         struct sock *newsk = NULL;
1023         struct sock *other = NULL;
1024         struct sk_buff *skb = NULL;
1025         unsigned hash;
1026         int st;
1027         int err;
1028         long timeo;
1029
1030         err = unix_mkname(sunaddr, addr_len, &hash);
1031         if (err < 0)
1032                 goto out;
1033         addr_len = err;
1034
1035         if (test_bit(SOCK_PASSCRED, &sock->flags)
1036                 && !u->addr && (err = unix_autobind(sock)) != 0)
1037                 goto out;
1038
1039         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1040
1041         /* First of all allocate resources.
1042            If we will make it after state is locked,
1043            we will have to recheck all again in any case.
1044          */
1045
1046         err = -ENOMEM;
1047
1048         /* create new sock for complete connection */
1049         newsk = unix_create1(sock_net(sk), NULL);
1050         if (newsk == NULL)
1051                 goto out;
1052
1053         /* Allocate skb for sending to listening sock */
1054         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1055         if (skb == NULL)
1056                 goto out;
1057
1058 restart:
1059         /*  Find listening sock. */
1060         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1061         if (!other)
1062                 goto out;
1063
1064         /* Latch state of peer */
1065         unix_state_lock(other);
1066
1067         /* Apparently VFS overslept socket death. Retry. */
1068         if (sock_flag(other, SOCK_DEAD)) {
1069                 unix_state_unlock(other);
1070                 sock_put(other);
1071                 goto restart;
1072         }
1073
1074         err = -ECONNREFUSED;
1075         if (other->sk_state != TCP_LISTEN)
1076                 goto out_unlock;
1077         if (other->sk_shutdown & RCV_SHUTDOWN)
1078                 goto out_unlock;
1079
1080         if (unix_recvq_full(other)) {
1081                 err = -EAGAIN;
1082                 if (!timeo)
1083                         goto out_unlock;
1084
1085                 timeo = unix_wait_for_peer(other, timeo);
1086
1087                 err = sock_intr_errno(timeo);
1088                 if (signal_pending(current))
1089                         goto out;
1090                 sock_put(other);
1091                 goto restart;
1092         }
1093
1094         /* Latch our state.
1095
1096            It is tricky place. We need to grab write lock and cannot
1097            drop lock on peer. It is dangerous because deadlock is
1098            possible. Connect to self case and simultaneous
1099            attempt to connect are eliminated by checking socket
1100            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1101            check this before attempt to grab lock.
1102
1103            Well, and we have to recheck the state after socket locked.
1104          */
1105         st = sk->sk_state;
1106
1107         switch (st) {
1108         case TCP_CLOSE:
1109                 /* This is ok... continue with connect */
1110                 break;
1111         case TCP_ESTABLISHED:
1112                 /* Socket is already connected */
1113                 err = -EISCONN;
1114                 goto out_unlock;
1115         default:
1116                 err = -EINVAL;
1117                 goto out_unlock;
1118         }
1119
1120         unix_state_lock_nested(sk);
1121
1122         if (sk->sk_state != st) {
1123                 unix_state_unlock(sk);
1124                 unix_state_unlock(other);
1125                 sock_put(other);
1126                 goto restart;
1127         }
1128
1129         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1130         if (err) {
1131                 unix_state_unlock(sk);
1132                 goto out_unlock;
1133         }
1134
1135         /* The way is open! Fastly set all the necessary fields... */
1136
1137         sock_hold(sk);
1138         unix_peer(newsk)        = sk;
1139         newsk->sk_state         = TCP_ESTABLISHED;
1140         newsk->sk_type          = sk->sk_type;
1141         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1142         current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1143         newu = unix_sk(newsk);
1144         newsk->sk_sleep         = &newu->peer_wait;
1145         otheru = unix_sk(other);
1146
1147         /* copy address information from listening to new sock*/
1148         if (otheru->addr) {
1149                 atomic_inc(&otheru->addr->refcnt);
1150                 newu->addr = otheru->addr;
1151         }
1152         if (otheru->dentry) {
1153                 newu->dentry    = dget(otheru->dentry);
1154                 newu->mnt       = mntget(otheru->mnt);
1155         }
1156
1157         /* Set credentials */
1158         sk->sk_peercred = other->sk_peercred;
1159
1160         sock->state     = SS_CONNECTED;
1161         sk->sk_state    = TCP_ESTABLISHED;
1162         sock_hold(newsk);
1163
1164         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1165         unix_peer(sk)   = newsk;
1166
1167         unix_state_unlock(sk);
1168
1169         /* take ten and and send info to listening sock */
1170         spin_lock(&other->sk_receive_queue.lock);
1171         __skb_queue_tail(&other->sk_receive_queue, skb);
1172         spin_unlock(&other->sk_receive_queue.lock);
1173         unix_state_unlock(other);
1174         other->sk_data_ready(other, 0);
1175         sock_put(other);
1176         return 0;
1177
1178 out_unlock:
1179         if (other)
1180                 unix_state_unlock(other);
1181
1182 out:
1183         kfree_skb(skb);
1184         if (newsk)
1185                 unix_release_sock(newsk, 0);
1186         if (other)
1187                 sock_put(other);
1188         return err;
1189 }
1190
1191 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1192 {
1193         struct sock *ska = socka->sk, *skb = sockb->sk;
1194
1195         /* Join our sockets back to back */
1196         sock_hold(ska);
1197         sock_hold(skb);
1198         unix_peer(ska) = skb;
1199         unix_peer(skb) = ska;
1200         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1201         current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
1202         ska->sk_peercred.uid = skb->sk_peercred.uid;
1203         ska->sk_peercred.gid = skb->sk_peercred.gid;
1204
1205         if (ska->sk_type != SOCK_DGRAM) {
1206                 ska->sk_state = TCP_ESTABLISHED;
1207                 skb->sk_state = TCP_ESTABLISHED;
1208                 socka->state  = SS_CONNECTED;
1209                 sockb->state  = SS_CONNECTED;
1210         }
1211         return 0;
1212 }
1213
1214 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1215 {
1216         struct sock *sk = sock->sk;
1217         struct sock *tsk;
1218         struct sk_buff *skb;
1219         int err;
1220
1221         err = -EOPNOTSUPP;
1222         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1223                 goto out;
1224
1225         err = -EINVAL;
1226         if (sk->sk_state != TCP_LISTEN)
1227                 goto out;
1228
1229         /* If socket state is TCP_LISTEN it cannot change (for now...),
1230          * so that no locks are necessary.
1231          */
1232
1233         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1234         if (!skb) {
1235                 /* This means receive shutdown. */
1236                 if (err == 0)
1237                         err = -EINVAL;
1238                 goto out;
1239         }
1240
1241         tsk = skb->sk;
1242         skb_free_datagram(sk, skb);
1243         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1244
1245         /* attach accepted sock to socket */
1246         unix_state_lock(tsk);
1247         newsock->state = SS_CONNECTED;
1248         sock_graft(tsk, newsock);
1249         unix_state_unlock(tsk);
1250         return 0;
1251
1252 out:
1253         return err;
1254 }
1255
1256
1257 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1258 {
1259         struct sock *sk = sock->sk;
1260         struct unix_sock *u;
1261         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1262         int err = 0;
1263
1264         if (peer) {
1265                 sk = unix_peer_get(sk);
1266
1267                 err = -ENOTCONN;
1268                 if (!sk)
1269                         goto out;
1270                 err = 0;
1271         } else {
1272                 sock_hold(sk);
1273         }
1274
1275         u = unix_sk(sk);
1276         unix_state_lock(sk);
1277         if (!u->addr) {
1278                 sunaddr->sun_family = AF_UNIX;
1279                 sunaddr->sun_path[0] = 0;
1280                 *uaddr_len = sizeof(short);
1281         } else {
1282                 struct unix_address *addr = u->addr;
1283
1284                 *uaddr_len = addr->len;
1285                 memcpy(sunaddr, addr->name, *uaddr_len);
1286         }
1287         unix_state_unlock(sk);
1288         sock_put(sk);
1289 out:
1290         return err;
1291 }
1292
1293 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1294 {
1295         int i;
1296
1297         scm->fp = UNIXCB(skb).fp;
1298         skb->destructor = sock_wfree;
1299         UNIXCB(skb).fp = NULL;
1300
1301         for (i = scm->fp->count-1; i >= 0; i--)
1302                 unix_notinflight(scm->fp->fp[i]);
1303 }
1304
1305 static void unix_destruct_fds(struct sk_buff *skb)
1306 {
1307         struct scm_cookie scm;
1308         memset(&scm, 0, sizeof(scm));
1309         unix_detach_fds(&scm, skb);
1310
1311         /* Alas, it calls VFS */
1312         /* So fscking what? fput() had been SMP-safe since the last Summer */
1313         scm_destroy(&scm);
1314         sock_wfree(skb);
1315 }
1316
1317 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1318 {
1319         int i;
1320
1321         /*
1322          * Need to duplicate file references for the sake of garbage
1323          * collection.  Otherwise a socket in the fps might become a
1324          * candidate for GC while the skb is not yet queued.
1325          */
1326         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1327         if (!UNIXCB(skb).fp)
1328                 return -ENOMEM;
1329
1330         for (i = scm->fp->count-1; i >= 0; i--)
1331                 unix_inflight(scm->fp->fp[i]);
1332         skb->destructor = unix_destruct_fds;
1333         return 0;
1334 }
1335
1336 /*
1337  *      Send AF_UNIX data.
1338  */
1339
1340 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1341                               struct msghdr *msg, size_t len)
1342 {
1343         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1344         struct sock *sk = sock->sk;
1345         struct net *net = sock_net(sk);
1346         struct unix_sock *u = unix_sk(sk);
1347         struct sockaddr_un *sunaddr = msg->msg_name;
1348         struct sock *other = NULL;
1349         int namelen = 0; /* fake GCC */
1350         int err;
1351         unsigned hash;
1352         struct sk_buff *skb;
1353         long timeo;
1354         struct scm_cookie tmp_scm;
1355
1356         if (NULL == siocb->scm)
1357                 siocb->scm = &tmp_scm;
1358         wait_for_unix_gc();
1359         err = scm_send(sock, msg, siocb->scm);
1360         if (err < 0)
1361                 return err;
1362
1363         err = -EOPNOTSUPP;
1364         if (msg->msg_flags&MSG_OOB)
1365                 goto out;
1366
1367         if (msg->msg_namelen) {
1368                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1369                 if (err < 0)
1370                         goto out;
1371                 namelen = err;
1372         } else {
1373                 sunaddr = NULL;
1374                 err = -ENOTCONN;
1375                 other = unix_peer_get(sk);
1376                 if (!other)
1377                         goto out;
1378         }
1379
1380         if (test_bit(SOCK_PASSCRED, &sock->flags)
1381                 && !u->addr && (err = unix_autobind(sock)) != 0)
1382                 goto out;
1383
1384         err = -EMSGSIZE;
1385         if (len > sk->sk_sndbuf - 32)
1386                 goto out;
1387
1388         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1389         if (skb == NULL)
1390                 goto out;
1391
1392         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1393         if (siocb->scm->fp) {
1394                 err = unix_attach_fds(siocb->scm, skb);
1395                 if (err)
1396                         goto out_free;
1397         }
1398         unix_get_secdata(siocb->scm, skb);
1399
1400         skb_reset_transport_header(skb);
1401         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1402         if (err)
1403                 goto out_free;
1404
1405         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1406
1407 restart:
1408         if (!other) {
1409                 err = -ECONNRESET;
1410                 if (sunaddr == NULL)
1411                         goto out_free;
1412
1413                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1414                                         hash, &err);
1415                 if (other == NULL)
1416                         goto out_free;
1417         }
1418
1419         unix_state_lock(other);
1420         err = -EPERM;
1421         if (!unix_may_send(sk, other))
1422                 goto out_unlock;
1423
1424         if (sock_flag(other, SOCK_DEAD)) {
1425                 /*
1426                  *      Check with 1003.1g - what should
1427                  *      datagram error
1428                  */
1429                 unix_state_unlock(other);
1430                 sock_put(other);
1431
1432                 err = 0;
1433                 unix_state_lock(sk);
1434                 if (unix_peer(sk) == other) {
1435                         unix_peer(sk) = NULL;
1436                         unix_state_unlock(sk);
1437
1438                         unix_dgram_disconnected(sk, other);
1439                         sock_put(other);
1440                         err = -ECONNREFUSED;
1441                 } else {
1442                         unix_state_unlock(sk);
1443                 }
1444
1445                 other = NULL;
1446                 if (err)
1447                         goto out_free;
1448                 goto restart;
1449         }
1450
1451         err = -EPIPE;
1452         if (other->sk_shutdown & RCV_SHUTDOWN)
1453                 goto out_unlock;
1454
1455         if (sk->sk_type != SOCK_SEQPACKET) {
1456                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1457                 if (err)
1458                         goto out_unlock;
1459         }
1460
1461         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1462                 if (!timeo) {
1463                         err = -EAGAIN;
1464                         goto out_unlock;
1465                 }
1466
1467                 timeo = unix_wait_for_peer(other, timeo);
1468
1469                 err = sock_intr_errno(timeo);
1470                 if (signal_pending(current))
1471                         goto out_free;
1472
1473                 goto restart;
1474         }
1475
1476         skb_queue_tail(&other->sk_receive_queue, skb);
1477         unix_state_unlock(other);
1478         other->sk_data_ready(other, len);
1479         sock_put(other);
1480         scm_destroy(siocb->scm);
1481         return len;
1482
1483 out_unlock:
1484         unix_state_unlock(other);
1485 out_free:
1486         kfree_skb(skb);
1487 out:
1488         if (other)
1489                 sock_put(other);
1490         scm_destroy(siocb->scm);
1491         return err;
1492 }
1493
1494
1495 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1496                                struct msghdr *msg, size_t len)
1497 {
1498         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1499         struct sock *sk = sock->sk;
1500         struct sock *other = NULL;
1501         struct sockaddr_un *sunaddr = msg->msg_name;
1502         int err, size;
1503         struct sk_buff *skb;
1504         int sent = 0;
1505         struct scm_cookie tmp_scm;
1506         bool fds_sent = false;
1507
1508         if (NULL == siocb->scm)
1509                 siocb->scm = &tmp_scm;
1510         wait_for_unix_gc();
1511         err = scm_send(sock, msg, siocb->scm);
1512         if (err < 0)
1513                 return err;
1514
1515         err = -EOPNOTSUPP;
1516         if (msg->msg_flags&MSG_OOB)
1517                 goto out_err;
1518
1519         if (msg->msg_namelen) {
1520                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1521                 goto out_err;
1522         } else {
1523                 sunaddr = NULL;
1524                 err = -ENOTCONN;
1525                 other = unix_peer(sk);
1526                 if (!other)
1527                         goto out_err;
1528         }
1529
1530         if (sk->sk_shutdown & SEND_SHUTDOWN)
1531                 goto pipe_err;
1532
1533         while (sent < len) {
1534                 /*
1535                  *      Optimisation for the fact that under 0.01% of X
1536                  *      messages typically need breaking up.
1537                  */
1538
1539                 size = len-sent;
1540
1541                 /* Keep two messages in the pipe so it schedules better */
1542                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1543                         size = (sk->sk_sndbuf >> 1) - 64;
1544
1545                 if (size > SKB_MAX_ALLOC)
1546                         size = SKB_MAX_ALLOC;
1547
1548                 /*
1549                  *      Grab a buffer
1550                  */
1551
1552                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1553                                           &err);
1554
1555                 if (skb == NULL)
1556                         goto out_err;
1557
1558                 /*
1559                  *      If you pass two values to the sock_alloc_send_skb
1560                  *      it tries to grab the large buffer with GFP_NOFS
1561                  *      (which can fail easily), and if it fails grab the
1562                  *      fallback size buffer which is under a page and will
1563                  *      succeed. [Alan]
1564                  */
1565                 size = min_t(int, size, skb_tailroom(skb));
1566
1567                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1568                 /* Only send the fds in the first buffer */
1569                 if (siocb->scm->fp && !fds_sent) {
1570                         err = unix_attach_fds(siocb->scm, skb);
1571                         if (err) {
1572                                 kfree_skb(skb);
1573                                 goto out_err;
1574                         }
1575                         fds_sent = true;
1576                 }
1577
1578                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1579                 if (err) {
1580                         kfree_skb(skb);
1581                         goto out_err;
1582                 }
1583
1584                 unix_state_lock(other);
1585
1586                 if (sock_flag(other, SOCK_DEAD) ||
1587                     (other->sk_shutdown & RCV_SHUTDOWN))
1588                         goto pipe_err_free;
1589
1590                 skb_queue_tail(&other->sk_receive_queue, skb);
1591                 unix_state_unlock(other);
1592                 other->sk_data_ready(other, size);
1593                 sent += size;
1594         }
1595
1596         scm_destroy(siocb->scm);
1597         siocb->scm = NULL;
1598
1599         return sent;
1600
1601 pipe_err_free:
1602         unix_state_unlock(other);
1603         kfree_skb(skb);
1604 pipe_err:
1605         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1606                 send_sig(SIGPIPE, current, 0);
1607         err = -EPIPE;
1608 out_err:
1609         scm_destroy(siocb->scm);
1610         siocb->scm = NULL;
1611         return sent ? : err;
1612 }
1613
1614 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1615                                   struct msghdr *msg, size_t len)
1616 {
1617         int err;
1618         struct sock *sk = sock->sk;
1619
1620         err = sock_error(sk);
1621         if (err)
1622                 return err;
1623
1624         if (sk->sk_state != TCP_ESTABLISHED)
1625                 return -ENOTCONN;
1626
1627         if (msg->msg_namelen)
1628                 msg->msg_namelen = 0;
1629
1630         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1631 }
1632
1633 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1634 {
1635         struct unix_sock *u = unix_sk(sk);
1636
1637         msg->msg_namelen = 0;
1638         if (u->addr) {
1639                 msg->msg_namelen = u->addr->len;
1640                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1641         }
1642 }
1643
1644 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1645                               struct msghdr *msg, size_t size,
1646                               int flags)
1647 {
1648         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1649         struct scm_cookie tmp_scm;
1650         struct sock *sk = sock->sk;
1651         struct unix_sock *u = unix_sk(sk);
1652         int noblock = flags & MSG_DONTWAIT;
1653         struct sk_buff *skb;
1654         int err;
1655
1656         err = -EOPNOTSUPP;
1657         if (flags&MSG_OOB)
1658                 goto out;
1659
1660         msg->msg_namelen = 0;
1661
1662         mutex_lock(&u->readlock);
1663
1664         skb = skb_recv_datagram(sk, flags, noblock, &err);
1665         if (!skb) {
1666                 unix_state_lock(sk);
1667                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1668                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1669                     (sk->sk_shutdown & RCV_SHUTDOWN))
1670                         err = 0;
1671                 unix_state_unlock(sk);
1672                 goto out_unlock;
1673         }
1674
1675         wake_up_interruptible_sync(&u->peer_wait);
1676
1677         if (msg->msg_name)
1678                 unix_copy_addr(msg, skb->sk);
1679
1680         if (size > skb->len)
1681                 size = skb->len;
1682         else if (size < skb->len)
1683                 msg->msg_flags |= MSG_TRUNC;
1684
1685         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1686         if (err)
1687                 goto out_free;
1688
1689         if (!siocb->scm) {
1690                 siocb->scm = &tmp_scm;
1691                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1692         }
1693         siocb->scm->creds = *UNIXCREDS(skb);
1694         unix_set_secdata(siocb->scm, skb);
1695
1696         if (!(flags & MSG_PEEK)) {
1697                 if (UNIXCB(skb).fp)
1698                         unix_detach_fds(siocb->scm, skb);
1699         } else {
1700                 /* It is questionable: on PEEK we could:
1701                    - do not return fds - good, but too simple 8)
1702                    - return fds, and do not return them on read (old strategy,
1703                      apparently wrong)
1704                    - clone fds (I chose it for now, it is the most universal
1705                      solution)
1706
1707                    POSIX 1003.1g does not actually define this clearly
1708                    at all. POSIX 1003.1g doesn't define a lot of things
1709                    clearly however!
1710
1711                 */
1712                 if (UNIXCB(skb).fp)
1713                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1714         }
1715         err = size;
1716
1717         scm_recv(sock, msg, siocb->scm, flags);
1718
1719 out_free:
1720         skb_free_datagram(sk, skb);
1721 out_unlock:
1722         mutex_unlock(&u->readlock);
1723 out:
1724         return err;
1725 }
1726
1727 /*
1728  *      Sleep until data has arrive. But check for races..
1729  */
1730
1731 static long unix_stream_data_wait(struct sock *sk, long timeo)
1732 {
1733         DEFINE_WAIT(wait);
1734
1735         unix_state_lock(sk);
1736
1737         for (;;) {
1738                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1739
1740                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1741                     sk->sk_err ||
1742                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1743                     signal_pending(current) ||
1744                     !timeo)
1745                         break;
1746
1747                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1748                 unix_state_unlock(sk);
1749                 timeo = schedule_timeout(timeo);
1750                 unix_state_lock(sk);
1751                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1752         }
1753
1754         finish_wait(sk->sk_sleep, &wait);
1755         unix_state_unlock(sk);
1756         return timeo;
1757 }
1758
1759
1760
1761 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1762                                struct msghdr *msg, size_t size,
1763                                int flags)
1764 {
1765         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1766         struct scm_cookie tmp_scm;
1767         struct sock *sk = sock->sk;
1768         struct unix_sock *u = unix_sk(sk);
1769         struct sockaddr_un *sunaddr = msg->msg_name;
1770         int copied = 0;
1771         int check_creds = 0;
1772         int target;
1773         int err = 0;
1774         long timeo;
1775
1776         err = -EINVAL;
1777         if (sk->sk_state != TCP_ESTABLISHED)
1778                 goto out;
1779
1780         err = -EOPNOTSUPP;
1781         if (flags&MSG_OOB)
1782                 goto out;
1783
1784         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1785         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1786
1787         msg->msg_namelen = 0;
1788
1789         /* Lock the socket to prevent queue disordering
1790          * while sleeps in memcpy_tomsg
1791          */
1792
1793         if (!siocb->scm) {
1794                 siocb->scm = &tmp_scm;
1795                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1796         }
1797
1798         mutex_lock(&u->readlock);
1799
1800         do {
1801                 int chunk;
1802                 struct sk_buff *skb;
1803
1804                 unix_state_lock(sk);
1805                 skb = skb_dequeue(&sk->sk_receive_queue);
1806                 if (skb == NULL) {
1807                         if (copied >= target)
1808                                 goto unlock;
1809
1810                         /*
1811                          *      POSIX 1003.1g mandates this order.
1812                          */
1813
1814                         err = sock_error(sk);
1815                         if (err)
1816                                 goto unlock;
1817                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1818                                 goto unlock;
1819
1820                         unix_state_unlock(sk);
1821                         err = -EAGAIN;
1822                         if (!timeo)
1823                                 break;
1824                         mutex_unlock(&u->readlock);
1825
1826                         timeo = unix_stream_data_wait(sk, timeo);
1827
1828                         if (signal_pending(current)) {
1829                                 err = sock_intr_errno(timeo);
1830                                 goto out;
1831                         }
1832                         mutex_lock(&u->readlock);
1833                         continue;
1834  unlock:
1835                         unix_state_unlock(sk);
1836                         break;
1837                 }
1838                 unix_state_unlock(sk);
1839
1840                 if (check_creds) {
1841                         /* Never glue messages from different writers */
1842                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
1843                                    sizeof(siocb->scm->creds)) != 0) {
1844                                 skb_queue_head(&sk->sk_receive_queue, skb);
1845                                 break;
1846                         }
1847                 } else {
1848                         /* Copy credentials */
1849                         siocb->scm->creds = *UNIXCREDS(skb);
1850                         check_creds = 1;
1851                 }
1852
1853                 /* Copy address just once */
1854                 if (sunaddr) {
1855                         unix_copy_addr(msg, skb->sk);
1856                         sunaddr = NULL;
1857                 }
1858
1859                 chunk = min_t(unsigned int, skb->len, size);
1860                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1861                         skb_queue_head(&sk->sk_receive_queue, skb);
1862                         if (copied == 0)
1863                                 copied = -EFAULT;
1864                         break;
1865                 }
1866                 copied += chunk;
1867                 size -= chunk;
1868
1869                 /* Mark read part of skb as used */
1870                 if (!(flags & MSG_PEEK)) {
1871                         skb_pull(skb, chunk);
1872
1873                         if (UNIXCB(skb).fp)
1874                                 unix_detach_fds(siocb->scm, skb);
1875
1876                         /* put the skb back if we didn't use it up.. */
1877                         if (skb->len) {
1878                                 skb_queue_head(&sk->sk_receive_queue, skb);
1879                                 break;
1880                         }
1881
1882                         kfree_skb(skb);
1883
1884                         if (siocb->scm->fp)
1885                                 break;
1886                 } else {
1887                         /* It is questionable, see note in unix_dgram_recvmsg.
1888                          */
1889                         if (UNIXCB(skb).fp)
1890                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1891
1892                         /* put message back and return */
1893                         skb_queue_head(&sk->sk_receive_queue, skb);
1894                         break;
1895                 }
1896         } while (size);
1897
1898         mutex_unlock(&u->readlock);
1899         scm_recv(sock, msg, siocb->scm, flags);
1900 out:
1901         return copied ? : err;
1902 }
1903
1904 static int unix_shutdown(struct socket *sock, int mode)
1905 {
1906         struct sock *sk = sock->sk;
1907         struct sock *other;
1908
1909         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1910
1911         if (mode) {
1912                 unix_state_lock(sk);
1913                 sk->sk_shutdown |= mode;
1914                 other = unix_peer(sk);
1915                 if (other)
1916                         sock_hold(other);
1917                 unix_state_unlock(sk);
1918                 sk->sk_state_change(sk);
1919
1920                 if (other &&
1921                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1922
1923                         int peer_mode = 0;
1924
1925                         if (mode&RCV_SHUTDOWN)
1926                                 peer_mode |= SEND_SHUTDOWN;
1927                         if (mode&SEND_SHUTDOWN)
1928                                 peer_mode |= RCV_SHUTDOWN;
1929                         unix_state_lock(other);
1930                         other->sk_shutdown |= peer_mode;
1931                         unix_state_unlock(other);
1932                         other->sk_state_change(other);
1933                         read_lock(&other->sk_callback_lock);
1934                         if (peer_mode == SHUTDOWN_MASK)
1935                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1936                         else if (peer_mode & RCV_SHUTDOWN)
1937                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1938                         read_unlock(&other->sk_callback_lock);
1939                 }
1940                 if (other)
1941                         sock_put(other);
1942         }
1943         return 0;
1944 }
1945
1946 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1947 {
1948         struct sock *sk = sock->sk;
1949         long amount = 0;
1950         int err;
1951
1952         switch (cmd) {
1953         case SIOCOUTQ:
1954                 amount = sk_wmem_alloc_get(sk);
1955                 err = put_user(amount, (int __user *)arg);
1956                 break;
1957         case SIOCINQ:
1958                 {
1959                         struct sk_buff *skb;
1960
1961                         if (sk->sk_state == TCP_LISTEN) {
1962                                 err = -EINVAL;
1963                                 break;
1964                         }
1965
1966                         spin_lock(&sk->sk_receive_queue.lock);
1967                         if (sk->sk_type == SOCK_STREAM ||
1968                             sk->sk_type == SOCK_SEQPACKET) {
1969                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1970                                         amount += skb->len;
1971                         } else {
1972                                 skb = skb_peek(&sk->sk_receive_queue);
1973                                 if (skb)
1974                                         amount = skb->len;
1975                         }
1976                         spin_unlock(&sk->sk_receive_queue.lock);
1977                         err = put_user(amount, (int __user *)arg);
1978                         break;
1979                 }
1980
1981         default:
1982                 err = -ENOIOCTLCMD;
1983                 break;
1984         }
1985         return err;
1986 }
1987
1988 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1989 {
1990         struct sock *sk = sock->sk;
1991         unsigned int mask;
1992
1993         sock_poll_wait(file, sk->sk_sleep, wait);
1994         mask = 0;
1995
1996         /* exceptional events? */
1997         if (sk->sk_err)
1998                 mask |= POLLERR;
1999         if (sk->sk_shutdown == SHUTDOWN_MASK)
2000                 mask |= POLLHUP;
2001         if (sk->sk_shutdown & RCV_SHUTDOWN)
2002                 mask |= POLLRDHUP;
2003
2004         /* readable? */
2005         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2006             (sk->sk_shutdown & RCV_SHUTDOWN))
2007                 mask |= POLLIN | POLLRDNORM;
2008
2009         /* Connection-based need to check for termination and startup */
2010         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2011             sk->sk_state == TCP_CLOSE)
2012                 mask |= POLLHUP;
2013
2014         /*
2015          * we set writable also when the other side has shut down the
2016          * connection. This prevents stuck sockets.
2017          */
2018         if (unix_writable(sk))
2019                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2020
2021         return mask;
2022 }
2023
2024 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2025                                     poll_table *wait)
2026 {
2027         struct sock *sk = sock->sk, *other;
2028         unsigned int mask, writable;
2029
2030         sock_poll_wait(file, sk->sk_sleep, wait);
2031         mask = 0;
2032
2033         /* exceptional events? */
2034         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2035                 mask |= POLLERR;
2036         if (sk->sk_shutdown & RCV_SHUTDOWN)
2037                 mask |= POLLRDHUP;
2038         if (sk->sk_shutdown == SHUTDOWN_MASK)
2039                 mask |= POLLHUP;
2040
2041         /* readable? */
2042         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2043             (sk->sk_shutdown & RCV_SHUTDOWN))
2044                 mask |= POLLIN | POLLRDNORM;
2045
2046         /* Connection-based need to check for termination and startup */
2047         if (sk->sk_type == SOCK_SEQPACKET) {
2048                 if (sk->sk_state == TCP_CLOSE)
2049                         mask |= POLLHUP;
2050                 /* connection hasn't started yet? */
2051                 if (sk->sk_state == TCP_SYN_SENT)
2052                         return mask;
2053         }
2054
2055         /* writable? */
2056         writable = unix_writable(sk);
2057         if (writable) {
2058                 other = unix_peer_get(sk);
2059                 if (other) {
2060                         if (unix_peer(other) != sk) {
2061                                 sock_poll_wait(file, &unix_sk(other)->peer_wait,
2062                                           wait);
2063                                 if (unix_recvq_full(other))
2064                                         writable = 0;
2065                         }
2066
2067                         sock_put(other);
2068                 }
2069         }
2070
2071         if (writable)
2072                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2073         else
2074                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2075
2076         return mask;
2077 }
2078
2079 #ifdef CONFIG_PROC_FS
2080 static struct sock *first_unix_socket(int *i)
2081 {
2082         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2083                 if (!hlist_empty(&unix_socket_table[*i]))
2084                         return __sk_head(&unix_socket_table[*i]);
2085         }
2086         return NULL;
2087 }
2088
2089 static struct sock *next_unix_socket(int *i, struct sock *s)
2090 {
2091         struct sock *next = sk_next(s);
2092         /* More in this chain? */
2093         if (next)
2094                 return next;
2095         /* Look for next non-empty chain. */
2096         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2097                 if (!hlist_empty(&unix_socket_table[*i]))
2098                         return __sk_head(&unix_socket_table[*i]);
2099         }
2100         return NULL;
2101 }
2102
2103 struct unix_iter_state {
2104         struct seq_net_private p;
2105         int i;
2106 };
2107
2108 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2109 {
2110         struct unix_iter_state *iter = seq->private;
2111         loff_t off = 0;
2112         struct sock *s;
2113
2114         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2115                 if (sock_net(s) != seq_file_net(seq))
2116                         continue;
2117                 if (off == pos)
2118                         return s;
2119                 ++off;
2120         }
2121         return NULL;
2122 }
2123
2124 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2125         __acquires(unix_table_lock)
2126 {
2127         spin_lock(&unix_table_lock);
2128         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2129 }
2130
2131 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2132 {
2133         struct unix_iter_state *iter = seq->private;
2134         struct sock *sk = v;
2135         ++*pos;
2136
2137         if (v == SEQ_START_TOKEN)
2138                 sk = first_unix_socket(&iter->i);
2139         else
2140                 sk = next_unix_socket(&iter->i, sk);
2141         while (sk && (sock_net(sk) != seq_file_net(seq)))
2142                 sk = next_unix_socket(&iter->i, sk);
2143         return sk;
2144 }
2145
2146 static void unix_seq_stop(struct seq_file *seq, void *v)
2147         __releases(unix_table_lock)
2148 {
2149         spin_unlock(&unix_table_lock);
2150 }
2151
2152 static int unix_seq_show(struct seq_file *seq, void *v)
2153 {
2154
2155         if (v == SEQ_START_TOKEN)
2156                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2157                          "Inode Path\n");
2158         else {
2159                 struct sock *s = v;
2160                 struct unix_sock *u = unix_sk(s);
2161                 unix_state_lock(s);
2162
2163                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2164                         s,
2165                         atomic_read(&s->sk_refcnt),
2166                         0,
2167                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2168                         s->sk_type,
2169                         s->sk_socket ?
2170                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2171                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2172                         sock_i_ino(s));
2173
2174                 if (u->addr) {
2175                         int i, len;
2176                         seq_putc(seq, ' ');
2177
2178                         i = 0;
2179                         len = u->addr->len - sizeof(short);
2180                         if (!UNIX_ABSTRACT(s))
2181                                 len--;
2182                         else {
2183                                 seq_putc(seq, '@');
2184                                 i++;
2185                         }
2186                         for ( ; i < len; i++)
2187                                 seq_putc(seq, u->addr->name->sun_path[i]);
2188                 }
2189                 unix_state_unlock(s);
2190                 seq_putc(seq, '\n');
2191         }
2192
2193         return 0;
2194 }
2195
2196 static const struct seq_operations unix_seq_ops = {
2197         .start  = unix_seq_start,
2198         .next   = unix_seq_next,
2199         .stop   = unix_seq_stop,
2200         .show   = unix_seq_show,
2201 };
2202
2203 static int unix_seq_open(struct inode *inode, struct file *file)
2204 {
2205         return seq_open_net(inode, file, &unix_seq_ops,
2206                             sizeof(struct unix_iter_state));
2207 }
2208
2209 static const struct file_operations unix_seq_fops = {
2210         .owner          = THIS_MODULE,
2211         .open           = unix_seq_open,
2212         .read           = seq_read,
2213         .llseek         = seq_lseek,
2214         .release        = seq_release_net,
2215 };
2216
2217 #endif
2218
2219 static struct net_proto_family unix_family_ops = {
2220         .family = PF_UNIX,
2221         .create = unix_create,
2222         .owner  = THIS_MODULE,
2223 };
2224
2225
2226 static int unix_net_init(struct net *net)
2227 {
2228         int error = -ENOMEM;
2229
2230         net->unx.sysctl_max_dgram_qlen = 10;
2231         if (unix_sysctl_register(net))
2232                 goto out;
2233
2234 #ifdef CONFIG_PROC_FS
2235         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2236                 unix_sysctl_unregister(net);
2237                 goto out;
2238         }
2239 #endif
2240         error = 0;
2241 out:
2242         return error;
2243 }
2244
2245 static void unix_net_exit(struct net *net)
2246 {
2247         unix_sysctl_unregister(net);
2248         proc_net_remove(net, "unix");
2249 }
2250
2251 static struct pernet_operations unix_net_ops = {
2252         .init = unix_net_init,
2253         .exit = unix_net_exit,
2254 };
2255
2256 static int __init af_unix_init(void)
2257 {
2258         int rc = -1;
2259         struct sk_buff *dummy_skb;
2260
2261         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2262
2263         rc = proto_register(&unix_proto, 1);
2264         if (rc != 0) {
2265                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2266                        __func__);
2267                 goto out;
2268         }
2269
2270         sock_register(&unix_family_ops);
2271         register_pernet_subsys(&unix_net_ops);
2272 out:
2273         return rc;
2274 }
2275
2276 static void __exit af_unix_exit(void)
2277 {
2278         sock_unregister(PF_UNIX);
2279         proto_unregister(&unix_proto);
2280         unregister_pernet_subsys(&unix_net_ops);
2281 }
2282
2283 /* Earlier than device_initcall() so that other drivers invoking
2284    request_module() don't end up in a loop when modprobe tries
2285    to use a UNIX socket. But later than subsys_initcall() because
2286    we depend on stuff initialised there */
2287 fs_initcall(af_unix_init);
2288 module_exit(af_unix_exit);
2289
2290 MODULE_LICENSE("GPL");
2291 MODULE_ALIAS_NETPROTO(PF_UNIX);