[NET]: Move sock_valbool_flag to socket.c
[safe/jmp/linux-2.6] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/net_namespace.h>
123 #include <net/request_sock.h>
124 #include <net/sock.h>
125 #include <net/xfrm.h>
126 #include <linux/ipsec.h>
127
128 #include <linux/filter.h>
129
130 #ifdef CONFIG_INET
131 #include <net/tcp.h>
132 #endif
133
134 /*
135  * Each address family might have different locking rules, so we have
136  * one slock key per address family:
137  */
138 static struct lock_class_key af_family_keys[AF_MAX];
139 static struct lock_class_key af_family_slock_keys[AF_MAX];
140
141 #ifdef CONFIG_DEBUG_LOCK_ALLOC
142 /*
143  * Make lock validator output more readable. (we pre-construct these
144  * strings build-time, so that runtime initialization of socket
145  * locks is fast):
146  */
147 static const char *af_family_key_strings[AF_MAX+1] = {
148   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
149   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
150   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
151   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
152   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
153   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
154   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
155   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
156   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
157   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
158   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
159   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
160 };
161 static const char *af_family_slock_key_strings[AF_MAX+1] = {
162   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171   "slock-27"       , "slock-28"          , "slock-29"          ,
172   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
173   "slock-AF_RXRPC" , "slock-AF_MAX"
174 };
175 static const char *af_family_clock_key_strings[AF_MAX+1] = {
176   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
177   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
178   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
179   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
180   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
181   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
182   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
183   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
184   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
185   "clock-27"       , "clock-28"          , "clock-29"          ,
186   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
187   "clock-AF_RXRPC" , "clock-AF_MAX"
188 };
189 #endif
190
191 /*
192  * sk_callback_lock locking rules are per-address-family,
193  * so split the lock classes by using a per-AF key:
194  */
195 static struct lock_class_key af_callback_keys[AF_MAX];
196
197 /* Take into consideration the size of the struct sk_buff overhead in the
198  * determination of these values, since that is non-constant across
199  * platforms.  This makes socket queueing behavior and performance
200  * not depend upon such differences.
201  */
202 #define _SK_MEM_PACKETS         256
203 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
204 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207 /* Run time adjustable parameters. */
208 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212
213 /* Maximal space eaten by iovec or ancilliary data plus some space */
214 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
215
216 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217 {
218         struct timeval tv;
219
220         if (optlen < sizeof(tv))
221                 return -EINVAL;
222         if (copy_from_user(&tv, optval, sizeof(tv)))
223                 return -EFAULT;
224         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
225                 return -EDOM;
226
227         if (tv.tv_sec < 0) {
228                 static int warned __read_mostly;
229
230                 *timeo_p = 0;
231                 if (warned < 10 && net_ratelimit())
232                         warned++;
233                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
234                                "tries to set negative timeout\n",
235                                 current->comm, task_pid_nr(current));
236                 return 0;
237         }
238         *timeo_p = MAX_SCHEDULE_TIMEOUT;
239         if (tv.tv_sec == 0 && tv.tv_usec == 0)
240                 return 0;
241         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
242                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
243         return 0;
244 }
245
246 static void sock_warn_obsolete_bsdism(const char *name)
247 {
248         static int warned;
249         static char warncomm[TASK_COMM_LEN];
250         if (strcmp(warncomm, current->comm) && warned < 5) {
251                 strcpy(warncomm,  current->comm);
252                 printk(KERN_WARNING "process `%s' is using obsolete "
253                        "%s SO_BSDCOMPAT\n", warncomm, name);
254                 warned++;
255         }
256 }
257
258 static void sock_disable_timestamp(struct sock *sk)
259 {
260         if (sock_flag(sk, SOCK_TIMESTAMP)) {
261                 sock_reset_flag(sk, SOCK_TIMESTAMP);
262                 net_disable_timestamp();
263         }
264 }
265
266
267 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
268 {
269         int err = 0;
270         int skb_len;
271
272         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
273            number of warnings when compiling with -W --ANK
274          */
275         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
276             (unsigned)sk->sk_rcvbuf) {
277                 err = -ENOMEM;
278                 goto out;
279         }
280
281         err = sk_filter(sk, skb);
282         if (err)
283                 goto out;
284
285         skb->dev = NULL;
286         skb_set_owner_r(skb, sk);
287
288         /* Cache the SKB length before we tack it onto the receive
289          * queue.  Once it is added it no longer belongs to us and
290          * may be freed by other threads of control pulling packets
291          * from the queue.
292          */
293         skb_len = skb->len;
294
295         skb_queue_tail(&sk->sk_receive_queue, skb);
296
297         if (!sock_flag(sk, SOCK_DEAD))
298                 sk->sk_data_ready(sk, skb_len);
299 out:
300         return err;
301 }
302 EXPORT_SYMBOL(sock_queue_rcv_skb);
303
304 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
305 {
306         int rc = NET_RX_SUCCESS;
307
308         if (sk_filter(sk, skb))
309                 goto discard_and_relse;
310
311         skb->dev = NULL;
312
313         if (nested)
314                 bh_lock_sock_nested(sk);
315         else
316                 bh_lock_sock(sk);
317         if (!sock_owned_by_user(sk)) {
318                 /*
319                  * trylock + unlock semantics:
320                  */
321                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
322
323                 rc = sk->sk_backlog_rcv(sk, skb);
324
325                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
326         } else
327                 sk_add_backlog(sk, skb);
328         bh_unlock_sock(sk);
329 out:
330         sock_put(sk);
331         return rc;
332 discard_and_relse:
333         kfree_skb(skb);
334         goto out;
335 }
336 EXPORT_SYMBOL(sk_receive_skb);
337
338 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
339 {
340         struct dst_entry *dst = sk->sk_dst_cache;
341
342         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
343                 sk->sk_dst_cache = NULL;
344                 dst_release(dst);
345                 return NULL;
346         }
347
348         return dst;
349 }
350 EXPORT_SYMBOL(__sk_dst_check);
351
352 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
353 {
354         struct dst_entry *dst = sk_dst_get(sk);
355
356         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
357                 sk_dst_reset(sk);
358                 dst_release(dst);
359                 return NULL;
360         }
361
362         return dst;
363 }
364 EXPORT_SYMBOL(sk_dst_check);
365
366 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
367 {
368         int ret = -ENOPROTOOPT;
369 #ifdef CONFIG_NETDEVICES
370         struct net *net = sk->sk_net;
371         char devname[IFNAMSIZ];
372         int index;
373
374         /* Sorry... */
375         ret = -EPERM;
376         if (!capable(CAP_NET_RAW))
377                 goto out;
378
379         ret = -EINVAL;
380         if (optlen < 0)
381                 goto out;
382
383         /* Bind this socket to a particular device like "eth0",
384          * as specified in the passed interface name. If the
385          * name is "" or the option length is zero the socket
386          * is not bound.
387          */
388         if (optlen > IFNAMSIZ - 1)
389                 optlen = IFNAMSIZ - 1;
390         memset(devname, 0, sizeof(devname));
391
392         ret = -EFAULT;
393         if (copy_from_user(devname, optval, optlen))
394                 goto out;
395
396         if (devname[0] == '\0') {
397                 index = 0;
398         } else {
399                 struct net_device *dev = dev_get_by_name(net, devname);
400
401                 ret = -ENODEV;
402                 if (!dev)
403                         goto out;
404
405                 index = dev->ifindex;
406                 dev_put(dev);
407         }
408
409         lock_sock(sk);
410         sk->sk_bound_dev_if = index;
411         sk_dst_reset(sk);
412         release_sock(sk);
413
414         ret = 0;
415
416 out:
417 #endif
418
419         return ret;
420 }
421
422 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
423 {
424         if (valbool)
425                 sock_set_flag(sk, bit);
426         else
427                 sock_reset_flag(sk, bit);
428 }
429
430 /*
431  *      This is meant for all protocols to use and covers goings on
432  *      at the socket level. Everything here is generic.
433  */
434
435 int sock_setsockopt(struct socket *sock, int level, int optname,
436                     char __user *optval, int optlen)
437 {
438         struct sock *sk=sock->sk;
439         int val;
440         int valbool;
441         struct linger ling;
442         int ret = 0;
443
444         /*
445          *      Options without arguments
446          */
447
448 #ifdef SO_DONTLINGER            /* Compatibility item... */
449         if (optname == SO_DONTLINGER) {
450                 lock_sock(sk);
451                 sock_reset_flag(sk, SOCK_LINGER);
452                 release_sock(sk);
453                 return 0;
454         }
455 #endif
456
457         if (optname == SO_BINDTODEVICE)
458                 return sock_bindtodevice(sk, optval, optlen);
459
460         if (optlen < sizeof(int))
461                 return -EINVAL;
462
463         if (get_user(val, (int __user *)optval))
464                 return -EFAULT;
465
466         valbool = val?1:0;
467
468         lock_sock(sk);
469
470         switch(optname) {
471         case SO_DEBUG:
472                 if (val && !capable(CAP_NET_ADMIN)) {
473                         ret = -EACCES;
474                 } else
475                         sock_valbool_flag(sk, SOCK_DBG, valbool);
476                 break;
477         case SO_REUSEADDR:
478                 sk->sk_reuse = valbool;
479                 break;
480         case SO_TYPE:
481         case SO_ERROR:
482                 ret = -ENOPROTOOPT;
483                 break;
484         case SO_DONTROUTE:
485                 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
486                 break;
487         case SO_BROADCAST:
488                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
489                 break;
490         case SO_SNDBUF:
491                 /* Don't error on this BSD doesn't and if you think
492                    about it this is right. Otherwise apps have to
493                    play 'guess the biggest size' games. RCVBUF/SNDBUF
494                    are treated in BSD as hints */
495
496                 if (val > sysctl_wmem_max)
497                         val = sysctl_wmem_max;
498 set_sndbuf:
499                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
500                 if ((val * 2) < SOCK_MIN_SNDBUF)
501                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
502                 else
503                         sk->sk_sndbuf = val * 2;
504
505                 /*
506                  *      Wake up sending tasks if we
507                  *      upped the value.
508                  */
509                 sk->sk_write_space(sk);
510                 break;
511
512         case SO_SNDBUFFORCE:
513                 if (!capable(CAP_NET_ADMIN)) {
514                         ret = -EPERM;
515                         break;
516                 }
517                 goto set_sndbuf;
518
519         case SO_RCVBUF:
520                 /* Don't error on this BSD doesn't and if you think
521                    about it this is right. Otherwise apps have to
522                    play 'guess the biggest size' games. RCVBUF/SNDBUF
523                    are treated in BSD as hints */
524
525                 if (val > sysctl_rmem_max)
526                         val = sysctl_rmem_max;
527 set_rcvbuf:
528                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
529                 /*
530                  * We double it on the way in to account for
531                  * "struct sk_buff" etc. overhead.   Applications
532                  * assume that the SO_RCVBUF setting they make will
533                  * allow that much actual data to be received on that
534                  * socket.
535                  *
536                  * Applications are unaware that "struct sk_buff" and
537                  * other overheads allocate from the receive buffer
538                  * during socket buffer allocation.
539                  *
540                  * And after considering the possible alternatives,
541                  * returning the value we actually used in getsockopt
542                  * is the most desirable behavior.
543                  */
544                 if ((val * 2) < SOCK_MIN_RCVBUF)
545                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
546                 else
547                         sk->sk_rcvbuf = val * 2;
548                 break;
549
550         case SO_RCVBUFFORCE:
551                 if (!capable(CAP_NET_ADMIN)) {
552                         ret = -EPERM;
553                         break;
554                 }
555                 goto set_rcvbuf;
556
557         case SO_KEEPALIVE:
558 #ifdef CONFIG_INET
559                 if (sk->sk_protocol == IPPROTO_TCP)
560                         tcp_set_keepalive(sk, valbool);
561 #endif
562                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
563                 break;
564
565         case SO_OOBINLINE:
566                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
567                 break;
568
569         case SO_NO_CHECK:
570                 sk->sk_no_check = valbool;
571                 break;
572
573         case SO_PRIORITY:
574                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
575                         sk->sk_priority = val;
576                 else
577                         ret = -EPERM;
578                 break;
579
580         case SO_LINGER:
581                 if (optlen < sizeof(ling)) {
582                         ret = -EINVAL;  /* 1003.1g */
583                         break;
584                 }
585                 if (copy_from_user(&ling,optval,sizeof(ling))) {
586                         ret = -EFAULT;
587                         break;
588                 }
589                 if (!ling.l_onoff)
590                         sock_reset_flag(sk, SOCK_LINGER);
591                 else {
592 #if (BITS_PER_LONG == 32)
593                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
594                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
595                         else
596 #endif
597                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
598                         sock_set_flag(sk, SOCK_LINGER);
599                 }
600                 break;
601
602         case SO_BSDCOMPAT:
603                 sock_warn_obsolete_bsdism("setsockopt");
604                 break;
605
606         case SO_PASSCRED:
607                 if (valbool)
608                         set_bit(SOCK_PASSCRED, &sock->flags);
609                 else
610                         clear_bit(SOCK_PASSCRED, &sock->flags);
611                 break;
612
613         case SO_TIMESTAMP:
614         case SO_TIMESTAMPNS:
615                 if (valbool)  {
616                         if (optname == SO_TIMESTAMP)
617                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
618                         else
619                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
620                         sock_set_flag(sk, SOCK_RCVTSTAMP);
621                         sock_enable_timestamp(sk);
622                 } else {
623                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
624                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
625                 }
626                 break;
627
628         case SO_RCVLOWAT:
629                 if (val < 0)
630                         val = INT_MAX;
631                 sk->sk_rcvlowat = val ? : 1;
632                 break;
633
634         case SO_RCVTIMEO:
635                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
636                 break;
637
638         case SO_SNDTIMEO:
639                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
640                 break;
641
642         case SO_ATTACH_FILTER:
643                 ret = -EINVAL;
644                 if (optlen == sizeof(struct sock_fprog)) {
645                         struct sock_fprog fprog;
646
647                         ret = -EFAULT;
648                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
649                                 break;
650
651                         ret = sk_attach_filter(&fprog, sk);
652                 }
653                 break;
654
655         case SO_DETACH_FILTER:
656                 ret = sk_detach_filter(sk);
657                 break;
658
659         case SO_PASSSEC:
660                 if (valbool)
661                         set_bit(SOCK_PASSSEC, &sock->flags);
662                 else
663                         clear_bit(SOCK_PASSSEC, &sock->flags);
664                 break;
665
666                 /* We implement the SO_SNDLOWAT etc to
667                    not be settable (1003.1g 5.3) */
668         default:
669                 ret = -ENOPROTOOPT;
670                 break;
671         }
672         release_sock(sk);
673         return ret;
674 }
675
676
677 int sock_getsockopt(struct socket *sock, int level, int optname,
678                     char __user *optval, int __user *optlen)
679 {
680         struct sock *sk = sock->sk;
681
682         union {
683                 int val;
684                 struct linger ling;
685                 struct timeval tm;
686         } v;
687
688         unsigned int lv = sizeof(int);
689         int len;
690
691         if (get_user(len, optlen))
692                 return -EFAULT;
693         if (len < 0)
694                 return -EINVAL;
695
696         switch(optname) {
697         case SO_DEBUG:
698                 v.val = sock_flag(sk, SOCK_DBG);
699                 break;
700
701         case SO_DONTROUTE:
702                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
703                 break;
704
705         case SO_BROADCAST:
706                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
707                 break;
708
709         case SO_SNDBUF:
710                 v.val = sk->sk_sndbuf;
711                 break;
712
713         case SO_RCVBUF:
714                 v.val = sk->sk_rcvbuf;
715                 break;
716
717         case SO_REUSEADDR:
718                 v.val = sk->sk_reuse;
719                 break;
720
721         case SO_KEEPALIVE:
722                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
723                 break;
724
725         case SO_TYPE:
726                 v.val = sk->sk_type;
727                 break;
728
729         case SO_ERROR:
730                 v.val = -sock_error(sk);
731                 if (v.val==0)
732                         v.val = xchg(&sk->sk_err_soft, 0);
733                 break;
734
735         case SO_OOBINLINE:
736                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
737                 break;
738
739         case SO_NO_CHECK:
740                 v.val = sk->sk_no_check;
741                 break;
742
743         case SO_PRIORITY:
744                 v.val = sk->sk_priority;
745                 break;
746
747         case SO_LINGER:
748                 lv              = sizeof(v.ling);
749                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
750                 v.ling.l_linger = sk->sk_lingertime / HZ;
751                 break;
752
753         case SO_BSDCOMPAT:
754                 sock_warn_obsolete_bsdism("getsockopt");
755                 break;
756
757         case SO_TIMESTAMP:
758                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
759                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
760                 break;
761
762         case SO_TIMESTAMPNS:
763                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
764                 break;
765
766         case SO_RCVTIMEO:
767                 lv=sizeof(struct timeval);
768                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
769                         v.tm.tv_sec = 0;
770                         v.tm.tv_usec = 0;
771                 } else {
772                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
773                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
774                 }
775                 break;
776
777         case SO_SNDTIMEO:
778                 lv=sizeof(struct timeval);
779                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
780                         v.tm.tv_sec = 0;
781                         v.tm.tv_usec = 0;
782                 } else {
783                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
784                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
785                 }
786                 break;
787
788         case SO_RCVLOWAT:
789                 v.val = sk->sk_rcvlowat;
790                 break;
791
792         case SO_SNDLOWAT:
793                 v.val=1;
794                 break;
795
796         case SO_PASSCRED:
797                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
798                 break;
799
800         case SO_PEERCRED:
801                 if (len > sizeof(sk->sk_peercred))
802                         len = sizeof(sk->sk_peercred);
803                 if (copy_to_user(optval, &sk->sk_peercred, len))
804                         return -EFAULT;
805                 goto lenout;
806
807         case SO_PEERNAME:
808         {
809                 char address[128];
810
811                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
812                         return -ENOTCONN;
813                 if (lv < len)
814                         return -EINVAL;
815                 if (copy_to_user(optval, address, len))
816                         return -EFAULT;
817                 goto lenout;
818         }
819
820         /* Dubious BSD thing... Probably nobody even uses it, but
821          * the UNIX standard wants it for whatever reason... -DaveM
822          */
823         case SO_ACCEPTCONN:
824                 v.val = sk->sk_state == TCP_LISTEN;
825                 break;
826
827         case SO_PASSSEC:
828                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
829                 break;
830
831         case SO_PEERSEC:
832                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
833
834         default:
835                 return -ENOPROTOOPT;
836         }
837
838         if (len > lv)
839                 len = lv;
840         if (copy_to_user(optval, &v, len))
841                 return -EFAULT;
842 lenout:
843         if (put_user(len, optlen))
844                 return -EFAULT;
845         return 0;
846 }
847
848 /*
849  * Initialize an sk_lock.
850  *
851  * (We also register the sk_lock with the lock validator.)
852  */
853 static inline void sock_lock_init(struct sock *sk)
854 {
855         sock_lock_init_class_and_name(sk,
856                         af_family_slock_key_strings[sk->sk_family],
857                         af_family_slock_keys + sk->sk_family,
858                         af_family_key_strings[sk->sk_family],
859                         af_family_keys + sk->sk_family);
860 }
861
862 static void sock_copy(struct sock *nsk, const struct sock *osk)
863 {
864 #ifdef CONFIG_SECURITY_NETWORK
865         void *sptr = nsk->sk_security;
866 #endif
867
868         memcpy(nsk, osk, osk->sk_prot->obj_size);
869 #ifdef CONFIG_SECURITY_NETWORK
870         nsk->sk_security = sptr;
871         security_sk_clone(osk, nsk);
872 #endif
873 }
874
875 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
876                 int family)
877 {
878         struct sock *sk;
879         struct kmem_cache *slab;
880
881         slab = prot->slab;
882         if (slab != NULL)
883                 sk = kmem_cache_alloc(slab, priority);
884         else
885                 sk = kmalloc(prot->obj_size, priority);
886
887         if (sk != NULL) {
888                 if (security_sk_alloc(sk, family, priority))
889                         goto out_free;
890
891                 if (!try_module_get(prot->owner))
892                         goto out_free_sec;
893         }
894
895         return sk;
896
897 out_free_sec:
898         security_sk_free(sk);
899 out_free:
900         if (slab != NULL)
901                 kmem_cache_free(slab, sk);
902         else
903                 kfree(sk);
904         return NULL;
905 }
906
907 static void sk_prot_free(struct proto *prot, struct sock *sk)
908 {
909         struct kmem_cache *slab;
910         struct module *owner;
911
912         owner = prot->owner;
913         slab = prot->slab;
914
915         security_sk_free(sk);
916         if (slab != NULL)
917                 kmem_cache_free(slab, sk);
918         else
919                 kfree(sk);
920         module_put(owner);
921 }
922
923 /**
924  *      sk_alloc - All socket objects are allocated here
925  *      @net: the applicable net namespace
926  *      @family: protocol family
927  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
928  *      @prot: struct proto associated with this new sock instance
929  *      @zero_it: if we should zero the newly allocated sock
930  */
931 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
932                       struct proto *prot)
933 {
934         struct sock *sk;
935
936         sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
937         if (sk) {
938                 sk->sk_family = family;
939                 /*
940                  * See comment in struct sock definition to understand
941                  * why we need sk_prot_creator -acme
942                  */
943                 sk->sk_prot = sk->sk_prot_creator = prot;
944                 sock_lock_init(sk);
945                 sk->sk_net = get_net(net);
946         }
947
948         return sk;
949 }
950
951 void sk_free(struct sock *sk)
952 {
953         struct sk_filter *filter;
954
955         if (sk->sk_destruct)
956                 sk->sk_destruct(sk);
957
958         filter = rcu_dereference(sk->sk_filter);
959         if (filter) {
960                 sk_filter_uncharge(sk, filter);
961                 rcu_assign_pointer(sk->sk_filter, NULL);
962         }
963
964         sock_disable_timestamp(sk);
965
966         if (atomic_read(&sk->sk_omem_alloc))
967                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
968                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
969
970         put_net(sk->sk_net);
971         sk_prot_free(sk->sk_prot_creator, sk);
972 }
973
974 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
975 {
976         struct sock *newsk;
977
978         newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
979         if (newsk != NULL) {
980                 struct sk_filter *filter;
981
982                 sock_copy(newsk, sk);
983
984                 /* SANITY */
985                 get_net(newsk->sk_net);
986                 sk_node_init(&newsk->sk_node);
987                 sock_lock_init(newsk);
988                 bh_lock_sock(newsk);
989                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
990
991                 atomic_set(&newsk->sk_rmem_alloc, 0);
992                 atomic_set(&newsk->sk_wmem_alloc, 0);
993                 atomic_set(&newsk->sk_omem_alloc, 0);
994                 skb_queue_head_init(&newsk->sk_receive_queue);
995                 skb_queue_head_init(&newsk->sk_write_queue);
996 #ifdef CONFIG_NET_DMA
997                 skb_queue_head_init(&newsk->sk_async_wait_queue);
998 #endif
999
1000                 rwlock_init(&newsk->sk_dst_lock);
1001                 rwlock_init(&newsk->sk_callback_lock);
1002                 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1003                                 af_callback_keys + newsk->sk_family,
1004                                 af_family_clock_key_strings[newsk->sk_family]);
1005
1006                 newsk->sk_dst_cache     = NULL;
1007                 newsk->sk_wmem_queued   = 0;
1008                 newsk->sk_forward_alloc = 0;
1009                 newsk->sk_send_head     = NULL;
1010                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1011
1012                 sock_reset_flag(newsk, SOCK_DONE);
1013                 skb_queue_head_init(&newsk->sk_error_queue);
1014
1015                 filter = newsk->sk_filter;
1016                 if (filter != NULL)
1017                         sk_filter_charge(newsk, filter);
1018
1019                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1020                         /* It is still raw copy of parent, so invalidate
1021                          * destructor and make plain sk_free() */
1022                         newsk->sk_destruct = NULL;
1023                         sk_free(newsk);
1024                         newsk = NULL;
1025                         goto out;
1026                 }
1027
1028                 newsk->sk_err      = 0;
1029                 newsk->sk_priority = 0;
1030                 atomic_set(&newsk->sk_refcnt, 2);
1031
1032                 /*
1033                  * Increment the counter in the same struct proto as the master
1034                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1035                  * is the same as sk->sk_prot->socks, as this field was copied
1036                  * with memcpy).
1037                  *
1038                  * This _changes_ the previous behaviour, where
1039                  * tcp_create_openreq_child always was incrementing the
1040                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1041                  * to be taken into account in all callers. -acme
1042                  */
1043                 sk_refcnt_debug_inc(newsk);
1044                 newsk->sk_socket = NULL;
1045                 newsk->sk_sleep  = NULL;
1046
1047                 if (newsk->sk_prot->sockets_allocated)
1048                         atomic_inc(newsk->sk_prot->sockets_allocated);
1049         }
1050 out:
1051         return newsk;
1052 }
1053
1054 EXPORT_SYMBOL_GPL(sk_clone);
1055
1056 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1057 {
1058         __sk_dst_set(sk, dst);
1059         sk->sk_route_caps = dst->dev->features;
1060         if (sk->sk_route_caps & NETIF_F_GSO)
1061                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1062         if (sk_can_gso(sk)) {
1063                 if (dst->header_len)
1064                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1065                 else
1066                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1067         }
1068 }
1069 EXPORT_SYMBOL_GPL(sk_setup_caps);
1070
1071 void __init sk_init(void)
1072 {
1073         if (num_physpages <= 4096) {
1074                 sysctl_wmem_max = 32767;
1075                 sysctl_rmem_max = 32767;
1076                 sysctl_wmem_default = 32767;
1077                 sysctl_rmem_default = 32767;
1078         } else if (num_physpages >= 131072) {
1079                 sysctl_wmem_max = 131071;
1080                 sysctl_rmem_max = 131071;
1081         }
1082 }
1083
1084 /*
1085  *      Simple resource managers for sockets.
1086  */
1087
1088
1089 /*
1090  * Write buffer destructor automatically called from kfree_skb.
1091  */
1092 void sock_wfree(struct sk_buff *skb)
1093 {
1094         struct sock *sk = skb->sk;
1095
1096         /* In case it might be waiting for more memory. */
1097         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1098         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1099                 sk->sk_write_space(sk);
1100         sock_put(sk);
1101 }
1102
1103 /*
1104  * Read buffer destructor automatically called from kfree_skb.
1105  */
1106 void sock_rfree(struct sk_buff *skb)
1107 {
1108         struct sock *sk = skb->sk;
1109
1110         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1111 }
1112
1113
1114 int sock_i_uid(struct sock *sk)
1115 {
1116         int uid;
1117
1118         read_lock(&sk->sk_callback_lock);
1119         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1120         read_unlock(&sk->sk_callback_lock);
1121         return uid;
1122 }
1123
1124 unsigned long sock_i_ino(struct sock *sk)
1125 {
1126         unsigned long ino;
1127
1128         read_lock(&sk->sk_callback_lock);
1129         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1130         read_unlock(&sk->sk_callback_lock);
1131         return ino;
1132 }
1133
1134 /*
1135  * Allocate a skb from the socket's send buffer.
1136  */
1137 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1138                              gfp_t priority)
1139 {
1140         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1141                 struct sk_buff * skb = alloc_skb(size, priority);
1142                 if (skb) {
1143                         skb_set_owner_w(skb, sk);
1144                         return skb;
1145                 }
1146         }
1147         return NULL;
1148 }
1149
1150 /*
1151  * Allocate a skb from the socket's receive buffer.
1152  */
1153 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1154                              gfp_t priority)
1155 {
1156         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1157                 struct sk_buff *skb = alloc_skb(size, priority);
1158                 if (skb) {
1159                         skb_set_owner_r(skb, sk);
1160                         return skb;
1161                 }
1162         }
1163         return NULL;
1164 }
1165
1166 /*
1167  * Allocate a memory block from the socket's option memory buffer.
1168  */
1169 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1170 {
1171         if ((unsigned)size <= sysctl_optmem_max &&
1172             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1173                 void *mem;
1174                 /* First do the add, to avoid the race if kmalloc
1175                  * might sleep.
1176                  */
1177                 atomic_add(size, &sk->sk_omem_alloc);
1178                 mem = kmalloc(size, priority);
1179                 if (mem)
1180                         return mem;
1181                 atomic_sub(size, &sk->sk_omem_alloc);
1182         }
1183         return NULL;
1184 }
1185
1186 /*
1187  * Free an option memory block.
1188  */
1189 void sock_kfree_s(struct sock *sk, void *mem, int size)
1190 {
1191         kfree(mem);
1192         atomic_sub(size, &sk->sk_omem_alloc);
1193 }
1194
1195 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1196    I think, these locks should be removed for datagram sockets.
1197  */
1198 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1199 {
1200         DEFINE_WAIT(wait);
1201
1202         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1203         for (;;) {
1204                 if (!timeo)
1205                         break;
1206                 if (signal_pending(current))
1207                         break;
1208                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1209                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1210                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1211                         break;
1212                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1213                         break;
1214                 if (sk->sk_err)
1215                         break;
1216                 timeo = schedule_timeout(timeo);
1217         }
1218         finish_wait(sk->sk_sleep, &wait);
1219         return timeo;
1220 }
1221
1222
1223 /*
1224  *      Generic send/receive buffer handlers
1225  */
1226
1227 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1228                                             unsigned long header_len,
1229                                             unsigned long data_len,
1230                                             int noblock, int *errcode)
1231 {
1232         struct sk_buff *skb;
1233         gfp_t gfp_mask;
1234         long timeo;
1235         int err;
1236
1237         gfp_mask = sk->sk_allocation;
1238         if (gfp_mask & __GFP_WAIT)
1239                 gfp_mask |= __GFP_REPEAT;
1240
1241         timeo = sock_sndtimeo(sk, noblock);
1242         while (1) {
1243                 err = sock_error(sk);
1244                 if (err != 0)
1245                         goto failure;
1246
1247                 err = -EPIPE;
1248                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1249                         goto failure;
1250
1251                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1252                         skb = alloc_skb(header_len, gfp_mask);
1253                         if (skb) {
1254                                 int npages;
1255                                 int i;
1256
1257                                 /* No pages, we're done... */
1258                                 if (!data_len)
1259                                         break;
1260
1261                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1262                                 skb->truesize += data_len;
1263                                 skb_shinfo(skb)->nr_frags = npages;
1264                                 for (i = 0; i < npages; i++) {
1265                                         struct page *page;
1266                                         skb_frag_t *frag;
1267
1268                                         page = alloc_pages(sk->sk_allocation, 0);
1269                                         if (!page) {
1270                                                 err = -ENOBUFS;
1271                                                 skb_shinfo(skb)->nr_frags = i;
1272                                                 kfree_skb(skb);
1273                                                 goto failure;
1274                                         }
1275
1276                                         frag = &skb_shinfo(skb)->frags[i];
1277                                         frag->page = page;
1278                                         frag->page_offset = 0;
1279                                         frag->size = (data_len >= PAGE_SIZE ?
1280                                                       PAGE_SIZE :
1281                                                       data_len);
1282                                         data_len -= PAGE_SIZE;
1283                                 }
1284
1285                                 /* Full success... */
1286                                 break;
1287                         }
1288                         err = -ENOBUFS;
1289                         goto failure;
1290                 }
1291                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1292                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1293                 err = -EAGAIN;
1294                 if (!timeo)
1295                         goto failure;
1296                 if (signal_pending(current))
1297                         goto interrupted;
1298                 timeo = sock_wait_for_wmem(sk, timeo);
1299         }
1300
1301         skb_set_owner_w(skb, sk);
1302         return skb;
1303
1304 interrupted:
1305         err = sock_intr_errno(timeo);
1306 failure:
1307         *errcode = err;
1308         return NULL;
1309 }
1310
1311 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1312                                     int noblock, int *errcode)
1313 {
1314         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1315 }
1316
1317 static void __lock_sock(struct sock *sk)
1318 {
1319         DEFINE_WAIT(wait);
1320
1321         for (;;) {
1322                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1323                                         TASK_UNINTERRUPTIBLE);
1324                 spin_unlock_bh(&sk->sk_lock.slock);
1325                 schedule();
1326                 spin_lock_bh(&sk->sk_lock.slock);
1327                 if (!sock_owned_by_user(sk))
1328                         break;
1329         }
1330         finish_wait(&sk->sk_lock.wq, &wait);
1331 }
1332
1333 static void __release_sock(struct sock *sk)
1334 {
1335         struct sk_buff *skb = sk->sk_backlog.head;
1336
1337         do {
1338                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1339                 bh_unlock_sock(sk);
1340
1341                 do {
1342                         struct sk_buff *next = skb->next;
1343
1344                         skb->next = NULL;
1345                         sk->sk_backlog_rcv(sk, skb);
1346
1347                         /*
1348                          * We are in process context here with softirqs
1349                          * disabled, use cond_resched_softirq() to preempt.
1350                          * This is safe to do because we've taken the backlog
1351                          * queue private:
1352                          */
1353                         cond_resched_softirq();
1354
1355                         skb = next;
1356                 } while (skb != NULL);
1357
1358                 bh_lock_sock(sk);
1359         } while ((skb = sk->sk_backlog.head) != NULL);
1360 }
1361
1362 /**
1363  * sk_wait_data - wait for data to arrive at sk_receive_queue
1364  * @sk:    sock to wait on
1365  * @timeo: for how long
1366  *
1367  * Now socket state including sk->sk_err is changed only under lock,
1368  * hence we may omit checks after joining wait queue.
1369  * We check receive queue before schedule() only as optimization;
1370  * it is very likely that release_sock() added new data.
1371  */
1372 int sk_wait_data(struct sock *sk, long *timeo)
1373 {
1374         int rc;
1375         DEFINE_WAIT(wait);
1376
1377         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1378         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1379         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1380         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1381         finish_wait(sk->sk_sleep, &wait);
1382         return rc;
1383 }
1384
1385 EXPORT_SYMBOL(sk_wait_data);
1386
1387 /*
1388  * Set of default routines for initialising struct proto_ops when
1389  * the protocol does not support a particular function. In certain
1390  * cases where it makes no sense for a protocol to have a "do nothing"
1391  * function, some default processing is provided.
1392  */
1393
1394 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1395 {
1396         return -EOPNOTSUPP;
1397 }
1398
1399 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1400                     int len, int flags)
1401 {
1402         return -EOPNOTSUPP;
1403 }
1404
1405 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1406 {
1407         return -EOPNOTSUPP;
1408 }
1409
1410 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1411 {
1412         return -EOPNOTSUPP;
1413 }
1414
1415 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1416                     int *len, int peer)
1417 {
1418         return -EOPNOTSUPP;
1419 }
1420
1421 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1422 {
1423         return 0;
1424 }
1425
1426 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1427 {
1428         return -EOPNOTSUPP;
1429 }
1430
1431 int sock_no_listen(struct socket *sock, int backlog)
1432 {
1433         return -EOPNOTSUPP;
1434 }
1435
1436 int sock_no_shutdown(struct socket *sock, int how)
1437 {
1438         return -EOPNOTSUPP;
1439 }
1440
1441 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1442                     char __user *optval, int optlen)
1443 {
1444         return -EOPNOTSUPP;
1445 }
1446
1447 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1448                     char __user *optval, int __user *optlen)
1449 {
1450         return -EOPNOTSUPP;
1451 }
1452
1453 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1454                     size_t len)
1455 {
1456         return -EOPNOTSUPP;
1457 }
1458
1459 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1460                     size_t len, int flags)
1461 {
1462         return -EOPNOTSUPP;
1463 }
1464
1465 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1466 {
1467         /* Mirror missing mmap method error code */
1468         return -ENODEV;
1469 }
1470
1471 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1472 {
1473         ssize_t res;
1474         struct msghdr msg = {.msg_flags = flags};
1475         struct kvec iov;
1476         char *kaddr = kmap(page);
1477         iov.iov_base = kaddr + offset;
1478         iov.iov_len = size;
1479         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1480         kunmap(page);
1481         return res;
1482 }
1483
1484 /*
1485  *      Default Socket Callbacks
1486  */
1487
1488 static void sock_def_wakeup(struct sock *sk)
1489 {
1490         read_lock(&sk->sk_callback_lock);
1491         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1492                 wake_up_interruptible_all(sk->sk_sleep);
1493         read_unlock(&sk->sk_callback_lock);
1494 }
1495
1496 static void sock_def_error_report(struct sock *sk)
1497 {
1498         read_lock(&sk->sk_callback_lock);
1499         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1500                 wake_up_interruptible(sk->sk_sleep);
1501         sk_wake_async(sk,0,POLL_ERR);
1502         read_unlock(&sk->sk_callback_lock);
1503 }
1504
1505 static void sock_def_readable(struct sock *sk, int len)
1506 {
1507         read_lock(&sk->sk_callback_lock);
1508         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1509                 wake_up_interruptible(sk->sk_sleep);
1510         sk_wake_async(sk,1,POLL_IN);
1511         read_unlock(&sk->sk_callback_lock);
1512 }
1513
1514 static void sock_def_write_space(struct sock *sk)
1515 {
1516         read_lock(&sk->sk_callback_lock);
1517
1518         /* Do not wake up a writer until he can make "significant"
1519          * progress.  --DaveM
1520          */
1521         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1522                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1523                         wake_up_interruptible(sk->sk_sleep);
1524
1525                 /* Should agree with poll, otherwise some programs break */
1526                 if (sock_writeable(sk))
1527                         sk_wake_async(sk, 2, POLL_OUT);
1528         }
1529
1530         read_unlock(&sk->sk_callback_lock);
1531 }
1532
1533 static void sock_def_destruct(struct sock *sk)
1534 {
1535         kfree(sk->sk_protinfo);
1536 }
1537
1538 void sk_send_sigurg(struct sock *sk)
1539 {
1540         if (sk->sk_socket && sk->sk_socket->file)
1541                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1542                         sk_wake_async(sk, 3, POLL_PRI);
1543 }
1544
1545 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1546                     unsigned long expires)
1547 {
1548         if (!mod_timer(timer, expires))
1549                 sock_hold(sk);
1550 }
1551
1552 EXPORT_SYMBOL(sk_reset_timer);
1553
1554 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1555 {
1556         if (timer_pending(timer) && del_timer(timer))
1557                 __sock_put(sk);
1558 }
1559
1560 EXPORT_SYMBOL(sk_stop_timer);
1561
1562 void sock_init_data(struct socket *sock, struct sock *sk)
1563 {
1564         skb_queue_head_init(&sk->sk_receive_queue);
1565         skb_queue_head_init(&sk->sk_write_queue);
1566         skb_queue_head_init(&sk->sk_error_queue);
1567 #ifdef CONFIG_NET_DMA
1568         skb_queue_head_init(&sk->sk_async_wait_queue);
1569 #endif
1570
1571         sk->sk_send_head        =       NULL;
1572
1573         init_timer(&sk->sk_timer);
1574
1575         sk->sk_allocation       =       GFP_KERNEL;
1576         sk->sk_rcvbuf           =       sysctl_rmem_default;
1577         sk->sk_sndbuf           =       sysctl_wmem_default;
1578         sk->sk_state            =       TCP_CLOSE;
1579         sk->sk_socket           =       sock;
1580
1581         sock_set_flag(sk, SOCK_ZAPPED);
1582
1583         if (sock) {
1584                 sk->sk_type     =       sock->type;
1585                 sk->sk_sleep    =       &sock->wait;
1586                 sock->sk        =       sk;
1587         } else
1588                 sk->sk_sleep    =       NULL;
1589
1590         rwlock_init(&sk->sk_dst_lock);
1591         rwlock_init(&sk->sk_callback_lock);
1592         lockdep_set_class_and_name(&sk->sk_callback_lock,
1593                         af_callback_keys + sk->sk_family,
1594                         af_family_clock_key_strings[sk->sk_family]);
1595
1596         sk->sk_state_change     =       sock_def_wakeup;
1597         sk->sk_data_ready       =       sock_def_readable;
1598         sk->sk_write_space      =       sock_def_write_space;
1599         sk->sk_error_report     =       sock_def_error_report;
1600         sk->sk_destruct         =       sock_def_destruct;
1601
1602         sk->sk_sndmsg_page      =       NULL;
1603         sk->sk_sndmsg_off       =       0;
1604
1605         sk->sk_peercred.pid     =       0;
1606         sk->sk_peercred.uid     =       -1;
1607         sk->sk_peercred.gid     =       -1;
1608         sk->sk_write_pending    =       0;
1609         sk->sk_rcvlowat         =       1;
1610         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1611         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1612
1613         sk->sk_stamp = ktime_set(-1L, -1L);
1614
1615         atomic_set(&sk->sk_refcnt, 1);
1616         atomic_set(&sk->sk_drops, 0);
1617 }
1618
1619 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1620 {
1621         might_sleep();
1622         spin_lock_bh(&sk->sk_lock.slock);
1623         if (sk->sk_lock.owned)
1624                 __lock_sock(sk);
1625         sk->sk_lock.owned = 1;
1626         spin_unlock(&sk->sk_lock.slock);
1627         /*
1628          * The sk_lock has mutex_lock() semantics here:
1629          */
1630         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1631         local_bh_enable();
1632 }
1633
1634 EXPORT_SYMBOL(lock_sock_nested);
1635
1636 void fastcall release_sock(struct sock *sk)
1637 {
1638         /*
1639          * The sk_lock has mutex_unlock() semantics:
1640          */
1641         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1642
1643         spin_lock_bh(&sk->sk_lock.slock);
1644         if (sk->sk_backlog.tail)
1645                 __release_sock(sk);
1646         sk->sk_lock.owned = 0;
1647         if (waitqueue_active(&sk->sk_lock.wq))
1648                 wake_up(&sk->sk_lock.wq);
1649         spin_unlock_bh(&sk->sk_lock.slock);
1650 }
1651 EXPORT_SYMBOL(release_sock);
1652
1653 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1654 {
1655         struct timeval tv;
1656         if (!sock_flag(sk, SOCK_TIMESTAMP))
1657                 sock_enable_timestamp(sk);
1658         tv = ktime_to_timeval(sk->sk_stamp);
1659         if (tv.tv_sec == -1)
1660                 return -ENOENT;
1661         if (tv.tv_sec == 0) {
1662                 sk->sk_stamp = ktime_get_real();
1663                 tv = ktime_to_timeval(sk->sk_stamp);
1664         }
1665         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1666 }
1667 EXPORT_SYMBOL(sock_get_timestamp);
1668
1669 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1670 {
1671         struct timespec ts;
1672         if (!sock_flag(sk, SOCK_TIMESTAMP))
1673                 sock_enable_timestamp(sk);
1674         ts = ktime_to_timespec(sk->sk_stamp);
1675         if (ts.tv_sec == -1)
1676                 return -ENOENT;
1677         if (ts.tv_sec == 0) {
1678                 sk->sk_stamp = ktime_get_real();
1679                 ts = ktime_to_timespec(sk->sk_stamp);
1680         }
1681         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1682 }
1683 EXPORT_SYMBOL(sock_get_timestampns);
1684
1685 void sock_enable_timestamp(struct sock *sk)
1686 {
1687         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1688                 sock_set_flag(sk, SOCK_TIMESTAMP);
1689                 net_enable_timestamp();
1690         }
1691 }
1692
1693 /*
1694  *      Get a socket option on an socket.
1695  *
1696  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1697  *      asynchronous errors should be reported by getsockopt. We assume
1698  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1699  */
1700 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1701                            char __user *optval, int __user *optlen)
1702 {
1703         struct sock *sk = sock->sk;
1704
1705         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1706 }
1707
1708 EXPORT_SYMBOL(sock_common_getsockopt);
1709
1710 #ifdef CONFIG_COMPAT
1711 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1712                                   char __user *optval, int __user *optlen)
1713 {
1714         struct sock *sk = sock->sk;
1715
1716         if (sk->sk_prot->compat_getsockopt != NULL)
1717                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1718                                                       optval, optlen);
1719         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1720 }
1721 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1722 #endif
1723
1724 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1725                         struct msghdr *msg, size_t size, int flags)
1726 {
1727         struct sock *sk = sock->sk;
1728         int addr_len = 0;
1729         int err;
1730
1731         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1732                                    flags & ~MSG_DONTWAIT, &addr_len);
1733         if (err >= 0)
1734                 msg->msg_namelen = addr_len;
1735         return err;
1736 }
1737
1738 EXPORT_SYMBOL(sock_common_recvmsg);
1739
1740 /*
1741  *      Set socket options on an inet socket.
1742  */
1743 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1744                            char __user *optval, int optlen)
1745 {
1746         struct sock *sk = sock->sk;
1747
1748         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1749 }
1750
1751 EXPORT_SYMBOL(sock_common_setsockopt);
1752
1753 #ifdef CONFIG_COMPAT
1754 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1755                                   char __user *optval, int optlen)
1756 {
1757         struct sock *sk = sock->sk;
1758
1759         if (sk->sk_prot->compat_setsockopt != NULL)
1760                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1761                                                       optval, optlen);
1762         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1763 }
1764 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1765 #endif
1766
1767 void sk_common_release(struct sock *sk)
1768 {
1769         if (sk->sk_prot->destroy)
1770                 sk->sk_prot->destroy(sk);
1771
1772         /*
1773          * Observation: when sock_common_release is called, processes have
1774          * no access to socket. But net still has.
1775          * Step one, detach it from networking:
1776          *
1777          * A. Remove from hash tables.
1778          */
1779
1780         sk->sk_prot->unhash(sk);
1781
1782         /*
1783          * In this point socket cannot receive new packets, but it is possible
1784          * that some packets are in flight because some CPU runs receiver and
1785          * did hash table lookup before we unhashed socket. They will achieve
1786          * receive queue and will be purged by socket destructor.
1787          *
1788          * Also we still have packets pending on receive queue and probably,
1789          * our own packets waiting in device queues. sock_destroy will drain
1790          * receive queue, but transmitted packets will delay socket destruction
1791          * until the last reference will be released.
1792          */
1793
1794         sock_orphan(sk);
1795
1796         xfrm_sk_free_policy(sk);
1797
1798         sk_refcnt_debug_release(sk);
1799         sock_put(sk);
1800 }
1801
1802 EXPORT_SYMBOL(sk_common_release);
1803
1804 static DEFINE_RWLOCK(proto_list_lock);
1805 static LIST_HEAD(proto_list);
1806
1807 #ifdef CONFIG_SMP
1808 /*
1809  * Define default functions to keep track of inuse sockets per protocol
1810  * Note that often used protocols use dedicated functions to get a speed increase.
1811  * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
1812  */
1813 static void inuse_add(struct proto *prot, int inc)
1814 {
1815         per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
1816 }
1817
1818 static int inuse_get(const struct proto *prot)
1819 {
1820         int res = 0, cpu;
1821         for_each_possible_cpu(cpu)
1822                 res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
1823         return res;
1824 }
1825
1826 static int inuse_init(struct proto *prot)
1827 {
1828         if (!prot->inuse_getval || !prot->inuse_add) {
1829                 prot->inuse_ptr = alloc_percpu(int);
1830                 if (prot->inuse_ptr == NULL)
1831                         return -ENOBUFS;
1832
1833                 prot->inuse_getval = inuse_get;
1834                 prot->inuse_add = inuse_add;
1835         }
1836         return 0;
1837 }
1838
1839 static void inuse_fini(struct proto *prot)
1840 {
1841         if (prot->inuse_ptr != NULL) {
1842                 free_percpu(prot->inuse_ptr);
1843                 prot->inuse_ptr = NULL;
1844                 prot->inuse_getval = NULL;
1845                 prot->inuse_add = NULL;
1846         }
1847 }
1848 #else
1849 static inline int inuse_init(struct proto *prot)
1850 {
1851         return 0;
1852 }
1853
1854 static inline void inuse_fini(struct proto *prot)
1855 {
1856 }
1857 #endif
1858
1859 int proto_register(struct proto *prot, int alloc_slab)
1860 {
1861         char *request_sock_slab_name = NULL;
1862         char *timewait_sock_slab_name;
1863
1864         if (inuse_init(prot))
1865                 goto out;
1866
1867         if (alloc_slab) {
1868                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1869                                                SLAB_HWCACHE_ALIGN, NULL);
1870
1871                 if (prot->slab == NULL) {
1872                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1873                                prot->name);
1874                         goto out_free_inuse;
1875                 }
1876
1877                 if (prot->rsk_prot != NULL) {
1878                         static const char mask[] = "request_sock_%s";
1879
1880                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1881                         if (request_sock_slab_name == NULL)
1882                                 goto out_free_sock_slab;
1883
1884                         sprintf(request_sock_slab_name, mask, prot->name);
1885                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1886                                                                  prot->rsk_prot->obj_size, 0,
1887                                                                  SLAB_HWCACHE_ALIGN, NULL);
1888
1889                         if (prot->rsk_prot->slab == NULL) {
1890                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1891                                        prot->name);
1892                                 goto out_free_request_sock_slab_name;
1893                         }
1894                 }
1895
1896                 if (prot->twsk_prot != NULL) {
1897                         static const char mask[] = "tw_sock_%s";
1898
1899                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1900
1901                         if (timewait_sock_slab_name == NULL)
1902                                 goto out_free_request_sock_slab;
1903
1904                         sprintf(timewait_sock_slab_name, mask, prot->name);
1905                         prot->twsk_prot->twsk_slab =
1906                                 kmem_cache_create(timewait_sock_slab_name,
1907                                                   prot->twsk_prot->twsk_obj_size,
1908                                                   0, SLAB_HWCACHE_ALIGN,
1909                                                   NULL);
1910                         if (prot->twsk_prot->twsk_slab == NULL)
1911                                 goto out_free_timewait_sock_slab_name;
1912                 }
1913         }
1914
1915         write_lock(&proto_list_lock);
1916         list_add(&prot->node, &proto_list);
1917         write_unlock(&proto_list_lock);
1918         return 0;
1919
1920 out_free_timewait_sock_slab_name:
1921         kfree(timewait_sock_slab_name);
1922 out_free_request_sock_slab:
1923         if (prot->rsk_prot && prot->rsk_prot->slab) {
1924                 kmem_cache_destroy(prot->rsk_prot->slab);
1925                 prot->rsk_prot->slab = NULL;
1926         }
1927 out_free_request_sock_slab_name:
1928         kfree(request_sock_slab_name);
1929 out_free_sock_slab:
1930         kmem_cache_destroy(prot->slab);
1931         prot->slab = NULL;
1932 out_free_inuse:
1933         inuse_fini(prot);
1934 out:
1935         return -ENOBUFS;
1936 }
1937
1938 EXPORT_SYMBOL(proto_register);
1939
1940 void proto_unregister(struct proto *prot)
1941 {
1942         write_lock(&proto_list_lock);
1943         list_del(&prot->node);
1944         write_unlock(&proto_list_lock);
1945
1946         inuse_fini(prot);
1947         if (prot->slab != NULL) {
1948                 kmem_cache_destroy(prot->slab);
1949                 prot->slab = NULL;
1950         }
1951
1952         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1953                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1954
1955                 kmem_cache_destroy(prot->rsk_prot->slab);
1956                 kfree(name);
1957                 prot->rsk_prot->slab = NULL;
1958         }
1959
1960         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1961                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1962
1963                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1964                 kfree(name);
1965                 prot->twsk_prot->twsk_slab = NULL;
1966         }
1967 }
1968
1969 EXPORT_SYMBOL(proto_unregister);
1970
1971 #ifdef CONFIG_PROC_FS
1972 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1973 {
1974         read_lock(&proto_list_lock);
1975         return seq_list_start_head(&proto_list, *pos);
1976 }
1977
1978 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1979 {
1980         return seq_list_next(v, &proto_list, pos);
1981 }
1982
1983 static void proto_seq_stop(struct seq_file *seq, void *v)
1984 {
1985         read_unlock(&proto_list_lock);
1986 }
1987
1988 static char proto_method_implemented(const void *method)
1989 {
1990         return method == NULL ? 'n' : 'y';
1991 }
1992
1993 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1994 {
1995         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1996                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1997                    proto->name,
1998                    proto->obj_size,
1999                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
2000                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2001                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2002                    proto->max_header,
2003                    proto->slab == NULL ? "no" : "yes",
2004                    module_name(proto->owner),
2005                    proto_method_implemented(proto->close),
2006                    proto_method_implemented(proto->connect),
2007                    proto_method_implemented(proto->disconnect),
2008                    proto_method_implemented(proto->accept),
2009                    proto_method_implemented(proto->ioctl),
2010                    proto_method_implemented(proto->init),
2011                    proto_method_implemented(proto->destroy),
2012                    proto_method_implemented(proto->shutdown),
2013                    proto_method_implemented(proto->setsockopt),
2014                    proto_method_implemented(proto->getsockopt),
2015                    proto_method_implemented(proto->sendmsg),
2016                    proto_method_implemented(proto->recvmsg),
2017                    proto_method_implemented(proto->sendpage),
2018                    proto_method_implemented(proto->bind),
2019                    proto_method_implemented(proto->backlog_rcv),
2020                    proto_method_implemented(proto->hash),
2021                    proto_method_implemented(proto->unhash),
2022                    proto_method_implemented(proto->get_port),
2023                    proto_method_implemented(proto->enter_memory_pressure));
2024 }
2025
2026 static int proto_seq_show(struct seq_file *seq, void *v)
2027 {
2028         if (v == &proto_list)
2029                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2030                            "protocol",
2031                            "size",
2032                            "sockets",
2033                            "memory",
2034                            "press",
2035                            "maxhdr",
2036                            "slab",
2037                            "module",
2038                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2039         else
2040                 proto_seq_printf(seq, list_entry(v, struct proto, node));
2041         return 0;
2042 }
2043
2044 static const struct seq_operations proto_seq_ops = {
2045         .start  = proto_seq_start,
2046         .next   = proto_seq_next,
2047         .stop   = proto_seq_stop,
2048         .show   = proto_seq_show,
2049 };
2050
2051 static int proto_seq_open(struct inode *inode, struct file *file)
2052 {
2053         return seq_open(file, &proto_seq_ops);
2054 }
2055
2056 static const struct file_operations proto_seq_fops = {
2057         .owner          = THIS_MODULE,
2058         .open           = proto_seq_open,
2059         .read           = seq_read,
2060         .llseek         = seq_lseek,
2061         .release        = seq_release,
2062 };
2063
2064 static int __init proto_init(void)
2065 {
2066         /* register /proc/net/protocols */
2067         return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
2068 }
2069
2070 subsys_initcall(proto_init);
2071
2072 #endif /* PROC_FS */
2073
2074 EXPORT_SYMBOL(sk_alloc);
2075 EXPORT_SYMBOL(sk_free);
2076 EXPORT_SYMBOL(sk_send_sigurg);
2077 EXPORT_SYMBOL(sock_alloc_send_skb);
2078 EXPORT_SYMBOL(sock_init_data);
2079 EXPORT_SYMBOL(sock_kfree_s);
2080 EXPORT_SYMBOL(sock_kmalloc);
2081 EXPORT_SYMBOL(sock_no_accept);
2082 EXPORT_SYMBOL(sock_no_bind);
2083 EXPORT_SYMBOL(sock_no_connect);
2084 EXPORT_SYMBOL(sock_no_getname);
2085 EXPORT_SYMBOL(sock_no_getsockopt);
2086 EXPORT_SYMBOL(sock_no_ioctl);
2087 EXPORT_SYMBOL(sock_no_listen);
2088 EXPORT_SYMBOL(sock_no_mmap);
2089 EXPORT_SYMBOL(sock_no_poll);
2090 EXPORT_SYMBOL(sock_no_recvmsg);
2091 EXPORT_SYMBOL(sock_no_sendmsg);
2092 EXPORT_SYMBOL(sock_no_sendpage);
2093 EXPORT_SYMBOL(sock_no_setsockopt);
2094 EXPORT_SYMBOL(sock_no_shutdown);
2095 EXPORT_SYMBOL(sock_no_socketpair);
2096 EXPORT_SYMBOL(sock_rfree);
2097 EXPORT_SYMBOL(sock_setsockopt);
2098 EXPORT_SYMBOL(sock_wfree);
2099 EXPORT_SYMBOL(sock_wmalloc);
2100 EXPORT_SYMBOL(sock_i_uid);
2101 EXPORT_SYMBOL(sock_i_ino);
2102 EXPORT_SYMBOL(sysctl_optmem_max);