af_packet: Check return of dev_set_promiscuity/allmulti
[safe/jmp/linux-2.6] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Authors:     Ross Biro
9  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
11  *
12  * Fixes:
13  *              Alan Cox        :       verify_area() now used correctly
14  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
15  *              Alan Cox        :       tidied skbuff lists.
16  *              Alan Cox        :       Now uses generic datagram routines I
17  *                                      added. Also fixed the peek/read crash
18  *                                      from all old Linux datagram code.
19  *              Alan Cox        :       Uses the improved datagram code.
20  *              Alan Cox        :       Added NULL's for socket options.
21  *              Alan Cox        :       Re-commented the code.
22  *              Alan Cox        :       Use new kernel side addressing
23  *              Rob Janssen     :       Correct MTU usage.
24  *              Dave Platt      :       Counter leaks caused by incorrect
25  *                                      interrupt locking and some slightly
26  *                                      dubious gcc output. Can you read
27  *                                      compiler: it said _VOLATILE_
28  *      Richard Kooijman        :       Timestamp fixes.
29  *              Alan Cox        :       New buffers. Use sk->mac.raw.
30  *              Alan Cox        :       sendmsg/recvmsg support.
31  *              Alan Cox        :       Protocol setting support
32  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
33  *      Cyrus Durgin            :       Fixed kerneld for kmod.
34  *      Michal Ostrowski        :       Module initialization cleanup.
35  *         Ulises Alonso        :       Frame number limit removal and
36  *                                      packet_set_ring memory leak.
37  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
38  *                                      The convention is that longer addresses
39  *                                      will simply extend the hardware address
40  *                                      byte arrays at the end of sockaddr_ll
41  *                                      and packet_mreq.
42  *
43  *              This program is free software; you can redistribute it and/or
44  *              modify it under the terms of the GNU General Public License
45  *              as published by the Free Software Foundation; either version
46  *              2 of the License, or (at your option) any later version.
47  *
48  */
49
50 #include <linux/types.h>
51 #include <linux/mm.h>
52 #include <linux/capability.h>
53 #include <linux/fcntl.h>
54 #include <linux/socket.h>
55 #include <linux/in.h>
56 #include <linux/inet.h>
57 #include <linux/netdevice.h>
58 #include <linux/if_packet.h>
59 #include <linux/wireless.h>
60 #include <linux/kernel.h>
61 #include <linux/kmod.h>
62 #include <net/net_namespace.h>
63 #include <net/ip.h>
64 #include <net/protocol.h>
65 #include <linux/skbuff.h>
66 #include <net/sock.h>
67 #include <linux/errno.h>
68 #include <linux/timer.h>
69 #include <asm/system.h>
70 #include <asm/uaccess.h>
71 #include <asm/ioctls.h>
72 #include <asm/page.h>
73 #include <asm/cacheflush.h>
74 #include <asm/io.h>
75 #include <linux/proc_fs.h>
76 #include <linux/seq_file.h>
77 #include <linux/poll.h>
78 #include <linux/module.h>
79 #include <linux/init.h>
80
81 #ifdef CONFIG_INET
82 #include <net/inet_common.h>
83 #endif
84
85 /*
86    Assumptions:
87    - if device has no dev->hard_header routine, it adds and removes ll header
88      inside itself. In this case ll header is invisible outside of device,
89      but higher levels still should reserve dev->hard_header_len.
90      Some devices are enough clever to reallocate skb, when header
91      will not fit to reserved space (tunnel), another ones are silly
92      (PPP).
93    - packet socket receives packets with pulled ll header,
94      so that SOCK_RAW should push it back.
95
96 On receive:
97 -----------
98
99 Incoming, dev->hard_header!=NULL
100    mac_header -> ll header
101    data       -> data
102
103 Outgoing, dev->hard_header!=NULL
104    mac_header -> ll header
105    data       -> ll header
106
107 Incoming, dev->hard_header==NULL
108    mac_header -> UNKNOWN position. It is very likely, that it points to ll
109                  header.  PPP makes it, that is wrong, because introduce
110                  assymetry between rx and tx paths.
111    data       -> data
112
113 Outgoing, dev->hard_header==NULL
114    mac_header -> data. ll header is still not built!
115    data       -> data
116
117 Resume
118   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
119
120
121 On transmit:
122 ------------
123
124 dev->hard_header != NULL
125    mac_header -> ll header
126    data       -> ll header
127
128 dev->hard_header == NULL (ll header is added by device, we cannot control it)
129    mac_header -> data
130    data       -> data
131
132    We should set nh.raw on output to correct posistion,
133    packet classifier depends on it.
134  */
135
136 /* Private packet socket structures. */
137
138 struct packet_mclist
139 {
140         struct packet_mclist    *next;
141         int                     ifindex;
142         int                     count;
143         unsigned short          type;
144         unsigned short          alen;
145         unsigned char           addr[MAX_ADDR_LEN];
146 };
147 /* identical to struct packet_mreq except it has
148  * a longer address field.
149  */
150 struct packet_mreq_max
151 {
152         int             mr_ifindex;
153         unsigned short  mr_type;
154         unsigned short  mr_alen;
155         unsigned char   mr_address[MAX_ADDR_LEN];
156 };
157
158 #ifdef CONFIG_PACKET_MMAP
159 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
160 #endif
161
162 static void packet_flush_mclist(struct sock *sk);
163
164 struct packet_sock {
165         /* struct sock has to be the first member of packet_sock */
166         struct sock             sk;
167         struct tpacket_stats    stats;
168 #ifdef CONFIG_PACKET_MMAP
169         char *                  *pg_vec;
170         unsigned int            head;
171         unsigned int            frames_per_block;
172         unsigned int            frame_size;
173         unsigned int            frame_max;
174         int                     copy_thresh;
175 #endif
176         struct packet_type      prot_hook;
177         spinlock_t              bind_lock;
178         unsigned int            running:1,      /* prot_hook is attached*/
179                                 auxdata:1,
180                                 origdev:1;
181         int                     ifindex;        /* bound device         */
182         __be16                  num;
183         struct packet_mclist    *mclist;
184 #ifdef CONFIG_PACKET_MMAP
185         atomic_t                mapped;
186         unsigned int            pg_vec_order;
187         unsigned int            pg_vec_pages;
188         unsigned int            pg_vec_len;
189 #endif
190 };
191
192 struct packet_skb_cb {
193         unsigned int origlen;
194         union {
195                 struct sockaddr_pkt pkt;
196                 struct sockaddr_ll ll;
197         } sa;
198 };
199
200 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
201
202 #ifdef CONFIG_PACKET_MMAP
203
204 static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
205 {
206         unsigned int pg_vec_pos, frame_offset;
207
208         pg_vec_pos = position / po->frames_per_block;
209         frame_offset = position % po->frames_per_block;
210
211         return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
212 }
213 #endif
214
215 static inline struct packet_sock *pkt_sk(struct sock *sk)
216 {
217         return (struct packet_sock *)sk;
218 }
219
220 static void packet_sock_destruct(struct sock *sk)
221 {
222         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
223         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
224
225         if (!sock_flag(sk, SOCK_DEAD)) {
226                 printk("Attempt to release alive packet socket: %p\n", sk);
227                 return;
228         }
229
230         sk_refcnt_debug_dec(sk);
231 }
232
233
234 static const struct proto_ops packet_ops;
235
236 static const struct proto_ops packet_ops_spkt;
237
238 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
239 {
240         struct sock *sk;
241         struct sockaddr_pkt *spkt;
242
243         /*
244          *      When we registered the protocol we saved the socket in the data
245          *      field for just this event.
246          */
247
248         sk = pt->af_packet_priv;
249
250         /*
251          *      Yank back the headers [hope the device set this
252          *      right or kerboom...]
253          *
254          *      Incoming packets have ll header pulled,
255          *      push it back.
256          *
257          *      For outgoing ones skb->data == skb_mac_header(skb)
258          *      so that this procedure is noop.
259          */
260
261         if (skb->pkt_type == PACKET_LOOPBACK)
262                 goto out;
263
264         if (dev_net(dev) != sock_net(sk))
265                 goto out;
266
267         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
268                 goto oom;
269
270         /* drop any routing info */
271         dst_release(skb->dst);
272         skb->dst = NULL;
273
274         /* drop conntrack reference */
275         nf_reset(skb);
276
277         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
278
279         skb_push(skb, skb->data - skb_mac_header(skb));
280
281         /*
282          *      The SOCK_PACKET socket receives _all_ frames.
283          */
284
285         spkt->spkt_family = dev->type;
286         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
287         spkt->spkt_protocol = skb->protocol;
288
289         /*
290          *      Charge the memory to the socket. This is done specifically
291          *      to prevent sockets using all the memory up.
292          */
293
294         if (sock_queue_rcv_skb(sk,skb) == 0)
295                 return 0;
296
297 out:
298         kfree_skb(skb);
299 oom:
300         return 0;
301 }
302
303
304 /*
305  *      Output a raw packet to a device layer. This bypasses all the other
306  *      protocol layers and you must therefore supply it with a complete frame
307  */
308
309 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
310                                struct msghdr *msg, size_t len)
311 {
312         struct sock *sk = sock->sk;
313         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
314         struct sk_buff *skb;
315         struct net_device *dev;
316         __be16 proto=0;
317         int err;
318
319         /*
320          *      Get and verify the address.
321          */
322
323         if (saddr)
324         {
325                 if (msg->msg_namelen < sizeof(struct sockaddr))
326                         return(-EINVAL);
327                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
328                         proto=saddr->spkt_protocol;
329         }
330         else
331                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
332
333         /*
334          *      Find the device first to size check it
335          */
336
337         saddr->spkt_device[13] = 0;
338         dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
339         err = -ENODEV;
340         if (dev == NULL)
341                 goto out_unlock;
342
343         err = -ENETDOWN;
344         if (!(dev->flags & IFF_UP))
345                 goto out_unlock;
346
347         /*
348          *      You may not queue a frame bigger than the mtu. This is the lowest level
349          *      raw protocol and you must do your own fragmentation at this level.
350          */
351
352         err = -EMSGSIZE;
353         if (len > dev->mtu + dev->hard_header_len)
354                 goto out_unlock;
355
356         err = -ENOBUFS;
357         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
358
359         /*
360          *      If the write buffer is full, then tough. At this level the user gets to
361          *      deal with the problem - do your own algorithmic backoffs. That's far
362          *      more flexible.
363          */
364
365         if (skb == NULL)
366                 goto out_unlock;
367
368         /*
369          *      Fill it in
370          */
371
372         /* FIXME: Save some space for broken drivers that write a
373          * hard header at transmission time by themselves. PPP is the
374          * notable one here. This should really be fixed at the driver level.
375          */
376         skb_reserve(skb, LL_RESERVED_SPACE(dev));
377         skb_reset_network_header(skb);
378
379         /* Try to align data part correctly */
380         if (dev->header_ops) {
381                 skb->data -= dev->hard_header_len;
382                 skb->tail -= dev->hard_header_len;
383                 if (len < dev->hard_header_len)
384                         skb_reset_network_header(skb);
385         }
386
387         /* Returns -EFAULT on error */
388         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
389         skb->protocol = proto;
390         skb->dev = dev;
391         skb->priority = sk->sk_priority;
392         if (err)
393                 goto out_free;
394
395         /*
396          *      Now send it
397          */
398
399         dev_queue_xmit(skb);
400         dev_put(dev);
401         return(len);
402
403 out_free:
404         kfree_skb(skb);
405 out_unlock:
406         if (dev)
407                 dev_put(dev);
408         return err;
409 }
410
411 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
412                                       unsigned int res)
413 {
414         struct sk_filter *filter;
415
416         rcu_read_lock_bh();
417         filter = rcu_dereference(sk->sk_filter);
418         if (filter != NULL)
419                 res = sk_run_filter(skb, filter->insns, filter->len);
420         rcu_read_unlock_bh();
421
422         return res;
423 }
424
425 /*
426    This function makes lazy skb cloning in hope that most of packets
427    are discarded by BPF.
428
429    Note tricky part: we DO mangle shared skb! skb->data, skb->len
430    and skb->cb are mangled. It works because (and until) packets
431    falling here are owned by current CPU. Output packets are cloned
432    by dev_queue_xmit_nit(), input packets are processed by net_bh
433    sequencially, so that if we return skb to original state on exit,
434    we will not harm anyone.
435  */
436
437 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
438 {
439         struct sock *sk;
440         struct sockaddr_ll *sll;
441         struct packet_sock *po;
442         u8 * skb_head = skb->data;
443         int skb_len = skb->len;
444         unsigned int snaplen, res;
445
446         if (skb->pkt_type == PACKET_LOOPBACK)
447                 goto drop;
448
449         sk = pt->af_packet_priv;
450         po = pkt_sk(sk);
451
452         if (dev_net(dev) != sock_net(sk))
453                 goto drop;
454
455         skb->dev = dev;
456
457         if (dev->header_ops) {
458                 /* The device has an explicit notion of ll header,
459                    exported to higher levels.
460
461                    Otherwise, the device hides datails of it frame
462                    structure, so that corresponding packet head
463                    never delivered to user.
464                  */
465                 if (sk->sk_type != SOCK_DGRAM)
466                         skb_push(skb, skb->data - skb_mac_header(skb));
467                 else if (skb->pkt_type == PACKET_OUTGOING) {
468                         /* Special case: outgoing packets have ll header at head */
469                         skb_pull(skb, skb_network_offset(skb));
470                 }
471         }
472
473         snaplen = skb->len;
474
475         res = run_filter(skb, sk, snaplen);
476         if (!res)
477                 goto drop_n_restore;
478         if (snaplen > res)
479                 snaplen = res;
480
481         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
482             (unsigned)sk->sk_rcvbuf)
483                 goto drop_n_acct;
484
485         if (skb_shared(skb)) {
486                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
487                 if (nskb == NULL)
488                         goto drop_n_acct;
489
490                 if (skb_head != skb->data) {
491                         skb->data = skb_head;
492                         skb->len = skb_len;
493                 }
494                 kfree_skb(skb);
495                 skb = nskb;
496         }
497
498         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
499                      sizeof(skb->cb));
500
501         sll = &PACKET_SKB_CB(skb)->sa.ll;
502         sll->sll_family = AF_PACKET;
503         sll->sll_hatype = dev->type;
504         sll->sll_protocol = skb->protocol;
505         sll->sll_pkttype = skb->pkt_type;
506         if (unlikely(po->origdev))
507                 sll->sll_ifindex = orig_dev->ifindex;
508         else
509                 sll->sll_ifindex = dev->ifindex;
510
511         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
512
513         PACKET_SKB_CB(skb)->origlen = skb->len;
514
515         if (pskb_trim(skb, snaplen))
516                 goto drop_n_acct;
517
518         skb_set_owner_r(skb, sk);
519         skb->dev = NULL;
520         dst_release(skb->dst);
521         skb->dst = NULL;
522
523         /* drop conntrack reference */
524         nf_reset(skb);
525
526         spin_lock(&sk->sk_receive_queue.lock);
527         po->stats.tp_packets++;
528         __skb_queue_tail(&sk->sk_receive_queue, skb);
529         spin_unlock(&sk->sk_receive_queue.lock);
530         sk->sk_data_ready(sk, skb->len);
531         return 0;
532
533 drop_n_acct:
534         spin_lock(&sk->sk_receive_queue.lock);
535         po->stats.tp_drops++;
536         spin_unlock(&sk->sk_receive_queue.lock);
537
538 drop_n_restore:
539         if (skb_head != skb->data && skb_shared(skb)) {
540                 skb->data = skb_head;
541                 skb->len = skb_len;
542         }
543 drop:
544         kfree_skb(skb);
545         return 0;
546 }
547
548 #ifdef CONFIG_PACKET_MMAP
549 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
550 {
551         struct sock *sk;
552         struct packet_sock *po;
553         struct sockaddr_ll *sll;
554         struct tpacket_hdr *h;
555         u8 * skb_head = skb->data;
556         int skb_len = skb->len;
557         unsigned int snaplen, res;
558         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
559         unsigned short macoff, netoff;
560         struct sk_buff *copy_skb = NULL;
561         struct timeval tv;
562
563         if (skb->pkt_type == PACKET_LOOPBACK)
564                 goto drop;
565
566         sk = pt->af_packet_priv;
567         po = pkt_sk(sk);
568
569         if (dev_net(dev) != sock_net(sk))
570                 goto drop;
571
572         if (dev->header_ops) {
573                 if (sk->sk_type != SOCK_DGRAM)
574                         skb_push(skb, skb->data - skb_mac_header(skb));
575                 else if (skb->pkt_type == PACKET_OUTGOING) {
576                         /* Special case: outgoing packets have ll header at head */
577                         skb_pull(skb, skb_network_offset(skb));
578                 }
579         }
580
581         if (skb->ip_summed == CHECKSUM_PARTIAL)
582                 status |= TP_STATUS_CSUMNOTREADY;
583
584         snaplen = skb->len;
585
586         res = run_filter(skb, sk, snaplen);
587         if (!res)
588                 goto drop_n_restore;
589         if (snaplen > res)
590                 snaplen = res;
591
592         if (sk->sk_type == SOCK_DGRAM) {
593                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
594         } else {
595                 unsigned maclen = skb_network_offset(skb);
596                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
597                 macoff = netoff - maclen;
598         }
599
600         if (macoff + snaplen > po->frame_size) {
601                 if (po->copy_thresh &&
602                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
603                     (unsigned)sk->sk_rcvbuf) {
604                         if (skb_shared(skb)) {
605                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
606                         } else {
607                                 copy_skb = skb_get(skb);
608                                 skb_head = skb->data;
609                         }
610                         if (copy_skb)
611                                 skb_set_owner_r(copy_skb, sk);
612                 }
613                 snaplen = po->frame_size - macoff;
614                 if ((int)snaplen < 0)
615                         snaplen = 0;
616         }
617
618         spin_lock(&sk->sk_receive_queue.lock);
619         h = packet_lookup_frame(po, po->head);
620
621         if (h->tp_status)
622                 goto ring_is_full;
623         po->head = po->head != po->frame_max ? po->head+1 : 0;
624         po->stats.tp_packets++;
625         if (copy_skb) {
626                 status |= TP_STATUS_COPY;
627                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
628         }
629         if (!po->stats.tp_drops)
630                 status &= ~TP_STATUS_LOSING;
631         spin_unlock(&sk->sk_receive_queue.lock);
632
633         skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
634
635         h->tp_len = skb->len;
636         h->tp_snaplen = snaplen;
637         h->tp_mac = macoff;
638         h->tp_net = netoff;
639         if (skb->tstamp.tv64)
640                 tv = ktime_to_timeval(skb->tstamp);
641         else
642                 do_gettimeofday(&tv);
643         h->tp_sec = tv.tv_sec;
644         h->tp_usec = tv.tv_usec;
645
646         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
647         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
648         sll->sll_family = AF_PACKET;
649         sll->sll_hatype = dev->type;
650         sll->sll_protocol = skb->protocol;
651         sll->sll_pkttype = skb->pkt_type;
652         if (unlikely(po->origdev))
653                 sll->sll_ifindex = orig_dev->ifindex;
654         else
655                 sll->sll_ifindex = dev->ifindex;
656
657         h->tp_status = status;
658         smp_mb();
659
660         {
661                 struct page *p_start, *p_end;
662                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
663
664                 p_start = virt_to_page(h);
665                 p_end = virt_to_page(h_end);
666                 while (p_start <= p_end) {
667                         flush_dcache_page(p_start);
668                         p_start++;
669                 }
670         }
671
672         sk->sk_data_ready(sk, 0);
673
674 drop_n_restore:
675         if (skb_head != skb->data && skb_shared(skb)) {
676                 skb->data = skb_head;
677                 skb->len = skb_len;
678         }
679 drop:
680         kfree_skb(skb);
681         return 0;
682
683 ring_is_full:
684         po->stats.tp_drops++;
685         spin_unlock(&sk->sk_receive_queue.lock);
686
687         sk->sk_data_ready(sk, 0);
688         if (copy_skb)
689                 kfree_skb(copy_skb);
690         goto drop_n_restore;
691 }
692
693 #endif
694
695
696 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
697                           struct msghdr *msg, size_t len)
698 {
699         struct sock *sk = sock->sk;
700         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
701         struct sk_buff *skb;
702         struct net_device *dev;
703         __be16 proto;
704         unsigned char *addr;
705         int ifindex, err, reserve = 0;
706
707         /*
708          *      Get and verify the address.
709          */
710
711         if (saddr == NULL) {
712                 struct packet_sock *po = pkt_sk(sk);
713
714                 ifindex = po->ifindex;
715                 proto   = po->num;
716                 addr    = NULL;
717         } else {
718                 err = -EINVAL;
719                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
720                         goto out;
721                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
722                         goto out;
723                 ifindex = saddr->sll_ifindex;
724                 proto   = saddr->sll_protocol;
725                 addr    = saddr->sll_addr;
726         }
727
728
729         dev = dev_get_by_index(sock_net(sk), ifindex);
730         err = -ENXIO;
731         if (dev == NULL)
732                 goto out_unlock;
733         if (sock->type == SOCK_RAW)
734                 reserve = dev->hard_header_len;
735
736         err = -ENETDOWN;
737         if (!(dev->flags & IFF_UP))
738                 goto out_unlock;
739
740         err = -EMSGSIZE;
741         if (len > dev->mtu+reserve)
742                 goto out_unlock;
743
744         skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
745                                 msg->msg_flags & MSG_DONTWAIT, &err);
746         if (skb==NULL)
747                 goto out_unlock;
748
749         skb_reserve(skb, LL_RESERVED_SPACE(dev));
750         skb_reset_network_header(skb);
751
752         err = -EINVAL;
753         if (sock->type == SOCK_DGRAM &&
754             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
755                 goto out_free;
756
757         /* Returns -EFAULT on error */
758         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
759         if (err)
760                 goto out_free;
761
762         skb->protocol = proto;
763         skb->dev = dev;
764         skb->priority = sk->sk_priority;
765
766         /*
767          *      Now send it
768          */
769
770         err = dev_queue_xmit(skb);
771         if (err > 0 && (err = net_xmit_errno(err)) != 0)
772                 goto out_unlock;
773
774         dev_put(dev);
775
776         return(len);
777
778 out_free:
779         kfree_skb(skb);
780 out_unlock:
781         if (dev)
782                 dev_put(dev);
783 out:
784         return err;
785 }
786
787 /*
788  *      Close a PACKET socket. This is fairly simple. We immediately go
789  *      to 'closed' state and remove our protocol entry in the device list.
790  */
791
792 static int packet_release(struct socket *sock)
793 {
794         struct sock *sk = sock->sk;
795         struct packet_sock *po;
796         struct net *net;
797
798         if (!sk)
799                 return 0;
800
801         net = sock_net(sk);
802         po = pkt_sk(sk);
803
804         write_lock_bh(&net->packet.sklist_lock);
805         sk_del_node_init(sk);
806         write_unlock_bh(&net->packet.sklist_lock);
807
808         /*
809          *      Unhook packet receive handler.
810          */
811
812         if (po->running) {
813                 /*
814                  *      Remove the protocol hook
815                  */
816                 dev_remove_pack(&po->prot_hook);
817                 po->running = 0;
818                 po->num = 0;
819                 __sock_put(sk);
820         }
821
822         packet_flush_mclist(sk);
823
824 #ifdef CONFIG_PACKET_MMAP
825         if (po->pg_vec) {
826                 struct tpacket_req req;
827                 memset(&req, 0, sizeof(req));
828                 packet_set_ring(sk, &req, 1);
829         }
830 #endif
831
832         /*
833          *      Now the socket is dead. No more input will appear.
834          */
835
836         sock_orphan(sk);
837         sock->sk = NULL;
838
839         /* Purge queues */
840
841         skb_queue_purge(&sk->sk_receive_queue);
842         sk_refcnt_debug_release(sk);
843
844         sock_put(sk);
845         return 0;
846 }
847
848 /*
849  *      Attach a packet hook.
850  */
851
852 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
853 {
854         struct packet_sock *po = pkt_sk(sk);
855         /*
856          *      Detach an existing hook if present.
857          */
858
859         lock_sock(sk);
860
861         spin_lock(&po->bind_lock);
862         if (po->running) {
863                 __sock_put(sk);
864                 po->running = 0;
865                 po->num = 0;
866                 spin_unlock(&po->bind_lock);
867                 dev_remove_pack(&po->prot_hook);
868                 spin_lock(&po->bind_lock);
869         }
870
871         po->num = protocol;
872         po->prot_hook.type = protocol;
873         po->prot_hook.dev = dev;
874
875         po->ifindex = dev ? dev->ifindex : 0;
876
877         if (protocol == 0)
878                 goto out_unlock;
879
880         if (!dev || (dev->flags & IFF_UP)) {
881                 dev_add_pack(&po->prot_hook);
882                 sock_hold(sk);
883                 po->running = 1;
884         } else {
885                 sk->sk_err = ENETDOWN;
886                 if (!sock_flag(sk, SOCK_DEAD))
887                         sk->sk_error_report(sk);
888         }
889
890 out_unlock:
891         spin_unlock(&po->bind_lock);
892         release_sock(sk);
893         return 0;
894 }
895
896 /*
897  *      Bind a packet socket to a device
898  */
899
900 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
901 {
902         struct sock *sk=sock->sk;
903         char name[15];
904         struct net_device *dev;
905         int err = -ENODEV;
906
907         /*
908          *      Check legality
909          */
910
911         if (addr_len != sizeof(struct sockaddr))
912                 return -EINVAL;
913         strlcpy(name,uaddr->sa_data,sizeof(name));
914
915         dev = dev_get_by_name(sock_net(sk), name);
916         if (dev) {
917                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
918                 dev_put(dev);
919         }
920         return err;
921 }
922
923 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
924 {
925         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
926         struct sock *sk=sock->sk;
927         struct net_device *dev = NULL;
928         int err;
929
930
931         /*
932          *      Check legality
933          */
934
935         if (addr_len < sizeof(struct sockaddr_ll))
936                 return -EINVAL;
937         if (sll->sll_family != AF_PACKET)
938                 return -EINVAL;
939
940         if (sll->sll_ifindex) {
941                 err = -ENODEV;
942                 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
943                 if (dev == NULL)
944                         goto out;
945         }
946         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
947         if (dev)
948                 dev_put(dev);
949
950 out:
951         return err;
952 }
953
954 static struct proto packet_proto = {
955         .name     = "PACKET",
956         .owner    = THIS_MODULE,
957         .obj_size = sizeof(struct packet_sock),
958 };
959
960 /*
961  *      Create a packet of type SOCK_PACKET.
962  */
963
964 static int packet_create(struct net *net, struct socket *sock, int protocol)
965 {
966         struct sock *sk;
967         struct packet_sock *po;
968         __be16 proto = (__force __be16)protocol; /* weird, but documented */
969         int err;
970
971         if (!capable(CAP_NET_RAW))
972                 return -EPERM;
973         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
974             sock->type != SOCK_PACKET)
975                 return -ESOCKTNOSUPPORT;
976
977         sock->state = SS_UNCONNECTED;
978
979         err = -ENOBUFS;
980         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
981         if (sk == NULL)
982                 goto out;
983
984         sock->ops = &packet_ops;
985         if (sock->type == SOCK_PACKET)
986                 sock->ops = &packet_ops_spkt;
987
988         sock_init_data(sock, sk);
989
990         po = pkt_sk(sk);
991         sk->sk_family = PF_PACKET;
992         po->num = proto;
993
994         sk->sk_destruct = packet_sock_destruct;
995         sk_refcnt_debug_inc(sk);
996
997         /*
998          *      Attach a protocol block
999          */
1000
1001         spin_lock_init(&po->bind_lock);
1002         po->prot_hook.func = packet_rcv;
1003
1004         if (sock->type == SOCK_PACKET)
1005                 po->prot_hook.func = packet_rcv_spkt;
1006
1007         po->prot_hook.af_packet_priv = sk;
1008
1009         if (proto) {
1010                 po->prot_hook.type = proto;
1011                 dev_add_pack(&po->prot_hook);
1012                 sock_hold(sk);
1013                 po->running = 1;
1014         }
1015
1016         write_lock_bh(&net->packet.sklist_lock);
1017         sk_add_node(sk, &net->packet.sklist);
1018         write_unlock_bh(&net->packet.sklist_lock);
1019         return(0);
1020 out:
1021         return err;
1022 }
1023
1024 /*
1025  *      Pull a packet from our receive queue and hand it to the user.
1026  *      If necessary we block.
1027  */
1028
1029 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1030                           struct msghdr *msg, size_t len, int flags)
1031 {
1032         struct sock *sk = sock->sk;
1033         struct sk_buff *skb;
1034         int copied, err;
1035         struct sockaddr_ll *sll;
1036
1037         err = -EINVAL;
1038         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1039                 goto out;
1040
1041 #if 0
1042         /* What error should we return now? EUNATTACH? */
1043         if (pkt_sk(sk)->ifindex < 0)
1044                 return -ENODEV;
1045 #endif
1046
1047         /*
1048          *      Call the generic datagram receiver. This handles all sorts
1049          *      of horrible races and re-entrancy so we can forget about it
1050          *      in the protocol layers.
1051          *
1052          *      Now it will return ENETDOWN, if device have just gone down,
1053          *      but then it will block.
1054          */
1055
1056         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1057
1058         /*
1059          *      An error occurred so return it. Because skb_recv_datagram()
1060          *      handles the blocking we don't see and worry about blocking
1061          *      retries.
1062          */
1063
1064         if (skb == NULL)
1065                 goto out;
1066
1067         /*
1068          *      If the address length field is there to be filled in, we fill
1069          *      it in now.
1070          */
1071
1072         sll = &PACKET_SKB_CB(skb)->sa.ll;
1073         if (sock->type == SOCK_PACKET)
1074                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1075         else
1076                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1077
1078         /*
1079          *      You lose any data beyond the buffer you gave. If it worries a
1080          *      user program they can ask the device for its MTU anyway.
1081          */
1082
1083         copied = skb->len;
1084         if (copied > len)
1085         {
1086                 copied=len;
1087                 msg->msg_flags|=MSG_TRUNC;
1088         }
1089
1090         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1091         if (err)
1092                 goto out_free;
1093
1094         sock_recv_timestamp(msg, sk, skb);
1095
1096         if (msg->msg_name)
1097                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1098                        msg->msg_namelen);
1099
1100         if (pkt_sk(sk)->auxdata) {
1101                 struct tpacket_auxdata aux;
1102
1103                 aux.tp_status = TP_STATUS_USER;
1104                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1105                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1106                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1107                 aux.tp_snaplen = skb->len;
1108                 aux.tp_mac = 0;
1109                 aux.tp_net = skb_network_offset(skb);
1110
1111                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1112         }
1113
1114         /*
1115          *      Free or return the buffer as appropriate. Again this
1116          *      hides all the races and re-entrancy issues from us.
1117          */
1118         err = (flags&MSG_TRUNC) ? skb->len : copied;
1119
1120 out_free:
1121         skb_free_datagram(sk, skb);
1122 out:
1123         return err;
1124 }
1125
1126 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1127                                int *uaddr_len, int peer)
1128 {
1129         struct net_device *dev;
1130         struct sock *sk = sock->sk;
1131
1132         if (peer)
1133                 return -EOPNOTSUPP;
1134
1135         uaddr->sa_family = AF_PACKET;
1136         dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1137         if (dev) {
1138                 strlcpy(uaddr->sa_data, dev->name, 15);
1139                 dev_put(dev);
1140         } else
1141                 memset(uaddr->sa_data, 0, 14);
1142         *uaddr_len = sizeof(*uaddr);
1143
1144         return 0;
1145 }
1146
1147 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1148                           int *uaddr_len, int peer)
1149 {
1150         struct net_device *dev;
1151         struct sock *sk = sock->sk;
1152         struct packet_sock *po = pkt_sk(sk);
1153         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1154
1155         if (peer)
1156                 return -EOPNOTSUPP;
1157
1158         sll->sll_family = AF_PACKET;
1159         sll->sll_ifindex = po->ifindex;
1160         sll->sll_protocol = po->num;
1161         dev = dev_get_by_index(sock_net(sk), po->ifindex);
1162         if (dev) {
1163                 sll->sll_hatype = dev->type;
1164                 sll->sll_halen = dev->addr_len;
1165                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1166                 dev_put(dev);
1167         } else {
1168                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1169                 sll->sll_halen = 0;
1170         }
1171         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1172
1173         return 0;
1174 }
1175
1176 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1177                          int what)
1178 {
1179         switch (i->type) {
1180         case PACKET_MR_MULTICAST:
1181                 if (what > 0)
1182                         dev_mc_add(dev, i->addr, i->alen, 0);
1183                 else
1184                         dev_mc_delete(dev, i->addr, i->alen, 0);
1185                 break;
1186         case PACKET_MR_PROMISC:
1187                 return dev_set_promiscuity(dev, what);
1188                 break;
1189         case PACKET_MR_ALLMULTI:
1190                 return dev_set_allmulti(dev, what);
1191                 break;
1192         default:;
1193         }
1194         return 0;
1195 }
1196
1197 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1198 {
1199         for ( ; i; i=i->next) {
1200                 if (i->ifindex == dev->ifindex)
1201                         packet_dev_mc(dev, i, what);
1202         }
1203 }
1204
1205 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1206 {
1207         struct packet_sock *po = pkt_sk(sk);
1208         struct packet_mclist *ml, *i;
1209         struct net_device *dev;
1210         int err;
1211
1212         rtnl_lock();
1213
1214         err = -ENODEV;
1215         dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1216         if (!dev)
1217                 goto done;
1218
1219         err = -EINVAL;
1220         if (mreq->mr_alen > dev->addr_len)
1221                 goto done;
1222
1223         err = -ENOBUFS;
1224         i = kmalloc(sizeof(*i), GFP_KERNEL);
1225         if (i == NULL)
1226                 goto done;
1227
1228         err = 0;
1229         for (ml = po->mclist; ml; ml = ml->next) {
1230                 if (ml->ifindex == mreq->mr_ifindex &&
1231                     ml->type == mreq->mr_type &&
1232                     ml->alen == mreq->mr_alen &&
1233                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1234                         ml->count++;
1235                         /* Free the new element ... */
1236                         kfree(i);
1237                         goto done;
1238                 }
1239         }
1240
1241         i->type = mreq->mr_type;
1242         i->ifindex = mreq->mr_ifindex;
1243         i->alen = mreq->mr_alen;
1244         memcpy(i->addr, mreq->mr_address, i->alen);
1245         i->count = 1;
1246         i->next = po->mclist;
1247         po->mclist = i;
1248         err = packet_dev_mc(dev, i, 1);
1249         if (err) {
1250                 po->mclist = i->next;
1251                 kfree(i);
1252         }
1253
1254 done:
1255         rtnl_unlock();
1256         return err;
1257 }
1258
1259 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1260 {
1261         struct packet_mclist *ml, **mlp;
1262
1263         rtnl_lock();
1264
1265         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1266                 if (ml->ifindex == mreq->mr_ifindex &&
1267                     ml->type == mreq->mr_type &&
1268                     ml->alen == mreq->mr_alen &&
1269                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1270                         if (--ml->count == 0) {
1271                                 struct net_device *dev;
1272                                 *mlp = ml->next;
1273                                 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1274                                 if (dev) {
1275                                         packet_dev_mc(dev, ml, -1);
1276                                         dev_put(dev);
1277                                 }
1278                                 kfree(ml);
1279                         }
1280                         rtnl_unlock();
1281                         return 0;
1282                 }
1283         }
1284         rtnl_unlock();
1285         return -EADDRNOTAVAIL;
1286 }
1287
1288 static void packet_flush_mclist(struct sock *sk)
1289 {
1290         struct packet_sock *po = pkt_sk(sk);
1291         struct packet_mclist *ml;
1292
1293         if (!po->mclist)
1294                 return;
1295
1296         rtnl_lock();
1297         while ((ml = po->mclist) != NULL) {
1298                 struct net_device *dev;
1299
1300                 po->mclist = ml->next;
1301                 if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) {
1302                         packet_dev_mc(dev, ml, -1);
1303                         dev_put(dev);
1304                 }
1305                 kfree(ml);
1306         }
1307         rtnl_unlock();
1308 }
1309
1310 static int
1311 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1312 {
1313         struct sock *sk = sock->sk;
1314         struct packet_sock *po = pkt_sk(sk);
1315         int ret;
1316
1317         if (level != SOL_PACKET)
1318                 return -ENOPROTOOPT;
1319
1320         switch(optname) {
1321         case PACKET_ADD_MEMBERSHIP:
1322         case PACKET_DROP_MEMBERSHIP:
1323         {
1324                 struct packet_mreq_max mreq;
1325                 int len = optlen;
1326                 memset(&mreq, 0, sizeof(mreq));
1327                 if (len < sizeof(struct packet_mreq))
1328                         return -EINVAL;
1329                 if (len > sizeof(mreq))
1330                         len = sizeof(mreq);
1331                 if (copy_from_user(&mreq,optval,len))
1332                         return -EFAULT;
1333                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1334                         return -EINVAL;
1335                 if (optname == PACKET_ADD_MEMBERSHIP)
1336                         ret = packet_mc_add(sk, &mreq);
1337                 else
1338                         ret = packet_mc_drop(sk, &mreq);
1339                 return ret;
1340         }
1341
1342 #ifdef CONFIG_PACKET_MMAP
1343         case PACKET_RX_RING:
1344         {
1345                 struct tpacket_req req;
1346
1347                 if (optlen<sizeof(req))
1348                         return -EINVAL;
1349                 if (copy_from_user(&req,optval,sizeof(req)))
1350                         return -EFAULT;
1351                 return packet_set_ring(sk, &req, 0);
1352         }
1353         case PACKET_COPY_THRESH:
1354         {
1355                 int val;
1356
1357                 if (optlen!=sizeof(val))
1358                         return -EINVAL;
1359                 if (copy_from_user(&val,optval,sizeof(val)))
1360                         return -EFAULT;
1361
1362                 pkt_sk(sk)->copy_thresh = val;
1363                 return 0;
1364         }
1365 #endif
1366         case PACKET_AUXDATA:
1367         {
1368                 int val;
1369
1370                 if (optlen < sizeof(val))
1371                         return -EINVAL;
1372                 if (copy_from_user(&val, optval, sizeof(val)))
1373                         return -EFAULT;
1374
1375                 po->auxdata = !!val;
1376                 return 0;
1377         }
1378         case PACKET_ORIGDEV:
1379         {
1380                 int val;
1381
1382                 if (optlen < sizeof(val))
1383                         return -EINVAL;
1384                 if (copy_from_user(&val, optval, sizeof(val)))
1385                         return -EFAULT;
1386
1387                 po->origdev = !!val;
1388                 return 0;
1389         }
1390         default:
1391                 return -ENOPROTOOPT;
1392         }
1393 }
1394
1395 static int packet_getsockopt(struct socket *sock, int level, int optname,
1396                              char __user *optval, int __user *optlen)
1397 {
1398         int len;
1399         int val;
1400         struct sock *sk = sock->sk;
1401         struct packet_sock *po = pkt_sk(sk);
1402         void *data;
1403         struct tpacket_stats st;
1404
1405         if (level != SOL_PACKET)
1406                 return -ENOPROTOOPT;
1407
1408         if (get_user(len, optlen))
1409                 return -EFAULT;
1410
1411         if (len < 0)
1412                 return -EINVAL;
1413
1414         switch(optname) {
1415         case PACKET_STATISTICS:
1416                 if (len > sizeof(struct tpacket_stats))
1417                         len = sizeof(struct tpacket_stats);
1418                 spin_lock_bh(&sk->sk_receive_queue.lock);
1419                 st = po->stats;
1420                 memset(&po->stats, 0, sizeof(st));
1421                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1422                 st.tp_packets += st.tp_drops;
1423
1424                 data = &st;
1425                 break;
1426         case PACKET_AUXDATA:
1427                 if (len > sizeof(int))
1428                         len = sizeof(int);
1429                 val = po->auxdata;
1430
1431                 data = &val;
1432                 break;
1433         case PACKET_ORIGDEV:
1434                 if (len > sizeof(int))
1435                         len = sizeof(int);
1436                 val = po->origdev;
1437
1438                 data = &val;
1439                 break;
1440         default:
1441                 return -ENOPROTOOPT;
1442         }
1443
1444         if (put_user(len, optlen))
1445                 return -EFAULT;
1446         if (copy_to_user(optval, data, len))
1447                 return -EFAULT;
1448         return 0;
1449 }
1450
1451
1452 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1453 {
1454         struct sock *sk;
1455         struct hlist_node *node;
1456         struct net_device *dev = data;
1457         struct net *net = dev_net(dev);
1458
1459         read_lock(&net->packet.sklist_lock);
1460         sk_for_each(sk, node, &net->packet.sklist) {
1461                 struct packet_sock *po = pkt_sk(sk);
1462
1463                 switch (msg) {
1464                 case NETDEV_UNREGISTER:
1465                         if (po->mclist)
1466                                 packet_dev_mclist(dev, po->mclist, -1);
1467                         /* fallthrough */
1468
1469                 case NETDEV_DOWN:
1470                         if (dev->ifindex == po->ifindex) {
1471                                 spin_lock(&po->bind_lock);
1472                                 if (po->running) {
1473                                         __dev_remove_pack(&po->prot_hook);
1474                                         __sock_put(sk);
1475                                         po->running = 0;
1476                                         sk->sk_err = ENETDOWN;
1477                                         if (!sock_flag(sk, SOCK_DEAD))
1478                                                 sk->sk_error_report(sk);
1479                                 }
1480                                 if (msg == NETDEV_UNREGISTER) {
1481                                         po->ifindex = -1;
1482                                         po->prot_hook.dev = NULL;
1483                                 }
1484                                 spin_unlock(&po->bind_lock);
1485                         }
1486                         break;
1487                 case NETDEV_UP:
1488                         spin_lock(&po->bind_lock);
1489                         if (dev->ifindex == po->ifindex && po->num &&
1490                             !po->running) {
1491                                 dev_add_pack(&po->prot_hook);
1492                                 sock_hold(sk);
1493                                 po->running = 1;
1494                         }
1495                         spin_unlock(&po->bind_lock);
1496                         break;
1497                 }
1498         }
1499         read_unlock(&net->packet.sklist_lock);
1500         return NOTIFY_DONE;
1501 }
1502
1503
1504 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1505                         unsigned long arg)
1506 {
1507         struct sock *sk = sock->sk;
1508
1509         switch(cmd) {
1510                 case SIOCOUTQ:
1511                 {
1512                         int amount = atomic_read(&sk->sk_wmem_alloc);
1513                         return put_user(amount, (int __user *)arg);
1514                 }
1515                 case SIOCINQ:
1516                 {
1517                         struct sk_buff *skb;
1518                         int amount = 0;
1519
1520                         spin_lock_bh(&sk->sk_receive_queue.lock);
1521                         skb = skb_peek(&sk->sk_receive_queue);
1522                         if (skb)
1523                                 amount = skb->len;
1524                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1525                         return put_user(amount, (int __user *)arg);
1526                 }
1527                 case SIOCGSTAMP:
1528                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1529                 case SIOCGSTAMPNS:
1530                         return sock_get_timestampns(sk, (struct timespec __user *)arg);
1531
1532 #ifdef CONFIG_INET
1533                 case SIOCADDRT:
1534                 case SIOCDELRT:
1535                 case SIOCDARP:
1536                 case SIOCGARP:
1537                 case SIOCSARP:
1538                 case SIOCGIFADDR:
1539                 case SIOCSIFADDR:
1540                 case SIOCGIFBRDADDR:
1541                 case SIOCSIFBRDADDR:
1542                 case SIOCGIFNETMASK:
1543                 case SIOCSIFNETMASK:
1544                 case SIOCGIFDSTADDR:
1545                 case SIOCSIFDSTADDR:
1546                 case SIOCSIFFLAGS:
1547                         if (sock_net(sk) != &init_net)
1548                                 return -ENOIOCTLCMD;
1549                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1550 #endif
1551
1552                 default:
1553                         return -ENOIOCTLCMD;
1554         }
1555         return 0;
1556 }
1557
1558 #ifndef CONFIG_PACKET_MMAP
1559 #define packet_mmap sock_no_mmap
1560 #define packet_poll datagram_poll
1561 #else
1562
1563 static unsigned int packet_poll(struct file * file, struct socket *sock,
1564                                 poll_table *wait)
1565 {
1566         struct sock *sk = sock->sk;
1567         struct packet_sock *po = pkt_sk(sk);
1568         unsigned int mask = datagram_poll(file, sock, wait);
1569
1570         spin_lock_bh(&sk->sk_receive_queue.lock);
1571         if (po->pg_vec) {
1572                 unsigned last = po->head ? po->head-1 : po->frame_max;
1573                 struct tpacket_hdr *h;
1574
1575                 h = packet_lookup_frame(po, last);
1576
1577                 if (h->tp_status)
1578                         mask |= POLLIN | POLLRDNORM;
1579         }
1580         spin_unlock_bh(&sk->sk_receive_queue.lock);
1581         return mask;
1582 }
1583
1584
1585 /* Dirty? Well, I still did not learn better way to account
1586  * for user mmaps.
1587  */
1588
1589 static void packet_mm_open(struct vm_area_struct *vma)
1590 {
1591         struct file *file = vma->vm_file;
1592         struct socket * sock = file->private_data;
1593         struct sock *sk = sock->sk;
1594
1595         if (sk)
1596                 atomic_inc(&pkt_sk(sk)->mapped);
1597 }
1598
1599 static void packet_mm_close(struct vm_area_struct *vma)
1600 {
1601         struct file *file = vma->vm_file;
1602         struct socket * sock = file->private_data;
1603         struct sock *sk = sock->sk;
1604
1605         if (sk)
1606                 atomic_dec(&pkt_sk(sk)->mapped);
1607 }
1608
1609 static struct vm_operations_struct packet_mmap_ops = {
1610         .open = packet_mm_open,
1611         .close =packet_mm_close,
1612 };
1613
1614 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1615 {
1616         int i;
1617
1618         for (i = 0; i < len; i++) {
1619                 if (likely(pg_vec[i]))
1620                         free_pages((unsigned long) pg_vec[i], order);
1621         }
1622         kfree(pg_vec);
1623 }
1624
1625 static inline char *alloc_one_pg_vec_page(unsigned long order)
1626 {
1627         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1628                                          order);
1629 }
1630
1631 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1632 {
1633         unsigned int block_nr = req->tp_block_nr;
1634         char **pg_vec;
1635         int i;
1636
1637         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1638         if (unlikely(!pg_vec))
1639                 goto out;
1640
1641         for (i = 0; i < block_nr; i++) {
1642                 pg_vec[i] = alloc_one_pg_vec_page(order);
1643                 if (unlikely(!pg_vec[i]))
1644                         goto out_free_pgvec;
1645         }
1646
1647 out:
1648         return pg_vec;
1649
1650 out_free_pgvec:
1651         free_pg_vec(pg_vec, order, block_nr);
1652         pg_vec = NULL;
1653         goto out;
1654 }
1655
1656 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1657 {
1658         char **pg_vec = NULL;
1659         struct packet_sock *po = pkt_sk(sk);
1660         int was_running, order = 0;
1661         __be16 num;
1662         int err = 0;
1663
1664         if (req->tp_block_nr) {
1665                 int i;
1666
1667                 /* Sanity tests and some calculations */
1668
1669                 if (unlikely(po->pg_vec))
1670                         return -EBUSY;
1671
1672                 if (unlikely((int)req->tp_block_size <= 0))
1673                         return -EINVAL;
1674                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1675                         return -EINVAL;
1676                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1677                         return -EINVAL;
1678                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1679                         return -EINVAL;
1680
1681                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1682                 if (unlikely(po->frames_per_block <= 0))
1683                         return -EINVAL;
1684                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1685                              req->tp_frame_nr))
1686                         return -EINVAL;
1687
1688                 err = -ENOMEM;
1689                 order = get_order(req->tp_block_size);
1690                 pg_vec = alloc_pg_vec(req, order);
1691                 if (unlikely(!pg_vec))
1692                         goto out;
1693
1694                 for (i = 0; i < req->tp_block_nr; i++) {
1695                         char *ptr = pg_vec[i];
1696                         struct tpacket_hdr *header;
1697                         int k;
1698
1699                         for (k = 0; k < po->frames_per_block; k++) {
1700                                 header = (struct tpacket_hdr *) ptr;
1701                                 header->tp_status = TP_STATUS_KERNEL;
1702                                 ptr += req->tp_frame_size;
1703                         }
1704                 }
1705                 /* Done */
1706         } else {
1707                 if (unlikely(req->tp_frame_nr))
1708                         return -EINVAL;
1709         }
1710
1711         lock_sock(sk);
1712
1713         /* Detach socket from network */
1714         spin_lock(&po->bind_lock);
1715         was_running = po->running;
1716         num = po->num;
1717         if (was_running) {
1718                 __dev_remove_pack(&po->prot_hook);
1719                 po->num = 0;
1720                 po->running = 0;
1721                 __sock_put(sk);
1722         }
1723         spin_unlock(&po->bind_lock);
1724
1725         synchronize_net();
1726
1727         err = -EBUSY;
1728         if (closing || atomic_read(&po->mapped) == 0) {
1729                 err = 0;
1730 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1731
1732                 spin_lock_bh(&sk->sk_receive_queue.lock);
1733                 pg_vec = XC(po->pg_vec, pg_vec);
1734                 po->frame_max = (req->tp_frame_nr - 1);
1735                 po->head = 0;
1736                 po->frame_size = req->tp_frame_size;
1737                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1738
1739                 order = XC(po->pg_vec_order, order);
1740                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1741
1742                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1743                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1744                 skb_queue_purge(&sk->sk_receive_queue);
1745 #undef XC
1746                 if (atomic_read(&po->mapped))
1747                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1748         }
1749
1750         spin_lock(&po->bind_lock);
1751         if (was_running && !po->running) {
1752                 sock_hold(sk);
1753                 po->running = 1;
1754                 po->num = num;
1755                 dev_add_pack(&po->prot_hook);
1756         }
1757         spin_unlock(&po->bind_lock);
1758
1759         release_sock(sk);
1760
1761         if (pg_vec)
1762                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1763 out:
1764         return err;
1765 }
1766
1767 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1768 {
1769         struct sock *sk = sock->sk;
1770         struct packet_sock *po = pkt_sk(sk);
1771         unsigned long size;
1772         unsigned long start;
1773         int err = -EINVAL;
1774         int i;
1775
1776         if (vma->vm_pgoff)
1777                 return -EINVAL;
1778
1779         size = vma->vm_end - vma->vm_start;
1780
1781         lock_sock(sk);
1782         if (po->pg_vec == NULL)
1783                 goto out;
1784         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1785                 goto out;
1786
1787         start = vma->vm_start;
1788         for (i = 0; i < po->pg_vec_len; i++) {
1789                 struct page *page = virt_to_page(po->pg_vec[i]);
1790                 int pg_num;
1791
1792                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1793                         err = vm_insert_page(vma, start, page);
1794                         if (unlikely(err))
1795                                 goto out;
1796                         start += PAGE_SIZE;
1797                 }
1798         }
1799         atomic_inc(&po->mapped);
1800         vma->vm_ops = &packet_mmap_ops;
1801         err = 0;
1802
1803 out:
1804         release_sock(sk);
1805         return err;
1806 }
1807 #endif
1808
1809
1810 static const struct proto_ops packet_ops_spkt = {
1811         .family =       PF_PACKET,
1812         .owner =        THIS_MODULE,
1813         .release =      packet_release,
1814         .bind =         packet_bind_spkt,
1815         .connect =      sock_no_connect,
1816         .socketpair =   sock_no_socketpair,
1817         .accept =       sock_no_accept,
1818         .getname =      packet_getname_spkt,
1819         .poll =         datagram_poll,
1820         .ioctl =        packet_ioctl,
1821         .listen =       sock_no_listen,
1822         .shutdown =     sock_no_shutdown,
1823         .setsockopt =   sock_no_setsockopt,
1824         .getsockopt =   sock_no_getsockopt,
1825         .sendmsg =      packet_sendmsg_spkt,
1826         .recvmsg =      packet_recvmsg,
1827         .mmap =         sock_no_mmap,
1828         .sendpage =     sock_no_sendpage,
1829 };
1830
1831 static const struct proto_ops packet_ops = {
1832         .family =       PF_PACKET,
1833         .owner =        THIS_MODULE,
1834         .release =      packet_release,
1835         .bind =         packet_bind,
1836         .connect =      sock_no_connect,
1837         .socketpair =   sock_no_socketpair,
1838         .accept =       sock_no_accept,
1839         .getname =      packet_getname,
1840         .poll =         packet_poll,
1841         .ioctl =        packet_ioctl,
1842         .listen =       sock_no_listen,
1843         .shutdown =     sock_no_shutdown,
1844         .setsockopt =   packet_setsockopt,
1845         .getsockopt =   packet_getsockopt,
1846         .sendmsg =      packet_sendmsg,
1847         .recvmsg =      packet_recvmsg,
1848         .mmap =         packet_mmap,
1849         .sendpage =     sock_no_sendpage,
1850 };
1851
1852 static struct net_proto_family packet_family_ops = {
1853         .family =       PF_PACKET,
1854         .create =       packet_create,
1855         .owner  =       THIS_MODULE,
1856 };
1857
1858 static struct notifier_block packet_netdev_notifier = {
1859         .notifier_call =packet_notifier,
1860 };
1861
1862 #ifdef CONFIG_PROC_FS
1863 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1864 {
1865         struct sock *s;
1866         struct hlist_node *node;
1867
1868         sk_for_each(s, node, &net->packet.sklist) {
1869                 if (!off--)
1870                         return s;
1871         }
1872         return NULL;
1873 }
1874
1875 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1876         __acquires(seq_file_net(seq)->packet.sklist_lock)
1877 {
1878         struct net *net = seq_file_net(seq);
1879         read_lock(&net->packet.sklist_lock);
1880         return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1881 }
1882
1883 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1884 {
1885         struct net *net = seq_file_net(seq);
1886         ++*pos;
1887         return  (v == SEQ_START_TOKEN)
1888                 ? sk_head(&net->packet.sklist)
1889                 : sk_next((struct sock*)v) ;
1890 }
1891
1892 static void packet_seq_stop(struct seq_file *seq, void *v)
1893         __releases(seq_file_net(seq)->packet.sklist_lock)
1894 {
1895         struct net *net = seq_file_net(seq);
1896         read_unlock(&net->packet.sklist_lock);
1897 }
1898
1899 static int packet_seq_show(struct seq_file *seq, void *v)
1900 {
1901         if (v == SEQ_START_TOKEN)
1902                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1903         else {
1904                 struct sock *s = v;
1905                 const struct packet_sock *po = pkt_sk(s);
1906
1907                 seq_printf(seq,
1908                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1909                            s,
1910                            atomic_read(&s->sk_refcnt),
1911                            s->sk_type,
1912                            ntohs(po->num),
1913                            po->ifindex,
1914                            po->running,
1915                            atomic_read(&s->sk_rmem_alloc),
1916                            sock_i_uid(s),
1917                            sock_i_ino(s) );
1918         }
1919
1920         return 0;
1921 }
1922
1923 static const struct seq_operations packet_seq_ops = {
1924         .start  = packet_seq_start,
1925         .next   = packet_seq_next,
1926         .stop   = packet_seq_stop,
1927         .show   = packet_seq_show,
1928 };
1929
1930 static int packet_seq_open(struct inode *inode, struct file *file)
1931 {
1932         return seq_open_net(inode, file, &packet_seq_ops,
1933                             sizeof(struct seq_net_private));
1934 }
1935
1936 static const struct file_operations packet_seq_fops = {
1937         .owner          = THIS_MODULE,
1938         .open           = packet_seq_open,
1939         .read           = seq_read,
1940         .llseek         = seq_lseek,
1941         .release        = seq_release_net,
1942 };
1943
1944 #endif
1945
1946 static int packet_net_init(struct net *net)
1947 {
1948         rwlock_init(&net->packet.sklist_lock);
1949         INIT_HLIST_HEAD(&net->packet.sklist);
1950
1951         if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
1952                 return -ENOMEM;
1953
1954         return 0;
1955 }
1956
1957 static void packet_net_exit(struct net *net)
1958 {
1959         proc_net_remove(net, "packet");
1960 }
1961
1962 static struct pernet_operations packet_net_ops = {
1963         .init = packet_net_init,
1964         .exit = packet_net_exit,
1965 };
1966
1967
1968 static void __exit packet_exit(void)
1969 {
1970         unregister_netdevice_notifier(&packet_netdev_notifier);
1971         unregister_pernet_subsys(&packet_net_ops);
1972         sock_unregister(PF_PACKET);
1973         proto_unregister(&packet_proto);
1974 }
1975
1976 static int __init packet_init(void)
1977 {
1978         int rc = proto_register(&packet_proto, 0);
1979
1980         if (rc != 0)
1981                 goto out;
1982
1983         sock_register(&packet_family_ops);
1984         register_pernet_subsys(&packet_net_ops);
1985         register_netdevice_notifier(&packet_netdev_notifier);
1986 out:
1987         return rc;
1988 }
1989
1990 module_init(packet_init);
1991 module_exit(packet_exit);
1992 MODULE_LICENSE("GPL");
1993 MODULE_ALIAS_NETPROTO(PF_PACKET);