[AF_PACKET]: Fix minor code duplication
[safe/jmp/linux-2.6] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and
38  *                                      packet_set_ring memory leak.
39  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
40  *                                      The convention is that longer addresses
41  *                                      will simply extend the hardware address
42  *                                      byte arrays at the end of sockaddr_ll
43  *                                      and packet_mreq.
44  *
45  *              This program is free software; you can redistribute it and/or
46  *              modify it under the terms of the GNU General Public License
47  *              as published by the Free Software Foundation; either version
48  *              2 of the License, or (at your option) any later version.
49  *
50  */
51
52 #include <linux/types.h>
53 #include <linux/mm.h>
54 #include <linux/capability.h>
55 #include <linux/fcntl.h>
56 #include <linux/socket.h>
57 #include <linux/in.h>
58 #include <linux/inet.h>
59 #include <linux/netdevice.h>
60 #include <linux/if_packet.h>
61 #include <linux/wireless.h>
62 #include <linux/kernel.h>
63 #include <linux/kmod.h>
64 #include <net/net_namespace.h>
65 #include <net/ip.h>
66 #include <net/protocol.h>
67 #include <linux/skbuff.h>
68 #include <net/sock.h>
69 #include <linux/errno.h>
70 #include <linux/timer.h>
71 #include <asm/system.h>
72 #include <asm/uaccess.h>
73 #include <asm/ioctls.h>
74 #include <asm/page.h>
75 #include <asm/cacheflush.h>
76 #include <asm/io.h>
77 #include <linux/proc_fs.h>
78 #include <linux/seq_file.h>
79 #include <linux/poll.h>
80 #include <linux/module.h>
81 #include <linux/init.h>
82
83 #ifdef CONFIG_INET
84 #include <net/inet_common.h>
85 #endif
86
87 /*
88    Assumptions:
89    - if device has no dev->hard_header routine, it adds and removes ll header
90      inside itself. In this case ll header is invisible outside of device,
91      but higher levels still should reserve dev->hard_header_len.
92      Some devices are enough clever to reallocate skb, when header
93      will not fit to reserved space (tunnel), another ones are silly
94      (PPP).
95    - packet socket receives packets with pulled ll header,
96      so that SOCK_RAW should push it back.
97
98 On receive:
99 -----------
100
101 Incoming, dev->hard_header!=NULL
102    mac_header -> ll header
103    data       -> data
104
105 Outgoing, dev->hard_header!=NULL
106    mac_header -> ll header
107    data       -> ll header
108
109 Incoming, dev->hard_header==NULL
110    mac_header -> UNKNOWN position. It is very likely, that it points to ll
111                  header.  PPP makes it, that is wrong, because introduce
112                  assymetry between rx and tx paths.
113    data       -> data
114
115 Outgoing, dev->hard_header==NULL
116    mac_header -> data. ll header is still not built!
117    data       -> data
118
119 Resume
120   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121
122
123 On transmit:
124 ------------
125
126 dev->hard_header != NULL
127    mac_header -> ll header
128    data       -> ll header
129
130 dev->hard_header == NULL (ll header is added by device, we cannot control it)
131    mac_header -> data
132    data       -> data
133
134    We should set nh.raw on output to correct posistion,
135    packet classifier depends on it.
136  */
137
138 /* List of all packet sockets. */
139 static HLIST_HEAD(packet_sklist);
140 static DEFINE_RWLOCK(packet_sklist_lock);
141
142 /* Private packet socket structures. */
143
144 struct packet_mclist
145 {
146         struct packet_mclist    *next;
147         int                     ifindex;
148         int                     count;
149         unsigned short          type;
150         unsigned short          alen;
151         unsigned char           addr[MAX_ADDR_LEN];
152 };
153 /* identical to struct packet_mreq except it has
154  * a longer address field.
155  */
156 struct packet_mreq_max
157 {
158         int             mr_ifindex;
159         unsigned short  mr_type;
160         unsigned short  mr_alen;
161         unsigned char   mr_address[MAX_ADDR_LEN];
162 };
163
164 #ifdef CONFIG_PACKET_MMAP
165 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
166 #endif
167
168 static void packet_flush_mclist(struct sock *sk);
169
170 struct packet_sock {
171         /* struct sock has to be the first member of packet_sock */
172         struct sock             sk;
173         struct tpacket_stats    stats;
174 #ifdef CONFIG_PACKET_MMAP
175         char *                  *pg_vec;
176         unsigned int            head;
177         unsigned int            frames_per_block;
178         unsigned int            frame_size;
179         unsigned int            frame_max;
180         int                     copy_thresh;
181 #endif
182         struct packet_type      prot_hook;
183         spinlock_t              bind_lock;
184         unsigned int            running:1,      /* prot_hook is attached*/
185                                 auxdata:1,
186                                 origdev:1;
187         int                     ifindex;        /* bound device         */
188         __be16                  num;
189         struct packet_mclist    *mclist;
190 #ifdef CONFIG_PACKET_MMAP
191         atomic_t                mapped;
192         unsigned int            pg_vec_order;
193         unsigned int            pg_vec_pages;
194         unsigned int            pg_vec_len;
195 #endif
196 };
197
198 struct packet_skb_cb {
199         unsigned int origlen;
200         union {
201                 struct sockaddr_pkt pkt;
202                 struct sockaddr_ll ll;
203         } sa;
204 };
205
206 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
207
208 #ifdef CONFIG_PACKET_MMAP
209
210 static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
211 {
212         unsigned int pg_vec_pos, frame_offset;
213
214         pg_vec_pos = position / po->frames_per_block;
215         frame_offset = position % po->frames_per_block;
216
217         return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
218 }
219 #endif
220
221 static inline struct packet_sock *pkt_sk(struct sock *sk)
222 {
223         return (struct packet_sock *)sk;
224 }
225
226 static void packet_sock_destruct(struct sock *sk)
227 {
228         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
229         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
230
231         if (!sock_flag(sk, SOCK_DEAD)) {
232                 printk("Attempt to release alive packet socket: %p\n", sk);
233                 return;
234         }
235
236         sk_refcnt_debug_dec(sk);
237 }
238
239
240 static const struct proto_ops packet_ops;
241
242 static const struct proto_ops packet_ops_spkt;
243
244 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
245 {
246         struct sock *sk;
247         struct sockaddr_pkt *spkt;
248
249         if (dev->nd_net != &init_net)
250                 goto out;
251
252         /*
253          *      When we registered the protocol we saved the socket in the data
254          *      field for just this event.
255          */
256
257         sk = pt->af_packet_priv;
258
259         /*
260          *      Yank back the headers [hope the device set this
261          *      right or kerboom...]
262          *
263          *      Incoming packets have ll header pulled,
264          *      push it back.
265          *
266          *      For outgoing ones skb->data == skb_mac_header(skb)
267          *      so that this procedure is noop.
268          */
269
270         if (skb->pkt_type == PACKET_LOOPBACK)
271                 goto out;
272
273         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
274                 goto oom;
275
276         /* drop any routing info */
277         dst_release(skb->dst);
278         skb->dst = NULL;
279
280         /* drop conntrack reference */
281         nf_reset(skb);
282
283         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
284
285         skb_push(skb, skb->data - skb_mac_header(skb));
286
287         /*
288          *      The SOCK_PACKET socket receives _all_ frames.
289          */
290
291         spkt->spkt_family = dev->type;
292         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
293         spkt->spkt_protocol = skb->protocol;
294
295         /*
296          *      Charge the memory to the socket. This is done specifically
297          *      to prevent sockets using all the memory up.
298          */
299
300         if (sock_queue_rcv_skb(sk,skb) == 0)
301                 return 0;
302
303 out:
304         kfree_skb(skb);
305 oom:
306         return 0;
307 }
308
309
310 /*
311  *      Output a raw packet to a device layer. This bypasses all the other
312  *      protocol layers and you must therefore supply it with a complete frame
313  */
314
315 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
316                                struct msghdr *msg, size_t len)
317 {
318         struct sock *sk = sock->sk;
319         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
320         struct sk_buff *skb;
321         struct net_device *dev;
322         __be16 proto=0;
323         int err;
324
325         /*
326          *      Get and verify the address.
327          */
328
329         if (saddr)
330         {
331                 if (msg->msg_namelen < sizeof(struct sockaddr))
332                         return(-EINVAL);
333                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
334                         proto=saddr->spkt_protocol;
335         }
336         else
337                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
338
339         /*
340          *      Find the device first to size check it
341          */
342
343         saddr->spkt_device[13] = 0;
344         dev = dev_get_by_name(&init_net, saddr->spkt_device);
345         err = -ENODEV;
346         if (dev == NULL)
347                 goto out_unlock;
348
349         err = -ENETDOWN;
350         if (!(dev->flags & IFF_UP))
351                 goto out_unlock;
352
353         /*
354          *      You may not queue a frame bigger than the mtu. This is the lowest level
355          *      raw protocol and you must do your own fragmentation at this level.
356          */
357
358         err = -EMSGSIZE;
359         if (len > dev->mtu + dev->hard_header_len)
360                 goto out_unlock;
361
362         err = -ENOBUFS;
363         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
364
365         /*
366          *      If the write buffer is full, then tough. At this level the user gets to
367          *      deal with the problem - do your own algorithmic backoffs. That's far
368          *      more flexible.
369          */
370
371         if (skb == NULL)
372                 goto out_unlock;
373
374         /*
375          *      Fill it in
376          */
377
378         /* FIXME: Save some space for broken drivers that write a
379          * hard header at transmission time by themselves. PPP is the
380          * notable one here. This should really be fixed at the driver level.
381          */
382         skb_reserve(skb, LL_RESERVED_SPACE(dev));
383         skb_reset_network_header(skb);
384
385         /* Try to align data part correctly */
386         if (dev->header_ops) {
387                 skb->data -= dev->hard_header_len;
388                 skb->tail -= dev->hard_header_len;
389                 if (len < dev->hard_header_len)
390                         skb_reset_network_header(skb);
391         }
392
393         /* Returns -EFAULT on error */
394         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
395         skb->protocol = proto;
396         skb->dev = dev;
397         skb->priority = sk->sk_priority;
398         if (err)
399                 goto out_free;
400
401         /*
402          *      Now send it
403          */
404
405         dev_queue_xmit(skb);
406         dev_put(dev);
407         return(len);
408
409 out_free:
410         kfree_skb(skb);
411 out_unlock:
412         if (dev)
413                 dev_put(dev);
414         return err;
415 }
416
417 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
418                                       unsigned int res)
419 {
420         struct sk_filter *filter;
421
422         rcu_read_lock_bh();
423         filter = rcu_dereference(sk->sk_filter);
424         if (filter != NULL)
425                 res = sk_run_filter(skb, filter->insns, filter->len);
426         rcu_read_unlock_bh();
427
428         return res;
429 }
430
431 /*
432    This function makes lazy skb cloning in hope that most of packets
433    are discarded by BPF.
434
435    Note tricky part: we DO mangle shared skb! skb->data, skb->len
436    and skb->cb are mangled. It works because (and until) packets
437    falling here are owned by current CPU. Output packets are cloned
438    by dev_queue_xmit_nit(), input packets are processed by net_bh
439    sequencially, so that if we return skb to original state on exit,
440    we will not harm anyone.
441  */
442
443 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
444 {
445         struct sock *sk;
446         struct sockaddr_ll *sll;
447         struct packet_sock *po;
448         u8 * skb_head = skb->data;
449         int skb_len = skb->len;
450         unsigned int snaplen, res;
451
452         if (dev->nd_net != &init_net)
453                 goto drop;
454
455         if (skb->pkt_type == PACKET_LOOPBACK)
456                 goto drop;
457
458         sk = pt->af_packet_priv;
459         po = pkt_sk(sk);
460
461         skb->dev = dev;
462
463         if (dev->header_ops) {
464                 /* The device has an explicit notion of ll header,
465                    exported to higher levels.
466
467                    Otherwise, the device hides datails of it frame
468                    structure, so that corresponding packet head
469                    never delivered to user.
470                  */
471                 if (sk->sk_type != SOCK_DGRAM)
472                         skb_push(skb, skb->data - skb_mac_header(skb));
473                 else if (skb->pkt_type == PACKET_OUTGOING) {
474                         /* Special case: outgoing packets have ll header at head */
475                         skb_pull(skb, skb_network_offset(skb));
476                 }
477         }
478
479         snaplen = skb->len;
480
481         res = run_filter(skb, sk, snaplen);
482         if (!res)
483                 goto drop_n_restore;
484         if (snaplen > res)
485                 snaplen = res;
486
487         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
488             (unsigned)sk->sk_rcvbuf)
489                 goto drop_n_acct;
490
491         if (skb_shared(skb)) {
492                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
493                 if (nskb == NULL)
494                         goto drop_n_acct;
495
496                 if (skb_head != skb->data) {
497                         skb->data = skb_head;
498                         skb->len = skb_len;
499                 }
500                 kfree_skb(skb);
501                 skb = nskb;
502         }
503
504         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
505                      sizeof(skb->cb));
506
507         sll = &PACKET_SKB_CB(skb)->sa.ll;
508         sll->sll_family = AF_PACKET;
509         sll->sll_hatype = dev->type;
510         sll->sll_protocol = skb->protocol;
511         sll->sll_pkttype = skb->pkt_type;
512         if (unlikely(po->origdev))
513                 sll->sll_ifindex = orig_dev->ifindex;
514         else
515                 sll->sll_ifindex = dev->ifindex;
516
517         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
518
519         PACKET_SKB_CB(skb)->origlen = skb->len;
520
521         if (pskb_trim(skb, snaplen))
522                 goto drop_n_acct;
523
524         skb_set_owner_r(skb, sk);
525         skb->dev = NULL;
526         dst_release(skb->dst);
527         skb->dst = NULL;
528
529         /* drop conntrack reference */
530         nf_reset(skb);
531
532         spin_lock(&sk->sk_receive_queue.lock);
533         po->stats.tp_packets++;
534         __skb_queue_tail(&sk->sk_receive_queue, skb);
535         spin_unlock(&sk->sk_receive_queue.lock);
536         sk->sk_data_ready(sk, skb->len);
537         return 0;
538
539 drop_n_acct:
540         spin_lock(&sk->sk_receive_queue.lock);
541         po->stats.tp_drops++;
542         spin_unlock(&sk->sk_receive_queue.lock);
543
544 drop_n_restore:
545         if (skb_head != skb->data && skb_shared(skb)) {
546                 skb->data = skb_head;
547                 skb->len = skb_len;
548         }
549 drop:
550         kfree_skb(skb);
551         return 0;
552 }
553
554 #ifdef CONFIG_PACKET_MMAP
555 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
556 {
557         struct sock *sk;
558         struct packet_sock *po;
559         struct sockaddr_ll *sll;
560         struct tpacket_hdr *h;
561         u8 * skb_head = skb->data;
562         int skb_len = skb->len;
563         unsigned int snaplen, res;
564         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
565         unsigned short macoff, netoff;
566         struct sk_buff *copy_skb = NULL;
567         struct timeval tv;
568
569         if (dev->nd_net != &init_net)
570                 goto drop;
571
572         if (skb->pkt_type == PACKET_LOOPBACK)
573                 goto drop;
574
575         sk = pt->af_packet_priv;
576         po = pkt_sk(sk);
577
578         if (dev->header_ops) {
579                 if (sk->sk_type != SOCK_DGRAM)
580                         skb_push(skb, skb->data - skb_mac_header(skb));
581                 else if (skb->pkt_type == PACKET_OUTGOING) {
582                         /* Special case: outgoing packets have ll header at head */
583                         skb_pull(skb, skb_network_offset(skb));
584                 }
585         }
586
587         if (skb->ip_summed == CHECKSUM_PARTIAL)
588                 status |= TP_STATUS_CSUMNOTREADY;
589
590         snaplen = skb->len;
591
592         res = run_filter(skb, sk, snaplen);
593         if (!res)
594                 goto drop_n_restore;
595         if (snaplen > res)
596                 snaplen = res;
597
598         if (sk->sk_type == SOCK_DGRAM) {
599                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
600         } else {
601                 unsigned maclen = skb_network_offset(skb);
602                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
603                 macoff = netoff - maclen;
604         }
605
606         if (macoff + snaplen > po->frame_size) {
607                 if (po->copy_thresh &&
608                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
609                     (unsigned)sk->sk_rcvbuf) {
610                         if (skb_shared(skb)) {
611                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
612                         } else {
613                                 copy_skb = skb_get(skb);
614                                 skb_head = skb->data;
615                         }
616                         if (copy_skb)
617                                 skb_set_owner_r(copy_skb, sk);
618                 }
619                 snaplen = po->frame_size - macoff;
620                 if ((int)snaplen < 0)
621                         snaplen = 0;
622         }
623
624         spin_lock(&sk->sk_receive_queue.lock);
625         h = packet_lookup_frame(po, po->head);
626
627         if (h->tp_status)
628                 goto ring_is_full;
629         po->head = po->head != po->frame_max ? po->head+1 : 0;
630         po->stats.tp_packets++;
631         if (copy_skb) {
632                 status |= TP_STATUS_COPY;
633                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
634         }
635         if (!po->stats.tp_drops)
636                 status &= ~TP_STATUS_LOSING;
637         spin_unlock(&sk->sk_receive_queue.lock);
638
639         skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
640
641         h->tp_len = skb->len;
642         h->tp_snaplen = snaplen;
643         h->tp_mac = macoff;
644         h->tp_net = netoff;
645         if (skb->tstamp.tv64)
646                 tv = ktime_to_timeval(skb->tstamp);
647         else
648                 do_gettimeofday(&tv);
649         h->tp_sec = tv.tv_sec;
650         h->tp_usec = tv.tv_usec;
651
652         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
653         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
654         sll->sll_family = AF_PACKET;
655         sll->sll_hatype = dev->type;
656         sll->sll_protocol = skb->protocol;
657         sll->sll_pkttype = skb->pkt_type;
658         if (unlikely(po->origdev))
659                 sll->sll_ifindex = orig_dev->ifindex;
660         else
661                 sll->sll_ifindex = dev->ifindex;
662
663         h->tp_status = status;
664         smp_mb();
665
666         {
667                 struct page *p_start, *p_end;
668                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
669
670                 p_start = virt_to_page(h);
671                 p_end = virt_to_page(h_end);
672                 while (p_start <= p_end) {
673                         flush_dcache_page(p_start);
674                         p_start++;
675                 }
676         }
677
678         sk->sk_data_ready(sk, 0);
679
680 drop_n_restore:
681         if (skb_head != skb->data && skb_shared(skb)) {
682                 skb->data = skb_head;
683                 skb->len = skb_len;
684         }
685 drop:
686         kfree_skb(skb);
687         return 0;
688
689 ring_is_full:
690         po->stats.tp_drops++;
691         spin_unlock(&sk->sk_receive_queue.lock);
692
693         sk->sk_data_ready(sk, 0);
694         if (copy_skb)
695                 kfree_skb(copy_skb);
696         goto drop_n_restore;
697 }
698
699 #endif
700
701
702 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
703                           struct msghdr *msg, size_t len)
704 {
705         struct sock *sk = sock->sk;
706         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
707         struct sk_buff *skb;
708         struct net_device *dev;
709         __be16 proto;
710         unsigned char *addr;
711         int ifindex, err, reserve = 0;
712
713         /*
714          *      Get and verify the address.
715          */
716
717         if (saddr == NULL) {
718                 struct packet_sock *po = pkt_sk(sk);
719
720                 ifindex = po->ifindex;
721                 proto   = po->num;
722                 addr    = NULL;
723         } else {
724                 err = -EINVAL;
725                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
726                         goto out;
727                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
728                         goto out;
729                 ifindex = saddr->sll_ifindex;
730                 proto   = saddr->sll_protocol;
731                 addr    = saddr->sll_addr;
732         }
733
734
735         dev = dev_get_by_index(&init_net, ifindex);
736         err = -ENXIO;
737         if (dev == NULL)
738                 goto out_unlock;
739         if (sock->type == SOCK_RAW)
740                 reserve = dev->hard_header_len;
741
742         err = -ENETDOWN;
743         if (!(dev->flags & IFF_UP))
744                 goto out_unlock;
745
746         err = -EMSGSIZE;
747         if (len > dev->mtu+reserve)
748                 goto out_unlock;
749
750         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
751                                 msg->msg_flags & MSG_DONTWAIT, &err);
752         if (skb==NULL)
753                 goto out_unlock;
754
755         skb_reserve(skb, LL_RESERVED_SPACE(dev));
756         skb_reset_network_header(skb);
757
758         err = -EINVAL;
759         if (sock->type == SOCK_DGRAM &&
760             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
761                 goto out_free;
762
763         /* Returns -EFAULT on error */
764         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
765         if (err)
766                 goto out_free;
767
768         skb->protocol = proto;
769         skb->dev = dev;
770         skb->priority = sk->sk_priority;
771
772         /*
773          *      Now send it
774          */
775
776         err = dev_queue_xmit(skb);
777         if (err > 0 && (err = net_xmit_errno(err)) != 0)
778                 goto out_unlock;
779
780         dev_put(dev);
781
782         return(len);
783
784 out_free:
785         kfree_skb(skb);
786 out_unlock:
787         if (dev)
788                 dev_put(dev);
789 out:
790         return err;
791 }
792
793 /*
794  *      Close a PACKET socket. This is fairly simple. We immediately go
795  *      to 'closed' state and remove our protocol entry in the device list.
796  */
797
798 static int packet_release(struct socket *sock)
799 {
800         struct sock *sk = sock->sk;
801         struct packet_sock *po;
802
803         if (!sk)
804                 return 0;
805
806         po = pkt_sk(sk);
807
808         write_lock_bh(&packet_sklist_lock);
809         sk_del_node_init(sk);
810         write_unlock_bh(&packet_sklist_lock);
811
812         /*
813          *      Unhook packet receive handler.
814          */
815
816         if (po->running) {
817                 /*
818                  *      Remove the protocol hook
819                  */
820                 dev_remove_pack(&po->prot_hook);
821                 po->running = 0;
822                 po->num = 0;
823                 __sock_put(sk);
824         }
825
826         packet_flush_mclist(sk);
827
828 #ifdef CONFIG_PACKET_MMAP
829         if (po->pg_vec) {
830                 struct tpacket_req req;
831                 memset(&req, 0, sizeof(req));
832                 packet_set_ring(sk, &req, 1);
833         }
834 #endif
835
836         /*
837          *      Now the socket is dead. No more input will appear.
838          */
839
840         sock_orphan(sk);
841         sock->sk = NULL;
842
843         /* Purge queues */
844
845         skb_queue_purge(&sk->sk_receive_queue);
846         sk_refcnt_debug_release(sk);
847
848         sock_put(sk);
849         return 0;
850 }
851
852 /*
853  *      Attach a packet hook.
854  */
855
856 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
857 {
858         struct packet_sock *po = pkt_sk(sk);
859         /*
860          *      Detach an existing hook if present.
861          */
862
863         lock_sock(sk);
864
865         spin_lock(&po->bind_lock);
866         if (po->running) {
867                 __sock_put(sk);
868                 po->running = 0;
869                 po->num = 0;
870                 spin_unlock(&po->bind_lock);
871                 dev_remove_pack(&po->prot_hook);
872                 spin_lock(&po->bind_lock);
873         }
874
875         po->num = protocol;
876         po->prot_hook.type = protocol;
877         po->prot_hook.dev = dev;
878
879         po->ifindex = dev ? dev->ifindex : 0;
880
881         if (protocol == 0)
882                 goto out_unlock;
883
884         if (!dev || (dev->flags & IFF_UP)) {
885                 dev_add_pack(&po->prot_hook);
886                 sock_hold(sk);
887                 po->running = 1;
888         } else {
889                 sk->sk_err = ENETDOWN;
890                 if (!sock_flag(sk, SOCK_DEAD))
891                         sk->sk_error_report(sk);
892         }
893
894 out_unlock:
895         spin_unlock(&po->bind_lock);
896         release_sock(sk);
897         return 0;
898 }
899
900 /*
901  *      Bind a packet socket to a device
902  */
903
904 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
905 {
906         struct sock *sk=sock->sk;
907         char name[15];
908         struct net_device *dev;
909         int err = -ENODEV;
910
911         /*
912          *      Check legality
913          */
914
915         if (addr_len != sizeof(struct sockaddr))
916                 return -EINVAL;
917         strlcpy(name,uaddr->sa_data,sizeof(name));
918
919         dev = dev_get_by_name(&init_net, name);
920         if (dev) {
921                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
922                 dev_put(dev);
923         }
924         return err;
925 }
926
927 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
928 {
929         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
930         struct sock *sk=sock->sk;
931         struct net_device *dev = NULL;
932         int err;
933
934
935         /*
936          *      Check legality
937          */
938
939         if (addr_len < sizeof(struct sockaddr_ll))
940                 return -EINVAL;
941         if (sll->sll_family != AF_PACKET)
942                 return -EINVAL;
943
944         if (sll->sll_ifindex) {
945                 err = -ENODEV;
946                 dev = dev_get_by_index(&init_net, sll->sll_ifindex);
947                 if (dev == NULL)
948                         goto out;
949         }
950         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
951         if (dev)
952                 dev_put(dev);
953
954 out:
955         return err;
956 }
957
958 static struct proto packet_proto = {
959         .name     = "PACKET",
960         .owner    = THIS_MODULE,
961         .obj_size = sizeof(struct packet_sock),
962 };
963
964 /*
965  *      Create a packet of type SOCK_PACKET.
966  */
967
968 static int packet_create(struct net *net, struct socket *sock, int protocol)
969 {
970         struct sock *sk;
971         struct packet_sock *po;
972         __be16 proto = (__force __be16)protocol; /* weird, but documented */
973         int err;
974
975         if (net != &init_net)
976                 return -EAFNOSUPPORT;
977
978         if (!capable(CAP_NET_RAW))
979                 return -EPERM;
980         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
981             sock->type != SOCK_PACKET)
982                 return -ESOCKTNOSUPPORT;
983
984         sock->state = SS_UNCONNECTED;
985
986         err = -ENOBUFS;
987         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
988         if (sk == NULL)
989                 goto out;
990
991         sock->ops = &packet_ops;
992         if (sock->type == SOCK_PACKET)
993                 sock->ops = &packet_ops_spkt;
994
995         sock_init_data(sock, sk);
996
997         po = pkt_sk(sk);
998         sk->sk_family = PF_PACKET;
999         po->num = proto;
1000
1001         sk->sk_destruct = packet_sock_destruct;
1002         sk_refcnt_debug_inc(sk);
1003
1004         /*
1005          *      Attach a protocol block
1006          */
1007
1008         spin_lock_init(&po->bind_lock);
1009         po->prot_hook.func = packet_rcv;
1010
1011         if (sock->type == SOCK_PACKET)
1012                 po->prot_hook.func = packet_rcv_spkt;
1013
1014         po->prot_hook.af_packet_priv = sk;
1015
1016         if (proto) {
1017                 po->prot_hook.type = proto;
1018                 dev_add_pack(&po->prot_hook);
1019                 sock_hold(sk);
1020                 po->running = 1;
1021         }
1022
1023         write_lock_bh(&packet_sklist_lock);
1024         sk_add_node(sk, &packet_sklist);
1025         write_unlock_bh(&packet_sklist_lock);
1026         return(0);
1027 out:
1028         return err;
1029 }
1030
1031 /*
1032  *      Pull a packet from our receive queue and hand it to the user.
1033  *      If necessary we block.
1034  */
1035
1036 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1037                           struct msghdr *msg, size_t len, int flags)
1038 {
1039         struct sock *sk = sock->sk;
1040         struct sk_buff *skb;
1041         int copied, err;
1042         struct sockaddr_ll *sll;
1043
1044         err = -EINVAL;
1045         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1046                 goto out;
1047
1048 #if 0
1049         /* What error should we return now? EUNATTACH? */
1050         if (pkt_sk(sk)->ifindex < 0)
1051                 return -ENODEV;
1052 #endif
1053
1054         /*
1055          *      Call the generic datagram receiver. This handles all sorts
1056          *      of horrible races and re-entrancy so we can forget about it
1057          *      in the protocol layers.
1058          *
1059          *      Now it will return ENETDOWN, if device have just gone down,
1060          *      but then it will block.
1061          */
1062
1063         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1064
1065         /*
1066          *      An error occurred so return it. Because skb_recv_datagram()
1067          *      handles the blocking we don't see and worry about blocking
1068          *      retries.
1069          */
1070
1071         if (skb == NULL)
1072                 goto out;
1073
1074         /*
1075          *      If the address length field is there to be filled in, we fill
1076          *      it in now.
1077          */
1078
1079         sll = &PACKET_SKB_CB(skb)->sa.ll;
1080         if (sock->type == SOCK_PACKET)
1081                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1082         else
1083                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1084
1085         /*
1086          *      You lose any data beyond the buffer you gave. If it worries a
1087          *      user program they can ask the device for its MTU anyway.
1088          */
1089
1090         copied = skb->len;
1091         if (copied > len)
1092         {
1093                 copied=len;
1094                 msg->msg_flags|=MSG_TRUNC;
1095         }
1096
1097         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1098         if (err)
1099                 goto out_free;
1100
1101         sock_recv_timestamp(msg, sk, skb);
1102
1103         if (msg->msg_name)
1104                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1105                        msg->msg_namelen);
1106
1107         if (pkt_sk(sk)->auxdata) {
1108                 struct tpacket_auxdata aux;
1109
1110                 aux.tp_status = TP_STATUS_USER;
1111                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1112                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1113                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1114                 aux.tp_snaplen = skb->len;
1115                 aux.tp_mac = 0;
1116                 aux.tp_net = skb_network_offset(skb);
1117
1118                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1119         }
1120
1121         /*
1122          *      Free or return the buffer as appropriate. Again this
1123          *      hides all the races and re-entrancy issues from us.
1124          */
1125         err = (flags&MSG_TRUNC) ? skb->len : copied;
1126
1127 out_free:
1128         skb_free_datagram(sk, skb);
1129 out:
1130         return err;
1131 }
1132
1133 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1134                                int *uaddr_len, int peer)
1135 {
1136         struct net_device *dev;
1137         struct sock *sk = sock->sk;
1138
1139         if (peer)
1140                 return -EOPNOTSUPP;
1141
1142         uaddr->sa_family = AF_PACKET;
1143         dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex);
1144         if (dev) {
1145                 strlcpy(uaddr->sa_data, dev->name, 15);
1146                 dev_put(dev);
1147         } else
1148                 memset(uaddr->sa_data, 0, 14);
1149         *uaddr_len = sizeof(*uaddr);
1150
1151         return 0;
1152 }
1153
1154 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1155                           int *uaddr_len, int peer)
1156 {
1157         struct net_device *dev;
1158         struct sock *sk = sock->sk;
1159         struct packet_sock *po = pkt_sk(sk);
1160         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1161
1162         if (peer)
1163                 return -EOPNOTSUPP;
1164
1165         sll->sll_family = AF_PACKET;
1166         sll->sll_ifindex = po->ifindex;
1167         sll->sll_protocol = po->num;
1168         dev = dev_get_by_index(&init_net, po->ifindex);
1169         if (dev) {
1170                 sll->sll_hatype = dev->type;
1171                 sll->sll_halen = dev->addr_len;
1172                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1173                 dev_put(dev);
1174         } else {
1175                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1176                 sll->sll_halen = 0;
1177         }
1178         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1179
1180         return 0;
1181 }
1182
1183 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1184 {
1185         switch (i->type) {
1186         case PACKET_MR_MULTICAST:
1187                 if (what > 0)
1188                         dev_mc_add(dev, i->addr, i->alen, 0);
1189                 else
1190                         dev_mc_delete(dev, i->addr, i->alen, 0);
1191                 break;
1192         case PACKET_MR_PROMISC:
1193                 dev_set_promiscuity(dev, what);
1194                 break;
1195         case PACKET_MR_ALLMULTI:
1196                 dev_set_allmulti(dev, what);
1197                 break;
1198         default:;
1199         }
1200 }
1201
1202 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1203 {
1204         for ( ; i; i=i->next) {
1205                 if (i->ifindex == dev->ifindex)
1206                         packet_dev_mc(dev, i, what);
1207         }
1208 }
1209
1210 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1211 {
1212         struct packet_sock *po = pkt_sk(sk);
1213         struct packet_mclist *ml, *i;
1214         struct net_device *dev;
1215         int err;
1216
1217         rtnl_lock();
1218
1219         err = -ENODEV;
1220         dev = __dev_get_by_index(&init_net, mreq->mr_ifindex);
1221         if (!dev)
1222                 goto done;
1223
1224         err = -EINVAL;
1225         if (mreq->mr_alen > dev->addr_len)
1226                 goto done;
1227
1228         err = -ENOBUFS;
1229         i = kmalloc(sizeof(*i), GFP_KERNEL);
1230         if (i == NULL)
1231                 goto done;
1232
1233         err = 0;
1234         for (ml = po->mclist; ml; ml = ml->next) {
1235                 if (ml->ifindex == mreq->mr_ifindex &&
1236                     ml->type == mreq->mr_type &&
1237                     ml->alen == mreq->mr_alen &&
1238                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1239                         ml->count++;
1240                         /* Free the new element ... */
1241                         kfree(i);
1242                         goto done;
1243                 }
1244         }
1245
1246         i->type = mreq->mr_type;
1247         i->ifindex = mreq->mr_ifindex;
1248         i->alen = mreq->mr_alen;
1249         memcpy(i->addr, mreq->mr_address, i->alen);
1250         i->count = 1;
1251         i->next = po->mclist;
1252         po->mclist = i;
1253         packet_dev_mc(dev, i, +1);
1254
1255 done:
1256         rtnl_unlock();
1257         return err;
1258 }
1259
1260 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1261 {
1262         struct packet_mclist *ml, **mlp;
1263
1264         rtnl_lock();
1265
1266         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1267                 if (ml->ifindex == mreq->mr_ifindex &&
1268                     ml->type == mreq->mr_type &&
1269                     ml->alen == mreq->mr_alen &&
1270                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1271                         if (--ml->count == 0) {
1272                                 struct net_device *dev;
1273                                 *mlp = ml->next;
1274                                 dev = dev_get_by_index(&init_net, ml->ifindex);
1275                                 if (dev) {
1276                                         packet_dev_mc(dev, ml, -1);
1277                                         dev_put(dev);
1278                                 }
1279                                 kfree(ml);
1280                         }
1281                         rtnl_unlock();
1282                         return 0;
1283                 }
1284         }
1285         rtnl_unlock();
1286         return -EADDRNOTAVAIL;
1287 }
1288
1289 static void packet_flush_mclist(struct sock *sk)
1290 {
1291         struct packet_sock *po = pkt_sk(sk);
1292         struct packet_mclist *ml;
1293
1294         if (!po->mclist)
1295                 return;
1296
1297         rtnl_lock();
1298         while ((ml = po->mclist) != NULL) {
1299                 struct net_device *dev;
1300
1301                 po->mclist = ml->next;
1302                 if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) {
1303                         packet_dev_mc(dev, ml, -1);
1304                         dev_put(dev);
1305                 }
1306                 kfree(ml);
1307         }
1308         rtnl_unlock();
1309 }
1310
1311 static int
1312 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1313 {
1314         struct sock *sk = sock->sk;
1315         struct packet_sock *po = pkt_sk(sk);
1316         int ret;
1317
1318         if (level != SOL_PACKET)
1319                 return -ENOPROTOOPT;
1320
1321         switch(optname) {
1322         case PACKET_ADD_MEMBERSHIP:
1323         case PACKET_DROP_MEMBERSHIP:
1324         {
1325                 struct packet_mreq_max mreq;
1326                 int len = optlen;
1327                 memset(&mreq, 0, sizeof(mreq));
1328                 if (len < sizeof(struct packet_mreq))
1329                         return -EINVAL;
1330                 if (len > sizeof(mreq))
1331                         len = sizeof(mreq);
1332                 if (copy_from_user(&mreq,optval,len))
1333                         return -EFAULT;
1334                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1335                         return -EINVAL;
1336                 if (optname == PACKET_ADD_MEMBERSHIP)
1337                         ret = packet_mc_add(sk, &mreq);
1338                 else
1339                         ret = packet_mc_drop(sk, &mreq);
1340                 return ret;
1341         }
1342
1343 #ifdef CONFIG_PACKET_MMAP
1344         case PACKET_RX_RING:
1345         {
1346                 struct tpacket_req req;
1347
1348                 if (optlen<sizeof(req))
1349                         return -EINVAL;
1350                 if (copy_from_user(&req,optval,sizeof(req)))
1351                         return -EFAULT;
1352                 return packet_set_ring(sk, &req, 0);
1353         }
1354         case PACKET_COPY_THRESH:
1355         {
1356                 int val;
1357
1358                 if (optlen!=sizeof(val))
1359                         return -EINVAL;
1360                 if (copy_from_user(&val,optval,sizeof(val)))
1361                         return -EFAULT;
1362
1363                 pkt_sk(sk)->copy_thresh = val;
1364                 return 0;
1365         }
1366 #endif
1367         case PACKET_AUXDATA:
1368         {
1369                 int val;
1370
1371                 if (optlen < sizeof(val))
1372                         return -EINVAL;
1373                 if (copy_from_user(&val, optval, sizeof(val)))
1374                         return -EFAULT;
1375
1376                 po->auxdata = !!val;
1377                 return 0;
1378         }
1379         case PACKET_ORIGDEV:
1380         {
1381                 int val;
1382
1383                 if (optlen < sizeof(val))
1384                         return -EINVAL;
1385                 if (copy_from_user(&val, optval, sizeof(val)))
1386                         return -EFAULT;
1387
1388                 po->origdev = !!val;
1389                 return 0;
1390         }
1391         default:
1392                 return -ENOPROTOOPT;
1393         }
1394 }
1395
1396 static int packet_getsockopt(struct socket *sock, int level, int optname,
1397                              char __user *optval, int __user *optlen)
1398 {
1399         int len;
1400         int val;
1401         struct sock *sk = sock->sk;
1402         struct packet_sock *po = pkt_sk(sk);
1403         void *data;
1404         struct tpacket_stats st;
1405
1406         if (level != SOL_PACKET)
1407                 return -ENOPROTOOPT;
1408
1409         if (get_user(len, optlen))
1410                 return -EFAULT;
1411
1412         if (len < 0)
1413                 return -EINVAL;
1414
1415         switch(optname) {
1416         case PACKET_STATISTICS:
1417                 if (len > sizeof(struct tpacket_stats))
1418                         len = sizeof(struct tpacket_stats);
1419                 spin_lock_bh(&sk->sk_receive_queue.lock);
1420                 st = po->stats;
1421                 memset(&po->stats, 0, sizeof(st));
1422                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1423                 st.tp_packets += st.tp_drops;
1424
1425                 data = &st;
1426                 break;
1427         case PACKET_AUXDATA:
1428                 if (len > sizeof(int))
1429                         len = sizeof(int);
1430                 val = po->auxdata;
1431
1432                 data = &val;
1433                 break;
1434         case PACKET_ORIGDEV:
1435                 if (len > sizeof(int))
1436                         len = sizeof(int);
1437                 val = po->origdev;
1438
1439                 data = &val;
1440                 break;
1441         default:
1442                 return -ENOPROTOOPT;
1443         }
1444
1445         if (put_user(len, optlen))
1446                 return -EFAULT;
1447         if (copy_to_user(optval, data, len))
1448                 return -EFAULT;
1449         return 0;
1450 }
1451
1452
1453 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1454 {
1455         struct sock *sk;
1456         struct hlist_node *node;
1457         struct net_device *dev = data;
1458
1459         if (dev->nd_net != &init_net)
1460                 return NOTIFY_DONE;
1461
1462         read_lock(&packet_sklist_lock);
1463         sk_for_each(sk, node, &packet_sklist) {
1464                 struct packet_sock *po = pkt_sk(sk);
1465
1466                 switch (msg) {
1467                 case NETDEV_UNREGISTER:
1468                         if (po->mclist)
1469                                 packet_dev_mclist(dev, po->mclist, -1);
1470                         /* fallthrough */
1471
1472                 case NETDEV_DOWN:
1473                         if (dev->ifindex == po->ifindex) {
1474                                 spin_lock(&po->bind_lock);
1475                                 if (po->running) {
1476                                         __dev_remove_pack(&po->prot_hook);
1477                                         __sock_put(sk);
1478                                         po->running = 0;
1479                                         sk->sk_err = ENETDOWN;
1480                                         if (!sock_flag(sk, SOCK_DEAD))
1481                                                 sk->sk_error_report(sk);
1482                                 }
1483                                 if (msg == NETDEV_UNREGISTER) {
1484                                         po->ifindex = -1;
1485                                         po->prot_hook.dev = NULL;
1486                                 }
1487                                 spin_unlock(&po->bind_lock);
1488                         }
1489                         break;
1490                 case NETDEV_UP:
1491                         spin_lock(&po->bind_lock);
1492                         if (dev->ifindex == po->ifindex && po->num &&
1493                             !po->running) {
1494                                 dev_add_pack(&po->prot_hook);
1495                                 sock_hold(sk);
1496                                 po->running = 1;
1497                         }
1498                         spin_unlock(&po->bind_lock);
1499                         break;
1500                 }
1501         }
1502         read_unlock(&packet_sklist_lock);
1503         return NOTIFY_DONE;
1504 }
1505
1506
1507 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1508                         unsigned long arg)
1509 {
1510         struct sock *sk = sock->sk;
1511
1512         switch(cmd) {
1513                 case SIOCOUTQ:
1514                 {
1515                         int amount = atomic_read(&sk->sk_wmem_alloc);
1516                         return put_user(amount, (int __user *)arg);
1517                 }
1518                 case SIOCINQ:
1519                 {
1520                         struct sk_buff *skb;
1521                         int amount = 0;
1522
1523                         spin_lock_bh(&sk->sk_receive_queue.lock);
1524                         skb = skb_peek(&sk->sk_receive_queue);
1525                         if (skb)
1526                                 amount = skb->len;
1527                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1528                         return put_user(amount, (int __user *)arg);
1529                 }
1530                 case SIOCGSTAMP:
1531                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1532                 case SIOCGSTAMPNS:
1533                         return sock_get_timestampns(sk, (struct timespec __user *)arg);
1534
1535 #ifdef CONFIG_INET
1536                 case SIOCADDRT:
1537                 case SIOCDELRT:
1538                 case SIOCDARP:
1539                 case SIOCGARP:
1540                 case SIOCSARP:
1541                 case SIOCGIFADDR:
1542                 case SIOCSIFADDR:
1543                 case SIOCGIFBRDADDR:
1544                 case SIOCSIFBRDADDR:
1545                 case SIOCGIFNETMASK:
1546                 case SIOCSIFNETMASK:
1547                 case SIOCGIFDSTADDR:
1548                 case SIOCSIFDSTADDR:
1549                 case SIOCSIFFLAGS:
1550                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1551 #endif
1552
1553                 default:
1554                         return -ENOIOCTLCMD;
1555         }
1556         return 0;
1557 }
1558
1559 #ifndef CONFIG_PACKET_MMAP
1560 #define packet_mmap sock_no_mmap
1561 #define packet_poll datagram_poll
1562 #else
1563
1564 static unsigned int packet_poll(struct file * file, struct socket *sock,
1565                                 poll_table *wait)
1566 {
1567         struct sock *sk = sock->sk;
1568         struct packet_sock *po = pkt_sk(sk);
1569         unsigned int mask = datagram_poll(file, sock, wait);
1570
1571         spin_lock_bh(&sk->sk_receive_queue.lock);
1572         if (po->pg_vec) {
1573                 unsigned last = po->head ? po->head-1 : po->frame_max;
1574                 struct tpacket_hdr *h;
1575
1576                 h = packet_lookup_frame(po, last);
1577
1578                 if (h->tp_status)
1579                         mask |= POLLIN | POLLRDNORM;
1580         }
1581         spin_unlock_bh(&sk->sk_receive_queue.lock);
1582         return mask;
1583 }
1584
1585
1586 /* Dirty? Well, I still did not learn better way to account
1587  * for user mmaps.
1588  */
1589
1590 static void packet_mm_open(struct vm_area_struct *vma)
1591 {
1592         struct file *file = vma->vm_file;
1593         struct socket * sock = file->private_data;
1594         struct sock *sk = sock->sk;
1595
1596         if (sk)
1597                 atomic_inc(&pkt_sk(sk)->mapped);
1598 }
1599
1600 static void packet_mm_close(struct vm_area_struct *vma)
1601 {
1602         struct file *file = vma->vm_file;
1603         struct socket * sock = file->private_data;
1604         struct sock *sk = sock->sk;
1605
1606         if (sk)
1607                 atomic_dec(&pkt_sk(sk)->mapped);
1608 }
1609
1610 static struct vm_operations_struct packet_mmap_ops = {
1611         .open = packet_mm_open,
1612         .close =packet_mm_close,
1613 };
1614
1615 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1616 {
1617         int i;
1618
1619         for (i = 0; i < len; i++) {
1620                 if (likely(pg_vec[i]))
1621                         free_pages((unsigned long) pg_vec[i], order);
1622         }
1623         kfree(pg_vec);
1624 }
1625
1626 static inline char *alloc_one_pg_vec_page(unsigned long order)
1627 {
1628         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1629                                          order);
1630 }
1631
1632 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1633 {
1634         unsigned int block_nr = req->tp_block_nr;
1635         char **pg_vec;
1636         int i;
1637
1638         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1639         if (unlikely(!pg_vec))
1640                 goto out;
1641
1642         for (i = 0; i < block_nr; i++) {
1643                 pg_vec[i] = alloc_one_pg_vec_page(order);
1644                 if (unlikely(!pg_vec[i]))
1645                         goto out_free_pgvec;
1646         }
1647
1648 out:
1649         return pg_vec;
1650
1651 out_free_pgvec:
1652         free_pg_vec(pg_vec, order, block_nr);
1653         pg_vec = NULL;
1654         goto out;
1655 }
1656
1657 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1658 {
1659         char **pg_vec = NULL;
1660         struct packet_sock *po = pkt_sk(sk);
1661         int was_running, order = 0;
1662         __be16 num;
1663         int err = 0;
1664
1665         if (req->tp_block_nr) {
1666                 int i, l;
1667
1668                 /* Sanity tests and some calculations */
1669
1670                 if (unlikely(po->pg_vec))
1671                         return -EBUSY;
1672
1673                 if (unlikely((int)req->tp_block_size <= 0))
1674                         return -EINVAL;
1675                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1676                         return -EINVAL;
1677                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1678                         return -EINVAL;
1679                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1680                         return -EINVAL;
1681
1682                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1683                 if (unlikely(po->frames_per_block <= 0))
1684                         return -EINVAL;
1685                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1686                              req->tp_frame_nr))
1687                         return -EINVAL;
1688
1689                 err = -ENOMEM;
1690                 order = get_order(req->tp_block_size);
1691                 pg_vec = alloc_pg_vec(req, order);
1692                 if (unlikely(!pg_vec))
1693                         goto out;
1694
1695                 l = 0;
1696                 for (i = 0; i < req->tp_block_nr; i++) {
1697                         char *ptr = pg_vec[i];
1698                         struct tpacket_hdr *header;
1699                         int k;
1700
1701                         for (k = 0; k < po->frames_per_block; k++) {
1702                                 header = (struct tpacket_hdr *) ptr;
1703                                 header->tp_status = TP_STATUS_KERNEL;
1704                                 ptr += req->tp_frame_size;
1705                         }
1706                 }
1707                 /* Done */
1708         } else {
1709                 if (unlikely(req->tp_frame_nr))
1710                         return -EINVAL;
1711         }
1712
1713         lock_sock(sk);
1714
1715         /* Detach socket from network */
1716         spin_lock(&po->bind_lock);
1717         was_running = po->running;
1718         num = po->num;
1719         if (was_running) {
1720                 __dev_remove_pack(&po->prot_hook);
1721                 po->num = 0;
1722                 po->running = 0;
1723                 __sock_put(sk);
1724         }
1725         spin_unlock(&po->bind_lock);
1726
1727         synchronize_net();
1728
1729         err = -EBUSY;
1730         if (closing || atomic_read(&po->mapped) == 0) {
1731                 err = 0;
1732 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1733
1734                 spin_lock_bh(&sk->sk_receive_queue.lock);
1735                 pg_vec = XC(po->pg_vec, pg_vec);
1736                 po->frame_max = (req->tp_frame_nr - 1);
1737                 po->head = 0;
1738                 po->frame_size = req->tp_frame_size;
1739                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1740
1741                 order = XC(po->pg_vec_order, order);
1742                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1743
1744                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1745                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1746                 skb_queue_purge(&sk->sk_receive_queue);
1747 #undef XC
1748                 if (atomic_read(&po->mapped))
1749                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1750         }
1751
1752         spin_lock(&po->bind_lock);
1753         if (was_running && !po->running) {
1754                 sock_hold(sk);
1755                 po->running = 1;
1756                 po->num = num;
1757                 dev_add_pack(&po->prot_hook);
1758         }
1759         spin_unlock(&po->bind_lock);
1760
1761         release_sock(sk);
1762
1763         if (pg_vec)
1764                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1765 out:
1766         return err;
1767 }
1768
1769 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1770 {
1771         struct sock *sk = sock->sk;
1772         struct packet_sock *po = pkt_sk(sk);
1773         unsigned long size;
1774         unsigned long start;
1775         int err = -EINVAL;
1776         int i;
1777
1778         if (vma->vm_pgoff)
1779                 return -EINVAL;
1780
1781         size = vma->vm_end - vma->vm_start;
1782
1783         lock_sock(sk);
1784         if (po->pg_vec == NULL)
1785                 goto out;
1786         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1787                 goto out;
1788
1789         start = vma->vm_start;
1790         for (i = 0; i < po->pg_vec_len; i++) {
1791                 struct page *page = virt_to_page(po->pg_vec[i]);
1792                 int pg_num;
1793
1794                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1795                         err = vm_insert_page(vma, start, page);
1796                         if (unlikely(err))
1797                                 goto out;
1798                         start += PAGE_SIZE;
1799                 }
1800         }
1801         atomic_inc(&po->mapped);
1802         vma->vm_ops = &packet_mmap_ops;
1803         err = 0;
1804
1805 out:
1806         release_sock(sk);
1807         return err;
1808 }
1809 #endif
1810
1811
1812 static const struct proto_ops packet_ops_spkt = {
1813         .family =       PF_PACKET,
1814         .owner =        THIS_MODULE,
1815         .release =      packet_release,
1816         .bind =         packet_bind_spkt,
1817         .connect =      sock_no_connect,
1818         .socketpair =   sock_no_socketpair,
1819         .accept =       sock_no_accept,
1820         .getname =      packet_getname_spkt,
1821         .poll =         datagram_poll,
1822         .ioctl =        packet_ioctl,
1823         .listen =       sock_no_listen,
1824         .shutdown =     sock_no_shutdown,
1825         .setsockopt =   sock_no_setsockopt,
1826         .getsockopt =   sock_no_getsockopt,
1827         .sendmsg =      packet_sendmsg_spkt,
1828         .recvmsg =      packet_recvmsg,
1829         .mmap =         sock_no_mmap,
1830         .sendpage =     sock_no_sendpage,
1831 };
1832
1833 static const struct proto_ops packet_ops = {
1834         .family =       PF_PACKET,
1835         .owner =        THIS_MODULE,
1836         .release =      packet_release,
1837         .bind =         packet_bind,
1838         .connect =      sock_no_connect,
1839         .socketpair =   sock_no_socketpair,
1840         .accept =       sock_no_accept,
1841         .getname =      packet_getname,
1842         .poll =         packet_poll,
1843         .ioctl =        packet_ioctl,
1844         .listen =       sock_no_listen,
1845         .shutdown =     sock_no_shutdown,
1846         .setsockopt =   packet_setsockopt,
1847         .getsockopt =   packet_getsockopt,
1848         .sendmsg =      packet_sendmsg,
1849         .recvmsg =      packet_recvmsg,
1850         .mmap =         packet_mmap,
1851         .sendpage =     sock_no_sendpage,
1852 };
1853
1854 static struct net_proto_family packet_family_ops = {
1855         .family =       PF_PACKET,
1856         .create =       packet_create,
1857         .owner  =       THIS_MODULE,
1858 };
1859
1860 static struct notifier_block packet_netdev_notifier = {
1861         .notifier_call =packet_notifier,
1862 };
1863
1864 #ifdef CONFIG_PROC_FS
1865 static inline struct sock *packet_seq_idx(loff_t off)
1866 {
1867         struct sock *s;
1868         struct hlist_node *node;
1869
1870         sk_for_each(s, node, &packet_sklist) {
1871                 if (!off--)
1872                         return s;
1873         }
1874         return NULL;
1875 }
1876
1877 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1878 {
1879         read_lock(&packet_sklist_lock);
1880         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1881 }
1882
1883 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1884 {
1885         ++*pos;
1886         return  (v == SEQ_START_TOKEN)
1887                 ? sk_head(&packet_sklist)
1888                 : sk_next((struct sock*)v) ;
1889 }
1890
1891 static void packet_seq_stop(struct seq_file *seq, void *v)
1892 {
1893         read_unlock(&packet_sklist_lock);
1894 }
1895
1896 static int packet_seq_show(struct seq_file *seq, void *v)
1897 {
1898         if (v == SEQ_START_TOKEN)
1899                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1900         else {
1901                 struct sock *s = v;
1902                 const struct packet_sock *po = pkt_sk(s);
1903
1904                 seq_printf(seq,
1905                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1906                            s,
1907                            atomic_read(&s->sk_refcnt),
1908                            s->sk_type,
1909                            ntohs(po->num),
1910                            po->ifindex,
1911                            po->running,
1912                            atomic_read(&s->sk_rmem_alloc),
1913                            sock_i_uid(s),
1914                            sock_i_ino(s) );
1915         }
1916
1917         return 0;
1918 }
1919
1920 static const struct seq_operations packet_seq_ops = {
1921         .start  = packet_seq_start,
1922         .next   = packet_seq_next,
1923         .stop   = packet_seq_stop,
1924         .show   = packet_seq_show,
1925 };
1926
1927 static int packet_seq_open(struct inode *inode, struct file *file)
1928 {
1929         return seq_open(file, &packet_seq_ops);
1930 }
1931
1932 static const struct file_operations packet_seq_fops = {
1933         .owner          = THIS_MODULE,
1934         .open           = packet_seq_open,
1935         .read           = seq_read,
1936         .llseek         = seq_lseek,
1937         .release        = seq_release,
1938 };
1939
1940 #endif
1941
1942 static void __exit packet_exit(void)
1943 {
1944         proc_net_remove(&init_net, "packet");
1945         unregister_netdevice_notifier(&packet_netdev_notifier);
1946         sock_unregister(PF_PACKET);
1947         proto_unregister(&packet_proto);
1948 }
1949
1950 static int __init packet_init(void)
1951 {
1952         int rc = proto_register(&packet_proto, 0);
1953
1954         if (rc != 0)
1955                 goto out;
1956
1957         sock_register(&packet_family_ops);
1958         register_netdevice_notifier(&packet_netdev_notifier);
1959         proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops);
1960 out:
1961         return rc;
1962 }
1963
1964 module_init(packet_init);
1965 module_exit(packet_exit);
1966 MODULE_LICENSE("GPL");
1967 MODULE_ALIAS_NETPROTO(PF_PACKET);