net: spread __net_init, __net_exit
[safe/jmp/linux-2.6] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Authors:     Ross Biro
9  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
11  *
12  * Fixes:
13  *              Alan Cox        :       verify_area() now used correctly
14  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
15  *              Alan Cox        :       tidied skbuff lists.
16  *              Alan Cox        :       Now uses generic datagram routines I
17  *                                      added. Also fixed the peek/read crash
18  *                                      from all old Linux datagram code.
19  *              Alan Cox        :       Uses the improved datagram code.
20  *              Alan Cox        :       Added NULL's for socket options.
21  *              Alan Cox        :       Re-commented the code.
22  *              Alan Cox        :       Use new kernel side addressing
23  *              Rob Janssen     :       Correct MTU usage.
24  *              Dave Platt      :       Counter leaks caused by incorrect
25  *                                      interrupt locking and some slightly
26  *                                      dubious gcc output. Can you read
27  *                                      compiler: it said _VOLATILE_
28  *      Richard Kooijman        :       Timestamp fixes.
29  *              Alan Cox        :       New buffers. Use sk->mac.raw.
30  *              Alan Cox        :       sendmsg/recvmsg support.
31  *              Alan Cox        :       Protocol setting support
32  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
33  *      Cyrus Durgin            :       Fixed kerneld for kmod.
34  *      Michal Ostrowski        :       Module initialization cleanup.
35  *         Ulises Alonso        :       Frame number limit removal and
36  *                                      packet_set_ring memory leak.
37  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
38  *                                      The convention is that longer addresses
39  *                                      will simply extend the hardware address
40  *                                      byte arrays at the end of sockaddr_ll
41  *                                      and packet_mreq.
42  *              Johann Baudy    :       Added TX RING.
43  *
44  *              This program is free software; you can redistribute it and/or
45  *              modify it under the terms of the GNU General Public License
46  *              as published by the Free Software Foundation; either version
47  *              2 of the License, or (at your option) any later version.
48  *
49  */
50
51 #include <linux/types.h>
52 #include <linux/mm.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
56 #include <linux/in.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <net/net_namespace.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <linux/skbuff.h>
67 #include <net/sock.h>
68 #include <linux/errno.h>
69 #include <linux/timer.h>
70 #include <asm/system.h>
71 #include <asm/uaccess.h>
72 #include <asm/ioctls.h>
73 #include <asm/page.h>
74 #include <asm/cacheflush.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81 #include <linux/mutex.h>
82 #include <linux/if_vlan.h>
83
84 #ifdef CONFIG_INET
85 #include <net/inet_common.h>
86 #endif
87
88 /*
89    Assumptions:
90    - if device has no dev->hard_header routine, it adds and removes ll header
91      inside itself. In this case ll header is invisible outside of device,
92      but higher levels still should reserve dev->hard_header_len.
93      Some devices are enough clever to reallocate skb, when header
94      will not fit to reserved space (tunnel), another ones are silly
95      (PPP).
96    - packet socket receives packets with pulled ll header,
97      so that SOCK_RAW should push it back.
98
99 On receive:
100 -----------
101
102 Incoming, dev->hard_header!=NULL
103    mac_header -> ll header
104    data       -> data
105
106 Outgoing, dev->hard_header!=NULL
107    mac_header -> ll header
108    data       -> ll header
109
110 Incoming, dev->hard_header==NULL
111    mac_header -> UNKNOWN position. It is very likely, that it points to ll
112                  header.  PPP makes it, that is wrong, because introduce
113                  assymetry between rx and tx paths.
114    data       -> data
115
116 Outgoing, dev->hard_header==NULL
117    mac_header -> data. ll header is still not built!
118    data       -> data
119
120 Resume
121   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
122
123
124 On transmit:
125 ------------
126
127 dev->hard_header != NULL
128    mac_header -> ll header
129    data       -> ll header
130
131 dev->hard_header == NULL (ll header is added by device, we cannot control it)
132    mac_header -> data
133    data       -> data
134
135    We should set nh.raw on output to correct posistion,
136    packet classifier depends on it.
137  */
138
139 /* Private packet socket structures. */
140
141 struct packet_mclist {
142         struct packet_mclist    *next;
143         int                     ifindex;
144         int                     count;
145         unsigned short          type;
146         unsigned short          alen;
147         unsigned char           addr[MAX_ADDR_LEN];
148 };
149 /* identical to struct packet_mreq except it has
150  * a longer address field.
151  */
152 struct packet_mreq_max {
153         int             mr_ifindex;
154         unsigned short  mr_type;
155         unsigned short  mr_alen;
156         unsigned char   mr_address[MAX_ADDR_LEN];
157 };
158
159 #ifdef CONFIG_PACKET_MMAP
160 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
161                 int closing, int tx_ring);
162
163 struct packet_ring_buffer {
164         char                    **pg_vec;
165         unsigned int            head;
166         unsigned int            frames_per_block;
167         unsigned int            frame_size;
168         unsigned int            frame_max;
169
170         unsigned int            pg_vec_order;
171         unsigned int            pg_vec_pages;
172         unsigned int            pg_vec_len;
173
174         atomic_t                pending;
175 };
176
177 struct packet_sock;
178 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
179 #endif
180
181 static void packet_flush_mclist(struct sock *sk);
182
183 struct packet_sock {
184         /* struct sock has to be the first member of packet_sock */
185         struct sock             sk;
186         struct tpacket_stats    stats;
187 #ifdef CONFIG_PACKET_MMAP
188         struct packet_ring_buffer       rx_ring;
189         struct packet_ring_buffer       tx_ring;
190         int                     copy_thresh;
191 #endif
192         spinlock_t              bind_lock;
193         struct mutex            pg_vec_lock;
194         unsigned int            running:1,      /* prot_hook is attached*/
195                                 auxdata:1,
196                                 origdev:1;
197         int                     ifindex;        /* bound device         */
198         __be16                  num;
199         struct packet_mclist    *mclist;
200 #ifdef CONFIG_PACKET_MMAP
201         atomic_t                mapped;
202         enum tpacket_versions   tp_version;
203         unsigned int            tp_hdrlen;
204         unsigned int            tp_reserve;
205         unsigned int            tp_loss:1;
206 #endif
207         struct packet_type      prot_hook ____cacheline_aligned_in_smp;
208 };
209
210 struct packet_skb_cb {
211         unsigned int origlen;
212         union {
213                 struct sockaddr_pkt pkt;
214                 struct sockaddr_ll ll;
215         } sa;
216 };
217
218 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
219
220 #ifdef CONFIG_PACKET_MMAP
221
222 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
223 {
224         union {
225                 struct tpacket_hdr *h1;
226                 struct tpacket2_hdr *h2;
227                 void *raw;
228         } h;
229
230         h.raw = frame;
231         switch (po->tp_version) {
232         case TPACKET_V1:
233                 h.h1->tp_status = status;
234                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
235                 break;
236         case TPACKET_V2:
237                 h.h2->tp_status = status;
238                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
239                 break;
240         default:
241                 pr_err("TPACKET version not supported\n");
242                 BUG();
243         }
244
245         smp_wmb();
246 }
247
248 static int __packet_get_status(struct packet_sock *po, void *frame)
249 {
250         union {
251                 struct tpacket_hdr *h1;
252                 struct tpacket2_hdr *h2;
253                 void *raw;
254         } h;
255
256         smp_rmb();
257
258         h.raw = frame;
259         switch (po->tp_version) {
260         case TPACKET_V1:
261                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
262                 return h.h1->tp_status;
263         case TPACKET_V2:
264                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
265                 return h.h2->tp_status;
266         default:
267                 pr_err("TPACKET version not supported\n");
268                 BUG();
269                 return 0;
270         }
271 }
272
273 static void *packet_lookup_frame(struct packet_sock *po,
274                 struct packet_ring_buffer *rb,
275                 unsigned int position,
276                 int status)
277 {
278         unsigned int pg_vec_pos, frame_offset;
279         union {
280                 struct tpacket_hdr *h1;
281                 struct tpacket2_hdr *h2;
282                 void *raw;
283         } h;
284
285         pg_vec_pos = position / rb->frames_per_block;
286         frame_offset = position % rb->frames_per_block;
287
288         h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
289
290         if (status != __packet_get_status(po, h.raw))
291                 return NULL;
292
293         return h.raw;
294 }
295
296 static inline void *packet_current_frame(struct packet_sock *po,
297                 struct packet_ring_buffer *rb,
298                 int status)
299 {
300         return packet_lookup_frame(po, rb, rb->head, status);
301 }
302
303 static inline void *packet_previous_frame(struct packet_sock *po,
304                 struct packet_ring_buffer *rb,
305                 int status)
306 {
307         unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
308         return packet_lookup_frame(po, rb, previous, status);
309 }
310
311 static inline void packet_increment_head(struct packet_ring_buffer *buff)
312 {
313         buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
314 }
315
316 #endif
317
318 static inline struct packet_sock *pkt_sk(struct sock *sk)
319 {
320         return (struct packet_sock *)sk;
321 }
322
323 static void packet_sock_destruct(struct sock *sk)
324 {
325         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
326         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
327
328         if (!sock_flag(sk, SOCK_DEAD)) {
329                 pr_err("Attempt to release alive packet socket: %p\n", sk);
330                 return;
331         }
332
333         sk_refcnt_debug_dec(sk);
334 }
335
336
337 static const struct proto_ops packet_ops;
338
339 static const struct proto_ops packet_ops_spkt;
340
341 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
342                            struct packet_type *pt, struct net_device *orig_dev)
343 {
344         struct sock *sk;
345         struct sockaddr_pkt *spkt;
346
347         /*
348          *      When we registered the protocol we saved the socket in the data
349          *      field for just this event.
350          */
351
352         sk = pt->af_packet_priv;
353
354         /*
355          *      Yank back the headers [hope the device set this
356          *      right or kerboom...]
357          *
358          *      Incoming packets have ll header pulled,
359          *      push it back.
360          *
361          *      For outgoing ones skb->data == skb_mac_header(skb)
362          *      so that this procedure is noop.
363          */
364
365         if (skb->pkt_type == PACKET_LOOPBACK)
366                 goto out;
367
368         if (!net_eq(dev_net(dev), sock_net(sk)))
369                 goto out;
370
371         skb = skb_share_check(skb, GFP_ATOMIC);
372         if (skb == NULL)
373                 goto oom;
374
375         /* drop any routing info */
376         skb_dst_drop(skb);
377
378         /* drop conntrack reference */
379         nf_reset(skb);
380
381         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
382
383         skb_push(skb, skb->data - skb_mac_header(skb));
384
385         /*
386          *      The SOCK_PACKET socket receives _all_ frames.
387          */
388
389         spkt->spkt_family = dev->type;
390         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
391         spkt->spkt_protocol = skb->protocol;
392
393         /*
394          *      Charge the memory to the socket. This is done specifically
395          *      to prevent sockets using all the memory up.
396          */
397
398         if (sock_queue_rcv_skb(sk, skb) == 0)
399                 return 0;
400
401 out:
402         kfree_skb(skb);
403 oom:
404         return 0;
405 }
406
407
408 /*
409  *      Output a raw packet to a device layer. This bypasses all the other
410  *      protocol layers and you must therefore supply it with a complete frame
411  */
412
413 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
414                                struct msghdr *msg, size_t len)
415 {
416         struct sock *sk = sock->sk;
417         struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
418         struct sk_buff *skb = NULL;
419         struct net_device *dev;
420         __be16 proto = 0;
421         int err;
422
423         /*
424          *      Get and verify the address.
425          */
426
427         if (saddr) {
428                 if (msg->msg_namelen < sizeof(struct sockaddr))
429                         return -EINVAL;
430                 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
431                         proto = saddr->spkt_protocol;
432         } else
433                 return -ENOTCONN;       /* SOCK_PACKET must be sent giving an address */
434
435         /*
436          *      Find the device first to size check it
437          */
438
439         saddr->spkt_device[13] = 0;
440 retry:
441         rcu_read_lock();
442         dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
443         err = -ENODEV;
444         if (dev == NULL)
445                 goto out_unlock;
446
447         err = -ENETDOWN;
448         if (!(dev->flags & IFF_UP))
449                 goto out_unlock;
450
451         /*
452          * You may not queue a frame bigger than the mtu. This is the lowest level
453          * raw protocol and you must do your own fragmentation at this level.
454          */
455
456         err = -EMSGSIZE;
457         if (len > dev->mtu + dev->hard_header_len)
458                 goto out_unlock;
459
460         if (!skb) {
461                 size_t reserved = LL_RESERVED_SPACE(dev);
462                 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
463
464                 rcu_read_unlock();
465                 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
466                 if (skb == NULL)
467                         return -ENOBUFS;
468                 /* FIXME: Save some space for broken drivers that write a hard
469                  * header at transmission time by themselves. PPP is the notable
470                  * one here. This should really be fixed at the driver level.
471                  */
472                 skb_reserve(skb, reserved);
473                 skb_reset_network_header(skb);
474
475                 /* Try to align data part correctly */
476                 if (hhlen) {
477                         skb->data -= hhlen;
478                         skb->tail -= hhlen;
479                         if (len < hhlen)
480                                 skb_reset_network_header(skb);
481                 }
482                 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
483                 if (err)
484                         goto out_free;
485                 goto retry;
486         }
487
488
489         skb->protocol = proto;
490         skb->dev = dev;
491         skb->priority = sk->sk_priority;
492         skb->mark = sk->sk_mark;
493
494         dev_queue_xmit(skb);
495         rcu_read_unlock();
496         return len;
497
498 out_unlock:
499         rcu_read_unlock();
500 out_free:
501         kfree_skb(skb);
502         return err;
503 }
504
505 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
506                                       unsigned int res)
507 {
508         struct sk_filter *filter;
509
510         rcu_read_lock_bh();
511         filter = rcu_dereference(sk->sk_filter);
512         if (filter != NULL)
513                 res = sk_run_filter(skb, filter->insns, filter->len);
514         rcu_read_unlock_bh();
515
516         return res;
517 }
518
519 /*
520    This function makes lazy skb cloning in hope that most of packets
521    are discarded by BPF.
522
523    Note tricky part: we DO mangle shared skb! skb->data, skb->len
524    and skb->cb are mangled. It works because (and until) packets
525    falling here are owned by current CPU. Output packets are cloned
526    by dev_queue_xmit_nit(), input packets are processed by net_bh
527    sequencially, so that if we return skb to original state on exit,
528    we will not harm anyone.
529  */
530
531 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
532                       struct packet_type *pt, struct net_device *orig_dev)
533 {
534         struct sock *sk;
535         struct sockaddr_ll *sll;
536         struct packet_sock *po;
537         u8 *skb_head = skb->data;
538         int skb_len = skb->len;
539         unsigned int snaplen, res;
540
541         if (skb->pkt_type == PACKET_LOOPBACK)
542                 goto drop;
543
544         sk = pt->af_packet_priv;
545         po = pkt_sk(sk);
546
547         if (!net_eq(dev_net(dev), sock_net(sk)))
548                 goto drop;
549
550         skb->dev = dev;
551
552         if (dev->header_ops) {
553                 /* The device has an explicit notion of ll header,
554                    exported to higher levels.
555
556                    Otherwise, the device hides datails of it frame
557                    structure, so that corresponding packet head
558                    never delivered to user.
559                  */
560                 if (sk->sk_type != SOCK_DGRAM)
561                         skb_push(skb, skb->data - skb_mac_header(skb));
562                 else if (skb->pkt_type == PACKET_OUTGOING) {
563                         /* Special case: outgoing packets have ll header at head */
564                         skb_pull(skb, skb_network_offset(skb));
565                 }
566         }
567
568         snaplen = skb->len;
569
570         res = run_filter(skb, sk, snaplen);
571         if (!res)
572                 goto drop_n_restore;
573         if (snaplen > res)
574                 snaplen = res;
575
576         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
577             (unsigned)sk->sk_rcvbuf)
578                 goto drop_n_acct;
579
580         if (skb_shared(skb)) {
581                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
582                 if (nskb == NULL)
583                         goto drop_n_acct;
584
585                 if (skb_head != skb->data) {
586                         skb->data = skb_head;
587                         skb->len = skb_len;
588                 }
589                 kfree_skb(skb);
590                 skb = nskb;
591         }
592
593         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
594                      sizeof(skb->cb));
595
596         sll = &PACKET_SKB_CB(skb)->sa.ll;
597         sll->sll_family = AF_PACKET;
598         sll->sll_hatype = dev->type;
599         sll->sll_protocol = skb->protocol;
600         sll->sll_pkttype = skb->pkt_type;
601         if (unlikely(po->origdev))
602                 sll->sll_ifindex = orig_dev->ifindex;
603         else
604                 sll->sll_ifindex = dev->ifindex;
605
606         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
607
608         PACKET_SKB_CB(skb)->origlen = skb->len;
609
610         if (pskb_trim(skb, snaplen))
611                 goto drop_n_acct;
612
613         skb_set_owner_r(skb, sk);
614         skb->dev = NULL;
615         skb_dst_drop(skb);
616
617         /* drop conntrack reference */
618         nf_reset(skb);
619
620         spin_lock(&sk->sk_receive_queue.lock);
621         po->stats.tp_packets++;
622         skb->dropcount = atomic_read(&sk->sk_drops);
623         __skb_queue_tail(&sk->sk_receive_queue, skb);
624         spin_unlock(&sk->sk_receive_queue.lock);
625         sk->sk_data_ready(sk, skb->len);
626         return 0;
627
628 drop_n_acct:
629         po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
630
631 drop_n_restore:
632         if (skb_head != skb->data && skb_shared(skb)) {
633                 skb->data = skb_head;
634                 skb->len = skb_len;
635         }
636 drop:
637         consume_skb(skb);
638         return 0;
639 }
640
641 #ifdef CONFIG_PACKET_MMAP
642 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
643                        struct packet_type *pt, struct net_device *orig_dev)
644 {
645         struct sock *sk;
646         struct packet_sock *po;
647         struct sockaddr_ll *sll;
648         union {
649                 struct tpacket_hdr *h1;
650                 struct tpacket2_hdr *h2;
651                 void *raw;
652         } h;
653         u8 *skb_head = skb->data;
654         int skb_len = skb->len;
655         unsigned int snaplen, res;
656         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
657         unsigned short macoff, netoff, hdrlen;
658         struct sk_buff *copy_skb = NULL;
659         struct timeval tv;
660         struct timespec ts;
661
662         if (skb->pkt_type == PACKET_LOOPBACK)
663                 goto drop;
664
665         sk = pt->af_packet_priv;
666         po = pkt_sk(sk);
667
668         if (!net_eq(dev_net(dev), sock_net(sk)))
669                 goto drop;
670
671         if (dev->header_ops) {
672                 if (sk->sk_type != SOCK_DGRAM)
673                         skb_push(skb, skb->data - skb_mac_header(skb));
674                 else if (skb->pkt_type == PACKET_OUTGOING) {
675                         /* Special case: outgoing packets have ll header at head */
676                         skb_pull(skb, skb_network_offset(skb));
677                 }
678         }
679
680         if (skb->ip_summed == CHECKSUM_PARTIAL)
681                 status |= TP_STATUS_CSUMNOTREADY;
682
683         snaplen = skb->len;
684
685         res = run_filter(skb, sk, snaplen);
686         if (!res)
687                 goto drop_n_restore;
688         if (snaplen > res)
689                 snaplen = res;
690
691         if (sk->sk_type == SOCK_DGRAM) {
692                 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
693                                   po->tp_reserve;
694         } else {
695                 unsigned maclen = skb_network_offset(skb);
696                 netoff = TPACKET_ALIGN(po->tp_hdrlen +
697                                        (maclen < 16 ? 16 : maclen)) +
698                         po->tp_reserve;
699                 macoff = netoff - maclen;
700         }
701
702         if (macoff + snaplen > po->rx_ring.frame_size) {
703                 if (po->copy_thresh &&
704                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
705                     (unsigned)sk->sk_rcvbuf) {
706                         if (skb_shared(skb)) {
707                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
708                         } else {
709                                 copy_skb = skb_get(skb);
710                                 skb_head = skb->data;
711                         }
712                         if (copy_skb)
713                                 skb_set_owner_r(copy_skb, sk);
714                 }
715                 snaplen = po->rx_ring.frame_size - macoff;
716                 if ((int)snaplen < 0)
717                         snaplen = 0;
718         }
719
720         spin_lock(&sk->sk_receive_queue.lock);
721         h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
722         if (!h.raw)
723                 goto ring_is_full;
724         packet_increment_head(&po->rx_ring);
725         po->stats.tp_packets++;
726         if (copy_skb) {
727                 status |= TP_STATUS_COPY;
728                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
729         }
730         if (!po->stats.tp_drops)
731                 status &= ~TP_STATUS_LOSING;
732         spin_unlock(&sk->sk_receive_queue.lock);
733
734         skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
735
736         switch (po->tp_version) {
737         case TPACKET_V1:
738                 h.h1->tp_len = skb->len;
739                 h.h1->tp_snaplen = snaplen;
740                 h.h1->tp_mac = macoff;
741                 h.h1->tp_net = netoff;
742                 if (skb->tstamp.tv64)
743                         tv = ktime_to_timeval(skb->tstamp);
744                 else
745                         do_gettimeofday(&tv);
746                 h.h1->tp_sec = tv.tv_sec;
747                 h.h1->tp_usec = tv.tv_usec;
748                 hdrlen = sizeof(*h.h1);
749                 break;
750         case TPACKET_V2:
751                 h.h2->tp_len = skb->len;
752                 h.h2->tp_snaplen = snaplen;
753                 h.h2->tp_mac = macoff;
754                 h.h2->tp_net = netoff;
755                 if (skb->tstamp.tv64)
756                         ts = ktime_to_timespec(skb->tstamp);
757                 else
758                         getnstimeofday(&ts);
759                 h.h2->tp_sec = ts.tv_sec;
760                 h.h2->tp_nsec = ts.tv_nsec;
761                 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
762                 hdrlen = sizeof(*h.h2);
763                 break;
764         default:
765                 BUG();
766         }
767
768         sll = h.raw + TPACKET_ALIGN(hdrlen);
769         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
770         sll->sll_family = AF_PACKET;
771         sll->sll_hatype = dev->type;
772         sll->sll_protocol = skb->protocol;
773         sll->sll_pkttype = skb->pkt_type;
774         if (unlikely(po->origdev))
775                 sll->sll_ifindex = orig_dev->ifindex;
776         else
777                 sll->sll_ifindex = dev->ifindex;
778
779         __packet_set_status(po, h.raw, status);
780         smp_mb();
781         {
782                 struct page *p_start, *p_end;
783                 u8 *h_end = h.raw + macoff + snaplen - 1;
784
785                 p_start = virt_to_page(h.raw);
786                 p_end = virt_to_page(h_end);
787                 while (p_start <= p_end) {
788                         flush_dcache_page(p_start);
789                         p_start++;
790                 }
791         }
792
793         sk->sk_data_ready(sk, 0);
794
795 drop_n_restore:
796         if (skb_head != skb->data && skb_shared(skb)) {
797                 skb->data = skb_head;
798                 skb->len = skb_len;
799         }
800 drop:
801         kfree_skb(skb);
802         return 0;
803
804 ring_is_full:
805         po->stats.tp_drops++;
806         spin_unlock(&sk->sk_receive_queue.lock);
807
808         sk->sk_data_ready(sk, 0);
809         kfree_skb(copy_skb);
810         goto drop_n_restore;
811 }
812
813 static void tpacket_destruct_skb(struct sk_buff *skb)
814 {
815         struct packet_sock *po = pkt_sk(skb->sk);
816         void *ph;
817
818         BUG_ON(skb == NULL);
819
820         if (likely(po->tx_ring.pg_vec)) {
821                 ph = skb_shinfo(skb)->destructor_arg;
822                 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
823                 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
824                 atomic_dec(&po->tx_ring.pending);
825                 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
826         }
827
828         sock_wfree(skb);
829 }
830
831 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
832                 void *frame, struct net_device *dev, int size_max,
833                 __be16 proto, unsigned char *addr)
834 {
835         union {
836                 struct tpacket_hdr *h1;
837                 struct tpacket2_hdr *h2;
838                 void *raw;
839         } ph;
840         int to_write, offset, len, tp_len, nr_frags, len_max;
841         struct socket *sock = po->sk.sk_socket;
842         struct page *page;
843         void *data;
844         int err;
845
846         ph.raw = frame;
847
848         skb->protocol = proto;
849         skb->dev = dev;
850         skb->priority = po->sk.sk_priority;
851         skb->mark = po->sk.sk_mark;
852         skb_shinfo(skb)->destructor_arg = ph.raw;
853
854         switch (po->tp_version) {
855         case TPACKET_V2:
856                 tp_len = ph.h2->tp_len;
857                 break;
858         default:
859                 tp_len = ph.h1->tp_len;
860                 break;
861         }
862         if (unlikely(tp_len > size_max)) {
863                 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
864                 return -EMSGSIZE;
865         }
866
867         skb_reserve(skb, LL_RESERVED_SPACE(dev));
868         skb_reset_network_header(skb);
869
870         data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
871         to_write = tp_len;
872
873         if (sock->type == SOCK_DGRAM) {
874                 err = dev_hard_header(skb, dev, ntohs(proto), addr,
875                                 NULL, tp_len);
876                 if (unlikely(err < 0))
877                         return -EINVAL;
878         } else if (dev->hard_header_len) {
879                 /* net device doesn't like empty head */
880                 if (unlikely(tp_len <= dev->hard_header_len)) {
881                         pr_err("packet size is too short (%d < %d)\n",
882                                tp_len, dev->hard_header_len);
883                         return -EINVAL;
884                 }
885
886                 skb_push(skb, dev->hard_header_len);
887                 err = skb_store_bits(skb, 0, data,
888                                 dev->hard_header_len);
889                 if (unlikely(err))
890                         return err;
891
892                 data += dev->hard_header_len;
893                 to_write -= dev->hard_header_len;
894         }
895
896         err = -EFAULT;
897         page = virt_to_page(data);
898         offset = offset_in_page(data);
899         len_max = PAGE_SIZE - offset;
900         len = ((to_write > len_max) ? len_max : to_write);
901
902         skb->data_len = to_write;
903         skb->len += to_write;
904         skb->truesize += to_write;
905         atomic_add(to_write, &po->sk.sk_wmem_alloc);
906
907         while (likely(to_write)) {
908                 nr_frags = skb_shinfo(skb)->nr_frags;
909
910                 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
911                         pr_err("Packet exceed the number of skb frags(%lu)\n",
912                                MAX_SKB_FRAGS);
913                         return -EFAULT;
914                 }
915
916                 flush_dcache_page(page);
917                 get_page(page);
918                 skb_fill_page_desc(skb,
919                                 nr_frags,
920                                 page++, offset, len);
921                 to_write -= len;
922                 offset = 0;
923                 len_max = PAGE_SIZE;
924                 len = ((to_write > len_max) ? len_max : to_write);
925         }
926
927         return tp_len;
928 }
929
930 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
931 {
932         struct socket *sock;
933         struct sk_buff *skb;
934         struct net_device *dev;
935         __be16 proto;
936         int ifindex, err, reserve = 0;
937         void *ph;
938         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
939         int tp_len, size_max;
940         unsigned char *addr;
941         int len_sum = 0;
942         int status = 0;
943
944         sock = po->sk.sk_socket;
945
946         mutex_lock(&po->pg_vec_lock);
947
948         err = -EBUSY;
949         if (saddr == NULL) {
950                 ifindex = po->ifindex;
951                 proto   = po->num;
952                 addr    = NULL;
953         } else {
954                 err = -EINVAL;
955                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
956                         goto out;
957                 if (msg->msg_namelen < (saddr->sll_halen
958                                         + offsetof(struct sockaddr_ll,
959                                                 sll_addr)))
960                         goto out;
961                 ifindex = saddr->sll_ifindex;
962                 proto   = saddr->sll_protocol;
963                 addr    = saddr->sll_addr;
964         }
965
966         dev = dev_get_by_index(sock_net(&po->sk), ifindex);
967         err = -ENXIO;
968         if (unlikely(dev == NULL))
969                 goto out;
970
971         reserve = dev->hard_header_len;
972
973         err = -ENETDOWN;
974         if (unlikely(!(dev->flags & IFF_UP)))
975                 goto out_put;
976
977         size_max = po->tx_ring.frame_size
978                 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
979
980         if (size_max > dev->mtu + reserve)
981                 size_max = dev->mtu + reserve;
982
983         do {
984                 ph = packet_current_frame(po, &po->tx_ring,
985                                 TP_STATUS_SEND_REQUEST);
986
987                 if (unlikely(ph == NULL)) {
988                         schedule();
989                         continue;
990                 }
991
992                 status = TP_STATUS_SEND_REQUEST;
993                 skb = sock_alloc_send_skb(&po->sk,
994                                 LL_ALLOCATED_SPACE(dev)
995                                 + sizeof(struct sockaddr_ll),
996                                 0, &err);
997
998                 if (unlikely(skb == NULL))
999                         goto out_status;
1000
1001                 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1002                                 addr);
1003
1004                 if (unlikely(tp_len < 0)) {
1005                         if (po->tp_loss) {
1006                                 __packet_set_status(po, ph,
1007                                                 TP_STATUS_AVAILABLE);
1008                                 packet_increment_head(&po->tx_ring);
1009                                 kfree_skb(skb);
1010                                 continue;
1011                         } else {
1012                                 status = TP_STATUS_WRONG_FORMAT;
1013                                 err = tp_len;
1014                                 goto out_status;
1015                         }
1016                 }
1017
1018                 skb->destructor = tpacket_destruct_skb;
1019                 __packet_set_status(po, ph, TP_STATUS_SENDING);
1020                 atomic_inc(&po->tx_ring.pending);
1021
1022                 status = TP_STATUS_SEND_REQUEST;
1023                 err = dev_queue_xmit(skb);
1024                 if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0))
1025                         goto out_xmit;
1026                 packet_increment_head(&po->tx_ring);
1027                 len_sum += tp_len;
1028         } while (likely((ph != NULL) ||
1029                         ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1030                          (atomic_read(&po->tx_ring.pending))))
1031                 );
1032
1033         err = len_sum;
1034         goto out_put;
1035
1036 out_xmit:
1037         skb->destructor = sock_wfree;
1038         atomic_dec(&po->tx_ring.pending);
1039 out_status:
1040         __packet_set_status(po, ph, status);
1041         kfree_skb(skb);
1042 out_put:
1043         dev_put(dev);
1044 out:
1045         mutex_unlock(&po->pg_vec_lock);
1046         return err;
1047 }
1048 #endif
1049
1050 static int packet_snd(struct socket *sock,
1051                           struct msghdr *msg, size_t len)
1052 {
1053         struct sock *sk = sock->sk;
1054         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1055         struct sk_buff *skb;
1056         struct net_device *dev;
1057         __be16 proto;
1058         unsigned char *addr;
1059         int ifindex, err, reserve = 0;
1060
1061         /*
1062          *      Get and verify the address.
1063          */
1064
1065         if (saddr == NULL) {
1066                 struct packet_sock *po = pkt_sk(sk);
1067
1068                 ifindex = po->ifindex;
1069                 proto   = po->num;
1070                 addr    = NULL;
1071         } else {
1072                 err = -EINVAL;
1073                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1074                         goto out;
1075                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1076                         goto out;
1077                 ifindex = saddr->sll_ifindex;
1078                 proto   = saddr->sll_protocol;
1079                 addr    = saddr->sll_addr;
1080         }
1081
1082
1083         dev = dev_get_by_index(sock_net(sk), ifindex);
1084         err = -ENXIO;
1085         if (dev == NULL)
1086                 goto out_unlock;
1087         if (sock->type == SOCK_RAW)
1088                 reserve = dev->hard_header_len;
1089
1090         err = -ENETDOWN;
1091         if (!(dev->flags & IFF_UP))
1092                 goto out_unlock;
1093
1094         err = -EMSGSIZE;
1095         if (len > dev->mtu+reserve)
1096                 goto out_unlock;
1097
1098         skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
1099                                 msg->msg_flags & MSG_DONTWAIT, &err);
1100         if (skb == NULL)
1101                 goto out_unlock;
1102
1103         skb_reserve(skb, LL_RESERVED_SPACE(dev));
1104         skb_reset_network_header(skb);
1105
1106         err = -EINVAL;
1107         if (sock->type == SOCK_DGRAM &&
1108             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1109                 goto out_free;
1110
1111         /* Returns -EFAULT on error */
1112         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1113         if (err)
1114                 goto out_free;
1115
1116         skb->protocol = proto;
1117         skb->dev = dev;
1118         skb->priority = sk->sk_priority;
1119         skb->mark = sk->sk_mark;
1120
1121         /*
1122          *      Now send it
1123          */
1124
1125         err = dev_queue_xmit(skb);
1126         if (err > 0 && (err = net_xmit_errno(err)) != 0)
1127                 goto out_unlock;
1128
1129         dev_put(dev);
1130
1131         return len;
1132
1133 out_free:
1134         kfree_skb(skb);
1135 out_unlock:
1136         if (dev)
1137                 dev_put(dev);
1138 out:
1139         return err;
1140 }
1141
1142 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1143                 struct msghdr *msg, size_t len)
1144 {
1145 #ifdef CONFIG_PACKET_MMAP
1146         struct sock *sk = sock->sk;
1147         struct packet_sock *po = pkt_sk(sk);
1148         if (po->tx_ring.pg_vec)
1149                 return tpacket_snd(po, msg);
1150         else
1151 #endif
1152                 return packet_snd(sock, msg, len);
1153 }
1154
1155 /*
1156  *      Close a PACKET socket. This is fairly simple. We immediately go
1157  *      to 'closed' state and remove our protocol entry in the device list.
1158  */
1159
1160 static int packet_release(struct socket *sock)
1161 {
1162         struct sock *sk = sock->sk;
1163         struct packet_sock *po;
1164         struct net *net;
1165 #ifdef CONFIG_PACKET_MMAP
1166         struct tpacket_req req;
1167 #endif
1168
1169         if (!sk)
1170                 return 0;
1171
1172         net = sock_net(sk);
1173         po = pkt_sk(sk);
1174
1175         write_lock_bh(&net->packet.sklist_lock);
1176         sk_del_node_init(sk);
1177         sock_prot_inuse_add(net, sk->sk_prot, -1);
1178         write_unlock_bh(&net->packet.sklist_lock);
1179
1180         /*
1181          *      Unhook packet receive handler.
1182          */
1183
1184         if (po->running) {
1185                 /*
1186                  *      Remove the protocol hook
1187                  */
1188                 dev_remove_pack(&po->prot_hook);
1189                 po->running = 0;
1190                 po->num = 0;
1191                 __sock_put(sk);
1192         }
1193
1194         packet_flush_mclist(sk);
1195
1196 #ifdef CONFIG_PACKET_MMAP
1197         memset(&req, 0, sizeof(req));
1198
1199         if (po->rx_ring.pg_vec)
1200                 packet_set_ring(sk, &req, 1, 0);
1201
1202         if (po->tx_ring.pg_vec)
1203                 packet_set_ring(sk, &req, 1, 1);
1204 #endif
1205
1206         /*
1207          *      Now the socket is dead. No more input will appear.
1208          */
1209
1210         sock_orphan(sk);
1211         sock->sk = NULL;
1212
1213         /* Purge queues */
1214
1215         skb_queue_purge(&sk->sk_receive_queue);
1216         sk_refcnt_debug_release(sk);
1217
1218         sock_put(sk);
1219         return 0;
1220 }
1221
1222 /*
1223  *      Attach a packet hook.
1224  */
1225
1226 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1227 {
1228         struct packet_sock *po = pkt_sk(sk);
1229         /*
1230          *      Detach an existing hook if present.
1231          */
1232
1233         lock_sock(sk);
1234
1235         spin_lock(&po->bind_lock);
1236         if (po->running) {
1237                 __sock_put(sk);
1238                 po->running = 0;
1239                 po->num = 0;
1240                 spin_unlock(&po->bind_lock);
1241                 dev_remove_pack(&po->prot_hook);
1242                 spin_lock(&po->bind_lock);
1243         }
1244
1245         po->num = protocol;
1246         po->prot_hook.type = protocol;
1247         po->prot_hook.dev = dev;
1248
1249         po->ifindex = dev ? dev->ifindex : 0;
1250
1251         if (protocol == 0)
1252                 goto out_unlock;
1253
1254         if (!dev || (dev->flags & IFF_UP)) {
1255                 dev_add_pack(&po->prot_hook);
1256                 sock_hold(sk);
1257                 po->running = 1;
1258         } else {
1259                 sk->sk_err = ENETDOWN;
1260                 if (!sock_flag(sk, SOCK_DEAD))
1261                         sk->sk_error_report(sk);
1262         }
1263
1264 out_unlock:
1265         spin_unlock(&po->bind_lock);
1266         release_sock(sk);
1267         return 0;
1268 }
1269
1270 /*
1271  *      Bind a packet socket to a device
1272  */
1273
1274 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1275                             int addr_len)
1276 {
1277         struct sock *sk = sock->sk;
1278         char name[15];
1279         struct net_device *dev;
1280         int err = -ENODEV;
1281
1282         /*
1283          *      Check legality
1284          */
1285
1286         if (addr_len != sizeof(struct sockaddr))
1287                 return -EINVAL;
1288         strlcpy(name, uaddr->sa_data, sizeof(name));
1289
1290         dev = dev_get_by_name(sock_net(sk), name);
1291         if (dev) {
1292                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1293                 dev_put(dev);
1294         }
1295         return err;
1296 }
1297
1298 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1299 {
1300         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1301         struct sock *sk = sock->sk;
1302         struct net_device *dev = NULL;
1303         int err;
1304
1305
1306         /*
1307          *      Check legality
1308          */
1309
1310         if (addr_len < sizeof(struct sockaddr_ll))
1311                 return -EINVAL;
1312         if (sll->sll_family != AF_PACKET)
1313                 return -EINVAL;
1314
1315         if (sll->sll_ifindex) {
1316                 err = -ENODEV;
1317                 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1318                 if (dev == NULL)
1319                         goto out;
1320         }
1321         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1322         if (dev)
1323                 dev_put(dev);
1324
1325 out:
1326         return err;
1327 }
1328
1329 static struct proto packet_proto = {
1330         .name     = "PACKET",
1331         .owner    = THIS_MODULE,
1332         .obj_size = sizeof(struct packet_sock),
1333 };
1334
1335 /*
1336  *      Create a packet of type SOCK_PACKET.
1337  */
1338
1339 static int packet_create(struct net *net, struct socket *sock, int protocol,
1340                          int kern)
1341 {
1342         struct sock *sk;
1343         struct packet_sock *po;
1344         __be16 proto = (__force __be16)protocol; /* weird, but documented */
1345         int err;
1346
1347         if (!capable(CAP_NET_RAW))
1348                 return -EPERM;
1349         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1350             sock->type != SOCK_PACKET)
1351                 return -ESOCKTNOSUPPORT;
1352
1353         sock->state = SS_UNCONNECTED;
1354
1355         err = -ENOBUFS;
1356         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1357         if (sk == NULL)
1358                 goto out;
1359
1360         sock->ops = &packet_ops;
1361         if (sock->type == SOCK_PACKET)
1362                 sock->ops = &packet_ops_spkt;
1363
1364         sock_init_data(sock, sk);
1365
1366         po = pkt_sk(sk);
1367         sk->sk_family = PF_PACKET;
1368         po->num = proto;
1369
1370         sk->sk_destruct = packet_sock_destruct;
1371         sk_refcnt_debug_inc(sk);
1372
1373         /*
1374          *      Attach a protocol block
1375          */
1376
1377         spin_lock_init(&po->bind_lock);
1378         mutex_init(&po->pg_vec_lock);
1379         po->prot_hook.func = packet_rcv;
1380
1381         if (sock->type == SOCK_PACKET)
1382                 po->prot_hook.func = packet_rcv_spkt;
1383
1384         po->prot_hook.af_packet_priv = sk;
1385
1386         if (proto) {
1387                 po->prot_hook.type = proto;
1388                 dev_add_pack(&po->prot_hook);
1389                 sock_hold(sk);
1390                 po->running = 1;
1391         }
1392
1393         write_lock_bh(&net->packet.sklist_lock);
1394         sk_add_node(sk, &net->packet.sklist);
1395         sock_prot_inuse_add(net, &packet_proto, 1);
1396         write_unlock_bh(&net->packet.sklist_lock);
1397         return 0;
1398 out:
1399         return err;
1400 }
1401
1402 /*
1403  *      Pull a packet from our receive queue and hand it to the user.
1404  *      If necessary we block.
1405  */
1406
1407 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1408                           struct msghdr *msg, size_t len, int flags)
1409 {
1410         struct sock *sk = sock->sk;
1411         struct sk_buff *skb;
1412         int copied, err;
1413         struct sockaddr_ll *sll;
1414
1415         err = -EINVAL;
1416         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1417                 goto out;
1418
1419 #if 0
1420         /* What error should we return now? EUNATTACH? */
1421         if (pkt_sk(sk)->ifindex < 0)
1422                 return -ENODEV;
1423 #endif
1424
1425         /*
1426          *      Call the generic datagram receiver. This handles all sorts
1427          *      of horrible races and re-entrancy so we can forget about it
1428          *      in the protocol layers.
1429          *
1430          *      Now it will return ENETDOWN, if device have just gone down,
1431          *      but then it will block.
1432          */
1433
1434         skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1435
1436         /*
1437          *      An error occurred so return it. Because skb_recv_datagram()
1438          *      handles the blocking we don't see and worry about blocking
1439          *      retries.
1440          */
1441
1442         if (skb == NULL)
1443                 goto out;
1444
1445         /*
1446          *      If the address length field is there to be filled in, we fill
1447          *      it in now.
1448          */
1449
1450         sll = &PACKET_SKB_CB(skb)->sa.ll;
1451         if (sock->type == SOCK_PACKET)
1452                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1453         else
1454                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1455
1456         /*
1457          *      You lose any data beyond the buffer you gave. If it worries a
1458          *      user program they can ask the device for its MTU anyway.
1459          */
1460
1461         copied = skb->len;
1462         if (copied > len) {
1463                 copied = len;
1464                 msg->msg_flags |= MSG_TRUNC;
1465         }
1466
1467         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1468         if (err)
1469                 goto out_free;
1470
1471         sock_recv_ts_and_drops(msg, sk, skb);
1472
1473         if (msg->msg_name)
1474                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1475                        msg->msg_namelen);
1476
1477         if (pkt_sk(sk)->auxdata) {
1478                 struct tpacket_auxdata aux;
1479
1480                 aux.tp_status = TP_STATUS_USER;
1481                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1482                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1483                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1484                 aux.tp_snaplen = skb->len;
1485                 aux.tp_mac = 0;
1486                 aux.tp_net = skb_network_offset(skb);
1487                 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1488
1489                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1490         }
1491
1492         /*
1493          *      Free or return the buffer as appropriate. Again this
1494          *      hides all the races and re-entrancy issues from us.
1495          */
1496         err = (flags&MSG_TRUNC) ? skb->len : copied;
1497
1498 out_free:
1499         skb_free_datagram(sk, skb);
1500 out:
1501         return err;
1502 }
1503
1504 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1505                                int *uaddr_len, int peer)
1506 {
1507         struct net_device *dev;
1508         struct sock *sk = sock->sk;
1509
1510         if (peer)
1511                 return -EOPNOTSUPP;
1512
1513         uaddr->sa_family = AF_PACKET;
1514         rcu_read_lock();
1515         dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1516         if (dev)
1517                 strlcpy(uaddr->sa_data, dev->name, 15);
1518         else
1519                 memset(uaddr->sa_data, 0, 14);
1520         rcu_read_unlock();
1521         *uaddr_len = sizeof(*uaddr);
1522
1523         return 0;
1524 }
1525
1526 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1527                           int *uaddr_len, int peer)
1528 {
1529         struct net_device *dev;
1530         struct sock *sk = sock->sk;
1531         struct packet_sock *po = pkt_sk(sk);
1532         DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1533
1534         if (peer)
1535                 return -EOPNOTSUPP;
1536
1537         sll->sll_family = AF_PACKET;
1538         sll->sll_ifindex = po->ifindex;
1539         sll->sll_protocol = po->num;
1540         rcu_read_lock();
1541         dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1542         if (dev) {
1543                 sll->sll_hatype = dev->type;
1544                 sll->sll_halen = dev->addr_len;
1545                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1546         } else {
1547                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1548                 sll->sll_halen = 0;
1549         }
1550         rcu_read_unlock();
1551         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1552
1553         return 0;
1554 }
1555
1556 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1557                          int what)
1558 {
1559         switch (i->type) {
1560         case PACKET_MR_MULTICAST:
1561                 if (what > 0)
1562                         return dev_mc_add(dev, i->addr, i->alen, 0);
1563                 else
1564                         return dev_mc_delete(dev, i->addr, i->alen, 0);
1565                 break;
1566         case PACKET_MR_PROMISC:
1567                 return dev_set_promiscuity(dev, what);
1568                 break;
1569         case PACKET_MR_ALLMULTI:
1570                 return dev_set_allmulti(dev, what);
1571                 break;
1572         case PACKET_MR_UNICAST:
1573                 if (what > 0)
1574                         return dev_unicast_add(dev, i->addr);
1575                 else
1576                         return dev_unicast_delete(dev, i->addr);
1577                 break;
1578         default:
1579                 break;
1580         }
1581         return 0;
1582 }
1583
1584 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1585 {
1586         for ( ; i; i = i->next) {
1587                 if (i->ifindex == dev->ifindex)
1588                         packet_dev_mc(dev, i, what);
1589         }
1590 }
1591
1592 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1593 {
1594         struct packet_sock *po = pkt_sk(sk);
1595         struct packet_mclist *ml, *i;
1596         struct net_device *dev;
1597         int err;
1598
1599         rtnl_lock();
1600
1601         err = -ENODEV;
1602         dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1603         if (!dev)
1604                 goto done;
1605
1606         err = -EINVAL;
1607         if (mreq->mr_alen > dev->addr_len)
1608                 goto done;
1609
1610         err = -ENOBUFS;
1611         i = kmalloc(sizeof(*i), GFP_KERNEL);
1612         if (i == NULL)
1613                 goto done;
1614
1615         err = 0;
1616         for (ml = po->mclist; ml; ml = ml->next) {
1617                 if (ml->ifindex == mreq->mr_ifindex &&
1618                     ml->type == mreq->mr_type &&
1619                     ml->alen == mreq->mr_alen &&
1620                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1621                         ml->count++;
1622                         /* Free the new element ... */
1623                         kfree(i);
1624                         goto done;
1625                 }
1626         }
1627
1628         i->type = mreq->mr_type;
1629         i->ifindex = mreq->mr_ifindex;
1630         i->alen = mreq->mr_alen;
1631         memcpy(i->addr, mreq->mr_address, i->alen);
1632         i->count = 1;
1633         i->next = po->mclist;
1634         po->mclist = i;
1635         err = packet_dev_mc(dev, i, 1);
1636         if (err) {
1637                 po->mclist = i->next;
1638                 kfree(i);
1639         }
1640
1641 done:
1642         rtnl_unlock();
1643         return err;
1644 }
1645
1646 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1647 {
1648         struct packet_mclist *ml, **mlp;
1649
1650         rtnl_lock();
1651
1652         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1653                 if (ml->ifindex == mreq->mr_ifindex &&
1654                     ml->type == mreq->mr_type &&
1655                     ml->alen == mreq->mr_alen &&
1656                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1657                         if (--ml->count == 0) {
1658                                 struct net_device *dev;
1659                                 *mlp = ml->next;
1660                                 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1661                                 if (dev)
1662                                         packet_dev_mc(dev, ml, -1);
1663                                 kfree(ml);
1664                         }
1665                         rtnl_unlock();
1666                         return 0;
1667                 }
1668         }
1669         rtnl_unlock();
1670         return -EADDRNOTAVAIL;
1671 }
1672
1673 static void packet_flush_mclist(struct sock *sk)
1674 {
1675         struct packet_sock *po = pkt_sk(sk);
1676         struct packet_mclist *ml;
1677
1678         if (!po->mclist)
1679                 return;
1680
1681         rtnl_lock();
1682         while ((ml = po->mclist) != NULL) {
1683                 struct net_device *dev;
1684
1685                 po->mclist = ml->next;
1686                 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1687                 if (dev != NULL)
1688                         packet_dev_mc(dev, ml, -1);
1689                 kfree(ml);
1690         }
1691         rtnl_unlock();
1692 }
1693
1694 static int
1695 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1696 {
1697         struct sock *sk = sock->sk;
1698         struct packet_sock *po = pkt_sk(sk);
1699         int ret;
1700
1701         if (level != SOL_PACKET)
1702                 return -ENOPROTOOPT;
1703
1704         switch (optname) {
1705         case PACKET_ADD_MEMBERSHIP:
1706         case PACKET_DROP_MEMBERSHIP:
1707         {
1708                 struct packet_mreq_max mreq;
1709                 int len = optlen;
1710                 memset(&mreq, 0, sizeof(mreq));
1711                 if (len < sizeof(struct packet_mreq))
1712                         return -EINVAL;
1713                 if (len > sizeof(mreq))
1714                         len = sizeof(mreq);
1715                 if (copy_from_user(&mreq, optval, len))
1716                         return -EFAULT;
1717                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1718                         return -EINVAL;
1719                 if (optname == PACKET_ADD_MEMBERSHIP)
1720                         ret = packet_mc_add(sk, &mreq);
1721                 else
1722                         ret = packet_mc_drop(sk, &mreq);
1723                 return ret;
1724         }
1725
1726 #ifdef CONFIG_PACKET_MMAP
1727         case PACKET_RX_RING:
1728         case PACKET_TX_RING:
1729         {
1730                 struct tpacket_req req;
1731
1732                 if (optlen < sizeof(req))
1733                         return -EINVAL;
1734                 if (copy_from_user(&req, optval, sizeof(req)))
1735                         return -EFAULT;
1736                 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1737         }
1738         case PACKET_COPY_THRESH:
1739         {
1740                 int val;
1741
1742                 if (optlen != sizeof(val))
1743                         return -EINVAL;
1744                 if (copy_from_user(&val, optval, sizeof(val)))
1745                         return -EFAULT;
1746
1747                 pkt_sk(sk)->copy_thresh = val;
1748                 return 0;
1749         }
1750         case PACKET_VERSION:
1751         {
1752                 int val;
1753
1754                 if (optlen != sizeof(val))
1755                         return -EINVAL;
1756                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1757                         return -EBUSY;
1758                 if (copy_from_user(&val, optval, sizeof(val)))
1759                         return -EFAULT;
1760                 switch (val) {
1761                 case TPACKET_V1:
1762                 case TPACKET_V2:
1763                         po->tp_version = val;
1764                         return 0;
1765                 default:
1766                         return -EINVAL;
1767                 }
1768         }
1769         case PACKET_RESERVE:
1770         {
1771                 unsigned int val;
1772
1773                 if (optlen != sizeof(val))
1774                         return -EINVAL;
1775                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1776                         return -EBUSY;
1777                 if (copy_from_user(&val, optval, sizeof(val)))
1778                         return -EFAULT;
1779                 po->tp_reserve = val;
1780                 return 0;
1781         }
1782         case PACKET_LOSS:
1783         {
1784                 unsigned int val;
1785
1786                 if (optlen != sizeof(val))
1787                         return -EINVAL;
1788                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1789                         return -EBUSY;
1790                 if (copy_from_user(&val, optval, sizeof(val)))
1791                         return -EFAULT;
1792                 po->tp_loss = !!val;
1793                 return 0;
1794         }
1795 #endif
1796         case PACKET_AUXDATA:
1797         {
1798                 int val;
1799
1800                 if (optlen < sizeof(val))
1801                         return -EINVAL;
1802                 if (copy_from_user(&val, optval, sizeof(val)))
1803                         return -EFAULT;
1804
1805                 po->auxdata = !!val;
1806                 return 0;
1807         }
1808         case PACKET_ORIGDEV:
1809         {
1810                 int val;
1811
1812                 if (optlen < sizeof(val))
1813                         return -EINVAL;
1814                 if (copy_from_user(&val, optval, sizeof(val)))
1815                         return -EFAULT;
1816
1817                 po->origdev = !!val;
1818                 return 0;
1819         }
1820         default:
1821                 return -ENOPROTOOPT;
1822         }
1823 }
1824
1825 static int packet_getsockopt(struct socket *sock, int level, int optname,
1826                              char __user *optval, int __user *optlen)
1827 {
1828         int len;
1829         int val;
1830         struct sock *sk = sock->sk;
1831         struct packet_sock *po = pkt_sk(sk);
1832         void *data;
1833         struct tpacket_stats st;
1834
1835         if (level != SOL_PACKET)
1836                 return -ENOPROTOOPT;
1837
1838         if (get_user(len, optlen))
1839                 return -EFAULT;
1840
1841         if (len < 0)
1842                 return -EINVAL;
1843
1844         switch (optname) {
1845         case PACKET_STATISTICS:
1846                 if (len > sizeof(struct tpacket_stats))
1847                         len = sizeof(struct tpacket_stats);
1848                 spin_lock_bh(&sk->sk_receive_queue.lock);
1849                 st = po->stats;
1850                 memset(&po->stats, 0, sizeof(st));
1851                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1852                 st.tp_packets += st.tp_drops;
1853
1854                 data = &st;
1855                 break;
1856         case PACKET_AUXDATA:
1857                 if (len > sizeof(int))
1858                         len = sizeof(int);
1859                 val = po->auxdata;
1860
1861                 data = &val;
1862                 break;
1863         case PACKET_ORIGDEV:
1864                 if (len > sizeof(int))
1865                         len = sizeof(int);
1866                 val = po->origdev;
1867
1868                 data = &val;
1869                 break;
1870 #ifdef CONFIG_PACKET_MMAP
1871         case PACKET_VERSION:
1872                 if (len > sizeof(int))
1873                         len = sizeof(int);
1874                 val = po->tp_version;
1875                 data = &val;
1876                 break;
1877         case PACKET_HDRLEN:
1878                 if (len > sizeof(int))
1879                         len = sizeof(int);
1880                 if (copy_from_user(&val, optval, len))
1881                         return -EFAULT;
1882                 switch (val) {
1883                 case TPACKET_V1:
1884                         val = sizeof(struct tpacket_hdr);
1885                         break;
1886                 case TPACKET_V2:
1887                         val = sizeof(struct tpacket2_hdr);
1888                         break;
1889                 default:
1890                         return -EINVAL;
1891                 }
1892                 data = &val;
1893                 break;
1894         case PACKET_RESERVE:
1895                 if (len > sizeof(unsigned int))
1896                         len = sizeof(unsigned int);
1897                 val = po->tp_reserve;
1898                 data = &val;
1899                 break;
1900         case PACKET_LOSS:
1901                 if (len > sizeof(unsigned int))
1902                         len = sizeof(unsigned int);
1903                 val = po->tp_loss;
1904                 data = &val;
1905                 break;
1906 #endif
1907         default:
1908                 return -ENOPROTOOPT;
1909         }
1910
1911         if (put_user(len, optlen))
1912                 return -EFAULT;
1913         if (copy_to_user(optval, data, len))
1914                 return -EFAULT;
1915         return 0;
1916 }
1917
1918
1919 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1920 {
1921         struct sock *sk;
1922         struct hlist_node *node;
1923         struct net_device *dev = data;
1924         struct net *net = dev_net(dev);
1925
1926         read_lock(&net->packet.sklist_lock);
1927         sk_for_each(sk, node, &net->packet.sklist) {
1928                 struct packet_sock *po = pkt_sk(sk);
1929
1930                 switch (msg) {
1931                 case NETDEV_UNREGISTER:
1932                         if (po->mclist)
1933                                 packet_dev_mclist(dev, po->mclist, -1);
1934                         /* fallthrough */
1935
1936                 case NETDEV_DOWN:
1937                         if (dev->ifindex == po->ifindex) {
1938                                 spin_lock(&po->bind_lock);
1939                                 if (po->running) {
1940                                         __dev_remove_pack(&po->prot_hook);
1941                                         __sock_put(sk);
1942                                         po->running = 0;
1943                                         sk->sk_err = ENETDOWN;
1944                                         if (!sock_flag(sk, SOCK_DEAD))
1945                                                 sk->sk_error_report(sk);
1946                                 }
1947                                 if (msg == NETDEV_UNREGISTER) {
1948                                         po->ifindex = -1;
1949                                         po->prot_hook.dev = NULL;
1950                                 }
1951                                 spin_unlock(&po->bind_lock);
1952                         }
1953                         break;
1954                 case NETDEV_UP:
1955                         spin_lock(&po->bind_lock);
1956                         if (dev->ifindex == po->ifindex && po->num &&
1957                             !po->running) {
1958                                 dev_add_pack(&po->prot_hook);
1959                                 sock_hold(sk);
1960                                 po->running = 1;
1961                         }
1962                         spin_unlock(&po->bind_lock);
1963                         break;
1964                 }
1965         }
1966         read_unlock(&net->packet.sklist_lock);
1967         return NOTIFY_DONE;
1968 }
1969
1970
1971 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1972                         unsigned long arg)
1973 {
1974         struct sock *sk = sock->sk;
1975
1976         switch (cmd) {
1977         case SIOCOUTQ:
1978         {
1979                 int amount = sk_wmem_alloc_get(sk);
1980
1981                 return put_user(amount, (int __user *)arg);
1982         }
1983         case SIOCINQ:
1984         {
1985                 struct sk_buff *skb;
1986                 int amount = 0;
1987
1988                 spin_lock_bh(&sk->sk_receive_queue.lock);
1989                 skb = skb_peek(&sk->sk_receive_queue);
1990                 if (skb)
1991                         amount = skb->len;
1992                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1993                 return put_user(amount, (int __user *)arg);
1994         }
1995         case SIOCGSTAMP:
1996                 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1997         case SIOCGSTAMPNS:
1998                 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1999
2000 #ifdef CONFIG_INET
2001         case SIOCADDRT:
2002         case SIOCDELRT:
2003         case SIOCDARP:
2004         case SIOCGARP:
2005         case SIOCSARP:
2006         case SIOCGIFADDR:
2007         case SIOCSIFADDR:
2008         case SIOCGIFBRDADDR:
2009         case SIOCSIFBRDADDR:
2010         case SIOCGIFNETMASK:
2011         case SIOCSIFNETMASK:
2012         case SIOCGIFDSTADDR:
2013         case SIOCSIFDSTADDR:
2014         case SIOCSIFFLAGS:
2015                 if (!net_eq(sock_net(sk), &init_net))
2016                         return -ENOIOCTLCMD;
2017                 return inet_dgram_ops.ioctl(sock, cmd, arg);
2018 #endif
2019
2020         default:
2021                 return -ENOIOCTLCMD;
2022         }
2023         return 0;
2024 }
2025
2026 #ifndef CONFIG_PACKET_MMAP
2027 #define packet_mmap sock_no_mmap
2028 #define packet_poll datagram_poll
2029 #else
2030
2031 static unsigned int packet_poll(struct file *file, struct socket *sock,
2032                                 poll_table *wait)
2033 {
2034         struct sock *sk = sock->sk;
2035         struct packet_sock *po = pkt_sk(sk);
2036         unsigned int mask = datagram_poll(file, sock, wait);
2037
2038         spin_lock_bh(&sk->sk_receive_queue.lock);
2039         if (po->rx_ring.pg_vec) {
2040                 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2041                         mask |= POLLIN | POLLRDNORM;
2042         }
2043         spin_unlock_bh(&sk->sk_receive_queue.lock);
2044         spin_lock_bh(&sk->sk_write_queue.lock);
2045         if (po->tx_ring.pg_vec) {
2046                 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2047                         mask |= POLLOUT | POLLWRNORM;
2048         }
2049         spin_unlock_bh(&sk->sk_write_queue.lock);
2050         return mask;
2051 }
2052
2053
2054 /* Dirty? Well, I still did not learn better way to account
2055  * for user mmaps.
2056  */
2057
2058 static void packet_mm_open(struct vm_area_struct *vma)
2059 {
2060         struct file *file = vma->vm_file;
2061         struct socket *sock = file->private_data;
2062         struct sock *sk = sock->sk;
2063
2064         if (sk)
2065                 atomic_inc(&pkt_sk(sk)->mapped);
2066 }
2067
2068 static void packet_mm_close(struct vm_area_struct *vma)
2069 {
2070         struct file *file = vma->vm_file;
2071         struct socket *sock = file->private_data;
2072         struct sock *sk = sock->sk;
2073
2074         if (sk)
2075                 atomic_dec(&pkt_sk(sk)->mapped);
2076 }
2077
2078 static const struct vm_operations_struct packet_mmap_ops = {
2079         .open   =       packet_mm_open,
2080         .close  =       packet_mm_close,
2081 };
2082
2083 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
2084 {
2085         int i;
2086
2087         for (i = 0; i < len; i++) {
2088                 if (likely(pg_vec[i]))
2089                         free_pages((unsigned long) pg_vec[i], order);
2090         }
2091         kfree(pg_vec);
2092 }
2093
2094 static inline char *alloc_one_pg_vec_page(unsigned long order)
2095 {
2096         gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2097
2098         return (char *) __get_free_pages(gfp_flags, order);
2099 }
2100
2101 static char **alloc_pg_vec(struct tpacket_req *req, int order)
2102 {
2103         unsigned int block_nr = req->tp_block_nr;
2104         char **pg_vec;
2105         int i;
2106
2107         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2108         if (unlikely(!pg_vec))
2109                 goto out;
2110
2111         for (i = 0; i < block_nr; i++) {
2112                 pg_vec[i] = alloc_one_pg_vec_page(order);
2113                 if (unlikely(!pg_vec[i]))
2114                         goto out_free_pgvec;
2115         }
2116
2117 out:
2118         return pg_vec;
2119
2120 out_free_pgvec:
2121         free_pg_vec(pg_vec, order, block_nr);
2122         pg_vec = NULL;
2123         goto out;
2124 }
2125
2126 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2127                 int closing, int tx_ring)
2128 {
2129         char **pg_vec = NULL;
2130         struct packet_sock *po = pkt_sk(sk);
2131         int was_running, order = 0;
2132         struct packet_ring_buffer *rb;
2133         struct sk_buff_head *rb_queue;
2134         __be16 num;
2135         int err;
2136
2137         rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2138         rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2139
2140         err = -EBUSY;
2141         if (!closing) {
2142                 if (atomic_read(&po->mapped))
2143                         goto out;
2144                 if (atomic_read(&rb->pending))
2145                         goto out;
2146         }
2147
2148         if (req->tp_block_nr) {
2149                 /* Sanity tests and some calculations */
2150                 err = -EBUSY;
2151                 if (unlikely(rb->pg_vec))
2152                         goto out;
2153
2154                 switch (po->tp_version) {
2155                 case TPACKET_V1:
2156                         po->tp_hdrlen = TPACKET_HDRLEN;
2157                         break;
2158                 case TPACKET_V2:
2159                         po->tp_hdrlen = TPACKET2_HDRLEN;
2160                         break;
2161                 }
2162
2163                 err = -EINVAL;
2164                 if (unlikely((int)req->tp_block_size <= 0))
2165                         goto out;
2166                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2167                         goto out;
2168                 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2169                                         po->tp_reserve))
2170                         goto out;
2171                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2172                         goto out;
2173
2174                 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2175                 if (unlikely(rb->frames_per_block <= 0))
2176                         goto out;
2177                 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2178                                         req->tp_frame_nr))
2179                         goto out;
2180
2181                 err = -ENOMEM;
2182                 order = get_order(req->tp_block_size);
2183                 pg_vec = alloc_pg_vec(req, order);
2184                 if (unlikely(!pg_vec))
2185                         goto out;
2186         }
2187         /* Done */
2188         else {
2189                 err = -EINVAL;
2190                 if (unlikely(req->tp_frame_nr))
2191                         goto out;
2192         }
2193
2194         lock_sock(sk);
2195
2196         /* Detach socket from network */
2197         spin_lock(&po->bind_lock);
2198         was_running = po->running;
2199         num = po->num;
2200         if (was_running) {
2201                 __dev_remove_pack(&po->prot_hook);
2202                 po->num = 0;
2203                 po->running = 0;
2204                 __sock_put(sk);
2205         }
2206         spin_unlock(&po->bind_lock);
2207
2208         synchronize_net();
2209
2210         err = -EBUSY;
2211         mutex_lock(&po->pg_vec_lock);
2212         if (closing || atomic_read(&po->mapped) == 0) {
2213                 err = 0;
2214 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
2215                 spin_lock_bh(&rb_queue->lock);
2216                 pg_vec = XC(rb->pg_vec, pg_vec);
2217                 rb->frame_max = (req->tp_frame_nr - 1);
2218                 rb->head = 0;
2219                 rb->frame_size = req->tp_frame_size;
2220                 spin_unlock_bh(&rb_queue->lock);
2221
2222                 order = XC(rb->pg_vec_order, order);
2223                 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2224
2225                 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2226                 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2227                                                 tpacket_rcv : packet_rcv;
2228                 skb_queue_purge(rb_queue);
2229 #undef XC
2230                 if (atomic_read(&po->mapped))
2231                         pr_err("packet_mmap: vma is busy: %d\n",
2232                                atomic_read(&po->mapped));
2233         }
2234         mutex_unlock(&po->pg_vec_lock);
2235
2236         spin_lock(&po->bind_lock);
2237         if (was_running && !po->running) {
2238                 sock_hold(sk);
2239                 po->running = 1;
2240                 po->num = num;
2241                 dev_add_pack(&po->prot_hook);
2242         }
2243         spin_unlock(&po->bind_lock);
2244
2245         release_sock(sk);
2246
2247         if (pg_vec)
2248                 free_pg_vec(pg_vec, order, req->tp_block_nr);
2249 out:
2250         return err;
2251 }
2252
2253 static int packet_mmap(struct file *file, struct socket *sock,
2254                 struct vm_area_struct *vma)
2255 {
2256         struct sock *sk = sock->sk;
2257         struct packet_sock *po = pkt_sk(sk);
2258         unsigned long size, expected_size;
2259         struct packet_ring_buffer *rb;
2260         unsigned long start;
2261         int err = -EINVAL;
2262         int i;
2263
2264         if (vma->vm_pgoff)
2265                 return -EINVAL;
2266
2267         mutex_lock(&po->pg_vec_lock);
2268
2269         expected_size = 0;
2270         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2271                 if (rb->pg_vec) {
2272                         expected_size += rb->pg_vec_len
2273                                                 * rb->pg_vec_pages
2274                                                 * PAGE_SIZE;
2275                 }
2276         }
2277
2278         if (expected_size == 0)
2279                 goto out;
2280
2281         size = vma->vm_end - vma->vm_start;
2282         if (size != expected_size)
2283                 goto out;
2284
2285         start = vma->vm_start;
2286         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2287                 if (rb->pg_vec == NULL)
2288                         continue;
2289
2290                 for (i = 0; i < rb->pg_vec_len; i++) {
2291                         struct page *page = virt_to_page(rb->pg_vec[i]);
2292                         int pg_num;
2293
2294                         for (pg_num = 0; pg_num < rb->pg_vec_pages;
2295                                         pg_num++, page++) {
2296                                 err = vm_insert_page(vma, start, page);
2297                                 if (unlikely(err))
2298                                         goto out;
2299                                 start += PAGE_SIZE;
2300                         }
2301                 }
2302         }
2303
2304         atomic_inc(&po->mapped);
2305         vma->vm_ops = &packet_mmap_ops;
2306         err = 0;
2307
2308 out:
2309         mutex_unlock(&po->pg_vec_lock);
2310         return err;
2311 }
2312 #endif
2313
2314
2315 static const struct proto_ops packet_ops_spkt = {
2316         .family =       PF_PACKET,
2317         .owner =        THIS_MODULE,
2318         .release =      packet_release,
2319         .bind =         packet_bind_spkt,
2320         .connect =      sock_no_connect,
2321         .socketpair =   sock_no_socketpair,
2322         .accept =       sock_no_accept,
2323         .getname =      packet_getname_spkt,
2324         .poll =         datagram_poll,
2325         .ioctl =        packet_ioctl,
2326         .listen =       sock_no_listen,
2327         .shutdown =     sock_no_shutdown,
2328         .setsockopt =   sock_no_setsockopt,
2329         .getsockopt =   sock_no_getsockopt,
2330         .sendmsg =      packet_sendmsg_spkt,
2331         .recvmsg =      packet_recvmsg,
2332         .mmap =         sock_no_mmap,
2333         .sendpage =     sock_no_sendpage,
2334 };
2335
2336 static const struct proto_ops packet_ops = {
2337         .family =       PF_PACKET,
2338         .owner =        THIS_MODULE,
2339         .release =      packet_release,
2340         .bind =         packet_bind,
2341         .connect =      sock_no_connect,
2342         .socketpair =   sock_no_socketpair,
2343         .accept =       sock_no_accept,
2344         .getname =      packet_getname,
2345         .poll =         packet_poll,
2346         .ioctl =        packet_ioctl,
2347         .listen =       sock_no_listen,
2348         .shutdown =     sock_no_shutdown,
2349         .setsockopt =   packet_setsockopt,
2350         .getsockopt =   packet_getsockopt,
2351         .sendmsg =      packet_sendmsg,
2352         .recvmsg =      packet_recvmsg,
2353         .mmap =         packet_mmap,
2354         .sendpage =     sock_no_sendpage,
2355 };
2356
2357 static const struct net_proto_family packet_family_ops = {
2358         .family =       PF_PACKET,
2359         .create =       packet_create,
2360         .owner  =       THIS_MODULE,
2361 };
2362
2363 static struct notifier_block packet_netdev_notifier = {
2364         .notifier_call =        packet_notifier,
2365 };
2366
2367 #ifdef CONFIG_PROC_FS
2368 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2369 {
2370         struct sock *s;
2371         struct hlist_node *node;
2372
2373         sk_for_each(s, node, &net->packet.sklist) {
2374                 if (!off--)
2375                         return s;
2376         }
2377         return NULL;
2378 }
2379
2380 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2381         __acquires(seq_file_net(seq)->packet.sklist_lock)
2382 {
2383         struct net *net = seq_file_net(seq);
2384         read_lock(&net->packet.sklist_lock);
2385         return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2386 }
2387
2388 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2389 {
2390         struct net *net = seq_file_net(seq);
2391         ++*pos;
2392         return  (v == SEQ_START_TOKEN)
2393                 ? sk_head(&net->packet.sklist)
2394                 : sk_next((struct sock *)v) ;
2395 }
2396
2397 static void packet_seq_stop(struct seq_file *seq, void *v)
2398         __releases(seq_file_net(seq)->packet.sklist_lock)
2399 {
2400         struct net *net = seq_file_net(seq);
2401         read_unlock(&net->packet.sklist_lock);
2402 }
2403
2404 static int packet_seq_show(struct seq_file *seq, void *v)
2405 {
2406         if (v == SEQ_START_TOKEN)
2407                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2408         else {
2409                 struct sock *s = v;
2410                 const struct packet_sock *po = pkt_sk(s);
2411
2412                 seq_printf(seq,
2413                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2414                            s,
2415                            atomic_read(&s->sk_refcnt),
2416                            s->sk_type,
2417                            ntohs(po->num),
2418                            po->ifindex,
2419                            po->running,
2420                            atomic_read(&s->sk_rmem_alloc),
2421                            sock_i_uid(s),
2422                            sock_i_ino(s));
2423         }
2424
2425         return 0;
2426 }
2427
2428 static const struct seq_operations packet_seq_ops = {
2429         .start  = packet_seq_start,
2430         .next   = packet_seq_next,
2431         .stop   = packet_seq_stop,
2432         .show   = packet_seq_show,
2433 };
2434
2435 static int packet_seq_open(struct inode *inode, struct file *file)
2436 {
2437         return seq_open_net(inode, file, &packet_seq_ops,
2438                             sizeof(struct seq_net_private));
2439 }
2440
2441 static const struct file_operations packet_seq_fops = {
2442         .owner          = THIS_MODULE,
2443         .open           = packet_seq_open,
2444         .read           = seq_read,
2445         .llseek         = seq_lseek,
2446         .release        = seq_release_net,
2447 };
2448
2449 #endif
2450
2451 static int __net_init packet_net_init(struct net *net)
2452 {
2453         rwlock_init(&net->packet.sklist_lock);
2454         INIT_HLIST_HEAD(&net->packet.sklist);
2455
2456         if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2457                 return -ENOMEM;
2458
2459         return 0;
2460 }
2461
2462 static void __net_exit packet_net_exit(struct net *net)
2463 {
2464         proc_net_remove(net, "packet");
2465 }
2466
2467 static struct pernet_operations packet_net_ops = {
2468         .init = packet_net_init,
2469         .exit = packet_net_exit,
2470 };
2471
2472
2473 static void __exit packet_exit(void)
2474 {
2475         unregister_netdevice_notifier(&packet_netdev_notifier);
2476         unregister_pernet_subsys(&packet_net_ops);
2477         sock_unregister(PF_PACKET);
2478         proto_unregister(&packet_proto);
2479 }
2480
2481 static int __init packet_init(void)
2482 {
2483         int rc = proto_register(&packet_proto, 0);
2484
2485         if (rc != 0)
2486                 goto out;
2487
2488         sock_register(&packet_family_ops);
2489         register_pernet_subsys(&packet_net_ops);
2490         register_netdevice_notifier(&packet_netdev_notifier);
2491 out:
2492         return rc;
2493 }
2494
2495 module_init(packet_init);
2496 module_exit(packet_exit);
2497 MODULE_LICENSE("GPL");
2498 MODULE_ALIAS_NETPROTO(PF_PACKET);