[AF_PACKET]: Don't enable global timestamps.
[safe/jmp/linux-2.6] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and
38  *                                      packet_set_ring memory leak.
39  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
40  *                                      The convention is that longer addresses
41  *                                      will simply extend the hardware address
42  *                                      byte arrays at the end of sockaddr_ll
43  *                                      and packet_mreq.
44  *
45  *              This program is free software; you can redistribute it and/or
46  *              modify it under the terms of the GNU General Public License
47  *              as published by the Free Software Foundation; either version
48  *              2 of the License, or (at your option) any later version.
49  *
50  */
51
52 #include <linux/types.h>
53 #include <linux/mm.h>
54 #include <linux/capability.h>
55 #include <linux/fcntl.h>
56 #include <linux/socket.h>
57 #include <linux/in.h>
58 #include <linux/inet.h>
59 #include <linux/netdevice.h>
60 #include <linux/if_packet.h>
61 #include <linux/wireless.h>
62 #include <linux/kernel.h>
63 #include <linux/kmod.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <linux/skbuff.h>
67 #include <net/sock.h>
68 #include <linux/errno.h>
69 #include <linux/timer.h>
70 #include <asm/system.h>
71 #include <asm/uaccess.h>
72 #include <asm/ioctls.h>
73 #include <asm/page.h>
74 #include <asm/cacheflush.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81
82 #ifdef CONFIG_INET
83 #include <net/inet_common.h>
84 #endif
85
86 /*
87    Assumptions:
88    - if device has no dev->hard_header routine, it adds and removes ll header
89      inside itself. In this case ll header is invisible outside of device,
90      but higher levels still should reserve dev->hard_header_len.
91      Some devices are enough clever to reallocate skb, when header
92      will not fit to reserved space (tunnel), another ones are silly
93      (PPP).
94    - packet socket receives packets with pulled ll header,
95      so that SOCK_RAW should push it back.
96
97 On receive:
98 -----------
99
100 Incoming, dev->hard_header!=NULL
101    mac_header -> ll header
102    data       -> data
103
104 Outgoing, dev->hard_header!=NULL
105    mac_header -> ll header
106    data       -> ll header
107
108 Incoming, dev->hard_header==NULL
109    mac_header -> UNKNOWN position. It is very likely, that it points to ll
110                  header.  PPP makes it, that is wrong, because introduce
111                  assymetry between rx and tx paths.
112    data       -> data
113
114 Outgoing, dev->hard_header==NULL
115    mac_header -> data. ll header is still not built!
116    data       -> data
117
118 Resume
119   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
120
121
122 On transmit:
123 ------------
124
125 dev->hard_header != NULL
126    mac_header -> ll header
127    data       -> ll header
128
129 dev->hard_header == NULL (ll header is added by device, we cannot control it)
130    mac_header -> data
131    data       -> data
132
133    We should set nh.raw on output to correct posistion,
134    packet classifier depends on it.
135  */
136
137 /* List of all packet sockets. */
138 static HLIST_HEAD(packet_sklist);
139 static DEFINE_RWLOCK(packet_sklist_lock);
140
141 static atomic_t packet_socks_nr;
142
143
144 /* Private packet socket structures. */
145
146 struct packet_mclist
147 {
148         struct packet_mclist    *next;
149         int                     ifindex;
150         int                     count;
151         unsigned short          type;
152         unsigned short          alen;
153         unsigned char           addr[MAX_ADDR_LEN];
154 };
155 /* identical to struct packet_mreq except it has
156  * a longer address field.
157  */
158 struct packet_mreq_max
159 {
160         int             mr_ifindex;
161         unsigned short  mr_type;
162         unsigned short  mr_alen;
163         unsigned char   mr_address[MAX_ADDR_LEN];
164 };
165
166 #ifdef CONFIG_PACKET_MMAP
167 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
168 #endif
169
170 static void packet_flush_mclist(struct sock *sk);
171
172 struct packet_sock {
173         /* struct sock has to be the first member of packet_sock */
174         struct sock             sk;
175         struct tpacket_stats    stats;
176 #ifdef CONFIG_PACKET_MMAP
177         char *                  *pg_vec;
178         unsigned int            head;
179         unsigned int            frames_per_block;
180         unsigned int            frame_size;
181         unsigned int            frame_max;
182         int                     copy_thresh;
183 #endif
184         struct packet_type      prot_hook;
185         spinlock_t              bind_lock;
186         unsigned int            running:1,      /* prot_hook is attached*/
187                                 auxdata:1,
188                                 origdev:1;
189         int                     ifindex;        /* bound device         */
190         __be16                  num;
191         struct packet_mclist    *mclist;
192 #ifdef CONFIG_PACKET_MMAP
193         atomic_t                mapped;
194         unsigned int            pg_vec_order;
195         unsigned int            pg_vec_pages;
196         unsigned int            pg_vec_len;
197 #endif
198 };
199
200 struct packet_skb_cb {
201         unsigned int origlen;
202         union {
203                 struct sockaddr_pkt pkt;
204                 struct sockaddr_ll ll;
205         } sa;
206 };
207
208 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
209
210 #ifdef CONFIG_PACKET_MMAP
211
212 static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
213 {
214         unsigned int pg_vec_pos, frame_offset;
215
216         pg_vec_pos = position / po->frames_per_block;
217         frame_offset = position % po->frames_per_block;
218
219         return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
220 }
221 #endif
222
223 static inline struct packet_sock *pkt_sk(struct sock *sk)
224 {
225         return (struct packet_sock *)sk;
226 }
227
228 static void packet_sock_destruct(struct sock *sk)
229 {
230         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
231         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
232
233         if (!sock_flag(sk, SOCK_DEAD)) {
234                 printk("Attempt to release alive packet socket: %p\n", sk);
235                 return;
236         }
237
238         atomic_dec(&packet_socks_nr);
239 #ifdef PACKET_REFCNT_DEBUG
240         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
241 #endif
242 }
243
244
245 static const struct proto_ops packet_ops;
246
247 static const struct proto_ops packet_ops_spkt;
248
249 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
250 {
251         struct sock *sk;
252         struct sockaddr_pkt *spkt;
253
254         /*
255          *      When we registered the protocol we saved the socket in the data
256          *      field for just this event.
257          */
258
259         sk = pt->af_packet_priv;
260
261         /*
262          *      Yank back the headers [hope the device set this
263          *      right or kerboom...]
264          *
265          *      Incoming packets have ll header pulled,
266          *      push it back.
267          *
268          *      For outgoing ones skb->data == skb_mac_header(skb)
269          *      so that this procedure is noop.
270          */
271
272         if (skb->pkt_type == PACKET_LOOPBACK)
273                 goto out;
274
275         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
276                 goto oom;
277
278         /* drop any routing info */
279         dst_release(skb->dst);
280         skb->dst = NULL;
281
282         /* drop conntrack reference */
283         nf_reset(skb);
284
285         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
286
287         skb_push(skb, skb->data - skb_mac_header(skb));
288
289         /*
290          *      The SOCK_PACKET socket receives _all_ frames.
291          */
292
293         spkt->spkt_family = dev->type;
294         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
295         spkt->spkt_protocol = skb->protocol;
296
297         /*
298          *      Charge the memory to the socket. This is done specifically
299          *      to prevent sockets using all the memory up.
300          */
301
302         if (sock_queue_rcv_skb(sk,skb) == 0)
303                 return 0;
304
305 out:
306         kfree_skb(skb);
307 oom:
308         return 0;
309 }
310
311
312 /*
313  *      Output a raw packet to a device layer. This bypasses all the other
314  *      protocol layers and you must therefore supply it with a complete frame
315  */
316
317 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
318                                struct msghdr *msg, size_t len)
319 {
320         struct sock *sk = sock->sk;
321         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
322         struct sk_buff *skb;
323         struct net_device *dev;
324         __be16 proto=0;
325         int err;
326
327         /*
328          *      Get and verify the address.
329          */
330
331         if (saddr)
332         {
333                 if (msg->msg_namelen < sizeof(struct sockaddr))
334                         return(-EINVAL);
335                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
336                         proto=saddr->spkt_protocol;
337         }
338         else
339                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
340
341         /*
342          *      Find the device first to size check it
343          */
344
345         saddr->spkt_device[13] = 0;
346         dev = dev_get_by_name(saddr->spkt_device);
347         err = -ENODEV;
348         if (dev == NULL)
349                 goto out_unlock;
350
351         err = -ENETDOWN;
352         if (!(dev->flags & IFF_UP))
353                 goto out_unlock;
354
355         /*
356          *      You may not queue a frame bigger than the mtu. This is the lowest level
357          *      raw protocol and you must do your own fragmentation at this level.
358          */
359
360         err = -EMSGSIZE;
361         if (len > dev->mtu + dev->hard_header_len)
362                 goto out_unlock;
363
364         err = -ENOBUFS;
365         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
366
367         /*
368          *      If the write buffer is full, then tough. At this level the user gets to
369          *      deal with the problem - do your own algorithmic backoffs. That's far
370          *      more flexible.
371          */
372
373         if (skb == NULL)
374                 goto out_unlock;
375
376         /*
377          *      Fill it in
378          */
379
380         /* FIXME: Save some space for broken drivers that write a
381          * hard header at transmission time by themselves. PPP is the
382          * notable one here. This should really be fixed at the driver level.
383          */
384         skb_reserve(skb, LL_RESERVED_SPACE(dev));
385         skb_reset_network_header(skb);
386
387         /* Try to align data part correctly */
388         if (dev->hard_header) {
389                 skb->data -= dev->hard_header_len;
390                 skb->tail -= dev->hard_header_len;
391                 if (len < dev->hard_header_len)
392                         skb_reset_network_header(skb);
393         }
394
395         /* Returns -EFAULT on error */
396         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
397         skb->protocol = proto;
398         skb->dev = dev;
399         skb->priority = sk->sk_priority;
400         if (err)
401                 goto out_free;
402
403         /*
404          *      Now send it
405          */
406
407         dev_queue_xmit(skb);
408         dev_put(dev);
409         return(len);
410
411 out_free:
412         kfree_skb(skb);
413 out_unlock:
414         if (dev)
415                 dev_put(dev);
416         return err;
417 }
418
419 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
420                                       unsigned int res)
421 {
422         struct sk_filter *filter;
423
424         rcu_read_lock_bh();
425         filter = rcu_dereference(sk->sk_filter);
426         if (filter != NULL)
427                 res = sk_run_filter(skb, filter->insns, filter->len);
428         rcu_read_unlock_bh();
429
430         return res;
431 }
432
433 /*
434    This function makes lazy skb cloning in hope that most of packets
435    are discarded by BPF.
436
437    Note tricky part: we DO mangle shared skb! skb->data, skb->len
438    and skb->cb are mangled. It works because (and until) packets
439    falling here are owned by current CPU. Output packets are cloned
440    by dev_queue_xmit_nit(), input packets are processed by net_bh
441    sequencially, so that if we return skb to original state on exit,
442    we will not harm anyone.
443  */
444
445 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
446 {
447         struct sock *sk;
448         struct sockaddr_ll *sll;
449         struct packet_sock *po;
450         u8 * skb_head = skb->data;
451         int skb_len = skb->len;
452         unsigned int snaplen, res;
453
454         if (skb->pkt_type == PACKET_LOOPBACK)
455                 goto drop;
456
457         sk = pt->af_packet_priv;
458         po = pkt_sk(sk);
459
460         skb->dev = dev;
461
462         if (dev->hard_header) {
463                 /* The device has an explicit notion of ll header,
464                    exported to higher levels.
465
466                    Otherwise, the device hides datails of it frame
467                    structure, so that corresponding packet head
468                    never delivered to user.
469                  */
470                 if (sk->sk_type != SOCK_DGRAM)
471                         skb_push(skb, skb->data - skb_mac_header(skb));
472                 else if (skb->pkt_type == PACKET_OUTGOING) {
473                         /* Special case: outgoing packets have ll header at head */
474                         skb_pull(skb, skb_network_offset(skb));
475                 }
476         }
477
478         snaplen = skb->len;
479
480         res = run_filter(skb, sk, snaplen);
481         if (!res)
482                 goto drop_n_restore;
483         if (snaplen > res)
484                 snaplen = res;
485
486         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
487             (unsigned)sk->sk_rcvbuf)
488                 goto drop_n_acct;
489
490         if (skb_shared(skb)) {
491                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
492                 if (nskb == NULL)
493                         goto drop_n_acct;
494
495                 if (skb_head != skb->data) {
496                         skb->data = skb_head;
497                         skb->len = skb_len;
498                 }
499                 kfree_skb(skb);
500                 skb = nskb;
501         }
502
503         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
504                      sizeof(skb->cb));
505
506         sll = &PACKET_SKB_CB(skb)->sa.ll;
507         sll->sll_family = AF_PACKET;
508         sll->sll_hatype = dev->type;
509         sll->sll_protocol = skb->protocol;
510         sll->sll_pkttype = skb->pkt_type;
511         if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
512                 sll->sll_ifindex = orig_dev->ifindex;
513         else
514                 sll->sll_ifindex = dev->ifindex;
515         sll->sll_halen = 0;
516
517         if (dev->hard_header_parse)
518                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
519
520         PACKET_SKB_CB(skb)->origlen = skb->len;
521
522         if (pskb_trim(skb, snaplen))
523                 goto drop_n_acct;
524
525         skb_set_owner_r(skb, sk);
526         skb->dev = NULL;
527         dst_release(skb->dst);
528         skb->dst = NULL;
529
530         /* drop conntrack reference */
531         nf_reset(skb);
532
533         spin_lock(&sk->sk_receive_queue.lock);
534         po->stats.tp_packets++;
535         __skb_queue_tail(&sk->sk_receive_queue, skb);
536         spin_unlock(&sk->sk_receive_queue.lock);
537         sk->sk_data_ready(sk, skb->len);
538         return 0;
539
540 drop_n_acct:
541         spin_lock(&sk->sk_receive_queue.lock);
542         po->stats.tp_drops++;
543         spin_unlock(&sk->sk_receive_queue.lock);
544
545 drop_n_restore:
546         if (skb_head != skb->data && skb_shared(skb)) {
547                 skb->data = skb_head;
548                 skb->len = skb_len;
549         }
550 drop:
551         kfree_skb(skb);
552         return 0;
553 }
554
555 #ifdef CONFIG_PACKET_MMAP
556 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
557 {
558         struct sock *sk;
559         struct packet_sock *po;
560         struct sockaddr_ll *sll;
561         struct tpacket_hdr *h;
562         u8 * skb_head = skb->data;
563         int skb_len = skb->len;
564         unsigned int snaplen, res;
565         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
566         unsigned short macoff, netoff;
567         struct sk_buff *copy_skb = NULL;
568         struct timeval tv;
569
570         if (skb->pkt_type == PACKET_LOOPBACK)
571                 goto drop;
572
573         sk = pt->af_packet_priv;
574         po = pkt_sk(sk);
575
576         if (dev->hard_header) {
577                 if (sk->sk_type != SOCK_DGRAM)
578                         skb_push(skb, skb->data - skb_mac_header(skb));
579                 else if (skb->pkt_type == PACKET_OUTGOING) {
580                         /* Special case: outgoing packets have ll header at head */
581                         skb_pull(skb, skb_network_offset(skb));
582                 }
583         }
584
585         if (skb->ip_summed == CHECKSUM_PARTIAL)
586                 status |= TP_STATUS_CSUMNOTREADY;
587
588         snaplen = skb->len;
589
590         res = run_filter(skb, sk, snaplen);
591         if (!res)
592                 goto drop_n_restore;
593         if (snaplen > res)
594                 snaplen = res;
595
596         if (sk->sk_type == SOCK_DGRAM) {
597                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
598         } else {
599                 unsigned maclen = skb_network_offset(skb);
600                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
601                 macoff = netoff - maclen;
602         }
603
604         if (macoff + snaplen > po->frame_size) {
605                 if (po->copy_thresh &&
606                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
607                     (unsigned)sk->sk_rcvbuf) {
608                         if (skb_shared(skb)) {
609                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
610                         } else {
611                                 copy_skb = skb_get(skb);
612                                 skb_head = skb->data;
613                         }
614                         if (copy_skb)
615                                 skb_set_owner_r(copy_skb, sk);
616                 }
617                 snaplen = po->frame_size - macoff;
618                 if ((int)snaplen < 0)
619                         snaplen = 0;
620         }
621
622         spin_lock(&sk->sk_receive_queue.lock);
623         h = packet_lookup_frame(po, po->head);
624
625         if (h->tp_status)
626                 goto ring_is_full;
627         po->head = po->head != po->frame_max ? po->head+1 : 0;
628         po->stats.tp_packets++;
629         if (copy_skb) {
630                 status |= TP_STATUS_COPY;
631                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
632         }
633         if (!po->stats.tp_drops)
634                 status &= ~TP_STATUS_LOSING;
635         spin_unlock(&sk->sk_receive_queue.lock);
636
637         skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
638
639         h->tp_len = skb->len;
640         h->tp_snaplen = snaplen;
641         h->tp_mac = macoff;
642         h->tp_net = netoff;
643         if (skb->tstamp.tv64)
644                 tv = ktime_to_timeval(skb->tstamp);
645         else
646                 do_gettimeofday(&tv);
647         h->tp_sec = tv.tv_sec;
648         h->tp_usec = tv.tv_usec;
649
650         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
651         sll->sll_halen = 0;
652         if (dev->hard_header_parse)
653                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
654         sll->sll_family = AF_PACKET;
655         sll->sll_hatype = dev->type;
656         sll->sll_protocol = skb->protocol;
657         sll->sll_pkttype = skb->pkt_type;
658         if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
659                 sll->sll_ifindex = orig_dev->ifindex;
660         else
661                 sll->sll_ifindex = dev->ifindex;
662
663         h->tp_status = status;
664         smp_mb();
665
666         {
667                 struct page *p_start, *p_end;
668                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
669
670                 p_start = virt_to_page(h);
671                 p_end = virt_to_page(h_end);
672                 while (p_start <= p_end) {
673                         flush_dcache_page(p_start);
674                         p_start++;
675                 }
676         }
677
678         sk->sk_data_ready(sk, 0);
679
680 drop_n_restore:
681         if (skb_head != skb->data && skb_shared(skb)) {
682                 skb->data = skb_head;
683                 skb->len = skb_len;
684         }
685 drop:
686         kfree_skb(skb);
687         return 0;
688
689 ring_is_full:
690         po->stats.tp_drops++;
691         spin_unlock(&sk->sk_receive_queue.lock);
692
693         sk->sk_data_ready(sk, 0);
694         if (copy_skb)
695                 kfree_skb(copy_skb);
696         goto drop_n_restore;
697 }
698
699 #endif
700
701
702 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
703                           struct msghdr *msg, size_t len)
704 {
705         struct sock *sk = sock->sk;
706         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
707         struct sk_buff *skb;
708         struct net_device *dev;
709         __be16 proto;
710         unsigned char *addr;
711         int ifindex, err, reserve = 0;
712
713         /*
714          *      Get and verify the address.
715          */
716
717         if (saddr == NULL) {
718                 struct packet_sock *po = pkt_sk(sk);
719
720                 ifindex = po->ifindex;
721                 proto   = po->num;
722                 addr    = NULL;
723         } else {
724                 err = -EINVAL;
725                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
726                         goto out;
727                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
728                         goto out;
729                 ifindex = saddr->sll_ifindex;
730                 proto   = saddr->sll_protocol;
731                 addr    = saddr->sll_addr;
732         }
733
734
735         dev = dev_get_by_index(ifindex);
736         err = -ENXIO;
737         if (dev == NULL)
738                 goto out_unlock;
739         if (sock->type == SOCK_RAW)
740                 reserve = dev->hard_header_len;
741
742         err = -ENETDOWN;
743         if (!(dev->flags & IFF_UP))
744                 goto out_unlock;
745
746         err = -EMSGSIZE;
747         if (len > dev->mtu+reserve)
748                 goto out_unlock;
749
750         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
751                                 msg->msg_flags & MSG_DONTWAIT, &err);
752         if (skb==NULL)
753                 goto out_unlock;
754
755         skb_reserve(skb, LL_RESERVED_SPACE(dev));
756         skb_reset_network_header(skb);
757
758         if (dev->hard_header) {
759                 int res;
760                 err = -EINVAL;
761                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
762                 if (sock->type != SOCK_DGRAM) {
763                         skb_reset_tail_pointer(skb);
764                         skb->len = 0;
765                 } else if (res < 0)
766                         goto out_free;
767         }
768
769         /* Returns -EFAULT on error */
770         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
771         if (err)
772                 goto out_free;
773
774         skb->protocol = proto;
775         skb->dev = dev;
776         skb->priority = sk->sk_priority;
777
778         /*
779          *      Now send it
780          */
781
782         err = dev_queue_xmit(skb);
783         if (err > 0 && (err = net_xmit_errno(err)) != 0)
784                 goto out_unlock;
785
786         dev_put(dev);
787
788         return(len);
789
790 out_free:
791         kfree_skb(skb);
792 out_unlock:
793         if (dev)
794                 dev_put(dev);
795 out:
796         return err;
797 }
798
799 /*
800  *      Close a PACKET socket. This is fairly simple. We immediately go
801  *      to 'closed' state and remove our protocol entry in the device list.
802  */
803
804 static int packet_release(struct socket *sock)
805 {
806         struct sock *sk = sock->sk;
807         struct packet_sock *po;
808
809         if (!sk)
810                 return 0;
811
812         po = pkt_sk(sk);
813
814         write_lock_bh(&packet_sklist_lock);
815         sk_del_node_init(sk);
816         write_unlock_bh(&packet_sklist_lock);
817
818         /*
819          *      Unhook packet receive handler.
820          */
821
822         if (po->running) {
823                 /*
824                  *      Remove the protocol hook
825                  */
826                 dev_remove_pack(&po->prot_hook);
827                 po->running = 0;
828                 po->num = 0;
829                 __sock_put(sk);
830         }
831
832         packet_flush_mclist(sk);
833
834 #ifdef CONFIG_PACKET_MMAP
835         if (po->pg_vec) {
836                 struct tpacket_req req;
837                 memset(&req, 0, sizeof(req));
838                 packet_set_ring(sk, &req, 1);
839         }
840 #endif
841
842         /*
843          *      Now the socket is dead. No more input will appear.
844          */
845
846         sock_orphan(sk);
847         sock->sk = NULL;
848
849         /* Purge queues */
850
851         skb_queue_purge(&sk->sk_receive_queue);
852
853         sock_put(sk);
854         return 0;
855 }
856
857 /*
858  *      Attach a packet hook.
859  */
860
861 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
862 {
863         struct packet_sock *po = pkt_sk(sk);
864         /*
865          *      Detach an existing hook if present.
866          */
867
868         lock_sock(sk);
869
870         spin_lock(&po->bind_lock);
871         if (po->running) {
872                 __sock_put(sk);
873                 po->running = 0;
874                 po->num = 0;
875                 spin_unlock(&po->bind_lock);
876                 dev_remove_pack(&po->prot_hook);
877                 spin_lock(&po->bind_lock);
878         }
879
880         po->num = protocol;
881         po->prot_hook.type = protocol;
882         po->prot_hook.dev = dev;
883
884         po->ifindex = dev ? dev->ifindex : 0;
885
886         if (protocol == 0)
887                 goto out_unlock;
888
889         if (dev) {
890                 if (dev->flags&IFF_UP) {
891                         dev_add_pack(&po->prot_hook);
892                         sock_hold(sk);
893                         po->running = 1;
894                 } else {
895                         sk->sk_err = ENETDOWN;
896                         if (!sock_flag(sk, SOCK_DEAD))
897                                 sk->sk_error_report(sk);
898                 }
899         } else {
900                 dev_add_pack(&po->prot_hook);
901                 sock_hold(sk);
902                 po->running = 1;
903         }
904
905 out_unlock:
906         spin_unlock(&po->bind_lock);
907         release_sock(sk);
908         return 0;
909 }
910
911 /*
912  *      Bind a packet socket to a device
913  */
914
915 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
916 {
917         struct sock *sk=sock->sk;
918         char name[15];
919         struct net_device *dev;
920         int err = -ENODEV;
921
922         /*
923          *      Check legality
924          */
925
926         if (addr_len != sizeof(struct sockaddr))
927                 return -EINVAL;
928         strlcpy(name,uaddr->sa_data,sizeof(name));
929
930         dev = dev_get_by_name(name);
931         if (dev) {
932                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
933                 dev_put(dev);
934         }
935         return err;
936 }
937
938 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
939 {
940         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
941         struct sock *sk=sock->sk;
942         struct net_device *dev = NULL;
943         int err;
944
945
946         /*
947          *      Check legality
948          */
949
950         if (addr_len < sizeof(struct sockaddr_ll))
951                 return -EINVAL;
952         if (sll->sll_family != AF_PACKET)
953                 return -EINVAL;
954
955         if (sll->sll_ifindex) {
956                 err = -ENODEV;
957                 dev = dev_get_by_index(sll->sll_ifindex);
958                 if (dev == NULL)
959                         goto out;
960         }
961         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
962         if (dev)
963                 dev_put(dev);
964
965 out:
966         return err;
967 }
968
969 static struct proto packet_proto = {
970         .name     = "PACKET",
971         .owner    = THIS_MODULE,
972         .obj_size = sizeof(struct packet_sock),
973 };
974
975 /*
976  *      Create a packet of type SOCK_PACKET.
977  */
978
979 static int packet_create(struct socket *sock, int protocol)
980 {
981         struct sock *sk;
982         struct packet_sock *po;
983         __be16 proto = (__force __be16)protocol; /* weird, but documented */
984         int err;
985
986         if (!capable(CAP_NET_RAW))
987                 return -EPERM;
988         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
989             sock->type != SOCK_PACKET)
990                 return -ESOCKTNOSUPPORT;
991
992         sock->state = SS_UNCONNECTED;
993
994         err = -ENOBUFS;
995         sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
996         if (sk == NULL)
997                 goto out;
998
999         sock->ops = &packet_ops;
1000         if (sock->type == SOCK_PACKET)
1001                 sock->ops = &packet_ops_spkt;
1002
1003         sock_init_data(sock, sk);
1004
1005         po = pkt_sk(sk);
1006         sk->sk_family = PF_PACKET;
1007         po->num = proto;
1008
1009         sk->sk_destruct = packet_sock_destruct;
1010         atomic_inc(&packet_socks_nr);
1011
1012         /*
1013          *      Attach a protocol block
1014          */
1015
1016         spin_lock_init(&po->bind_lock);
1017         po->prot_hook.func = packet_rcv;
1018
1019         if (sock->type == SOCK_PACKET)
1020                 po->prot_hook.func = packet_rcv_spkt;
1021
1022         po->prot_hook.af_packet_priv = sk;
1023
1024         if (proto) {
1025                 po->prot_hook.type = proto;
1026                 dev_add_pack(&po->prot_hook);
1027                 sock_hold(sk);
1028                 po->running = 1;
1029         }
1030
1031         write_lock_bh(&packet_sklist_lock);
1032         sk_add_node(sk, &packet_sklist);
1033         write_unlock_bh(&packet_sklist_lock);
1034         return(0);
1035 out:
1036         return err;
1037 }
1038
1039 /*
1040  *      Pull a packet from our receive queue and hand it to the user.
1041  *      If necessary we block.
1042  */
1043
1044 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1045                           struct msghdr *msg, size_t len, int flags)
1046 {
1047         struct sock *sk = sock->sk;
1048         struct sk_buff *skb;
1049         int copied, err;
1050         struct sockaddr_ll *sll;
1051
1052         err = -EINVAL;
1053         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1054                 goto out;
1055
1056 #if 0
1057         /* What error should we return now? EUNATTACH? */
1058         if (pkt_sk(sk)->ifindex < 0)
1059                 return -ENODEV;
1060 #endif
1061
1062         /*
1063          *      Call the generic datagram receiver. This handles all sorts
1064          *      of horrible races and re-entrancy so we can forget about it
1065          *      in the protocol layers.
1066          *
1067          *      Now it will return ENETDOWN, if device have just gone down,
1068          *      but then it will block.
1069          */
1070
1071         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1072
1073         /*
1074          *      An error occurred so return it. Because skb_recv_datagram()
1075          *      handles the blocking we don't see and worry about blocking
1076          *      retries.
1077          */
1078
1079         if (skb == NULL)
1080                 goto out;
1081
1082         /*
1083          *      If the address length field is there to be filled in, we fill
1084          *      it in now.
1085          */
1086
1087         sll = &PACKET_SKB_CB(skb)->sa.ll;
1088         if (sock->type == SOCK_PACKET)
1089                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1090         else
1091                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1092
1093         /*
1094          *      You lose any data beyond the buffer you gave. If it worries a
1095          *      user program they can ask the device for its MTU anyway.
1096          */
1097
1098         copied = skb->len;
1099         if (copied > len)
1100         {
1101                 copied=len;
1102                 msg->msg_flags|=MSG_TRUNC;
1103         }
1104
1105         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1106         if (err)
1107                 goto out_free;
1108
1109         sock_recv_timestamp(msg, sk, skb);
1110
1111         if (msg->msg_name)
1112                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1113                        msg->msg_namelen);
1114
1115         if (pkt_sk(sk)->auxdata) {
1116                 struct tpacket_auxdata aux;
1117
1118                 aux.tp_status = TP_STATUS_USER;
1119                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1120                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1121                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1122                 aux.tp_snaplen = skb->len;
1123                 aux.tp_mac = 0;
1124                 aux.tp_net = skb_network_offset(skb);
1125
1126                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1127         }
1128
1129         /*
1130          *      Free or return the buffer as appropriate. Again this
1131          *      hides all the races and re-entrancy issues from us.
1132          */
1133         err = (flags&MSG_TRUNC) ? skb->len : copied;
1134
1135 out_free:
1136         skb_free_datagram(sk, skb);
1137 out:
1138         return err;
1139 }
1140
1141 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1142                                int *uaddr_len, int peer)
1143 {
1144         struct net_device *dev;
1145         struct sock *sk = sock->sk;
1146
1147         if (peer)
1148                 return -EOPNOTSUPP;
1149
1150         uaddr->sa_family = AF_PACKET;
1151         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1152         if (dev) {
1153                 strlcpy(uaddr->sa_data, dev->name, 15);
1154                 dev_put(dev);
1155         } else
1156                 memset(uaddr->sa_data, 0, 14);
1157         *uaddr_len = sizeof(*uaddr);
1158
1159         return 0;
1160 }
1161
1162 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1163                           int *uaddr_len, int peer)
1164 {
1165         struct net_device *dev;
1166         struct sock *sk = sock->sk;
1167         struct packet_sock *po = pkt_sk(sk);
1168         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1169
1170         if (peer)
1171                 return -EOPNOTSUPP;
1172
1173         sll->sll_family = AF_PACKET;
1174         sll->sll_ifindex = po->ifindex;
1175         sll->sll_protocol = po->num;
1176         dev = dev_get_by_index(po->ifindex);
1177         if (dev) {
1178                 sll->sll_hatype = dev->type;
1179                 sll->sll_halen = dev->addr_len;
1180                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1181                 dev_put(dev);
1182         } else {
1183                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1184                 sll->sll_halen = 0;
1185         }
1186         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1187
1188         return 0;
1189 }
1190
1191 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1192 {
1193         switch (i->type) {
1194         case PACKET_MR_MULTICAST:
1195                 if (what > 0)
1196                         dev_mc_add(dev, i->addr, i->alen, 0);
1197                 else
1198                         dev_mc_delete(dev, i->addr, i->alen, 0);
1199                 break;
1200         case PACKET_MR_PROMISC:
1201                 dev_set_promiscuity(dev, what);
1202                 break;
1203         case PACKET_MR_ALLMULTI:
1204                 dev_set_allmulti(dev, what);
1205                 break;
1206         default:;
1207         }
1208 }
1209
1210 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1211 {
1212         for ( ; i; i=i->next) {
1213                 if (i->ifindex == dev->ifindex)
1214                         packet_dev_mc(dev, i, what);
1215         }
1216 }
1217
1218 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1219 {
1220         struct packet_sock *po = pkt_sk(sk);
1221         struct packet_mclist *ml, *i;
1222         struct net_device *dev;
1223         int err;
1224
1225         rtnl_lock();
1226
1227         err = -ENODEV;
1228         dev = __dev_get_by_index(mreq->mr_ifindex);
1229         if (!dev)
1230                 goto done;
1231
1232         err = -EINVAL;
1233         if (mreq->mr_alen > dev->addr_len)
1234                 goto done;
1235
1236         err = -ENOBUFS;
1237         i = kmalloc(sizeof(*i), GFP_KERNEL);
1238         if (i == NULL)
1239                 goto done;
1240
1241         err = 0;
1242         for (ml = po->mclist; ml; ml = ml->next) {
1243                 if (ml->ifindex == mreq->mr_ifindex &&
1244                     ml->type == mreq->mr_type &&
1245                     ml->alen == mreq->mr_alen &&
1246                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1247                         ml->count++;
1248                         /* Free the new element ... */
1249                         kfree(i);
1250                         goto done;
1251                 }
1252         }
1253
1254         i->type = mreq->mr_type;
1255         i->ifindex = mreq->mr_ifindex;
1256         i->alen = mreq->mr_alen;
1257         memcpy(i->addr, mreq->mr_address, i->alen);
1258         i->count = 1;
1259         i->next = po->mclist;
1260         po->mclist = i;
1261         packet_dev_mc(dev, i, +1);
1262
1263 done:
1264         rtnl_unlock();
1265         return err;
1266 }
1267
1268 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1269 {
1270         struct packet_mclist *ml, **mlp;
1271
1272         rtnl_lock();
1273
1274         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1275                 if (ml->ifindex == mreq->mr_ifindex &&
1276                     ml->type == mreq->mr_type &&
1277                     ml->alen == mreq->mr_alen &&
1278                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1279                         if (--ml->count == 0) {
1280                                 struct net_device *dev;
1281                                 *mlp = ml->next;
1282                                 dev = dev_get_by_index(ml->ifindex);
1283                                 if (dev) {
1284                                         packet_dev_mc(dev, ml, -1);
1285                                         dev_put(dev);
1286                                 }
1287                                 kfree(ml);
1288                         }
1289                         rtnl_unlock();
1290                         return 0;
1291                 }
1292         }
1293         rtnl_unlock();
1294         return -EADDRNOTAVAIL;
1295 }
1296
1297 static void packet_flush_mclist(struct sock *sk)
1298 {
1299         struct packet_sock *po = pkt_sk(sk);
1300         struct packet_mclist *ml;
1301
1302         if (!po->mclist)
1303                 return;
1304
1305         rtnl_lock();
1306         while ((ml = po->mclist) != NULL) {
1307                 struct net_device *dev;
1308
1309                 po->mclist = ml->next;
1310                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1311                         packet_dev_mc(dev, ml, -1);
1312                         dev_put(dev);
1313                 }
1314                 kfree(ml);
1315         }
1316         rtnl_unlock();
1317 }
1318
1319 static int
1320 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1321 {
1322         struct sock *sk = sock->sk;
1323         struct packet_sock *po = pkt_sk(sk);
1324         int ret;
1325
1326         if (level != SOL_PACKET)
1327                 return -ENOPROTOOPT;
1328
1329         switch(optname) {
1330         case PACKET_ADD_MEMBERSHIP:
1331         case PACKET_DROP_MEMBERSHIP:
1332         {
1333                 struct packet_mreq_max mreq;
1334                 int len = optlen;
1335                 memset(&mreq, 0, sizeof(mreq));
1336                 if (len < sizeof(struct packet_mreq))
1337                         return -EINVAL;
1338                 if (len > sizeof(mreq))
1339                         len = sizeof(mreq);
1340                 if (copy_from_user(&mreq,optval,len))
1341                         return -EFAULT;
1342                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1343                         return -EINVAL;
1344                 if (optname == PACKET_ADD_MEMBERSHIP)
1345                         ret = packet_mc_add(sk, &mreq);
1346                 else
1347                         ret = packet_mc_drop(sk, &mreq);
1348                 return ret;
1349         }
1350
1351 #ifdef CONFIG_PACKET_MMAP
1352         case PACKET_RX_RING:
1353         {
1354                 struct tpacket_req req;
1355
1356                 if (optlen<sizeof(req))
1357                         return -EINVAL;
1358                 if (copy_from_user(&req,optval,sizeof(req)))
1359                         return -EFAULT;
1360                 return packet_set_ring(sk, &req, 0);
1361         }
1362         case PACKET_COPY_THRESH:
1363         {
1364                 int val;
1365
1366                 if (optlen!=sizeof(val))
1367                         return -EINVAL;
1368                 if (copy_from_user(&val,optval,sizeof(val)))
1369                         return -EFAULT;
1370
1371                 pkt_sk(sk)->copy_thresh = val;
1372                 return 0;
1373         }
1374 #endif
1375         case PACKET_AUXDATA:
1376         {
1377                 int val;
1378
1379                 if (optlen < sizeof(val))
1380                         return -EINVAL;
1381                 if (copy_from_user(&val, optval, sizeof(val)))
1382                         return -EFAULT;
1383
1384                 po->auxdata = !!val;
1385                 return 0;
1386         }
1387         case PACKET_ORIGDEV:
1388         {
1389                 int val;
1390
1391                 if (optlen < sizeof(val))
1392                         return -EINVAL;
1393                 if (copy_from_user(&val, optval, sizeof(val)))
1394                         return -EFAULT;
1395
1396                 po->origdev = !!val;
1397                 return 0;
1398         }
1399         default:
1400                 return -ENOPROTOOPT;
1401         }
1402 }
1403
1404 static int packet_getsockopt(struct socket *sock, int level, int optname,
1405                              char __user *optval, int __user *optlen)
1406 {
1407         int len;
1408         int val;
1409         struct sock *sk = sock->sk;
1410         struct packet_sock *po = pkt_sk(sk);
1411         void *data;
1412         struct tpacket_stats st;
1413
1414         if (level != SOL_PACKET)
1415                 return -ENOPROTOOPT;
1416
1417         if (get_user(len, optlen))
1418                 return -EFAULT;
1419
1420         if (len < 0)
1421                 return -EINVAL;
1422
1423         switch(optname) {
1424         case PACKET_STATISTICS:
1425                 if (len > sizeof(struct tpacket_stats))
1426                         len = sizeof(struct tpacket_stats);
1427                 spin_lock_bh(&sk->sk_receive_queue.lock);
1428                 st = po->stats;
1429                 memset(&po->stats, 0, sizeof(st));
1430                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1431                 st.tp_packets += st.tp_drops;
1432
1433                 data = &st;
1434                 break;
1435         case PACKET_AUXDATA:
1436                 if (len > sizeof(int))
1437                         len = sizeof(int);
1438                 val = po->auxdata;
1439
1440                 data = &val;
1441                 break;
1442         case PACKET_ORIGDEV:
1443                 if (len > sizeof(int))
1444                         len = sizeof(int);
1445                 val = po->origdev;
1446
1447                 data = &val;
1448                 break;
1449         default:
1450                 return -ENOPROTOOPT;
1451         }
1452
1453         if (put_user(len, optlen))
1454                 return -EFAULT;
1455         if (copy_to_user(optval, data, len))
1456                 return -EFAULT;
1457         return 0;
1458 }
1459
1460
1461 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1462 {
1463         struct sock *sk;
1464         struct hlist_node *node;
1465         struct net_device *dev = data;
1466
1467         read_lock(&packet_sklist_lock);
1468         sk_for_each(sk, node, &packet_sklist) {
1469                 struct packet_sock *po = pkt_sk(sk);
1470
1471                 switch (msg) {
1472                 case NETDEV_UNREGISTER:
1473                         if (po->mclist)
1474                                 packet_dev_mclist(dev, po->mclist, -1);
1475                         /* fallthrough */
1476
1477                 case NETDEV_DOWN:
1478                         if (dev->ifindex == po->ifindex) {
1479                                 spin_lock(&po->bind_lock);
1480                                 if (po->running) {
1481                                         __dev_remove_pack(&po->prot_hook);
1482                                         __sock_put(sk);
1483                                         po->running = 0;
1484                                         sk->sk_err = ENETDOWN;
1485                                         if (!sock_flag(sk, SOCK_DEAD))
1486                                                 sk->sk_error_report(sk);
1487                                 }
1488                                 if (msg == NETDEV_UNREGISTER) {
1489                                         po->ifindex = -1;
1490                                         po->prot_hook.dev = NULL;
1491                                 }
1492                                 spin_unlock(&po->bind_lock);
1493                         }
1494                         break;
1495                 case NETDEV_UP:
1496                         spin_lock(&po->bind_lock);
1497                         if (dev->ifindex == po->ifindex && po->num &&
1498                             !po->running) {
1499                                 dev_add_pack(&po->prot_hook);
1500                                 sock_hold(sk);
1501                                 po->running = 1;
1502                         }
1503                         spin_unlock(&po->bind_lock);
1504                         break;
1505                 }
1506         }
1507         read_unlock(&packet_sklist_lock);
1508         return NOTIFY_DONE;
1509 }
1510
1511
1512 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1513                         unsigned long arg)
1514 {
1515         struct sock *sk = sock->sk;
1516
1517         switch(cmd) {
1518                 case SIOCOUTQ:
1519                 {
1520                         int amount = atomic_read(&sk->sk_wmem_alloc);
1521                         return put_user(amount, (int __user *)arg);
1522                 }
1523                 case SIOCINQ:
1524                 {
1525                         struct sk_buff *skb;
1526                         int amount = 0;
1527
1528                         spin_lock_bh(&sk->sk_receive_queue.lock);
1529                         skb = skb_peek(&sk->sk_receive_queue);
1530                         if (skb)
1531                                 amount = skb->len;
1532                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1533                         return put_user(amount, (int __user *)arg);
1534                 }
1535                 case SIOCGSTAMP:
1536                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1537                 case SIOCGSTAMPNS:
1538                         return sock_get_timestampns(sk, (struct timespec __user *)arg);
1539
1540 #ifdef CONFIG_INET
1541                 case SIOCADDRT:
1542                 case SIOCDELRT:
1543                 case SIOCDARP:
1544                 case SIOCGARP:
1545                 case SIOCSARP:
1546                 case SIOCGIFADDR:
1547                 case SIOCSIFADDR:
1548                 case SIOCGIFBRDADDR:
1549                 case SIOCSIFBRDADDR:
1550                 case SIOCGIFNETMASK:
1551                 case SIOCSIFNETMASK:
1552                 case SIOCGIFDSTADDR:
1553                 case SIOCSIFDSTADDR:
1554                 case SIOCSIFFLAGS:
1555                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1556 #endif
1557
1558                 default:
1559                         return -ENOIOCTLCMD;
1560         }
1561         return 0;
1562 }
1563
1564 #ifndef CONFIG_PACKET_MMAP
1565 #define packet_mmap sock_no_mmap
1566 #define packet_poll datagram_poll
1567 #else
1568
1569 static unsigned int packet_poll(struct file * file, struct socket *sock,
1570                                 poll_table *wait)
1571 {
1572         struct sock *sk = sock->sk;
1573         struct packet_sock *po = pkt_sk(sk);
1574         unsigned int mask = datagram_poll(file, sock, wait);
1575
1576         spin_lock_bh(&sk->sk_receive_queue.lock);
1577         if (po->pg_vec) {
1578                 unsigned last = po->head ? po->head-1 : po->frame_max;
1579                 struct tpacket_hdr *h;
1580
1581                 h = packet_lookup_frame(po, last);
1582
1583                 if (h->tp_status)
1584                         mask |= POLLIN | POLLRDNORM;
1585         }
1586         spin_unlock_bh(&sk->sk_receive_queue.lock);
1587         return mask;
1588 }
1589
1590
1591 /* Dirty? Well, I still did not learn better way to account
1592  * for user mmaps.
1593  */
1594
1595 static void packet_mm_open(struct vm_area_struct *vma)
1596 {
1597         struct file *file = vma->vm_file;
1598         struct socket * sock = file->private_data;
1599         struct sock *sk = sock->sk;
1600
1601         if (sk)
1602                 atomic_inc(&pkt_sk(sk)->mapped);
1603 }
1604
1605 static void packet_mm_close(struct vm_area_struct *vma)
1606 {
1607         struct file *file = vma->vm_file;
1608         struct socket * sock = file->private_data;
1609         struct sock *sk = sock->sk;
1610
1611         if (sk)
1612                 atomic_dec(&pkt_sk(sk)->mapped);
1613 }
1614
1615 static struct vm_operations_struct packet_mmap_ops = {
1616         .open = packet_mm_open,
1617         .close =packet_mm_close,
1618 };
1619
1620 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1621 {
1622         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1623 }
1624
1625 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1626 {
1627         int i;
1628
1629         for (i = 0; i < len; i++) {
1630                 if (likely(pg_vec[i]))
1631                         free_pages((unsigned long) pg_vec[i], order);
1632         }
1633         kfree(pg_vec);
1634 }
1635
1636 static inline char *alloc_one_pg_vec_page(unsigned long order)
1637 {
1638         return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1639                                          order);
1640 }
1641
1642 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1643 {
1644         unsigned int block_nr = req->tp_block_nr;
1645         char **pg_vec;
1646         int i;
1647
1648         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1649         if (unlikely(!pg_vec))
1650                 goto out;
1651
1652         for (i = 0; i < block_nr; i++) {
1653                 pg_vec[i] = alloc_one_pg_vec_page(order);
1654                 if (unlikely(!pg_vec[i]))
1655                         goto out_free_pgvec;
1656         }
1657
1658 out:
1659         return pg_vec;
1660
1661 out_free_pgvec:
1662         free_pg_vec(pg_vec, order, block_nr);
1663         pg_vec = NULL;
1664         goto out;
1665 }
1666
1667 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1668 {
1669         char **pg_vec = NULL;
1670         struct packet_sock *po = pkt_sk(sk);
1671         int was_running, order = 0;
1672         __be16 num;
1673         int err = 0;
1674
1675         if (req->tp_block_nr) {
1676                 int i, l;
1677
1678                 /* Sanity tests and some calculations */
1679
1680                 if (unlikely(po->pg_vec))
1681                         return -EBUSY;
1682
1683                 if (unlikely((int)req->tp_block_size <= 0))
1684                         return -EINVAL;
1685                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1686                         return -EINVAL;
1687                 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1688                         return -EINVAL;
1689                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1690                         return -EINVAL;
1691
1692                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1693                 if (unlikely(po->frames_per_block <= 0))
1694                         return -EINVAL;
1695                 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1696                              req->tp_frame_nr))
1697                         return -EINVAL;
1698
1699                 err = -ENOMEM;
1700                 order = get_order(req->tp_block_size);
1701                 pg_vec = alloc_pg_vec(req, order);
1702                 if (unlikely(!pg_vec))
1703                         goto out;
1704
1705                 l = 0;
1706                 for (i = 0; i < req->tp_block_nr; i++) {
1707                         char *ptr = pg_vec[i];
1708                         struct tpacket_hdr *header;
1709                         int k;
1710
1711                         for (k = 0; k < po->frames_per_block; k++) {
1712                                 header = (struct tpacket_hdr *) ptr;
1713                                 header->tp_status = TP_STATUS_KERNEL;
1714                                 ptr += req->tp_frame_size;
1715                         }
1716                 }
1717                 /* Done */
1718         } else {
1719                 if (unlikely(req->tp_frame_nr))
1720                         return -EINVAL;
1721         }
1722
1723         lock_sock(sk);
1724
1725         /* Detach socket from network */
1726         spin_lock(&po->bind_lock);
1727         was_running = po->running;
1728         num = po->num;
1729         if (was_running) {
1730                 __dev_remove_pack(&po->prot_hook);
1731                 po->num = 0;
1732                 po->running = 0;
1733                 __sock_put(sk);
1734         }
1735         spin_unlock(&po->bind_lock);
1736
1737         synchronize_net();
1738
1739         err = -EBUSY;
1740         if (closing || atomic_read(&po->mapped) == 0) {
1741                 err = 0;
1742 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1743
1744                 spin_lock_bh(&sk->sk_receive_queue.lock);
1745                 pg_vec = XC(po->pg_vec, pg_vec);
1746                 po->frame_max = (req->tp_frame_nr - 1);
1747                 po->head = 0;
1748                 po->frame_size = req->tp_frame_size;
1749                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1750
1751                 order = XC(po->pg_vec_order, order);
1752                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1753
1754                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1755                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1756                 skb_queue_purge(&sk->sk_receive_queue);
1757 #undef XC
1758                 if (atomic_read(&po->mapped))
1759                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1760         }
1761
1762         spin_lock(&po->bind_lock);
1763         if (was_running && !po->running) {
1764                 sock_hold(sk);
1765                 po->running = 1;
1766                 po->num = num;
1767                 dev_add_pack(&po->prot_hook);
1768         }
1769         spin_unlock(&po->bind_lock);
1770
1771         release_sock(sk);
1772
1773         if (pg_vec)
1774                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1775 out:
1776         return err;
1777 }
1778
1779 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1780 {
1781         struct sock *sk = sock->sk;
1782         struct packet_sock *po = pkt_sk(sk);
1783         unsigned long size;
1784         unsigned long start;
1785         int err = -EINVAL;
1786         int i;
1787
1788         if (vma->vm_pgoff)
1789                 return -EINVAL;
1790
1791         size = vma->vm_end - vma->vm_start;
1792
1793         lock_sock(sk);
1794         if (po->pg_vec == NULL)
1795                 goto out;
1796         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1797                 goto out;
1798
1799         start = vma->vm_start;
1800         for (i = 0; i < po->pg_vec_len; i++) {
1801                 struct page *page = virt_to_page(po->pg_vec[i]);
1802                 int pg_num;
1803
1804                 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1805                         err = vm_insert_page(vma, start, page);
1806                         if (unlikely(err))
1807                                 goto out;
1808                         start += PAGE_SIZE;
1809                 }
1810         }
1811         atomic_inc(&po->mapped);
1812         vma->vm_ops = &packet_mmap_ops;
1813         err = 0;
1814
1815 out:
1816         release_sock(sk);
1817         return err;
1818 }
1819 #endif
1820
1821
1822 static const struct proto_ops packet_ops_spkt = {
1823         .family =       PF_PACKET,
1824         .owner =        THIS_MODULE,
1825         .release =      packet_release,
1826         .bind =         packet_bind_spkt,
1827         .connect =      sock_no_connect,
1828         .socketpair =   sock_no_socketpair,
1829         .accept =       sock_no_accept,
1830         .getname =      packet_getname_spkt,
1831         .poll =         datagram_poll,
1832         .ioctl =        packet_ioctl,
1833         .listen =       sock_no_listen,
1834         .shutdown =     sock_no_shutdown,
1835         .setsockopt =   sock_no_setsockopt,
1836         .getsockopt =   sock_no_getsockopt,
1837         .sendmsg =      packet_sendmsg_spkt,
1838         .recvmsg =      packet_recvmsg,
1839         .mmap =         sock_no_mmap,
1840         .sendpage =     sock_no_sendpage,
1841 };
1842
1843 static const struct proto_ops packet_ops = {
1844         .family =       PF_PACKET,
1845         .owner =        THIS_MODULE,
1846         .release =      packet_release,
1847         .bind =         packet_bind,
1848         .connect =      sock_no_connect,
1849         .socketpair =   sock_no_socketpair,
1850         .accept =       sock_no_accept,
1851         .getname =      packet_getname,
1852         .poll =         packet_poll,
1853         .ioctl =        packet_ioctl,
1854         .listen =       sock_no_listen,
1855         .shutdown =     sock_no_shutdown,
1856         .setsockopt =   packet_setsockopt,
1857         .getsockopt =   packet_getsockopt,
1858         .sendmsg =      packet_sendmsg,
1859         .recvmsg =      packet_recvmsg,
1860         .mmap =         packet_mmap,
1861         .sendpage =     sock_no_sendpage,
1862 };
1863
1864 static struct net_proto_family packet_family_ops = {
1865         .family =       PF_PACKET,
1866         .create =       packet_create,
1867         .owner  =       THIS_MODULE,
1868 };
1869
1870 static struct notifier_block packet_netdev_notifier = {
1871         .notifier_call =packet_notifier,
1872 };
1873
1874 #ifdef CONFIG_PROC_FS
1875 static inline struct sock *packet_seq_idx(loff_t off)
1876 {
1877         struct sock *s;
1878         struct hlist_node *node;
1879
1880         sk_for_each(s, node, &packet_sklist) {
1881                 if (!off--)
1882                         return s;
1883         }
1884         return NULL;
1885 }
1886
1887 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1888 {
1889         read_lock(&packet_sklist_lock);
1890         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1891 }
1892
1893 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1894 {
1895         ++*pos;
1896         return  (v == SEQ_START_TOKEN)
1897                 ? sk_head(&packet_sklist)
1898                 : sk_next((struct sock*)v) ;
1899 }
1900
1901 static void packet_seq_stop(struct seq_file *seq, void *v)
1902 {
1903         read_unlock(&packet_sklist_lock);
1904 }
1905
1906 static int packet_seq_show(struct seq_file *seq, void *v)
1907 {
1908         if (v == SEQ_START_TOKEN)
1909                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1910         else {
1911                 struct sock *s = v;
1912                 const struct packet_sock *po = pkt_sk(s);
1913
1914                 seq_printf(seq,
1915                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1916                            s,
1917                            atomic_read(&s->sk_refcnt),
1918                            s->sk_type,
1919                            ntohs(po->num),
1920                            po->ifindex,
1921                            po->running,
1922                            atomic_read(&s->sk_rmem_alloc),
1923                            sock_i_uid(s),
1924                            sock_i_ino(s) );
1925         }
1926
1927         return 0;
1928 }
1929
1930 static const struct seq_operations packet_seq_ops = {
1931         .start  = packet_seq_start,
1932         .next   = packet_seq_next,
1933         .stop   = packet_seq_stop,
1934         .show   = packet_seq_show,
1935 };
1936
1937 static int packet_seq_open(struct inode *inode, struct file *file)
1938 {
1939         return seq_open(file, &packet_seq_ops);
1940 }
1941
1942 static const struct file_operations packet_seq_fops = {
1943         .owner          = THIS_MODULE,
1944         .open           = packet_seq_open,
1945         .read           = seq_read,
1946         .llseek         = seq_lseek,
1947         .release        = seq_release,
1948 };
1949
1950 #endif
1951
1952 static void __exit packet_exit(void)
1953 {
1954         proc_net_remove("packet");
1955         unregister_netdevice_notifier(&packet_netdev_notifier);
1956         sock_unregister(PF_PACKET);
1957         proto_unregister(&packet_proto);
1958 }
1959
1960 static int __init packet_init(void)
1961 {
1962         int rc = proto_register(&packet_proto, 0);
1963
1964         if (rc != 0)
1965                 goto out;
1966
1967         sock_register(&packet_family_ops);
1968         register_netdevice_notifier(&packet_netdev_notifier);
1969         proc_net_fops_create("packet", 0, &packet_seq_fops);
1970 out:
1971         return rc;
1972 }
1973
1974 module_init(packet_init);
1975 module_exit(packet_exit);
1976 MODULE_LICENSE("GPL");
1977 MODULE_ALIAS_NETPROTO(PF_PACKET);