X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fpacket%2Faf_packet.c;h=f546e81acc450278589f0b5025cbabf73cd5a432;hb=9d2a3f31d6d7832cd441eeda08bc2266cdd5d972;hp=bf2699074774dc436d53d5276c2375721df58cf1;hpb=cd354f1ae75e6466a7e31b727faede57a1f89ca5;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bf26990..f546e81 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -5,8 +5,6 @@ * * PACKET - implements raw packet sockets. * - * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, * Alan Cox, @@ -61,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -78,27 +77,12 @@ #include #include #include +#include #ifdef CONFIG_INET #include #endif -#define CONFIG_SOCK_PACKET 1 - -/* - Proposed replacement for SIOC{ADD,DEL}MULTI and - IFF_PROMISC, IFF_ALLMULTI flags. - - It is more expensive, but I believe, - it is really correct solution: reentereble, safe and fault tolerant. - - IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping - reference count and global flag, so that real status is - (gflag|(count != 0)), so that we can use obsolete faulty interface - not harming clever users. - */ -#define CONFIG_PACKET_MULTICAST 1 - /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -114,22 +98,22 @@ On receive: ----------- Incoming, dev->hard_header!=NULL - mac.raw -> ll header - data -> data + mac_header -> ll header + data -> data Outgoing, dev->hard_header!=NULL - mac.raw -> ll header - data -> ll header + mac_header -> ll header + data -> ll header Incoming, dev->hard_header==NULL - mac.raw -> UNKNOWN position. It is very likely, that it points to ll header. - PPP makes it, that is wrong, because introduce assymetry - between rx and tx paths. - data -> data + mac_header -> UNKNOWN position. It is very likely, that it points to ll + header. PPP makes it, that is wrong, because introduce + assymetry between rx and tx paths. + data -> data Outgoing, dev->hard_header==NULL - mac.raw -> data. ll header is still not built! - data -> data + mac_header -> data. ll header is still not built! + data -> data Resume If dev->hard_header==NULL we are unlikely to restore sensible ll header. @@ -139,27 +123,19 @@ On transmit: ------------ dev->hard_header != NULL - mac.raw -> ll header - data -> ll header + mac_header -> ll header + data -> ll header dev->hard_header == NULL (ll header is added by device, we cannot control it) - mac.raw -> data - data -> data + mac_header -> data + data -> data We should set nh.raw on output to correct posistion, packet classifier depends on it. */ -/* List of all packet sockets. */ -static HLIST_HEAD(packet_sklist); -static DEFINE_RWLOCK(packet_sklist_lock); - -static atomic_t packet_socks_nr; - - /* Private packet socket structures. */ -#ifdef CONFIG_PACKET_MULTICAST struct packet_mclist { struct packet_mclist *next; @@ -179,7 +155,7 @@ struct packet_mreq_max unsigned short mr_alen; unsigned char mr_address[MAX_ADDR_LEN]; }; -#endif + #ifdef CONFIG_PACKET_MMAP static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing); #endif @@ -200,18 +176,21 @@ struct packet_sock { #endif struct packet_type prot_hook; spinlock_t bind_lock; + struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ - auxdata:1; + auxdata:1, + origdev:1; int ifindex; /* bound device */ __be16 num; -#ifdef CONFIG_PACKET_MULTICAST struct packet_mclist *mclist; -#endif #ifdef CONFIG_PACKET_MMAP atomic_t mapped; unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; #endif }; @@ -227,17 +206,52 @@ struct packet_skb_cb { #ifdef CONFIG_PACKET_MMAP -static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position) +static void *packet_lookup_frame(struct packet_sock *po, unsigned int position, + int status) { unsigned int pg_vec_pos, frame_offset; - char *frame; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; pg_vec_pos = position / po->frames_per_block; frame_offset = position % po->frames_per_block; - frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); + h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); + switch (po->tp_version) { + case TPACKET_V1: + if (status != (h.h1->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL)) + return NULL; + break; + case TPACKET_V2: + if (status != (h.h2->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL)) + return NULL; + break; + } + return h.raw; +} - return frame; +static void __packet_set_status(struct packet_sock *po, void *frame, int status) +{ + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; + + h.raw = frame; + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_status = status; + break; + case TPACKET_V2: + h.h2->tp_status = status; + break; + } } #endif @@ -248,24 +262,20 @@ static inline struct packet_sock *pkt_sk(struct sock *sk) static void packet_sock_destruct(struct sock *sk) { - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive packet socket: %p\n", sk); return; } - atomic_dec(&packet_socks_nr); -#ifdef PACKET_REFCNT_DEBUG - printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr)); -#endif + sk_refcnt_debug_dec(sk); } static const struct proto_ops packet_ops; -#ifdef CONFIG_SOCK_PACKET static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) @@ -287,13 +297,16 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct * Incoming packets have ll header pulled, * push it back. * - * For outgoing ones skb->data == skb->mac.raw + * For outgoing ones skb->data == skb_mac_header(skb) * so that this procedure is noop. */ if (skb->pkt_type == PACKET_LOOPBACK) goto out; + if (dev_net(dev) != sock_net(sk)) + goto out; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto oom; @@ -306,7 +319,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct spkt = &PACKET_SKB_CB(skb)->sa.pkt; - skb_push(skb, skb->data-skb->mac.raw); + skb_push(skb, skb->data - skb_mac_header(skb)); /* * The SOCK_PACKET socket receives _all_ frames. @@ -365,7 +378,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -404,14 +417,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, * notable one here. This should really be fixed at the driver level. */ skb_reserve(skb, LL_RESERVED_SPACE(dev)); - skb->nh.raw = skb->data; + skb_reset_network_header(skb); /* Try to align data part correctly */ - if (dev->hard_header) { + if (dev->header_ops) { skb->data -= dev->hard_header_len; skb->tail -= dev->hard_header_len; if (len < dev->hard_header_len) - skb->nh.raw = skb->data; + skb_reset_network_header(skb); } /* Returns -EFAULT on error */ @@ -437,7 +450,6 @@ out_unlock: dev_put(dev); return err; } -#endif static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, unsigned int res) @@ -480,9 +492,12 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sk = pt->af_packet_priv; po = pkt_sk(sk); + if (dev_net(dev) != sock_net(sk)) + goto drop; + skb->dev = dev; - if (dev->hard_header) { + if (dev->header_ops) { /* The device has an explicit notion of ll header, exported to higher levels. @@ -491,10 +506,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet never delivered to user. */ if (sk->sk_type != SOCK_DGRAM) - skb_push(skb, skb->data - skb->mac.raw); + skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ - skb_pull(skb, skb->nh.raw - skb->data); + skb_pull(skb, skb_network_offset(skb)); } } @@ -531,11 +546,12 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - sll->sll_ifindex = dev->ifindex; - sll->sll_halen = 0; + if (unlikely(po->origdev)) + sll->sll_ifindex = orig_dev->ifindex; + else + sll->sll_ifindex = dev->ifindex; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); PACKET_SKB_CB(skb)->origlen = skb->len; @@ -568,7 +584,7 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + consume_skb(skb); return 0; } @@ -578,13 +594,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; - struct tpacket_hdr *h; + union { + struct tpacket_hdr *h1; + struct tpacket2_hdr *h2; + void *raw; + } h; u8 * skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; - unsigned short macoff, netoff; + unsigned short macoff, netoff, hdrlen; struct sk_buff *copy_skb = NULL; + struct timeval tv; + struct timespec ts; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -592,12 +614,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev->hard_header) { + if (dev_net(dev) != sock_net(sk)) + goto drop; + + if (dev->header_ops) { if (sk->sk_type != SOCK_DGRAM) - skb_push(skb, skb->data - skb->mac.raw); + skb_push(skb, skb->data - skb_mac_header(skb)); else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ - skb_pull(skb, skb->nh.raw - skb->data); + skb_pull(skb, skb_network_offset(skb)); } } @@ -613,10 +638,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = res; if (sk->sk_type == SOCK_DGRAM) { - macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; + macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + + po->tp_reserve; } else { - unsigned maclen = skb->nh.raw - skb->data; - netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); + unsigned maclen = skb_network_offset(skb); + netoff = TPACKET_ALIGN(po->tp_hdrlen + + (maclen < 16 ? 16 : maclen)) + + po->tp_reserve; macoff = netoff - maclen; } @@ -639,9 +667,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe } spin_lock(&sk->sk_receive_queue.lock); - h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head); - - if (h->tp_status) + h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL); + if (!h.raw) goto ring_is_full; po->head = po->head != po->frame_max ? po->head+1 : 0; po->stats.tp_packets++; @@ -653,37 +680,59 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe status &= ~TP_STATUS_LOSING; spin_unlock(&sk->sk_receive_queue.lock); - skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - h->tp_len = skb->len; - h->tp_snaplen = snaplen; - h->tp_mac = macoff; - h->tp_net = netoff; - if (skb->tstamp.off_sec == 0) { - __net_timestamp(skb); - sock_enable_timestamp(sk); + switch (po->tp_version) { + case TPACKET_V1: + h.h1->tp_len = skb->len; + h.h1->tp_snaplen = snaplen; + h.h1->tp_mac = macoff; + h.h1->tp_net = netoff; + if (skb->tstamp.tv64) + tv = ktime_to_timeval(skb->tstamp); + else + do_gettimeofday(&tv); + h.h1->tp_sec = tv.tv_sec; + h.h1->tp_usec = tv.tv_usec; + hdrlen = sizeof(*h.h1); + break; + case TPACKET_V2: + h.h2->tp_len = skb->len; + h.h2->tp_snaplen = snaplen; + h.h2->tp_mac = macoff; + h.h2->tp_net = netoff; + if (skb->tstamp.tv64) + ts = ktime_to_timespec(skb->tstamp); + else + getnstimeofday(&ts); + h.h2->tp_sec = ts.tv_sec; + h.h2->tp_nsec = ts.tv_nsec; + h.h2->tp_vlan_tci = skb->vlan_tci; + hdrlen = sizeof(*h.h2); + break; + default: + BUG(); } - h->tp_sec = skb->tstamp.off_sec; - h->tp_usec = skb->tstamp.off_usec; - sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); - sll->sll_halen = 0; - if (dev->hard_header_parse) - sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr); + sll = h.raw + TPACKET_ALIGN(hdrlen); + sll->sll_halen = dev_parse_header(skb, sll->sll_addr); sll->sll_family = AF_PACKET; sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - sll->sll_ifindex = dev->ifindex; + if (unlikely(po->origdev)) + sll->sll_ifindex = orig_dev->ifindex; + else + sll->sll_ifindex = dev->ifindex; - h->tp_status = status; + __packet_set_status(po, h.raw, status); smp_mb(); { struct page *p_start, *p_end; - u8 *h_end = (u8 *)h + macoff + snaplen - 1; + u8 *h_end = h.raw + macoff + snaplen - 1; - p_start = virt_to_page(h); + p_start = virt_to_page(h.raw); p_end = virt_to_page(h_end); while (p_start <= p_end) { flush_dcache_page(p_start); @@ -707,8 +756,7 @@ ring_is_full: spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, 0); - if (copy_skb) - kfree_skb(copy_skb); + kfree_skb(copy_skb); goto drop_n_restore; } @@ -748,7 +796,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -763,24 +811,18 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (len > dev->mtu+reserve) goto out_unlock; - skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; skb_reserve(skb, LL_RESERVED_SPACE(dev)); - skb->nh.raw = skb->data; + skb_reset_network_header(skb); - if (dev->hard_header) { - int res; - err = -EINVAL; - res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); - if (sock->type != SOCK_DGRAM) { - skb->tail = skb->data; - skb->len = 0; - } else if (res < 0) - goto out_free; - } + err = -EINVAL; + if (sock->type == SOCK_DGRAM && + dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) + goto out_free; /* Returns -EFAULT on error */ err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); @@ -821,15 +863,18 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct net *net; if (!sk) return 0; + net = sock_net(sk); po = pkt_sk(sk); - write_lock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet.sklist_lock); sk_del_node_init(sk); - write_unlock_bh(&packet_sklist_lock); + sock_prot_inuse_add(net, sk->sk_prot, -1); + write_unlock_bh(&net->packet.sklist_lock); /* * Unhook packet receive handler. @@ -845,9 +890,7 @@ static int packet_release(struct socket *sock) __sock_put(sk); } -#ifdef CONFIG_PACKET_MULTICAST packet_flush_mclist(sk); -#endif #ifdef CONFIG_PACKET_MMAP if (po->pg_vec) { @@ -867,6 +910,7 @@ static int packet_release(struct socket *sock) /* Purge queues */ skb_queue_purge(&sk->sk_receive_queue); + sk_refcnt_debug_release(sk); sock_put(sk); return 0; @@ -904,20 +948,14 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc if (protocol == 0) goto out_unlock; - if (dev) { - if (dev->flags&IFF_UP) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; - } else { - sk->sk_err = ENETDOWN; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_error_report(sk); - } - } else { + if (!dev || (dev->flags & IFF_UP)) { dev_add_pack(&po->prot_hook); sock_hold(sk); po->running = 1; + } else { + sk->sk_err = ENETDOWN; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_error_report(sk); } out_unlock: @@ -930,8 +968,6 @@ out_unlock: * Bind a packet socket to a device */ -#ifdef CONFIG_SOCK_PACKET - static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk=sock->sk; @@ -947,14 +983,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int add return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(sock_net(sk), name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); } return err; } -#endif static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -975,7 +1010,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); if (dev == NULL) goto out; } @@ -997,7 +1032,7 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; @@ -1006,25 +1041,21 @@ static int packet_create(struct socket *sock, int protocol) if (!capable(CAP_NET_RAW)) return -EPERM; - if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW -#ifdef CONFIG_SOCK_PACKET - && sock->type != SOCK_PACKET -#endif - ) + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && + sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); if (sk == NULL) goto out; sock->ops = &packet_ops; -#ifdef CONFIG_SOCK_PACKET if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; -#endif + sock_init_data(sock, sk); po = pkt_sk(sk); @@ -1032,18 +1063,19 @@ static int packet_create(struct socket *sock, int protocol) po->num = proto; sk->sk_destruct = packet_sock_destruct; - atomic_inc(&packet_socks_nr); + sk_refcnt_debug_inc(sk); /* * Attach a protocol block */ spin_lock_init(&po->bind_lock); + mutex_init(&po->pg_vec_lock); po->prot_hook.func = packet_rcv; -#ifdef CONFIG_SOCK_PACKET + if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; -#endif + po->prot_hook.af_packet_priv = sk; if (proto) { @@ -1053,9 +1085,10 @@ static int packet_create(struct socket *sock, int protocol) po->running = 1; } - write_lock_bh(&packet_sklist_lock); - sk_add_node(sk, &packet_sklist); - write_unlock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet.sklist_lock); + sk_add_node(sk, &net->packet.sklist); + sock_prot_inuse_add(net, &packet_proto, 1); + write_unlock_bh(&net->packet.sklist_lock); return(0); out: return err; @@ -1146,7 +1179,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_len = PACKET_SKB_CB(skb)->origlen; aux.tp_snaplen = skb->len; aux.tp_mac = 0; - aux.tp_net = skb->nh.raw - skb->data; + aux.tp_net = skb_network_offset(skb); + aux.tp_vlan_tci = skb->vlan_tci; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -1163,7 +1197,6 @@ out: return err; } -#ifdef CONFIG_SOCK_PACKET static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { @@ -1174,7 +1207,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1184,7 +1217,6 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return 0; } -#endif static int packet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) @@ -1200,7 +1232,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(sock_net(sk), po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1215,8 +1247,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, return 0; } -#ifdef CONFIG_PACKET_MULTICAST -static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what) +static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, + int what) { switch (i->type) { case PACKET_MR_MULTICAST: @@ -1226,13 +1258,14 @@ static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int w dev_mc_delete(dev, i->addr, i->alen, 0); break; case PACKET_MR_PROMISC: - dev_set_promiscuity(dev, what); + return dev_set_promiscuity(dev, what); break; case PACKET_MR_ALLMULTI: - dev_set_allmulti(dev, what); + return dev_set_allmulti(dev, what); break; default:; } + return 0; } static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) @@ -1253,7 +1286,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); if (!dev) goto done; @@ -1286,7 +1319,11 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) i->count = 1; i->next = po->mclist; po->mclist = i; - packet_dev_mc(dev, i, +1); + err = packet_dev_mc(dev, i, 1); + if (err) { + po->mclist = i->next; + kfree(i); + } done: rtnl_unlock(); @@ -1307,7 +1344,7 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(sock_net(sk), ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1335,7 +1372,7 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1343,7 +1380,6 @@ static void packet_flush_mclist(struct sock *sk) } rtnl_unlock(); } -#endif static int packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) @@ -1356,7 +1392,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -ENOPROTOOPT; switch(optname) { -#ifdef CONFIG_PACKET_MULTICAST case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { @@ -1377,7 +1412,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv ret = packet_mc_drop(sk, &mreq); return ret; } -#endif + #ifdef CONFIG_PACKET_MMAP case PACKET_RX_RING: { @@ -1401,6 +1436,38 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv pkt_sk(sk)->copy_thresh = val; return 0; } + case PACKET_VERSION: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + switch (val) { + case TPACKET_V1: + case TPACKET_V2: + po->tp_version = val; + return 0; + default: + return -EINVAL; + } + } + case PACKET_RESERVE: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_reserve = val; + return 0; + } #endif case PACKET_AUXDATA: { @@ -1414,6 +1481,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->auxdata = !!val; return 0; } + case PACKET_ORIGDEV: + { + int val; + + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->origdev = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -1457,6 +1536,44 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, data = &val; break; + case PACKET_ORIGDEV: + if (len > sizeof(int)) + len = sizeof(int); + val = po->origdev; + + data = &val; + break; +#ifdef CONFIG_PACKET_MMAP + case PACKET_VERSION: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_version; + data = &val; + break; + case PACKET_HDRLEN: + if (len > sizeof(int)) + len = sizeof(int); + if (copy_from_user(&val, optval, len)) + return -EFAULT; + switch (val) { + case TPACKET_V1: + val = sizeof(struct tpacket_hdr); + break; + case TPACKET_V2: + val = sizeof(struct tpacket2_hdr); + break; + default: + return -EINVAL; + } + data = &val; + break; + case PACKET_RESERVE: + if (len > sizeof(unsigned int)) + len = sizeof(unsigned int); + val = po->tp_reserve; + data = &val; + break; +#endif default: return -ENOPROTOOPT; } @@ -1473,19 +1590,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void { struct sock *sk; struct hlist_node *node; - struct net_device *dev = (struct net_device*)data; + struct net_device *dev = data; + struct net *net = dev_net(dev); - read_lock(&packet_sklist_lock); - sk_for_each(sk, node, &packet_sklist) { + read_lock(&net->packet.sklist_lock); + sk_for_each(sk, node, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { case NETDEV_UNREGISTER: -#ifdef CONFIG_PACKET_MULTICAST if (po->mclist) packet_dev_mclist(dev, po->mclist, -1); - // fallthrough -#endif + /* fallthrough */ + case NETDEV_DOWN: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); @@ -1516,7 +1633,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void break; } } - read_unlock(&packet_sklist_lock); + read_unlock(&net->packet.sklist_lock); return NOTIFY_DONE; } @@ -1546,6 +1663,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, } case SIOCGSTAMP: return sock_get_timestamp(sk, (struct timeval __user *)arg); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, (struct timespec __user *)arg); #ifdef CONFIG_INET case SIOCADDRT: @@ -1562,6 +1681,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: case SIOCSIFFLAGS: + if (!net_eq(sock_net(sk), &init_net)) + return -ENOIOCTLCMD; return inet_dgram_ops.ioctl(sock, cmd, arg); #endif @@ -1586,11 +1707,8 @@ static unsigned int packet_poll(struct file * file, struct socket *sock, spin_lock_bh(&sk->sk_receive_queue.lock); if (po->pg_vec) { unsigned last = po->head ? po->head-1 : po->frame_max; - struct tpacket_hdr *h; - - h = (struct tpacket_hdr *)packet_lookup_frame(po, last); - if (h->tp_status) + if (packet_lookup_frame(po, last, TP_STATUS_USER)) mask |= POLLIN | POLLRDNORM; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -1627,11 +1745,6 @@ static struct vm_operations_struct packet_mmap_ops = { .close =packet_mm_close, }; -static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order) -{ - return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); -} - static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) { int i; @@ -1645,8 +1758,9 @@ static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) static inline char *alloc_one_pg_vec_page(unsigned long order) { - return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, - order); + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + + return (char *) __get_free_pages(gfp_flags, order); } static char **alloc_pg_vec(struct tpacket_req *req, int order) @@ -1683,18 +1797,28 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing int err = 0; if (req->tp_block_nr) { - int i, l; + int i; /* Sanity tests and some calculations */ if (unlikely(po->pg_vec)) return -EBUSY; + switch (po->tp_version) { + case TPACKET_V1: + po->tp_hdrlen = TPACKET_HDRLEN; + break; + case TPACKET_V2: + po->tp_hdrlen = TPACKET2_HDRLEN; + break; + } + if (unlikely((int)req->tp_block_size <= 0)) return -EINVAL; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) return -EINVAL; - if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) return -EINVAL; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) return -EINVAL; @@ -1712,15 +1836,12 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (unlikely(!pg_vec)) goto out; - l = 0; for (i = 0; i < req->tp_block_nr; i++) { - char *ptr = pg_vec[i]; - struct tpacket_hdr *header; + void *ptr = pg_vec[i]; int k; for (k = 0; k < po->frames_per_block; k++) { - header = (struct tpacket_hdr *) ptr; - header->tp_status = TP_STATUS_KERNEL; + __packet_set_status(po, ptr, TP_STATUS_KERNEL); ptr += req->tp_frame_size; } } @@ -1747,6 +1868,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing synchronize_net(); err = -EBUSY; + mutex_lock(&po->pg_vec_lock); if (closing || atomic_read(&po->mapped) == 0) { err = 0; #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) @@ -1768,6 +1890,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (atomic_read(&po->mapped)) printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); } + mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running && !po->running) { @@ -1800,7 +1923,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st size = vma->vm_end - vma->vm_start; - lock_sock(sk); + mutex_lock(&po->pg_vec_lock); if (po->pg_vec == NULL) goto out; if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) @@ -1823,13 +1946,12 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st err = 0; out: - release_sock(sk); + mutex_unlock(&po->pg_vec_lock); return err; } #endif -#ifdef CONFIG_SOCK_PACKET static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, @@ -1850,7 +1972,6 @@ static const struct proto_ops packet_ops_spkt = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; -#endif static const struct proto_ops packet_ops = { .family = PF_PACKET, @@ -1884,12 +2005,12 @@ static struct notifier_block packet_netdev_notifier = { }; #ifdef CONFIG_PROC_FS -static inline struct sock *packet_seq_idx(loff_t off) +static inline struct sock *packet_seq_idx(struct net *net, loff_t off) { struct sock *s; struct hlist_node *node; - sk_for_each(s, node, &packet_sklist) { + sk_for_each(s, node, &net->packet.sklist) { if (!off--) return s; } @@ -1897,22 +2018,27 @@ static inline struct sock *packet_seq_idx(loff_t off) } static void *packet_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(seq_file_net(seq)->packet.sklist_lock) { - read_lock(&packet_sklist_lock); - return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; + struct net *net = seq_file_net(seq); + read_lock(&net->packet.sklist_lock); + return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); ++*pos; return (v == SEQ_START_TOKEN) - ? sk_head(&packet_sklist) + ? sk_head(&net->packet.sklist) : sk_next((struct sock*)v) ; } static void packet_seq_stop(struct seq_file *seq, void *v) + __releases(seq_file_net(seq)->packet.sklist_lock) { - read_unlock(&packet_sklist_lock); + struct net *net = seq_file_net(seq); + read_unlock(&net->packet.sklist_lock); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -1939,7 +2065,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations packet_seq_ops = { +static const struct seq_operations packet_seq_ops = { .start = packet_seq_start, .next = packet_seq_next, .stop = packet_seq_stop, @@ -1948,7 +2074,8 @@ static struct seq_operations packet_seq_ops = { static int packet_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &packet_seq_ops); + return seq_open_net(inode, file, &packet_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations packet_seq_fops = { @@ -1956,15 +2083,37 @@ static const struct file_operations packet_seq_fops = { .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif +static int packet_net_init(struct net *net) +{ + rwlock_init(&net->packet.sklist_lock); + INIT_HLIST_HEAD(&net->packet.sklist); + + if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + return -ENOMEM; + + return 0; +} + +static void packet_net_exit(struct net *net) +{ + proc_net_remove(net, "packet"); +} + +static struct pernet_operations packet_net_ops = { + .init = packet_net_init, + .exit = packet_net_exit, +}; + + static void __exit packet_exit(void) { - proc_net_remove("packet"); unregister_netdevice_notifier(&packet_netdev_notifier); + unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); } @@ -1977,8 +2126,8 @@ static int __init packet_init(void) goto out; sock_register(&packet_family_ops); + register_pernet_subsys(&packet_net_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); out: return rc; }