* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
*
- * Fixes:
+ * Fixes:
* Alan Cox : verify_area() now used correctly
* Alan Cox : new skbuff lists, look ma no backlogs!
* Alan Cox : tidied skbuff lists.
* Alexey Kuznetsov : Untied from IPv4 stack.
* Cyrus Durgin : Fixed kerneld for kmod.
* Michal Ostrowski : Module initialization cleanup.
- * Ulises Alonso : Frame number limit removal and
+ * Ulises Alonso : Frame number limit removal and
* packet_set_ring memory leak.
* Eric Biederman : Allow for > 8 byte hardware addresses.
* The convention is that longer addresses
* will simply extend the hardware address
- * byte arrays at the end of sockaddr_ll
+ * byte arrays at the end of sockaddr_ll
* and packet_mreq.
*
* This program is free software; you can redistribute it and/or
* 2 of the License, or (at your option) any later version.
*
*/
-
-#include <linux/config.h>
+
#include <linux/types.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
+#include <linux/kernel.h>
#include <linux/kmod.h>
+#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <asm/page.h>
+#include <asm/cacheflush.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/inet_common.h>
#endif
-#define CONFIG_SOCK_PACKET 1
-
-/*
- Proposed replacement for SIOC{ADD,DEL}MULTI and
- IFF_PROMISC, IFF_ALLMULTI flags.
-
- It is more expensive, but I believe,
- it is really correct solution: reentereble, safe and fault tolerant.
-
- IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
- reference count and global flag, so that real status is
- (gflag|(count != 0)), so that we can use obsolete faulty interface
- not harming clever users.
- */
-#define CONFIG_PACKET_MULTICAST 1
-
/*
Assumptions:
- if device has no dev->hard_header routine, it adds and removes ll header
-----------
Incoming, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> data
+ mac_header -> ll header
+ data -> data
Outgoing, dev->hard_header!=NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
Incoming, dev->hard_header==NULL
- mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
- PPP makes it, that is wrong, because introduce assymetry
- between rx and tx paths.
- data -> data
+ mac_header -> UNKNOWN position. It is very likely, that it points to ll
+ header. PPP makes it, that is wrong, because introduce
+ assymetry between rx and tx paths.
+ data -> data
Outgoing, dev->hard_header==NULL
- mac.raw -> data. ll header is still not built!
- data -> data
+ mac_header -> data. ll header is still not built!
+ data -> data
Resume
If dev->hard_header==NULL we are unlikely to restore sensible ll header.
------------
dev->hard_header != NULL
- mac.raw -> ll header
- data -> ll header
+ mac_header -> ll header
+ data -> ll header
dev->hard_header == NULL (ll header is added by device, we cannot control it)
- mac.raw -> data
- data -> data
+ mac_header -> data
+ data -> data
We should set nh.raw on output to correct posistion,
packet classifier depends on it.
*/
-/* List of all packet sockets. */
-static HLIST_HEAD(packet_sklist);
-static DEFINE_RWLOCK(packet_sklist_lock);
-
-static atomic_t packet_socks_nr;
-
-
/* Private packet socket structures. */
-#ifdef CONFIG_PACKET_MULTICAST
struct packet_mclist
{
struct packet_mclist *next;
unsigned short mr_alen;
unsigned char mr_address[MAX_ADDR_LEN];
};
-#endif
+
#ifdef CONFIG_PACKET_MMAP
static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
#endif
#endif
struct packet_type prot_hook;
spinlock_t bind_lock;
- char running; /* prot_hook is attached*/
+ unsigned int running:1, /* prot_hook is attached*/
+ auxdata:1,
+ origdev:1;
int ifindex; /* bound device */
- unsigned short num;
-#ifdef CONFIG_PACKET_MULTICAST
+ __be16 num;
struct packet_mclist *mclist;
-#endif
#ifdef CONFIG_PACKET_MMAP
atomic_t mapped;
unsigned int pg_vec_order;
#endif
};
+struct packet_skb_cb {
+ unsigned int origlen;
+ union {
+ struct sockaddr_pkt pkt;
+ struct sockaddr_ll ll;
+ } sa;
+};
+
+#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
+
#ifdef CONFIG_PACKET_MMAP
-static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
+static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
{
unsigned int pg_vec_pos, frame_offset;
- char *frame;
pg_vec_pos = position / po->frames_per_block;
frame_offset = position % po->frames_per_block;
- frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
-
- return frame;
+ return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
}
#endif
return;
}
- atomic_dec(&packet_socks_nr);
-#ifdef PACKET_REFCNT_DEBUG
- printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
-#endif
+ sk_refcnt_debug_dec(sk);
}
static const struct proto_ops packet_ops;
-#ifdef CONFIG_SOCK_PACKET
static const struct proto_ops packet_ops_spkt;
static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
*/
sk = pt->af_packet_priv;
-
+
/*
* Yank back the headers [hope the device set this
* right or kerboom...]
* Incoming packets have ll header pulled,
* push it back.
*
- * For outgoing ones skb->data == skb->mac.raw
+ * For outgoing ones skb->data == skb_mac_header(skb)
* so that this procedure is noop.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
goto out;
+ if (dev->nd_net != sk->sk_net)
+ goto out;
+
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
goto oom;
/* drop conntrack reference */
nf_reset(skb);
- spkt = (struct sockaddr_pkt*)skb->cb;
+ spkt = &PACKET_SKB_CB(skb)->sa.pkt;
- skb_push(skb, skb->data-skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
/*
* The SOCK_PACKET socket receives _all_ frames.
* Output a raw packet to a device layer. This bypasses all the other
* protocol layers and you must therefore supply it with a complete frame
*/
-
+
static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto=0;
+ __be16 proto=0;
int err;
-
+
/*
- * Get and verify the address.
+ * Get and verify the address.
*/
if (saddr)
return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
/*
- * Find the device first to size check it
+ * Find the device first to size check it
*/
saddr->spkt_device[13] = 0;
- dev = dev_get_by_name(saddr->spkt_device);
+ dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
err = -ENODEV;
if (dev == NULL)
goto out_unlock;
-
+
+ err = -ENETDOWN;
+ if (!(dev->flags & IFF_UP))
+ goto out_unlock;
+
/*
* You may not queue a frame bigger than the mtu. This is the lowest level
* raw protocol and you must do your own fragmentation at this level.
*/
-
+
err = -EMSGSIZE;
if (len > dev->mtu + dev->hard_header_len)
goto out_unlock;
* deal with the problem - do your own algorithmic backoffs. That's far
* more flexible.
*/
-
- if (skb == NULL)
+
+ if (skb == NULL)
goto out_unlock;
/*
- * Fill it in
+ * Fill it in
*/
-
+
/* FIXME: Save some space for broken drivers that write a
* hard header at transmission time by themselves. PPP is the
* notable one here. This should really be fixed at the driver level.
*/
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
/* Try to align data part correctly */
- if (dev->hard_header) {
+ if (dev->header_ops) {
skb->data -= dev->hard_header_len;
skb->tail -= dev->hard_header_len;
if (len < dev->hard_header_len)
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
if (err)
goto out_free;
- err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_free;
-
/*
* Now send it
*/
dev_put(dev);
return err;
}
-#endif
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+ unsigned int res)
{
struct sk_filter *filter;
- bh_lock_sock(sk);
- filter = sk->sk_filter;
- /*
- * Our caller already checked that filter != NULL but we need to
- * verify that under bh_lock_sock() to be safe
- */
- if (likely(filter != NULL))
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter != NULL)
res = sk_run_filter(skb, filter->insns, filter->len);
- bh_unlock_sock(sk);
+ rcu_read_unlock_bh();
return res;
}
struct packet_sock *po;
u8 * skb_head = skb->data;
int skb_len = skb->len;
- unsigned snaplen;
+ unsigned int snaplen, res;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
sk = pt->af_packet_priv;
po = pkt_sk(sk);
+ if (dev->nd_net != sk->sk_net)
+ goto drop;
+
skb->dev = dev;
- if (dev->hard_header) {
+ if (dev->header_ops) {
/* The device has an explicit notion of ll header,
exported to higher levels.
never delivered to user.
*/
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
+ skb_pull(skb, skb_network_offset(skb));
}
}
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ res = run_filter(skb, sk, snaplen);
+ if (!res)
+ goto drop_n_restore;
+ if (snaplen > res)
+ snaplen = res;
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
skb = nskb;
}
- sll = (struct sockaddr_ll*)skb->cb;
+ BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
+ sizeof(skb->cb));
+
+ sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
- sll->sll_halen = 0;
+ if (unlikely(po->origdev))
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
- if (dev->hard_header_parse)
- sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+ sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
+
+ PACKET_SKB_CB(skb)->origlen = skb->len;
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
struct tpacket_hdr *h;
u8 * skb_head = skb->data;
int skb_len = skb->len;
- unsigned snaplen;
+ unsigned int snaplen, res;
unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
unsigned short macoff, netoff;
struct sk_buff *copy_skb = NULL;
+ struct timeval tv;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
sk = pt->af_packet_priv;
po = pkt_sk(sk);
- if (dev->hard_header) {
+ if (dev->nd_net != sk->sk_net)
+ goto drop;
+
+ if (dev->header_ops) {
if (sk->sk_type != SOCK_DGRAM)
- skb_push(skb, skb->data - skb->mac.raw);
+ skb_push(skb, skb->data - skb_mac_header(skb));
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
- skb_pull(skb, skb->nh.raw - skb->data);
- if (skb->ip_summed == CHECKSUM_HW)
- status |= TP_STATUS_CSUMNOTREADY;
+ skb_pull(skb, skb_network_offset(skb));
}
}
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ status |= TP_STATUS_CSUMNOTREADY;
+
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ res = run_filter(skb, sk, snaplen);
+ if (!res)
+ goto drop_n_restore;
+ if (snaplen > res)
+ snaplen = res;
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
} else {
- unsigned maclen = skb->nh.raw - skb->data;
+ unsigned maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
macoff = netoff - maclen;
}
if ((int)snaplen < 0)
snaplen = 0;
}
- if (snaplen > skb->len-skb->data_len)
- snaplen = skb->len-skb->data_len;
spin_lock(&sk->sk_receive_queue.lock);
- h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
-
+ h = packet_lookup_frame(po, po->head);
+
if (h->tp_status)
goto ring_is_full;
po->head = po->head != po->frame_max ? po->head+1 : 0;
status &= ~TP_STATUS_LOSING;
spin_unlock(&sk->sk_receive_queue.lock);
- memcpy((u8*)h + macoff, skb->data, snaplen);
+ skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
h->tp_len = skb->len;
h->tp_snaplen = snaplen;
h->tp_mac = macoff;
h->tp_net = netoff;
- if (skb->tstamp.off_sec == 0) {
- __net_timestamp(skb);
- sock_enable_timestamp(sk);
- }
- h->tp_sec = skb->tstamp.off_sec;
- h->tp_usec = skb->tstamp.off_usec;
+ if (skb->tstamp.tv64)
+ tv = ktime_to_timeval(skb->tstamp);
+ else
+ do_gettimeofday(&tv);
+ h->tp_sec = tv.tv_sec;
+ h->tp_usec = tv.tv_usec;
sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
- sll->sll_halen = 0;
- if (dev->hard_header_parse)
- sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+ sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- sll->sll_ifindex = dev->ifindex;
+ if (unlikely(po->origdev))
+ sll->sll_ifindex = orig_dev->ifindex;
+ else
+ sll->sll_ifindex = dev->ifindex;
h->tp_status = status;
- mb();
+ smp_mb();
{
struct page *p_start, *p_end;
skb->len = skb_len;
}
drop:
- kfree_skb(skb);
+ kfree_skb(skb);
return 0;
ring_is_full:
struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto;
+ __be16 proto;
unsigned char *addr;
int ifindex, err, reserve = 0;
/*
- * Get and verify the address.
+ * Get and verify the address.
*/
-
+
if (saddr == NULL) {
struct packet_sock *po = pkt_sk(sk);
}
- dev = dev_get_by_index(ifindex);
+ dev = dev_get_by_index(sk->sk_net, ifindex);
err = -ENXIO;
if (dev == NULL)
goto out_unlock;
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
+ err = -ENETDOWN;
+ if (!(dev->flags & IFF_UP))
+ goto out_unlock;
+
err = -EMSGSIZE;
if (len > dev->mtu+reserve)
goto out_unlock;
goto out_unlock;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
- skb->nh.raw = skb->data;
+ skb_reset_network_header(skb);
- if (dev->hard_header) {
- int res;
- err = -EINVAL;
- res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
- if (sock->type != SOCK_DGRAM) {
- skb->tail = skb->data;
- skb->len = 0;
- } else if (res < 0)
- goto out_free;
- }
+ err = -EINVAL;
+ if (sock->type == SOCK_DGRAM &&
+ dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
+ goto out_free;
/* Returns -EFAULT on error */
err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
skb->dev = dev;
skb->priority = sk->sk_priority;
- err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_free;
-
/*
* Now send it
*/
{
struct sock *sk = sock->sk;
struct packet_sock *po;
+ struct net *net;
if (!sk)
return 0;
+ net = sk->sk_net;
po = pkt_sk(sk);
- write_lock_bh(&packet_sklist_lock);
+ write_lock_bh(&net->packet.sklist_lock);
sk_del_node_init(sk);
- write_unlock_bh(&packet_sklist_lock);
+ write_unlock_bh(&net->packet.sklist_lock);
/*
* Unhook packet receive handler.
__sock_put(sk);
}
-#ifdef CONFIG_PACKET_MULTICAST
packet_flush_mclist(sk);
-#endif
#ifdef CONFIG_PACKET_MMAP
if (po->pg_vec) {
/* Purge queues */
skb_queue_purge(&sk->sk_receive_queue);
+ sk_refcnt_debug_release(sk);
sock_put(sk);
return 0;
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
+static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
{
struct packet_sock *po = pkt_sk(sk);
/*
if (protocol == 0)
goto out_unlock;
- if (dev) {
- if (dev->flags&IFF_UP) {
- dev_add_pack(&po->prot_hook);
- sock_hold(sk);
- po->running = 1;
- } else {
- sk->sk_err = ENETDOWN;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
- }
- } else {
+ if (!dev || (dev->flags & IFF_UP)) {
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
+ } else {
+ sk->sk_err = ENETDOWN;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
}
out_unlock:
* Bind a packet socket to a device
*/
-#ifdef CONFIG_SOCK_PACKET
-
static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk=sock->sk;
char name[15];
struct net_device *dev;
int err = -ENODEV;
-
+
/*
* Check legality
*/
-
+
if (addr_len != sizeof(struct sockaddr))
return -EINVAL;
strlcpy(name,uaddr->sa_data,sizeof(name));
- dev = dev_get_by_name(name);
+ dev = dev_get_by_name(sk->sk_net, name);
if (dev) {
err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
dev_put(dev);
}
return err;
}
-#endif
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
/*
* Check legality
*/
-
+
if (addr_len < sizeof(struct sockaddr_ll))
return -EINVAL;
if (sll->sll_family != AF_PACKET)
if (sll->sll_ifindex) {
err = -ENODEV;
- dev = dev_get_by_index(sll->sll_ifindex);
+ dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
if (dev == NULL)
goto out;
}
};
/*
- * Create a packet of type SOCK_PACKET.
+ * Create a packet of type SOCK_PACKET.
*/
-static int packet_create(struct socket *sock, int protocol)
+static int packet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
struct packet_sock *po;
+ __be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
if (!capable(CAP_NET_RAW))
return -EPERM;
- if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
-#ifdef CONFIG_SOCK_PACKET
- && sock->type != SOCK_PACKET
-#endif
- )
+ if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
+ sock->type != SOCK_PACKET)
return -ESOCKTNOSUPPORT;
sock->state = SS_UNCONNECTED;
err = -ENOBUFS;
- sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
if (sk == NULL)
goto out;
sock->ops = &packet_ops;
-#ifdef CONFIG_SOCK_PACKET
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
-#endif
+
sock_init_data(sock, sk);
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
- po->num = protocol;
+ po->num = proto;
sk->sk_destruct = packet_sock_destruct;
- atomic_inc(&packet_socks_nr);
+ sk_refcnt_debug_inc(sk);
/*
* Attach a protocol block
spin_lock_init(&po->bind_lock);
po->prot_hook.func = packet_rcv;
-#ifdef CONFIG_SOCK_PACKET
+
if (sock->type == SOCK_PACKET)
po->prot_hook.func = packet_rcv_spkt;
-#endif
+
po->prot_hook.af_packet_priv = sk;
- if (protocol) {
- po->prot_hook.type = protocol;
+ if (proto) {
+ po->prot_hook.type = proto;
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
}
- write_lock_bh(&packet_sklist_lock);
- sk_add_node(sk, &packet_sklist);
- write_unlock_bh(&packet_sklist_lock);
+ write_lock_bh(&net->packet.sklist_lock);
+ sk_add_node(sk, &net->packet.sklist);
+ write_unlock_bh(&net->packet.sklist_lock);
return(0);
out:
return err;
skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
/*
- * An error occurred so return it. Because skb_recv_datagram()
+ * An error occurred so return it. Because skb_recv_datagram()
* handles the blocking we don't see and worry about blocking
* retries.
*/
* it in now.
*/
- sll = (struct sockaddr_ll*)skb->cb;
+ sll = &PACKET_SKB_CB(skb)->sa.ll;
if (sock->type == SOCK_PACKET)
msg->msg_namelen = sizeof(struct sockaddr_pkt);
else
sock_recv_timestamp(msg, sk, skb);
if (msg->msg_name)
- memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
+ msg->msg_namelen);
+
+ if (pkt_sk(sk)->auxdata) {
+ struct tpacket_auxdata aux;
+
+ aux.tp_status = TP_STATUS_USER;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+ aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+ aux.tp_snaplen = skb->len;
+ aux.tp_mac = 0;
+ aux.tp_net = skb_network_offset(skb);
+
+ put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
+ }
/*
* Free or return the buffer as appropriate. Again this
return err;
}
-#ifdef CONFIG_SOCK_PACKET
static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET;
- dev = dev_get_by_index(pkt_sk(sk)->ifindex);
+ dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
if (dev) {
strlcpy(uaddr->sa_data, dev->name, 15);
dev_put(dev);
return 0;
}
-#endif
static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
sll->sll_family = AF_PACKET;
sll->sll_ifindex = po->ifindex;
sll->sll_protocol = po->num;
- dev = dev_get_by_index(po->ifindex);
+ dev = dev_get_by_index(sk->sk_net, po->ifindex);
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
return 0;
}
-#ifdef CONFIG_PACKET_MULTICAST
static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
{
switch (i->type) {
rtnl_lock();
err = -ENODEV;
- dev = __dev_get_by_index(mreq->mr_ifindex);
+ dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
if (!dev)
goto done;
if (--ml->count == 0) {
struct net_device *dev;
*mlp = ml->next;
- dev = dev_get_by_index(ml->ifindex);
+ dev = dev_get_by_index(sk->sk_net, ml->ifindex);
if (dev) {
packet_dev_mc(dev, ml, -1);
dev_put(dev);
struct net_device *dev;
po->mclist = ml->next;
- if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
+ if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
packet_dev_mc(dev, ml, -1);
dev_put(dev);
}
}
rtnl_unlock();
}
-#endif
static int
packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
{
struct sock *sk = sock->sk;
+ struct packet_sock *po = pkt_sk(sk);
int ret;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
switch(optname) {
-#ifdef CONFIG_PACKET_MULTICAST
- case PACKET_ADD_MEMBERSHIP:
+ case PACKET_ADD_MEMBERSHIP:
case PACKET_DROP_MEMBERSHIP:
{
struct packet_mreq_max mreq;
ret = packet_mc_drop(sk, &mreq);
return ret;
}
-#endif
+
#ifdef CONFIG_PACKET_MMAP
case PACKET_RX_RING:
{
return 0;
}
#endif
+ case PACKET_AUXDATA:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->auxdata = !!val;
+ return 0;
+ }
+ case PACKET_ORIGDEV:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->origdev = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
char __user *optval, int __user *optlen)
{
int len;
+ int val;
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
+ void *data;
+ struct tpacket_stats st;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
if (len < 0)
return -EINVAL;
-
+
switch(optname) {
case PACKET_STATISTICS:
- {
- struct tpacket_stats st;
-
if (len > sizeof(struct tpacket_stats))
len = sizeof(struct tpacket_stats);
spin_lock_bh(&sk->sk_receive_queue.lock);
spin_unlock_bh(&sk->sk_receive_queue.lock);
st.tp_packets += st.tp_drops;
- if (copy_to_user(optval, &st, len))
- return -EFAULT;
+ data = &st;
+ break;
+ case PACKET_AUXDATA:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->auxdata;
+
+ data = &val;
+ break;
+ case PACKET_ORIGDEV:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->origdev;
+
+ data = &val;
break;
- }
default:
return -ENOPROTOOPT;
}
if (put_user(len, optlen))
return -EFAULT;
+ if (copy_to_user(optval, data, len))
+ return -EFAULT;
return 0;
}
{
struct sock *sk;
struct hlist_node *node;
- struct net_device *dev = (struct net_device*)data;
+ struct net_device *dev = data;
+ struct net *net = dev->nd_net;
- read_lock(&packet_sklist_lock);
- sk_for_each(sk, node, &packet_sklist) {
+ read_lock(&net->packet.sklist_lock);
+ sk_for_each(sk, node, &net->packet.sklist) {
struct packet_sock *po = pkt_sk(sk);
switch (msg) {
case NETDEV_UNREGISTER:
-#ifdef CONFIG_PACKET_MULTICAST
if (po->mclist)
packet_dev_mclist(dev, po->mclist, -1);
- // fallthrough
-#endif
+ /* fallthrough */
+
case NETDEV_DOWN:
if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock);
break;
}
}
- read_unlock(&packet_sklist_lock);
+ read_unlock(&net->packet.sklist_lock);
return NOTIFY_DONE;
}
}
case SIOCGSTAMP:
return sock_get_timestamp(sk, (struct timeval __user *)arg);
-
+ case SIOCGSTAMPNS:
+ return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
#ifdef CONFIG_INET
case SIOCADDRT:
case SIOCDELRT:
case SIOCGIFDSTADDR:
case SIOCSIFDSTADDR:
case SIOCSIFFLAGS:
+ if (sk->sk_net != &init_net)
+ return -ENOIOCTLCMD;
return inet_dgram_ops.ioctl(sock, cmd, arg);
#endif
unsigned last = po->head ? po->head-1 : po->frame_max;
struct tpacket_hdr *h;
- h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
+ h = packet_lookup_frame(po, last);
if (h->tp_status)
mask |= POLLIN | POLLRDNORM;
struct file *file = vma->vm_file;
struct socket * sock = file->private_data;
struct sock *sk = sock->sk;
-
+
if (sk)
atomic_inc(&pkt_sk(sk)->mapped);
}
struct file *file = vma->vm_file;
struct socket * sock = file->private_data;
struct sock *sk = sock->sk;
-
+
if (sk)
atomic_dec(&pkt_sk(sk)->mapped);
}
.close =packet_mm_close,
};
-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
-{
- return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
-}
-
static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
{
int i;
{
char **pg_vec = NULL;
struct packet_sock *po = pkt_sk(sk);
- int was_running, num, order = 0;
+ int was_running, order = 0;
+ __be16 num;
int err = 0;
-
+
if (req->tp_block_nr) {
int i, l;
__sock_put(sk);
}
spin_unlock(&po->bind_lock);
-
+
synchronize_net();
err = -EBUSY;
#endif
-#ifdef CONFIG_SOCK_PACKET
static const struct proto_ops packet_ops_spkt = {
.family = PF_PACKET,
.owner = THIS_MODULE,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
-#endif
static const struct proto_ops packet_ops = {
.family = PF_PACKET,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
- .getname = packet_getname,
+ .getname = packet_getname,
.poll = packet_poll,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
};
#ifdef CONFIG_PROC_FS
-static inline struct sock *packet_seq_idx(loff_t off)
+static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
{
struct sock *s;
struct hlist_node *node;
- sk_for_each(s, node, &packet_sklist) {
+ sk_for_each(s, node, &net->packet.sklist) {
if (!off--)
return s;
}
}
static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(seq_file_net(seq)->packet.sklist_lock)
{
- read_lock(&packet_sklist_lock);
- return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+ struct net *net = seq_file_net(seq);
+ read_lock(&net->packet.sklist_lock);
+ return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
}
static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
++*pos;
- return (v == SEQ_START_TOKEN)
- ? sk_head(&packet_sklist)
+ return (v == SEQ_START_TOKEN)
+ ? sk_head(&net->packet.sklist)
: sk_next((struct sock*)v) ;
}
static void packet_seq_stop(struct seq_file *seq, void *v)
+ __releases(seq_file_net(seq)->packet.sklist_lock)
{
- read_unlock(&packet_sklist_lock);
+ struct net *net = seq_file_net(seq);
+ read_unlock(&net->packet.sklist_lock);
}
-static int packet_seq_show(struct seq_file *seq, void *v)
+static int packet_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
return 0;
}
-static struct seq_operations packet_seq_ops = {
+static const struct seq_operations packet_seq_ops = {
.start = packet_seq_start,
.next = packet_seq_next,
.stop = packet_seq_stop,
static int packet_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &packet_seq_ops);
+ return seq_open_net(inode, file, &packet_seq_ops,
+ sizeof(struct seq_net_private));
}
-static struct file_operations packet_seq_fops = {
+static const struct file_operations packet_seq_fops = {
.owner = THIS_MODULE,
.open = packet_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
#endif
+static int packet_net_init(struct net *net)
+{
+ rwlock_init(&net->packet.sklist_lock);
+ INIT_HLIST_HEAD(&net->packet.sklist);
+
+ if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void packet_net_exit(struct net *net)
+{
+ proc_net_remove(net, "packet");
+}
+
+static struct pernet_operations packet_net_ops = {
+ .init = packet_net_init,
+ .exit = packet_net_exit,
+};
+
+
static void __exit packet_exit(void)
{
- proc_net_remove("packet");
unregister_netdevice_notifier(&packet_netdev_notifier);
+ unregister_pernet_subsys(&packet_net_ops);
sock_unregister(PF_PACKET);
proto_unregister(&packet_proto);
}
goto out;
sock_register(&packet_family_ops);
+ register_pernet_subsys(&packet_net_ops);
register_netdevice_notifier(&packet_netdev_notifier);
- proc_net_fops_create("packet", 0, &packet_seq_fops);
out:
return rc;
}