#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/security.h>
#include <linux/jhash.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/audit.h>
-#include <linux/selinux.h>
#include <linux/mutex.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
+#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct netlink_callback *cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
- void (*data_ready)(struct sock *sk, int bytes);
+ void (*netlink_rcv)(struct sk_buff *skb);
struct module *module;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
{
- return (struct netlink_sock *)sk;
+ return container_of(sk, struct netlink_sock, sk);
+}
+
+static inline int netlink_is_kernel(struct sock *sk)
+{
+ return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
}
struct nl_pid_hash {
static void netlink_sock_destruct(struct sock *sk)
{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (nlk->cb) {
+ if (nlk->cb->done)
+ nlk->cb->done(nlk->cb);
+ netlink_destroy_callback(nlk->cb);
+ }
+
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
- printk("Freeing alive netlink socket %p\n", sk);
+ printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
return;
}
BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
- BUG_TRAP(!nlk_sk(sk)->cb);
BUG_TRAP(!nlk_sk(sk)->groups);
}
-/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
- * Look, when several writers sleep and reader wakes them up, all but one
+/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
+ * SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
* this, _but_ remember, it adds useless work on UP machines.
*/
static void netlink_table_grab(void)
+ __acquires(nl_table_lock)
{
write_lock_irq(&nl_table_lock);
DECLARE_WAITQUEUE(wait, current);
add_wait_queue_exclusive(&nl_table_wait, &wait);
- for(;;) {
+ for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (atomic_read(&nl_table_users) == 0)
break;
}
}
-static __inline__ void netlink_table_ungrab(void)
+static void netlink_table_ungrab(void)
+ __releases(nl_table_lock)
{
write_unlock_irq(&nl_table_lock);
wake_up(&nl_table_wait);
}
-static __inline__ void
+static inline void
netlink_lock_table(void)
{
/* read_lock() synchronizes us to netlink_table_grab */
read_unlock(&nl_table_lock);
}
-static __inline__ void
+static inline void
netlink_unlock_table(void)
{
if (atomic_dec_and_test(&nl_table_users))
wake_up(&nl_table_wait);
}
-static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
+static inline struct sock *netlink_lookup(struct net *net, int protocol,
+ u32 pid)
{
struct nl_pid_hash *hash = &nl_table[protocol].hash;
struct hlist_head *head;
read_lock(&nl_table_lock);
head = nl_pid_hashfn(hash, pid);
sk_for_each(sk, node, head) {
- if (nlk_sk(sk)->pid == pid) {
+ if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
sock_hold(sk);
goto found;
}
return sk;
}
-static inline struct hlist_head *nl_pid_hash_alloc(size_t size)
+static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
{
if (size <= PAGE_SIZE)
- return kmalloc(size, GFP_ATOMIC);
+ return kzalloc(size, GFP_ATOMIC);
else
return (struct hlist_head *)
- __get_free_pages(GFP_ATOMIC, get_order(size));
+ __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+ get_order(size));
}
static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
size *= 2;
}
- table = nl_pid_hash_alloc(size);
+ table = nl_pid_hash_zalloc(size);
if (!table)
return 0;
- memset(table, 0, size);
otable = hash->table;
hash->table = table;
hash->mask = mask;
unsigned long mask;
unsigned int i;
- for (i = 0; i < NLGRPSZ(tbl->groups)/sizeof(unsigned long); i++) {
+ for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
mask = 0;
- sk_for_each_bound(sk, node, &tbl->mc_list)
- mask |= nlk_sk(sk)->groups[i];
+ sk_for_each_bound(sk, node, &tbl->mc_list) {
+ if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
+ mask |= nlk_sk(sk)->groups[i];
+ }
tbl->listeners[i] = mask;
}
/* this function is only called with the netlink table "grabbed", which
* makes sure updates are visible before bind or setsockopt return. */
}
-static int netlink_insert(struct sock *sk, u32 pid)
+static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
{
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
head = nl_pid_hashfn(hash, pid);
len = 0;
sk_for_each(osk, node, head) {
- if (nlk_sk(osk)->pid == pid)
+ if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
break;
len++;
}
.obj_size = sizeof(struct netlink_sock),
};
-static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
- int protocol)
+static int __netlink_create(struct net *net, struct socket *sock,
+ struct mutex *cb_mutex, int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
if (!sk)
return -ENOMEM;
return 0;
}
-static int netlink_create(struct socket *sock, int protocol)
+static int netlink_create(struct net *net, struct socket *sock, int protocol)
{
struct module *module = NULL;
struct mutex *cb_mutex;
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
return -ESOCKTNOSUPPORT;
- if (protocol<0 || protocol >= MAX_LINKS)
+ if (protocol < 0 || protocol >= MAX_LINKS)
return -EPROTONOSUPPORT;
netlink_lock_table();
cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
- if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
+ err = __netlink_create(net, sock, cb_mutex, protocol);
+ if (err < 0)
goto out_module;
nlk = nlk_sk(sock->sk);
sock_orphan(sk);
nlk = nlk_sk(sk);
- mutex_lock(nlk->cb_mutex);
- if (nlk->cb) {
- if (nlk->cb->done)
- nlk->cb->done(nlk->cb);
- netlink_destroy_callback(nlk->cb);
- nlk->cb = NULL;
- }
- mutex_unlock(nlk->cb_mutex);
-
- /* OK. Socket is unlinked, and, therefore,
- no new packets will arrive */
+ /*
+ * OK. Socket is unlinked, any packets that arrive now
+ * will be purged.
+ */
sock->sk = NULL;
wake_up_interruptible_all(&nlk->wait);
if (nlk->pid && !nlk->subscriptions) {
struct netlink_notify n = {
+ .net = sock_net(sk),
.protocol = sk->sk_protocol,
.pid = nlk->pid,
};
module_put(nlk->module);
netlink_table_grab();
- if (nlk->flags & NETLINK_KERNEL_SOCKET) {
- kfree(nl_table[sk->sk_protocol].listeners);
- nl_table[sk->sk_protocol].module = NULL;
- nl_table[sk->sk_protocol].registered = 0;
+ if (netlink_is_kernel(sk)) {
+ BUG_ON(nl_table[sk->sk_protocol].registered == 0);
+ if (--nl_table[sk->sk_protocol].registered == 0) {
+ kfree(nl_table[sk->sk_protocol].listeners);
+ nl_table[sk->sk_protocol].module = NULL;
+ nl_table[sk->sk_protocol].registered = 0;
+ }
} else if (nlk->subscriptions)
netlink_update_listeners(sk);
netlink_table_ungrab();
static int netlink_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
struct sock *osk;
netlink_table_grab();
head = nl_pid_hashfn(hash, pid);
sk_for_each(osk, node, head) {
+ if (!net_eq(sock_net(osk), net))
+ continue;
if (nlk_sk(osk)->pid == pid) {
/* Bind collision, search negative pid values. */
pid = rover--;
}
netlink_table_ungrab();
- err = netlink_insert(sk, pid);
+ err = netlink_insert(sk, net, pid);
if (err == -EADDRINUSE)
goto retry;
nlk->subscriptions = subscriptions;
}
-static int netlink_alloc_groups(struct sock *sk)
+static int netlink_realloc_groups(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
unsigned int groups;
+ unsigned long *new_groups;
int err = 0;
- netlink_lock_table();
+ netlink_table_grab();
+
groups = nl_table[sk->sk_protocol].groups;
- if (!nl_table[sk->sk_protocol].registered)
+ if (!nl_table[sk->sk_protocol].registered) {
err = -ENOENT;
- netlink_unlock_table();
+ goto out_unlock;
+ }
- if (err)
- return err;
+ if (nlk->ngroups >= groups)
+ goto out_unlock;
- nlk->groups = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
- if (nlk->groups == NULL)
- return -ENOMEM;
+ new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
+ if (new_groups == NULL) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
+ NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
+
+ nlk->groups = new_groups;
nlk->ngroups = groups;
- return 0;
+ out_unlock:
+ netlink_table_ungrab();
+ return err;
}
-static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int netlink_bind(struct socket *sock, struct sockaddr *addr,
+ int addr_len)
{
struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err;
if (nladdr->nl_groups) {
if (!netlink_capable(sock, NL_NONROOT_RECV))
return -EPERM;
- if (nlk->groups == NULL) {
- err = netlink_alloc_groups(sk);
- if (err)
- return err;
- }
+ err = netlink_realloc_groups(sk);
+ if (err)
+ return err;
}
if (nlk->pid) {
return -EINVAL;
} else {
err = nladdr->nl_pid ?
- netlink_insert(sk, nladdr->nl_pid) :
+ netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
if (err)
return err;
int err = 0;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
+ struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
if (addr->sa_family == AF_UNSPEC) {
sk->sk_state = NETLINK_UNCONNECTED;
return err;
}
-static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
+static int netlink_getname(struct socket *sock, struct sockaddr *addr,
+ int *addr_len, int peer)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
+ struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
nladdr->nl_family = AF_NETLINK;
nladdr->nl_pad = 0;
static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
{
- int protocol = ssk->sk_protocol;
struct sock *sock;
struct netlink_sock *nlk;
- sock = netlink_lookup(protocol, pid);
+ sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
if (!sock)
return ERR_PTR(-ECONNREFUSED);
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if ((nlk->pid == 0 && !nlk->data_ready) ||
- (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_pid != nlk_sk(ssk)->pid)) {
+ if (sock->sk_state == NETLINK_CONNECTED &&
+ nlk->dst_pid != nlk_sk(ssk)->pid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
* 0: continue
* 1: repeat lookup - reference dropped while waiting for socket memory.
*/
-int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
- long timeo, struct sock *ssk)
+int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
+ long *timeo, struct sock *ssk)
{
struct netlink_sock *nlk;
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
test_bit(0, &nlk->state)) {
DECLARE_WAITQUEUE(wait, current);
- if (!timeo) {
- if (!ssk || nlk_sk(ssk)->pid == 0)
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
netlink_overrun(sk);
sock_put(sk);
kfree_skb(skb);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
test_bit(0, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
- timeo = schedule_timeout(timeo);
+ *timeo = schedule_timeout(*timeo);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&nlk->wait, &wait);
if (signal_pending(current)) {
kfree_skb(skb);
- return sock_intr_errno(timeo);
+ return sock_intr_errno(*timeo);
}
return 1;
}
return 0;
}
-int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
+int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
return skb;
}
-int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
+static inline void netlink_rcv_wake(struct sock *sk)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ clear_bit(0, &nlk->state);
+ if (!test_bit(0, &nlk->state))
+ wake_up_interruptible(&nlk->wait);
+}
+
+static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ ret = -ECONNREFUSED;
+ if (nlk->netlink_rcv != NULL) {
+ ret = skb->len;
+ skb_set_owner_r(skb, sk);
+ nlk->netlink_rcv(skb);
+ }
+ kfree_skb(skb);
+ sock_put(sk);
+ return ret;
+}
+
+int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
+ u32 pid, int nonblock)
{
struct sock *sk;
int err;
kfree_skb(skb);
return PTR_ERR(sk);
}
- err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
+ if (netlink_is_kernel(sk))
+ return netlink_unicast_kernel(sk, skb);
+
+ if (sk_filter(sk, skb)) {
+ int err = skb->len;
+ kfree_skb(skb);
+ sock_put(sk);
+ return err;
+ }
+
+ err = netlink_attachskb(sk, skb, &timeo, ssk);
if (err == 1)
goto retry;
if (err)
return err;
- return netlink_sendskb(sk, skb, ssk->sk_protocol);
+ return netlink_sendskb(sk, skb);
}
+EXPORT_SYMBOL(netlink_unicast);
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
+ unsigned long *listeners;
+
+ BUG_ON(!netlink_is_kernel(sk));
+
+ rcu_read_lock();
+ listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
- BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET));
if (group - 1 < nl_table[sk->sk_protocol].groups)
- res = test_bit(group - 1, nl_table[sk->sk_protocol].listeners);
+ res = test_bit(group - 1, listeners);
+
+ rcu_read_unlock();
+
return res;
}
EXPORT_SYMBOL_GPL(netlink_has_listeners);
-static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
+static inline int netlink_broadcast_deliver(struct sock *sk,
+ struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_broadcast_data {
struct sock *exclude_sk;
+ struct net *net;
u32 pid;
u32 group;
int failure;
!test_bit(p->group - 1, nlk->groups))
goto out;
+ if (!net_eq(sock_net(sk), p->net))
+ goto out;
+
if (p->failure) {
netlink_overrun(sk);
goto out;
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
+ } else if (sk_filter(sk, p->skb2)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
netlink_overrun(sk);
} else {
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
u32 group, gfp_t allocation)
{
+ struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
struct hlist_node *node;
struct sock *sk;
skb = netlink_trim(skb, allocation);
info.exclude_sk = ssk;
+ info.net = net;
info.pid = pid;
info.group = group;
info.failure = 0;
return -ENOBUFS;
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
struct sock *exclude_sk;
if (sk == p->exclude_sk)
goto out;
+ if (sock_net(sk) != sock_net(p->exclude_sk))
+ goto out;
+
if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
goto out;
read_unlock(&nl_table_lock);
}
+/* must be called with netlink table grabbed */
+static void netlink_update_socket_mc(struct netlink_sock *nlk,
+ unsigned int group,
+ int is_new)
+{
+ int old, new = !!is_new, subscriptions;
+
+ old = test_bit(group - 1, nlk->groups);
+ subscriptions = nlk->subscriptions - old + new;
+ if (new)
+ __set_bit(group - 1, nlk->groups);
+ else
+ __clear_bit(group - 1, nlk->groups);
+ netlink_update_subscriptions(&nlk->sk, subscriptions);
+ netlink_update_listeners(&nlk->sk);
+}
+
static int netlink_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, int optlen)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- int val = 0, err;
+ unsigned int val = 0;
+ int err;
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
if (optlen >= sizeof(int) &&
- get_user(val, (int __user *)optval))
+ get_user(val, (unsigned int __user *)optval))
return -EFAULT;
switch (optname) {
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
- unsigned int subscriptions;
- int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0;
-
if (!netlink_capable(sock, NL_NONROOT_RECV))
return -EPERM;
- if (nlk->groups == NULL) {
- err = netlink_alloc_groups(sk);
- if (err)
- return err;
- }
+ err = netlink_realloc_groups(sk);
+ if (err)
+ return err;
if (!val || val - 1 >= nlk->ngroups)
return -EINVAL;
netlink_table_grab();
- old = test_bit(val - 1, nlk->groups);
- subscriptions = nlk->subscriptions - old + new;
- if (new)
- __set_bit(val - 1, nlk->groups);
- else
- __clear_bit(val - 1, nlk->groups);
- netlink_update_subscriptions(sk, subscriptions);
- netlink_update_listeners(sk);
+ netlink_update_socket_mc(nlk, val,
+ optname == NETLINK_ADD_MEMBERSHIP);
netlink_table_ungrab();
err = 0;
break;
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
-static inline void netlink_rcv_wake(struct sock *sk)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(0, &nlk->state);
- if (!test_bit(0, &nlk->state))
- wake_up_interruptible(&nlk->wait);
-}
-
static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- struct sockaddr_nl *addr=msg->msg_name;
+ struct sockaddr_nl *addr = msg->msg_name;
u32 dst_pid;
u32 dst_group;
struct sk_buff *skb;
goto out;
err = -ENOBUFS;
skb = alloc_skb(len, GFP_KERNEL);
- if (skb==NULL)
+ if (skb == NULL)
goto out;
NETLINK_CB(skb).pid = nlk->pid;
NETLINK_CB(skb).dst_group = dst_group;
- NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
- selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
+ NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
+ NETLINK_CB(skb).sessionid = audit_get_sessionid(current);
+ security_task_getsecid(current, &(NETLINK_CB(skb).sid));
memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
/* What can I do? Netlink is asynchronous, so that
*/
err = -EFAULT;
- if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
+ if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
kfree_skb(skb);
goto out;
}
copied = 0;
- skb = skb_recv_datagram(sk,flags,noblock,&err);
- if (skb==NULL)
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (skb == NULL)
goto out;
msg->msg_namelen = 0;
err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (msg->msg_name) {
- struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
+ struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
addr->nl_family = AF_NETLINK;
addr->nl_pad = 0;
addr->nl_pid = NETLINK_CB(skb).pid;
siocb->scm = &scm;
}
siocb->scm->creds = *NETLINK_CREDS(skb);
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
skb_free_datagram(sk, skb);
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
netlink_dump(sk);
scm_recv(sock, msg, siocb->scm, flags);
-
- if (flags & MSG_TRUNC)
- copied = skb->len;
-
out:
netlink_rcv_wake(sk);
return err ? : copied;
static void netlink_data_ready(struct sock *sk, int len)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->data_ready)
- nlk->data_ready(sk, len);
- netlink_rcv_wake(sk);
+ BUG();
}
/*
*/
struct sock *
-netlink_kernel_create(int unit, unsigned int groups,
- void (*input)(struct sock *sk, int len),
+netlink_kernel_create(struct net *net, int unit, unsigned int groups,
+ void (*input)(struct sk_buff *skb),
struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
BUG_ON(!nl_table);
- if (unit<0 || unit>=MAX_LINKS)
+ if (unit < 0 || unit >= MAX_LINKS)
return NULL;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(sock, cb_mutex, unit) < 0)
- goto out_sock_release;
+ /*
+ * We have to just have a reference on the net from sk, but don't
+ * get_net it. Besides, we cannot get and then put the net here.
+ * So we create one inside init_net and the move it to net.
+ */
+
+ if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+ goto out_sock_release_nosk;
+
+ sk = sock->sk;
+ sk_change_net(sk, net);
if (groups < 32)
groups = 32;
if (!listeners)
goto out_sock_release;
- sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
if (input)
- nlk_sk(sk)->data_ready = input;
+ nlk_sk(sk)->netlink_rcv = input;
- if (netlink_insert(sk, 0))
+ if (netlink_insert(sk, net, 0))
goto out_sock_release;
nlk = nlk_sk(sk);
nlk->flags |= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
- nl_table[unit].groups = groups;
- nl_table[unit].listeners = listeners;
- nl_table[unit].cb_mutex = cb_mutex;
- nl_table[unit].module = module;
- nl_table[unit].registered = 1;
+ if (!nl_table[unit].registered) {
+ nl_table[unit].groups = groups;
+ nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
+ nl_table[unit].module = module;
+ nl_table[unit].registered = 1;
+ } else {
+ kfree(listeners);
+ nl_table[unit].registered++;
+ }
netlink_table_ungrab();
-
return sk;
out_sock_release:
kfree(listeners);
+ netlink_kernel_release(sk);
+ return NULL;
+
+out_sock_release_nosk:
sock_release(sock);
return NULL;
}
+EXPORT_SYMBOL(netlink_kernel_create);
+
+
+void
+netlink_kernel_release(struct sock *sk)
+{
+ sk_release_kernel(sk);
+}
+EXPORT_SYMBOL(netlink_kernel_release);
+
+
+/**
+ * netlink_change_ngroups - change number of multicast groups
+ *
+ * This changes the number of multicast groups that are available
+ * on a certain netlink family. Note that it is not possible to
+ * change the number of groups to below 32. Also note that it does
+ * not implicitly call netlink_clear_multicast_users() when the
+ * number of groups is reduced.
+ *
+ * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
+ * @groups: The new number of groups.
+ */
+int netlink_change_ngroups(struct sock *sk, unsigned int groups)
+{
+ unsigned long *listeners, *old = NULL;
+ struct netlink_table *tbl = &nl_table[sk->sk_protocol];
+ int err = 0;
+
+ if (groups < 32)
+ groups = 32;
+
+ netlink_table_grab();
+ if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
+ listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC);
+ if (!listeners) {
+ err = -ENOMEM;
+ goto out_ungrab;
+ }
+ old = tbl->listeners;
+ memcpy(listeners, old, NLGRPSZ(tbl->groups));
+ rcu_assign_pointer(tbl->listeners, listeners);
+ }
+ tbl->groups = groups;
+
+ out_ungrab:
+ netlink_table_ungrab();
+ synchronize_rcu();
+ kfree(old);
+ return err;
+}
+EXPORT_SYMBOL(netlink_change_ngroups);
+
+/**
+ * netlink_clear_multicast_users - kick off multicast listeners
+ *
+ * This function removes all listeners from the given group.
+ * @ksk: The kernel netlink socket, as returned by
+ * netlink_kernel_create().
+ * @group: The multicast group to clear.
+ */
+void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+ struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+ netlink_table_grab();
+
+ sk_for_each_bound(sk, node, &tbl->mc_list)
+ netlink_update_socket_mc(nlk_sk(sk), group, 0);
+
+ netlink_table_ungrab();
+}
+EXPORT_SYMBOL(netlink_clear_multicast_users);
void netlink_set_nonroot(int protocol, unsigned int flags)
{
if ((unsigned int)protocol < MAX_LINKS)
nl_table[protocol].nl_nonroot = flags;
}
+EXPORT_SYMBOL(netlink_set_nonroot);
static void netlink_destroy_callback(struct netlink_callback *cb)
{
if (len > 0) {
mutex_unlock(nlk->cb_mutex);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, len);
+
+ if (sk_filter(sk, skb))
+ kfree_skb(skb);
+ else {
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
+ }
return 0;
}
memcpy(nlmsg_data(nlh), &len, sizeof(len));
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ if (sk_filter(sk, skb))
+ kfree_skb(skb);
+ else {
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ sk->sk_data_ready(sk, skb->len);
+ }
if (cb->done)
cb->done(cb);
int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct nlmsghdr *nlh,
- int (*dump)(struct sk_buff *skb, struct netlink_callback*),
- int (*done)(struct netlink_callback*))
+ int (*dump)(struct sk_buff *skb,
+ struct netlink_callback *),
+ int (*done)(struct netlink_callback *))
{
struct netlink_callback *cb;
struct sock *sk;
atomic_inc(&skb->users);
cb->skb = skb;
- sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
+ sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
if (sk == NULL) {
netlink_destroy_callback(cb);
return -ECONNREFUSED;
}
nlk = nlk_sk(sk);
- /* A dump or destruction is in progress... */
+ /* A dump is in progress... */
mutex_lock(nlk->cb_mutex);
- if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+ if (nlk->cb) {
mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
sock_put(sk);
sock_put(sk);
/* We successfully started a dump, by returning -EINTR we
- * signal the queue mangement to interrupt processing of
- * any netlink messages so userspace gets a chance to read
- * the results. */
+ * signal not to send ACK even if it was requested.
+ */
return -EINTR;
}
+EXPORT_SYMBOL(netlink_dump_start);
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
{
if (!skb) {
struct sock *sk;
- sk = netlink_lookup(in_skb->sk->sk_protocol,
+ sk = netlink_lookup(sock_net(in_skb->sk),
+ in_skb->sk->sk_protocol,
NETLINK_CB(in_skb).pid);
if (sk) {
sk->sk_err = ENOBUFS;
memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
}
+EXPORT_SYMBOL(netlink_ack);
-static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
+int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *))
{
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
+ int msglen;
+
nlh = nlmsg_hdr(skb);
err = 0;
/* Only requests are handled by the kernel */
if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- goto skip;
+ goto ack;
/* Skip control messages */
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
- goto skip;
+ goto ack;
err = cb(skb, nlh);
- if (err == -EINTR) {
- /* Not an error, but we interrupt processing */
- netlink_queue_skip(nlh, skb);
- return err;
- }
-skip:
+ if (err == -EINTR)
+ goto skip;
+
+ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err)
netlink_ack(skb, nlh, err);
- netlink_queue_skip(nlh, skb);
+skip:
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+ skb_pull(skb, msglen);
}
return 0;
}
-
-/**
- * nelink_run_queue - Process netlink receive queue.
- * @sk: Netlink socket containing the queue
- * @qlen: Place to store queue length upon entry
- * @cb: Callback function invoked for each netlink message found
- *
- * Processes as much as there was in the queue upon entry and invokes
- * a callback function for each netlink message found. The callback
- * function may refuse a message by returning a negative error code
- * but setting the error pointer to 0 in which case this function
- * returns with a qlen != 0.
- *
- * qlen must be initialized to 0 before the initial entry, afterwards
- * the function may be called repeatedly until qlen reaches 0.
- *
- * The callback function may return -EINTR to signal that processing
- * of netlink messages shall be interrupted. In this case the message
- * currently being processed will NOT be requeued onto the receive
- * queue.
- */
-void netlink_run_queue(struct sock *sk, unsigned int *qlen,
- int (*cb)(struct sk_buff *, struct nlmsghdr *))
-{
- struct sk_buff *skb;
-
- if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
- *qlen = skb_queue_len(&sk->sk_receive_queue);
-
- for (; *qlen; (*qlen)--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (netlink_rcv_skb(skb, cb)) {
- if (skb->len)
- skb_queue_head(&sk->sk_receive_queue, skb);
- else {
- kfree_skb(skb);
- (*qlen)--;
- }
- break;
- }
-
- kfree_skb(skb);
- }
-}
-
-/**
- * netlink_queue_skip - Skip netlink message while processing queue.
- * @nlh: Netlink message to be skipped
- * @skb: Socket buffer containing the netlink messages.
- *
- * Pulls the given netlink message off the socket buffer so the next
- * call to netlink_queue_run() will not reconsider the message.
- */
-void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
-{
- int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
-
- if (msglen > skb->len)
- msglen = skb->len;
-
- skb_pull(skb, msglen);
-}
+EXPORT_SYMBOL(netlink_rcv_skb);
/**
* nlmsg_notify - send a notification netlink message
return err;
}
+EXPORT_SYMBOL(nlmsg_notify);
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
+ struct seq_net_private p;
int link;
int hash_idx;
};
struct hlist_node *node;
loff_t off = 0;
- for (i=0; i<MAX_LINKS; i++) {
+ for (i = 0; i < MAX_LINKS; i++) {
struct nl_pid_hash *hash = &nl_table[i].hash;
for (j = 0; j <= hash->mask; j++) {
sk_for_each(s, node, &hash->table[j]) {
+ if (sock_net(s) != seq_file_net(seq))
+ continue;
if (off == pos) {
iter->link = i;
iter->hash_idx = j;
}
static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(nl_table_lock)
{
read_lock(&nl_table_lock);
return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
if (v == SEQ_START_TOKEN)
return netlink_seq_socket_idx(seq, 0);
- s = sk_next(v);
+ iter = seq->private;
+ s = v;
+ do {
+ s = sk_next(s);
+ } while (s && sock_net(s) != seq_file_net(seq));
if (s)
return s;
- iter = seq->private;
i = iter->link;
j = iter->hash_idx + 1;
for (; j <= hash->mask; j++) {
s = sk_head(&hash->table[j]);
+ while (s && sock_net(s) != seq_file_net(seq))
+ s = sk_next(s);
if (s) {
iter->link = i;
iter->hash_idx = j;
}
static void netlink_seq_stop(struct seq_file *seq, void *v)
+ __releases(nl_table_lock)
{
read_unlock(&nl_table_lock);
}
return 0;
}
-static struct seq_operations netlink_seq_ops = {
+static const struct seq_operations netlink_seq_ops = {
.start = netlink_seq_start,
.next = netlink_seq_next,
.stop = netlink_seq_stop,
static int netlink_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- struct nl_seq_iter *iter;
- int err;
-
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return -ENOMEM;
-
- err = seq_open(file, &netlink_seq_ops);
- if (err) {
- kfree(iter);
- return err;
- }
-
- seq = file->private_data;
- seq->private = iter;
- return 0;
+ return seq_open_net(inode, file, &netlink_seq_ops,
+ sizeof(struct nl_seq_iter));
}
static const struct file_operations netlink_seq_fops = {
.open = netlink_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif
{
return atomic_notifier_chain_register(&netlink_chain, nb);
}
+EXPORT_SYMBOL(netlink_register_notifier);
int netlink_unregister_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&netlink_chain, nb);
}
+EXPORT_SYMBOL(netlink_unregister_notifier);
static const struct proto_ops netlink_ops = {
.family = PF_NETLINK,
.owner = THIS_MODULE, /* for consistency 8) */
};
+static int __net_init netlink_net_init(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit netlink_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "netlink");
+#endif
+}
+
+static struct pernet_operations __net_initdata netlink_net_ops = {
+ .init = netlink_net_init,
+ .exit = netlink_net_exit,
+};
+
static int __init netlink_proto_init(void)
{
struct sk_buff *dummy_skb;
int i;
- unsigned long max;
+ unsigned long limit;
unsigned int order;
int err = proto_register(&netlink_proto, 0);
goto panic;
if (num_physpages >= (128 * 1024))
- max = num_physpages >> (21 - PAGE_SHIFT);
+ limit = num_physpages >> (21 - PAGE_SHIFT);
else
- max = num_physpages >> (23 - PAGE_SHIFT);
+ limit = num_physpages >> (23 - PAGE_SHIFT);
- order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
- max = (1UL << order) / sizeof(struct hlist_head);
- order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1;
+ order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
+ limit = (1UL << order) / sizeof(struct hlist_head);
+ order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
for (i = 0; i < MAX_LINKS; i++) {
struct nl_pid_hash *hash = &nl_table[i].hash;
- hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table));
+ hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
if (!hash->table) {
while (i-- > 0)
nl_pid_hash_free(nl_table[i].hash.table,
kfree(nl_table);
goto panic;
}
- memset(hash->table, 0, 1 * sizeof(*hash->table));
hash->max_shift = order;
hash->shift = 0;
hash->mask = 0;
}
sock_register(&netlink_family_ops);
-#ifdef CONFIG_PROC_FS
- proc_net_fops_create("netlink", 0, &netlink_seq_fops);
-#endif
+ register_pernet_subsys(&netlink_net_ops);
/* The netlink device handler may be needed early. */
rtnetlink_init();
out:
}
core_initcall(netlink_proto_init);
-
-EXPORT_SYMBOL(netlink_ack);
-EXPORT_SYMBOL(netlink_run_queue);
-EXPORT_SYMBOL(netlink_queue_skip);
-EXPORT_SYMBOL(netlink_broadcast);
-EXPORT_SYMBOL(netlink_dump_start);
-EXPORT_SYMBOL(netlink_kernel_create);
-EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_nonroot);
-EXPORT_SYMBOL(netlink_unicast);
-EXPORT_SYMBOL(netlink_unregister_notifier);
-EXPORT_SYMBOL(nlmsg_notify);