struct module *module;
};
+struct listeners_rcu_head {
+ struct rcu_head rcu_head;
+ void *ptr;
+};
+
#define NETLINK_KERNEL_SOCKET 0x1
#define NETLINK_RECV_PKTINFO 0x2
#define NETLINK_BROADCAST_SEND_ERROR 0x4
* this, _but_ remember, it adds useless work on UP machines.
*/
-static void netlink_table_grab(void)
+void netlink_table_grab(void)
__acquires(nl_table_lock)
{
+ might_sleep();
+
write_lock_irq(&nl_table_lock);
if (atomic_read(&nl_table_users)) {
}
}
-static void netlink_table_ungrab(void)
+void netlink_table_ungrab(void)
__releases(nl_table_lock)
{
write_unlock_irq(&nl_table_lock);
return 0;
}
-static int netlink_create(struct net *net, struct socket *sock, int protocol)
+static int netlink_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
{
struct module *module = NULL;
struct mutex *cb_mutex;
if (nl_table[protocol].registered &&
try_module_get(nl_table[protocol].module))
module = nl_table[protocol].module;
+ else
+ err = -EPROTONOSUPPORT;
cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
+ if (err < 0)
+ goto out;
+
err = __netlink_create(net, sock, cb_mutex, protocol);
if (err < 0)
goto out_module;
skb_queue_purge(&sk->sk_write_queue);
- if (nlk->pid && !nlk->subscriptions) {
+ if (nlk->pid) {
struct netlink_notify n = {
.net = sock_net(sk),
.protocol = sk->sk_protocol,
struct hlist_head *head;
struct sock *osk;
struct hlist_node *node;
- s32 pid = current->tgid;
+ s32 pid = task_tgid_vnr(current);
int err;
static s32 rover = -4097;
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+ if (alen < sizeof(addr->sa_family))
+ return -EINVAL;
+
if (addr->sa_family == AF_UNSPEC) {
sk->sk_state = NETLINK_UNCONNECTED;
nlk->dst_pid = 0;
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+ DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
nladdr->nl_family = AF_NETLINK;
nladdr->nl_pad = 0;
int delivered;
gfp_t allocation;
struct sk_buff *skb, *skb2;
+ int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+ void *tx_data;
};
static inline int do_one_broadcast(struct sock *sk,
p->failure = 1;
if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
+ } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ kfree_skb(p->skb2);
+ p->skb2 = NULL;
} else if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
return 0;
}
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
- u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+ u32 group, gfp_t allocation,
+ int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+ void *filter_data)
{
struct net *net = sock_net(ssk);
struct netlink_broadcast_data info;
info.allocation = allocation;
info.skb = skb;
info.skb2 = NULL;
+ info.tx_filter = filter;
+ info.tx_data = filter_data;
/* While we sleep in clone, do not allow to change socket list */
}
return -ESRCH;
}
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+ u32 group, gfp_t allocation)
+{
+ return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+ NULL, NULL);
+}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
struct netlink_set_err_data *p)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ int ret = 0;
if (sk == p->exclude_sk)
goto out;
- if (sock_net(sk) != sock_net(p->exclude_sk))
+ if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
goto out;
if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
goto out;
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ ret = 1;
+ goto out;
+ }
+
sk->sk_err = p->code;
sk->sk_error_report(sk);
out:
- return 0;
+ return ret;
}
/**
* @pid: the PID of a process that we want to skip (if any)
* @groups: the broadcast group that will notice the error
* @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
*/
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
{
struct netlink_set_err_data info;
struct hlist_node *node;
struct sock *sk;
+ int ret = 0;
info.exclude_sk = ssk;
info.pid = pid;
read_lock(&nl_table_lock);
sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
- do_one_set_err(sk, &info);
+ ret += do_one_set_err(sk, &info);
read_unlock(&nl_table_lock);
+ return ret;
}
EXPORT_SYMBOL(netlink_set_err);
}
static int netlink_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_sock *nlk = nlk_sk(sk);
int noblock = flags&MSG_DONTWAIT;
size_t copied;
- struct sk_buff *skb;
+ struct sk_buff *skb, *frag __maybe_unused = NULL;
int err;
if (flags&MSG_OOB)
if (skb == NULL)
goto out;
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+ if (unlikely(skb_shinfo(skb)->frag_list)) {
+ bool need_compat = !!(flags & MSG_CMSG_COMPAT);
+
+ /*
+ * If this skb has a frag_list, then here that means that
+ * we will have to use the frag_list skb for compat tasks
+ * and the regular skb for non-compat tasks.
+ *
+ * The skb might (and likely will) be cloned, so we can't
+ * just reset frag_list and go on with things -- we need to
+ * keep that. For the compat case that's easy -- simply get
+ * a reference to the compat skb and free the regular one
+ * including the frag. For the non-compat case, we need to
+ * avoid sending the frag to the user -- so assign NULL but
+ * restore it below before freeing the skb.
+ */
+ if (need_compat) {
+ struct sk_buff *compskb = skb_shinfo(skb)->frag_list;
+ skb_get(compskb);
+ kfree_skb(skb);
+ skb = compskb;
+ } else {
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = NULL;
+ }
+ }
+#endif
+
msg->msg_namelen = 0;
copied = skb->len;
siocb->scm->creds = *NETLINK_CREDS(skb);
if (flags & MSG_TRUNC)
copied = skb->len;
+
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+ skb_shinfo(skb)->frag_list = frag;
+#endif
+
skb_free_datagram(sk, skb);
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
if (groups < 32)
groups = 32;
- listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
+ listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
+ GFP_KERNEL);
if (!listeners)
goto out_sock_release;
EXPORT_SYMBOL(netlink_kernel_release);
+static void netlink_free_old_listeners(struct rcu_head *rcu_head)
+{
+ struct listeners_rcu_head *lrh;
+
+ lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
+ kfree(lrh->ptr);
+}
+
+int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
+{
+ unsigned long *listeners, *old = NULL;
+ struct listeners_rcu_head *old_rcu_head;
+ struct netlink_table *tbl = &nl_table[sk->sk_protocol];
+
+ if (groups < 32)
+ groups = 32;
+
+ if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
+ listeners = kzalloc(NLGRPSZ(groups) +
+ sizeof(struct listeners_rcu_head),
+ GFP_ATOMIC);
+ if (!listeners)
+ return -ENOMEM;
+ old = tbl->listeners;
+ memcpy(listeners, old, NLGRPSZ(tbl->groups));
+ rcu_assign_pointer(tbl->listeners, listeners);
+ /*
+ * Free the old memory after an RCU grace period so we
+ * don't leak it. We use call_rcu() here in order to be
+ * able to call this function from atomic contexts. The
+ * allocation of this memory will have reserved enough
+ * space for struct listeners_rcu_head at the end.
+ */
+ old_rcu_head = (void *)(tbl->listeners +
+ NLGRPLONGS(tbl->groups));
+ old_rcu_head->ptr = old;
+ call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
+ }
+ tbl->groups = groups;
+
+ return 0;
+}
+
/**
* netlink_change_ngroups - change number of multicast groups
*
*/
int netlink_change_ngroups(struct sock *sk, unsigned int groups)
{
- unsigned long *listeners, *old = NULL;
- struct netlink_table *tbl = &nl_table[sk->sk_protocol];
- int err = 0;
-
- if (groups < 32)
- groups = 32;
+ int err;
netlink_table_grab();
- if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
- listeners = kzalloc(NLGRPSZ(groups), GFP_ATOMIC);
- if (!listeners) {
- err = -ENOMEM;
- goto out_ungrab;
- }
- old = tbl->listeners;
- memcpy(listeners, old, NLGRPSZ(tbl->groups));
- rcu_assign_pointer(tbl->listeners, listeners);
- }
- tbl->groups = groups;
-
- out_ungrab:
+ err = __netlink_change_ngroups(sk, groups);
netlink_table_ungrab();
- synchronize_rcu();
- kfree(old);
+
return err;
}
-EXPORT_SYMBOL(netlink_change_ngroups);
+
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+ struct sock *sk;
+ struct hlist_node *node;
+ struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+ sk_for_each_bound(sk, node, &tbl->mc_list)
+ netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
/**
* netlink_clear_multicast_users - kick off multicast listeners
*/
void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
{
- struct sock *sk;
- struct hlist_node *node;
- struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
netlink_table_grab();
-
- sk_for_each_bound(sk, node, &tbl->mc_list)
- netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+ __netlink_clear_multicast_users(ksk, group);
netlink_table_ungrab();
}
-EXPORT_SYMBOL(netlink_clear_multicast_users);
void netlink_set_nonroot(int protocol, unsigned int flags)
{
}
int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
- struct nlmsghdr *nlh,
+ const struct nlmsghdr *nlh,
int (*dump)(struct sk_buff *skb,
struct netlink_callback *),
int (*done)(struct netlink_callback *))
}
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
- NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
+ NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = err;
memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
if (v == SEQ_START_TOKEN)
seq_puts(seq,
"sk Eth Pid Groups "
- "Rmem Wmem Dump Locks Drops\n");
+ "Rmem Wmem Dump Locks Drops Inode\n");
else {
struct sock *s = v;
struct netlink_sock *nlk = nlk_sk(s);
- seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n",
+ seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
nlk->pid,
nlk->groups ? (u32)nlk->groups[0] : 0,
- atomic_read(&s->sk_rmem_alloc),
- atomic_read(&s->sk_wmem_alloc),
+ sk_rmem_alloc_get(s),
+ sk_wmem_alloc_get(s),
nlk->cb,
atomic_read(&s->sk_refcnt),
- atomic_read(&s->sk_drops)
+ atomic_read(&s->sk_drops),
+ sock_i_ino(s)
);
}
.sendpage = sock_no_sendpage,
};
-static struct net_proto_family netlink_family_ops = {
+static const struct net_proto_family netlink_family_ops = {
.family = PF_NETLINK,
.create = netlink_create,
.owner = THIS_MODULE, /* for consistency 8) */
if (!nl_table)
goto panic;
- if (num_physpages >= (128 * 1024))
- limit = num_physpages >> (21 - PAGE_SHIFT);
+ if (totalram_pages >= (128 * 1024))
+ limit = totalram_pages >> (21 - PAGE_SHIFT);
else
- limit = num_physpages >> (23 - PAGE_SHIFT);
+ limit = totalram_pages >> (23 - PAGE_SHIFT);
order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
limit = (1UL << order) / sizeof(struct hlist_head);