* strings build-time, so that runtime initialization of socket
* locks is fast):
*/
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
"sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
"sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
"sk_lock-AF_IEEE802154",
"sk_lock-AF_MAX"
};
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
"slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
"slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
"slock-AF_IEEE802154",
"slock-AF_MAX"
};
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
"clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
"clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- int err = 0;
+ int err;
int skb_len;
+ unsigned long flags;
+ struct sk_buff_head *list = &sk->sk_receive_queue;
/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
+ atomic_inc(&sk->sk_drops);
+ return -ENOMEM;
}
err = sk_filter(sk, skb);
if (err)
- goto out;
+ return err;
if (!sk_rmem_schedule(sk, skb->truesize)) {
- err = -ENOBUFS;
- goto out;
+ atomic_inc(&sk->sk_drops);
+ return -ENOBUFS;
}
skb->dev = NULL;
*/
skb_len = skb->len;
- skb_queue_tail(&sk->sk_receive_queue, skb);
+ spin_lock_irqsave(&list->lock, flags);
+ skb->dropcount = atomic_read(&sk->sk_drops);
+ __skb_queue_tail(list, skb);
+ spin_unlock_irqrestore(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, skb_len);
-out:
- return err;
+ return 0;
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
}
EXPORT_SYMBOL(sk_receive_skb);
+void sk_reset_txq(struct sock *sk)
+{
+ sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk->sk_dst_cache;
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ sk_tx_queue_clear(sk);
sk->sk_dst_cache = NULL;
dst_release(dst);
return NULL;
if (copy_from_user(devname, optval, optlen))
goto out;
- if (devname[0] == '\0') {
- index = 0;
- } else {
- struct net_device *dev = dev_get_by_name(net, devname);
+ index = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, devname);
+ if (dev)
+ index = dev->ifindex;
+ rcu_read_unlock();
ret = -ENODEV;
if (!dev)
goto out;
-
- index = dev->ifindex;
- dev_put(dev);
}
lock_sock(sk);
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int val;
sk->sk_reuse = valbool;
break;
case SO_TYPE:
+ case SO_PROTOCOL:
+ case SO_DOMAIN:
case SO_ERROR:
ret = -ENOPROTOOPT;
break;
case SO_TIMESTAMPING:
if (val & ~SOF_TIMESTAMPING_MASK) {
- ret = EINVAL;
+ ret = -EINVAL;
break;
}
sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
+ case SO_RXQ_OVFL:
+ if (valbool)
+ sock_set_flag(sk, SOCK_RXQ_OVFL);
+ else
+ sock_reset_flag(sk, SOCK_RXQ_OVFL);
+ break;
default:
ret = -ENOPROTOOPT;
break;
struct timeval tm;
} v;
- unsigned int lv = sizeof(int);
+ int lv = sizeof(int);
int len;
if (get_user(len, optlen))
v.val = sk->sk_type;
break;
+ case SO_PROTOCOL:
+ v.val = sk->sk_protocol;
+ break;
+
+ case SO_DOMAIN:
+ v.val = sk->sk_family;
+ break;
+
case SO_ERROR:
v.val = -sock_error(sk);
if (v.val == 0)
v.val = sk->sk_mark;
break;
+ case SO_RXQ_OVFL:
+ v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+ break;
+
default:
return -ENOPROTOOPT;
}
af_family_keys + sk->sk_family);
}
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ */
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
-
- memcpy(nsk, osk, osk->sk_prot->obj_size);
+ BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
+ sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
+ sizeof(osk->sk_tx_queue_mapping));
+ memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
+ osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
struct kmem_cache *slab;
slab = prot->slab;
- if (slab != NULL)
- sk = kmem_cache_alloc(slab, priority);
+ if (slab != NULL) {
+ sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+ if (!sk)
+ return sk;
+ if (priority & __GFP_ZERO) {
+ /*
+ * caches using SLAB_DESTROY_BY_RCU should let
+ * sk_node.next un-modified. Special care is taken
+ * when initializing object to zero.
+ */
+ if (offsetof(struct sock, sk_node.next) != 0)
+ memset(sk, 0, offsetof(struct sock, sk_node.next));
+ memset(&sk->sk_node.pprev, 0,
+ prot->obj_size - offsetof(struct sock,
+ sk_node.pprev));
+ }
+ }
else
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
+ kmemcheck_annotate_bitfield(sk, flags);
+
if (security_sk_alloc(sk, family, priority))
goto out_free;
if (!try_module_get(prot->owner))
goto out_free_sec;
+ sk_tx_queue_clear(sk);
}
return sk;
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
sock_net_set(sk, get_net(net));
+ atomic_set(&sk->sk_wmem_alloc, 1);
}
return sk;
}
EXPORT_SYMBOL(sk_alloc);
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
{
struct sk_filter *filter;
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
+
+void sk_free(struct sock *sk)
+{
+ /*
+ * We substract one from sk_wmem_alloc and can know if
+ * some packets are still in some tx queue.
+ * If not null, sock_wfree() will call __sk_free(sk) later
+ */
+ if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
EXPORT_SYMBOL(sk_free);
/*
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
atomic_set(&newsk->sk_rmem_alloc, 0);
- atomic_set(&newsk->sk_wmem_alloc, 0);
+ /*
+ * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+ */
+ atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
newsk->sk_err = 0;
newsk->sk_priority = 0;
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
+ */
+ smp_wmb();
atomic_set(&newsk->sk_refcnt, 2);
/*
if (newsk->sk_prot->sockets_allocated)
percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+
+ if (sock_flag(newsk, SOCK_TIMESTAMP) ||
+ sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+ net_enable_timestamp();
}
out:
return newsk;
void __init sk_init(void)
{
- if (num_physpages <= 4096) {
+ if (totalram_pages <= 4096) {
sysctl_wmem_max = 32767;
sysctl_rmem_max = 32767;
sysctl_wmem_default = 32767;
sysctl_rmem_default = 32767;
- } else if (num_physpages >= 131072) {
+ } else if (totalram_pages >= 131072) {
sysctl_wmem_max = 131071;
sysctl_rmem_max = 131071;
}
void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
+ unsigned int len = skb->truesize;
- /* In case it might be waiting for more memory. */
- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
- if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+ if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ /*
+ * Keep a reference on sk_wmem_alloc, this will be released
+ * after sk_write_space() call
+ */
+ atomic_sub(len - 1, &sk->sk_wmem_alloc);
sk->sk_write_space(sk);
- sock_put(sk);
+ len = 1;
+ }
+ /*
+ * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+ * could not do because of in-flight packets
+ */
+ if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+ __sk_free(sk);
}
EXPORT_SYMBOL(sock_wfree);
EXPORT_SYMBOL(sock_no_shutdown);
int sock_no_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
return -EOPNOTSUPP;
}
static void sock_def_wakeup(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ if (sk_has_sleeper(sk))
wake_up_interruptible_all(sk->sk_sleep);
read_unlock(&sk->sk_callback_lock);
}
static void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ if (sk_has_sleeper(sk))
wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
read_unlock(&sk->sk_callback_lock);
static void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ if (sk_has_sleeper(sk))
wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
* progress. --DaveM
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ if (sk_has_sleeper(sk))
wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
POLLWRNORM | POLLWRBAND);
sk->sk_stamp = ktime_set(-1L, 0);
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
+ */
+ smp_wmb();
atomic_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
* Set socket options on an inet socket.
*/
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
#ifdef CONFIG_COMPAT
int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-static int sock_inuse_init_net(struct net *net)
+static int __net_init sock_inuse_init_net(struct net *net)
{
net->core.inuse = alloc_percpu(struct prot_inuse);
return net->core.inuse ? 0 : -ENOMEM;
}
-static void sock_inuse_exit_net(struct net *net)
+static void __net_exit sock_inuse_exit_net(struct net *net)
{
free_percpu(net->core.inuse);
}