* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
* locks is fast):
*/
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
"sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
"sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
"sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
"sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
"sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
- "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
+ "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
- "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
+ "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
+ "sk_lock-AF_IEEE802154",
+ "sk_lock-AF_MAX"
};
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
"slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
"slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
"slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
"slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
"slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
- "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
+ "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
- "slock-AF_RXRPC" , "slock-AF_MAX"
+ "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
+ "slock-AF_IEEE802154",
+ "slock-AF_MAX"
};
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
"clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
"clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
"clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
"clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
"clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
- "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
+ "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
"clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
- "clock-27" , "clock-28" , "clock-29" ,
+ "clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
- "clock-AF_RXRPC" , "clock-AF_MAX"
+ "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
+ "clock-AF_IEEE802154",
+ "clock-AF_MAX"
};
-#endif
/*
* sk_callback_lock locking rules are per-address-family,
/* Maximal space eaten by iovec or ancilliary data plus some space */
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
+EXPORT_SYMBOL(sysctl_optmem_max);
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
}
}
-static void sock_disable_timestamp(struct sock *sk)
+static void sock_disable_timestamp(struct sock *sk, int flag)
{
- if (sock_flag(sk, SOCK_TIMESTAMP)) {
- sock_reset_flag(sk, SOCK_TIMESTAMP);
- net_disable_timestamp();
+ if (sock_flag(sk, flag)) {
+ sock_reset_flag(sk, flag);
+ if (!sock_flag(sk, SOCK_TIMESTAMP) &&
+ !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+ net_disable_timestamp();
+ }
}
}
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- int err = 0;
+ int err;
int skb_len;
+ unsigned long flags;
+ struct sk_buff_head *list = &sk->sk_receive_queue;
/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
+ atomic_inc(&sk->sk_drops);
+ return -ENOMEM;
}
err = sk_filter(sk, skb);
if (err)
- goto out;
+ return err;
if (!sk_rmem_schedule(sk, skb->truesize)) {
- err = -ENOBUFS;
- goto out;
+ atomic_inc(&sk->sk_drops);
+ return -ENOBUFS;
}
skb->dev = NULL;
*/
skb_len = skb->len;
- skb_queue_tail(&sk->sk_receive_queue, skb);
+ spin_lock_irqsave(&list->lock, flags);
+ skb->dropcount = atomic_read(&sk->sk_drops);
+ __skb_queue_tail(list, skb);
+ spin_unlock_irqrestore(&list->lock, flags);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, skb_len);
-out:
- return err;
+ return 0;
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
*/
mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
- rc = sk->sk_backlog_rcv(sk, skb);
+ rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
- } else
- sk_add_backlog(sk, skb);
+ } else if (sk_add_backlog(sk, skb)) {
+ bh_unlock_sock(sk);
+ atomic_inc(&sk->sk_drops);
+ goto discard_and_relse;
+ }
+
bh_unlock_sock(sk);
out:
sock_put(sk);
}
EXPORT_SYMBOL(sk_receive_skb);
+void sk_reset_txq(struct sock *sk)
+{
+ sk_tx_queue_clear(sk);
+}
+EXPORT_SYMBOL(sk_reset_txq);
+
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk->sk_dst_cache;
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ sk_tx_queue_clear(sk);
sk->sk_dst_cache = NULL;
dst_release(dst);
return NULL;
if (copy_from_user(devname, optval, optlen))
goto out;
- if (devname[0] == '\0') {
- index = 0;
- } else {
- struct net_device *dev = dev_get_by_name(net, devname);
+ index = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, devname);
+ if (dev)
+ index = dev->ifindex;
+ rcu_read_unlock();
ret = -ENODEV;
if (!dev)
goto out;
-
- index = dev->ifindex;
- dev_put(dev);
}
lock_sock(sk);
*/
int sock_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
- struct sock *sk=sock->sk;
+ struct sock *sk = sock->sk;
int val;
int valbool;
struct linger ling;
if (get_user(val, (int __user *)optval))
return -EFAULT;
- valbool = val?1:0;
+ valbool = val ? 1 : 0;
lock_sock(sk);
- switch(optname) {
+ switch (optname) {
case SO_DEBUG:
- if (val && !capable(CAP_NET_ADMIN)) {
+ if (val && !capable(CAP_NET_ADMIN))
ret = -EACCES;
- } else
+ else
sock_valbool_flag(sk, SOCK_DBG, valbool);
break;
case SO_REUSEADDR:
sk->sk_reuse = valbool;
break;
case SO_TYPE:
+ case SO_PROTOCOL:
+ case SO_DOMAIN:
case SO_ERROR:
ret = -ENOPROTOOPT;
break;
ret = -EINVAL; /* 1003.1g */
break;
}
- if (copy_from_user(&ling,optval,sizeof(ling))) {
+ if (copy_from_user(&ling, optval, sizeof(ling))) {
ret = -EFAULT;
break;
}
else
sock_set_flag(sk, SOCK_RCVTSTAMPNS);
sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
}
break;
+ case SO_TIMESTAMPING:
+ if (val & ~SOF_TIMESTAMPING_MASK) {
+ ret = -EINVAL;
+ break;
+ }
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
+ val & SOF_TIMESTAMPING_TX_HARDWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
+ val & SOF_TIMESTAMPING_TX_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
+ val & SOF_TIMESTAMPING_RX_HARDWARE);
+ if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+ sock_enable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ else
+ sock_disable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
+ val & SOF_TIMESTAMPING_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
+ val & SOF_TIMESTAMPING_SYS_HARDWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
+ val & SOF_TIMESTAMPING_RAW_HARDWARE);
+ break;
+
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
case SO_MARK:
if (!capable(CAP_NET_ADMIN))
ret = -EPERM;
- else {
+ else
sk->sk_mark = val;
- }
break;
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
+ case SO_RXQ_OVFL:
+ if (valbool)
+ sock_set_flag(sk, SOCK_RXQ_OVFL);
+ else
+ sock_reset_flag(sk, SOCK_RXQ_OVFL);
+ break;
default:
ret = -ENOPROTOOPT;
break;
release_sock(sk);
return ret;
}
+EXPORT_SYMBOL(sock_setsockopt);
int sock_getsockopt(struct socket *sock, int level, int optname,
struct timeval tm;
} v;
- unsigned int lv = sizeof(int);
+ int lv = sizeof(int);
int len;
if (get_user(len, optlen))
if (len < 0)
return -EINVAL;
- switch(optname) {
+ memset(&v, 0, sizeof(v));
+
+ switch (optname) {
case SO_DEBUG:
v.val = sock_flag(sk, SOCK_DBG);
break;
v.val = sk->sk_type;
break;
+ case SO_PROTOCOL:
+ v.val = sk->sk_protocol;
+ break;
+
+ case SO_DOMAIN:
+ v.val = sk->sk_family;
+ break;
+
case SO_ERROR:
v.val = -sock_error(sk);
- if (v.val==0)
+ if (v.val == 0)
v.val = xchg(&sk->sk_err_soft, 0);
break;
v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
break;
+ case SO_TIMESTAMPING:
+ v.val = 0;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ break;
+
case SO_RCVTIMEO:
- lv=sizeof(struct timeval);
+ lv = sizeof(struct timeval);
if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
break;
case SO_SNDTIMEO:
- lv=sizeof(struct timeval);
+ lv = sizeof(struct timeval);
if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
v.tm.tv_sec = 0;
v.tm.tv_usec = 0;
break;
case SO_SNDLOWAT:
- v.val=1;
+ v.val = 1;
break;
case SO_PASSCRED:
v.val = sk->sk_mark;
break;
+ case SO_RXQ_OVFL:
+ v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+ break;
+
default:
return -ENOPROTOOPT;
}
af_family_keys + sk->sk_family);
}
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ */
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
-
- memcpy(nsk, osk, osk->sk_prot->obj_size);
+ BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
+ sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
+ sizeof(osk->sk_tx_queue_mapping));
+ memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
+ osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
struct kmem_cache *slab;
slab = prot->slab;
- if (slab != NULL)
- sk = kmem_cache_alloc(slab, priority);
+ if (slab != NULL) {
+ sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
+ if (!sk)
+ return sk;
+ if (priority & __GFP_ZERO) {
+ /*
+ * caches using SLAB_DESTROY_BY_RCU should let
+ * sk_node.next un-modified. Special care is taken
+ * when initializing object to zero.
+ */
+ if (offsetof(struct sock, sk_node.next) != 0)
+ memset(sk, 0, offsetof(struct sock, sk_node.next));
+ memset(&sk->sk_node.pprev, 0,
+ prot->obj_size - offsetof(struct sock,
+ sk_node.pprev));
+ }
+ }
else
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
+ kmemcheck_annotate_bitfield(sk, flags);
+
if (security_sk_alloc(sk, family, priority))
goto out_free;
if (!try_module_get(prot->owner))
goto out_free_sec;
+ sk_tx_queue_clear(sk);
}
return sk;
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
sock_net_set(sk, get_net(net));
+ atomic_set(&sk->sk_wmem_alloc, 1);
}
return sk;
}
+EXPORT_SYMBOL(sk_alloc);
-void sk_free(struct sock *sk)
+static void __sk_free(struct sock *sk)
{
struct sk_filter *filter;
if (sk->sk_destruct)
sk->sk_destruct(sk);
- filter = rcu_dereference(sk->sk_filter);
+ filter = rcu_dereference_check(sk->sk_filter,
+ atomic_read(&sk->sk_wmem_alloc) == 0);
if (filter) {
sk_filter_uncharge(sk, filter);
rcu_assign_pointer(sk->sk_filter, NULL);
}
- sock_disable_timestamp(sk);
+ sock_disable_timestamp(sk, SOCK_TIMESTAMP);
+ sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
if (atomic_read(&sk->sk_omem_alloc))
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
sk_prot_free(sk->sk_prot_creator, sk);
}
+void sk_free(struct sock *sk)
+{
+ /*
+ * We substract one from sk_wmem_alloc and can know if
+ * some packets are still in some tx queue.
+ * If not null, sock_wfree() will call __sk_free(sk) later
+ */
+ if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
+EXPORT_SYMBOL(sk_free);
+
/*
* Last sock_put should drop referrence to sk->sk_net. It has already
* been dropped in sk_change_net. Taking referrence to stopping namespace
sock_lock_init(newsk);
bh_lock_sock(newsk);
newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+ newsk->sk_backlog.len = 0;
atomic_set(&newsk->sk_rmem_alloc, 0);
- atomic_set(&newsk->sk_wmem_alloc, 0);
+ /*
+ * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
+ */
+ atomic_set(&newsk->sk_wmem_alloc, 1);
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
newsk->sk_err = 0;
newsk->sk_priority = 0;
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
+ */
+ smp_wmb();
atomic_set(&newsk->sk_refcnt, 2);
/*
* to be taken into account in all callers. -acme
*/
sk_refcnt_debug_inc(newsk);
- newsk->sk_socket = NULL;
+ sk_set_socket(newsk, NULL);
newsk->sk_sleep = NULL;
if (newsk->sk_prot->sockets_allocated)
- atomic_inc(newsk->sk_prot->sockets_allocated);
+ percpu_counter_inc(newsk->sk_prot->sockets_allocated);
+
+ if (sock_flag(newsk, SOCK_TIMESTAMP) ||
+ sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+ net_enable_timestamp();
}
out:
return newsk;
}
-
EXPORT_SYMBOL_GPL(sk_clone);
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
void __init sk_init(void)
{
- if (num_physpages <= 4096) {
+ if (totalram_pages <= 4096) {
sysctl_wmem_max = 32767;
sysctl_rmem_max = 32767;
sysctl_wmem_default = 32767;
sysctl_rmem_default = 32767;
- } else if (num_physpages >= 131072) {
+ } else if (totalram_pages >= 131072) {
sysctl_wmem_max = 131071;
sysctl_rmem_max = 131071;
}
void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
+ unsigned int len = skb->truesize;
- /* In case it might be waiting for more memory. */
- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
- if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
+ if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ /*
+ * Keep a reference on sk_wmem_alloc, this will be released
+ * after sk_write_space() call
+ */
+ atomic_sub(len - 1, &sk->sk_wmem_alloc);
sk->sk_write_space(sk);
- sock_put(sk);
+ len = 1;
+ }
+ /*
+ * if sk_wmem_alloc reaches 0, we must finish what sk_free()
+ * could not do because of in-flight packets
+ */
+ if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+ __sk_free(sk);
}
+EXPORT_SYMBOL(sock_wfree);
/*
* Read buffer destructor automatically called from kfree_skb.
{
struct sock *sk = skb->sk;
- skb_truesize_check(skb);
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_uncharge(skb->sk, skb->truesize);
}
+EXPORT_SYMBOL(sock_rfree);
int sock_i_uid(struct sock *sk)
read_unlock(&sk->sk_callback_lock);
return uid;
}
+EXPORT_SYMBOL(sock_i_uid);
unsigned long sock_i_ino(struct sock *sk)
{
read_unlock(&sk->sk_callback_lock);
return ino;
}
+EXPORT_SYMBOL(sock_i_ino);
/*
* Allocate a skb from the socket's send buffer.
gfp_t priority)
{
if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- struct sk_buff * skb = alloc_skb(size, priority);
+ struct sk_buff *skb = alloc_skb(size, priority);
if (skb) {
skb_set_owner_w(skb, sk);
return skb;
}
return NULL;
}
+EXPORT_SYMBOL(sock_wmalloc);
/*
* Allocate a skb from the socket's receive buffer.
}
return NULL;
}
+EXPORT_SYMBOL(sock_kmalloc);
/*
* Free an option memory block.
kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);
}
+EXPORT_SYMBOL(sock_kfree_s);
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
-static long sock_wait_for_wmem(struct sock * sk, long timeo)
+static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
* Generic send/receive buffer handlers
*/
-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
- unsigned long header_len,
- unsigned long data_len,
- int noblock, int *errcode)
+struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ unsigned long data_len, int noblock,
+ int *errcode)
{
struct sk_buff *skb;
gfp_t gfp_mask;
*errcode = err;
return NULL;
}
+EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
{
return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
}
+EXPORT_SYMBOL(sock_alloc_send_skb);
static void __lock_sock(struct sock *sk)
{
struct sk_buff *next = skb->next;
skb->next = NULL;
- sk->sk_backlog_rcv(sk, skb);
+ sk_backlog_rcv(sk, skb);
/*
* We are in process context here with softirqs
bh_lock_sock(sk);
} while ((skb = sk->sk_backlog.head) != NULL);
+
+ /*
+ * Doing the zeroing here guarantee we can not loop forever
+ * while a wild producer attempts to flood us.
+ */
+ sk->sk_backlog.len = 0;
}
/**
finish_wait(sk->sk_sleep, &wait);
return rc;
}
-
EXPORT_SYMBOL(sk_wait_data);
/**
/* Under pressure. */
if (allocated > prot->sysctl_mem[1])
if (prot->enter_memory_pressure)
- prot->enter_memory_pressure();
+ prot->enter_memory_pressure(sk);
/* Over hard limit. */
if (allocated > prot->sysctl_mem[2])
}
if (prot->memory_pressure) {
- if (!*prot->memory_pressure ||
- prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+ int alloc;
+
+ if (!*prot->memory_pressure)
+ return 1;
+ alloc = percpu_counter_read_positive(prot->sockets_allocated);
+ if (prot->sysctl_mem[2] > alloc *
sk_mem_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
atomic_sub(amt, prot->memory_allocated);
return 0;
}
-
EXPORT_SYMBOL(__sk_mem_schedule);
/**
(atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
-
EXPORT_SYMBOL(__sk_mem_reclaim);
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_bind);
int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
int len, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_connect);
int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_socketpair);
int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_accept);
int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
int *len, int peer)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
+unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
{
return 0;
}
+EXPORT_SYMBOL(sock_no_poll);
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_ioctl);
int sock_no_listen(struct socket *sock, int backlog)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_listen);
int sock_no_shutdown(struct socket *sock, int how)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_shutdown);
int sock_no_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_setsockopt);
int sock_no_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_getsockopt);
int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t len)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_sendmsg);
int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
size_t len, int flags)
{
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL(sock_no_recvmsg);
int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
{
/* Mirror missing mmap method error code */
return -ENODEV;
}
+EXPORT_SYMBOL(sock_no_mmap);
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
kunmap(page);
return res;
}
+EXPORT_SYMBOL(sock_no_sendpage);
/*
* Default Socket Callbacks
static void sock_def_wakeup(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ if (sk_has_sleeper(sk))
wake_up_interruptible_all(sk->sk_sleep);
read_unlock(&sk->sk_callback_lock);
}
static void sock_def_error_report(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ if (sk_has_sleeper(sk))
+ wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
read_unlock(&sk->sk_callback_lock);
}
static void sock_def_readable(struct sock *sk, int len)
{
read_lock(&sk->sk_callback_lock);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ if (sk_has_sleeper(sk))
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+ POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&sk->sk_callback_lock);
}
* progress. --DaveM
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible_sync(sk->sk_sleep);
+ if (sk_has_sleeper(sk))
+ wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+ POLLWRNORM | POLLWRBAND);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
if (send_sigurg(&sk->sk_socket->file->f_owner))
sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
+EXPORT_SYMBOL(sk_send_sigurg);
void sk_reset_timer(struct sock *sk, struct timer_list* timer,
unsigned long expires)
if (!mod_timer(timer, expires))
sock_hold(sk);
}
-
EXPORT_SYMBOL(sk_reset_timer);
void sk_stop_timer(struct sock *sk, struct timer_list* timer)
if (timer_pending(timer) && del_timer(timer))
__sock_put(sk);
}
-
EXPORT_SYMBOL(sk_stop_timer);
void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
sk->sk_state = TCP_CLOSE;
- sk->sk_socket = sock;
+ sk_set_socket(sk, sock);
sock_set_flag(sk, SOCK_ZAPPED);
sk->sk_stamp = ktime_set(-1L, 0);
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
+ */
+ smp_wmb();
atomic_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
+EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
local_bh_enable();
}
-
EXPORT_SYMBOL(lock_sock_nested);
void release_sock(struct sock *sk)
{
struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
tv = ktime_to_timeval(sk->sk_stamp);
if (tv.tv_sec == -1)
return -ENOENT;
{
struct timespec ts;
if (!sock_flag(sk, SOCK_TIMESTAMP))
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ts = ktime_to_timespec(sk->sk_stamp);
if (ts.tv_sec == -1)
return -ENOENT;
}
EXPORT_SYMBOL(sock_get_timestampns);
-void sock_enable_timestamp(struct sock *sk)
+void sock_enable_timestamp(struct sock *sk, int flag)
{
- if (!sock_flag(sk, SOCK_TIMESTAMP)) {
- sock_set_flag(sk, SOCK_TIMESTAMP);
- net_enable_timestamp();
+ if (!sock_flag(sk, flag)) {
+ sock_set_flag(sk, flag);
+ /*
+ * we just set one of the two flags which require net
+ * time stamping, but time stamping might have been on
+ * already because of the other one
+ */
+ if (!sock_flag(sk,
+ flag == SOCK_TIMESTAMP ?
+ SOCK_TIMESTAMPING_RX_SOFTWARE :
+ SOCK_TIMESTAMP))
+ net_enable_timestamp();
}
}
return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
}
-
EXPORT_SYMBOL(sock_common_getsockopt);
#ifdef CONFIG_COMPAT
msg->msg_namelen = addr_len;
return err;
}
-
EXPORT_SYMBOL(sock_common_recvmsg);
/*
* Set socket options on an inet socket.
*/
int sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
}
-
EXPORT_SYMBOL(sock_common_setsockopt);
#ifdef CONFIG_COMPAT
int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
sk_refcnt_debug_release(sk);
sock_put(sk);
}
-
EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
}
EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-static int sock_inuse_init_net(struct net *net)
+static int __net_init sock_inuse_init_net(struct net *net)
{
net->core.inuse = alloc_percpu(struct prot_inuse);
return net->core.inuse ? 0 : -ENOMEM;
}
-static void sock_inuse_exit_net(struct net *net)
+static void __net_exit sock_inuse_exit_net(struct net *net)
{
free_percpu(net->core.inuse);
}
int proto_register(struct proto *prot, int alloc_slab)
{
- char *request_sock_slab_name = NULL;
- char *timewait_sock_slab_name;
-
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ NULL);
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
}
if (prot->rsk_prot != NULL) {
- static const char mask[] = "request_sock_%s";
-
- request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
- if (request_sock_slab_name == NULL)
+ prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
+ if (prot->rsk_prot->slab_name == NULL)
goto out_free_sock_slab;
- sprintf(request_sock_slab_name, mask, prot->name);
- prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+ prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
prot->rsk_prot->obj_size, 0,
SLAB_HWCACHE_ALIGN, NULL);
}
if (prot->twsk_prot != NULL) {
- static const char mask[] = "tw_sock_%s";
+ prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
- timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
-
- if (timewait_sock_slab_name == NULL)
+ if (prot->twsk_prot->twsk_slab_name == NULL)
goto out_free_request_sock_slab;
- sprintf(timewait_sock_slab_name, mask, prot->name);
prot->twsk_prot->twsk_slab =
- kmem_cache_create(timewait_sock_slab_name,
+ kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
- 0, SLAB_HWCACHE_ALIGN,
+ 0,
+ SLAB_HWCACHE_ALIGN |
+ prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
return 0;
out_free_timewait_sock_slab_name:
- kfree(timewait_sock_slab_name);
+ kfree(prot->twsk_prot->twsk_slab_name);
out_free_request_sock_slab:
if (prot->rsk_prot && prot->rsk_prot->slab) {
kmem_cache_destroy(prot->rsk_prot->slab);
prot->rsk_prot->slab = NULL;
}
out_free_request_sock_slab_name:
- kfree(request_sock_slab_name);
+ if (prot->rsk_prot)
+ kfree(prot->rsk_prot->slab_name);
out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
out:
return -ENOBUFS;
}
-
EXPORT_SYMBOL(proto_register);
void proto_unregister(struct proto *prot)
}
if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
- const char *name = kmem_cache_name(prot->rsk_prot->slab);
-
kmem_cache_destroy(prot->rsk_prot->slab);
- kfree(name);
+ kfree(prot->rsk_prot->slab_name);
prot->rsk_prot->slab = NULL;
}
if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
- const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
-
kmem_cache_destroy(prot->twsk_prot->twsk_slab);
- kfree(name);
+ kfree(prot->twsk_prot->twsk_slab_name);
prot->twsk_prot->twsk_slab = NULL;
}
}
-
EXPORT_SYMBOL(proto_unregister);
#ifdef CONFIG_PROC_FS
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
- proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+ sock_prot_inuse_get(seq_file_net(seq), proto),
proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
static int proto_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &proto_seq_ops);
+ return seq_open_net(inode, file, &proto_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations proto_seq_fops = {
.open = proto_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
+};
+
+static __net_init int proto_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static __net_exit void proto_exit_net(struct net *net)
+{
+ proc_net_remove(net, "protocols");
+}
+
+
+static __net_initdata struct pernet_operations proto_net_ops = {
+ .init = proto_init_net,
+ .exit = proto_exit_net,
};
static int __init proto_init(void)
{
- /* register /proc/net/protocols */
- return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ return register_pernet_subsys(&proto_net_ops);
}
subsys_initcall(proto_init);
#endif /* PROC_FS */
-
-EXPORT_SYMBOL(sk_alloc);
-EXPORT_SYMBOL(sk_free);
-EXPORT_SYMBOL(sk_send_sigurg);
-EXPORT_SYMBOL(sock_alloc_send_skb);
-EXPORT_SYMBOL(sock_init_data);
-EXPORT_SYMBOL(sock_kfree_s);
-EXPORT_SYMBOL(sock_kmalloc);
-EXPORT_SYMBOL(sock_no_accept);
-EXPORT_SYMBOL(sock_no_bind);
-EXPORT_SYMBOL(sock_no_connect);
-EXPORT_SYMBOL(sock_no_getname);
-EXPORT_SYMBOL(sock_no_getsockopt);
-EXPORT_SYMBOL(sock_no_ioctl);
-EXPORT_SYMBOL(sock_no_listen);
-EXPORT_SYMBOL(sock_no_mmap);
-EXPORT_SYMBOL(sock_no_poll);
-EXPORT_SYMBOL(sock_no_recvmsg);
-EXPORT_SYMBOL(sock_no_sendmsg);
-EXPORT_SYMBOL(sock_no_sendpage);
-EXPORT_SYMBOL(sock_no_setsockopt);
-EXPORT_SYMBOL(sock_no_shutdown);
-EXPORT_SYMBOL(sock_no_socketpair);
-EXPORT_SYMBOL(sock_rfree);
-EXPORT_SYMBOL(sock_setsockopt);
-EXPORT_SYMBOL(sock_wfree);
-EXPORT_SYMBOL(sock_wmalloc);
-EXPORT_SYMBOL(sock_i_uid);
-EXPORT_SYMBOL(sock_i_ino);
-EXPORT_SYMBOL(sysctl_optmem_max);