* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
static struct lock_class_key af_family_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
- "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
+ "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
+ "sk_lock-AF_MAX"
};
static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
- "slock-AF_RXRPC" , "slock-AF_MAX"
+ "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
+ "slock-AF_MAX"
};
static const char *af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
"clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
"clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
"clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
- "clock-27" , "clock-28" , "clock-29" ,
+ "clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
- "clock-AF_RXRPC" , "clock-AF_MAX"
+ "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
+ "clock-AF_MAX"
};
-#endif
/*
* sk_callback_lock locking rules are per-address-family,
static int warned __read_mostly;
*timeo_p = 0;
- if (warned < 10 && net_ratelimit())
+ if (warned < 10 && net_ratelimit()) {
warned++;
printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
"tries to set negative timeout\n",
current->comm, task_pid_nr(current));
+ }
return 0;
}
*timeo_p = MAX_SCHEDULE_TIMEOUT;
int err = 0;
int skb_len;
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
+ /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
number of warnings when compiling with -W --ANK
*/
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
*/
mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
- rc = sk->sk_backlog_rcv(sk, skb);
+ rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
} else
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
- struct net *net = sk->sk_net;
+ struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
int index;
* Options without arguments
*/
-#ifdef SO_DONTLINGER /* Compatibility item... */
- if (optname == SO_DONTLINGER) {
- lock_sock(sk);
- sock_reset_flag(sk, SOCK_LINGER);
- release_sock(sk);
- return 0;
- }
-#endif
-
if (optname == SO_BINDTODEVICE)
return sock_bindtodevice(sk, optval, optlen);
if (len < 0)
return -EINVAL;
+ v.val = 0;
+
switch(optname) {
case SO_DEBUG:
v.val = sock_flag(sk, SOCK_DBG);
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
- * @zero_it: if we should zero the newly allocated sock
*/
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot)
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
- sk->sk_net = get_net(net);
+ sock_net_set(sk, get_net(net));
}
return sk;
if (atomic_read(&sk->sk_omem_alloc))
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
- __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+ __func__, atomic_read(&sk->sk_omem_alloc));
- put_net(sk->sk_net);
+ put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
sock_hold(sk);
sock_release(sk->sk_socket);
- sk->sk_net = get_net(&init_net);
+ release_net(sock_net(sk));
+ sock_net_set(sk, get_net(&init_net));
sock_put(sk);
}
EXPORT_SYMBOL(sk_release_kernel);
sock_copy(newsk, sk);
/* SANITY */
- get_net(newsk->sk_net);
+ get_net(sock_net(newsk));
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);
* to be taken into account in all callers. -acme
*/
sk_refcnt_debug_inc(newsk);
- newsk->sk_socket = NULL;
+ sk_set_socket(newsk, NULL);
newsk->sk_sleep = NULL;
if (newsk->sk_prot->sockets_allocated)
- atomic_inc(newsk->sk_prot->sockets_allocated);
+ percpu_counter_inc(newsk->sk_prot->sockets_allocated);
}
out:
return newsk;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
if (sk_can_gso(sk)) {
- if (dst->header_len)
+ if (dst->header_len) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
- else
+ } else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ sk->sk_gso_max_size = dst->dev->gso_max_size;
+ }
}
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
struct sk_buff *next = skb->next;
skb->next = NULL;
- sk->sk_backlog_rcv(sk, skb);
+ sk_backlog_rcv(sk, skb);
/*
* We are in process context here with softirqs
/* Under pressure. */
if (allocated > prot->sysctl_mem[1])
if (prot->enter_memory_pressure)
- prot->enter_memory_pressure();
+ prot->enter_memory_pressure(sk);
/* Over hard limit. */
if (allocated > prot->sysctl_mem[2])
}
if (prot->memory_pressure) {
- if (!*prot->memory_pressure ||
- prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+ int alloc;
+
+ if (!*prot->memory_pressure)
+ return 1;
+ alloc = percpu_counter_read_positive(prot->sockets_allocated);
+ if (prot->sysctl_mem[2] > alloc *
sk_mem_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
sk->sk_forward_alloc))
{
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ wake_up_interruptible_sync(sk->sk_sleep);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&sk->sk_callback_lock);
}
*/
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ wake_up_interruptible_sync(sk->sk_sleep);
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
- sk->sk_socket = sock;
+ sk_set_socket(sk, sock);
sock_set_flag(sk, SOCK_ZAPPED);
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp = ktime_set(-1L, -1L);
+ sk->sk_stamp = ktime_set(-1L, 0);
atomic_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
-int proto_register(struct proto *prot, int alloc_slab)
+#ifdef CONFIG_PROC_FS
+#define PROTO_INUSE_NR 64 /* should be enough for the first time */
+struct prot_inuse {
+ int val[PROTO_INUSE_NR];
+};
+
+static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
+
+#ifdef CONFIG_NET_NS
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+ int cpu = smp_processor_id();
+ per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
{
- char *request_sock_slab_name = NULL;
- char *timewait_sock_slab_name;
+ int cpu, idx = prot->inuse_idx;
+ int res = 0;
- if (sock_prot_inuse_init(prot) != 0) {
- printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
- goto out;
+ for_each_possible_cpu(cpu)
+ res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+
+ return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+
+static int sock_inuse_init_net(struct net *net)
+{
+ net->core.inuse = alloc_percpu(struct prot_inuse);
+ return net->core.inuse ? 0 : -ENOMEM;
+}
+
+static void sock_inuse_exit_net(struct net *net)
+{
+ free_percpu(net->core.inuse);
+}
+
+static struct pernet_operations net_inuse_ops = {
+ .init = sock_inuse_init_net,
+ .exit = sock_inuse_exit_net,
+};
+
+static __init int net_inuse_init(void)
+{
+ if (register_pernet_subsys(&net_inuse_ops))
+ panic("Cannot initialize net inuse counters");
+
+ return 0;
+}
+
+core_initcall(net_inuse_init);
+#else
+static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
+
+void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+{
+ __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+
+int sock_prot_inuse_get(struct net *net, struct proto *prot)
+{
+ int cpu, idx = prot->inuse_idx;
+ int res = 0;
+
+ for_each_possible_cpu(cpu)
+ res += per_cpu(prot_inuse, cpu).val[idx];
+
+ return res >= 0 ? res : 0;
+}
+EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+#endif
+
+static void assign_proto_idx(struct proto *prot)
+{
+ prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
+
+ if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+ printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
+ return;
}
+ set_bit(prot->inuse_idx, proto_inuse_idx);
+}
+
+static void release_proto_idx(struct proto *prot)
+{
+ if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+ clear_bit(prot->inuse_idx, proto_inuse_idx);
+}
+#else
+static inline void assign_proto_idx(struct proto *prot)
+{
+}
+
+static inline void release_proto_idx(struct proto *prot)
+{
+}
+#endif
+
+int proto_register(struct proto *prot, int alloc_slab)
+{
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ NULL);
if (prot->slab == NULL) {
printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
prot->name);
- goto out_free_inuse;
+ goto out;
}
if (prot->rsk_prot != NULL) {
static const char mask[] = "request_sock_%s";
- request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
- if (request_sock_slab_name == NULL)
+ prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+ if (prot->rsk_prot->slab_name == NULL)
goto out_free_sock_slab;
- sprintf(request_sock_slab_name, mask, prot->name);
- prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
+ sprintf(prot->rsk_prot->slab_name, mask, prot->name);
+ prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
prot->rsk_prot->obj_size, 0,
SLAB_HWCACHE_ALIGN, NULL);
if (prot->twsk_prot != NULL) {
static const char mask[] = "tw_sock_%s";
- timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
+ prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
- if (timewait_sock_slab_name == NULL)
+ if (prot->twsk_prot->twsk_slab_name == NULL)
goto out_free_request_sock_slab;
- sprintf(timewait_sock_slab_name, mask, prot->name);
+ sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
prot->twsk_prot->twsk_slab =
- kmem_cache_create(timewait_sock_slab_name,
+ kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
- 0, SLAB_HWCACHE_ALIGN,
+ 0,
+ SLAB_HWCACHE_ALIGN |
+ prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
goto out_free_timewait_sock_slab_name;
write_lock(&proto_list_lock);
list_add(&prot->node, &proto_list);
+ assign_proto_idx(prot);
write_unlock(&proto_list_lock);
return 0;
out_free_timewait_sock_slab_name:
- kfree(timewait_sock_slab_name);
+ kfree(prot->twsk_prot->twsk_slab_name);
out_free_request_sock_slab:
if (prot->rsk_prot && prot->rsk_prot->slab) {
kmem_cache_destroy(prot->rsk_prot->slab);
prot->rsk_prot->slab = NULL;
}
out_free_request_sock_slab_name:
- kfree(request_sock_slab_name);
+ kfree(prot->rsk_prot->slab_name);
out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
-out_free_inuse:
- sock_prot_inuse_free(prot);
out:
return -ENOBUFS;
}
void proto_unregister(struct proto *prot)
{
write_lock(&proto_list_lock);
+ release_proto_idx(prot);
list_del(&prot->node);
write_unlock(&proto_list_lock);
- sock_prot_inuse_free(prot);
-
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
}
if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
- const char *name = kmem_cache_name(prot->rsk_prot->slab);
-
kmem_cache_destroy(prot->rsk_prot->slab);
- kfree(name);
+ kfree(prot->rsk_prot->slab_name);
prot->rsk_prot->slab = NULL;
}
if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
- const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
-
kmem_cache_destroy(prot->twsk_prot->twsk_slab);
- kfree(name);
+ kfree(prot->twsk_prot->twsk_slab_name);
prot->twsk_prot->twsk_slab = NULL;
}
}
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
- proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
+ sock_prot_inuse_get(seq_file_net(seq), proto),
proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
static int proto_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &proto_seq_ops);
+ return seq_open_net(inode, file, &proto_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations proto_seq_fops = {
.open = proto_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
+};
+
+static __net_init int proto_init_net(struct net *net)
+{
+ if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static __net_exit void proto_exit_net(struct net *net)
+{
+ proc_net_remove(net, "protocols");
+}
+
+
+static __net_initdata struct pernet_operations proto_net_ops = {
+ .init = proto_init_net,
+ .exit = proto_exit_net,
};
static int __init proto_init(void)
{
- /* register /proc/net/protocols */
- return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ return register_pernet_subsys(&proto_net_ops);
}
subsys_initcall(proto_init);