X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv4%2Faf_inet.c;h=24eca23c2db3159c91898bd7fefe8bbb501f1233;hb=a6604471db5e7a33474a7f16c64d6b118fae3e74;hp=6e5575b0abef4eec4670f9730d169e5e19e481ce;hpb=132adf54639cf7dd9315e8df89c2faa59f6e46d9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6e5575b..24eca23 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -87,11 +87,11 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -126,6 +126,10 @@ extern void ip_mc_drop_socket(struct sock *sk); static struct list_head inetsw[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw_lock); +struct ipv4_config ipv4_config; + +EXPORT_SYMBOL(ipv4_config); + /* New destruction routine */ void inet_sock_destruct(struct sock *sk) @@ -135,6 +139,8 @@ void inet_sock_destruct(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_error_queue); + sk_mem_reclaim(sk); + if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { printk("Attempt to release TCP socket in state %d %p\n", sk->sk_state, sk); @@ -217,11 +223,48 @@ out: return err; } +u32 inet_ehash_secret __read_mostly; +EXPORT_SYMBOL(inet_ehash_secret); + +/* + * inet_ehash_secret must be set exactly once + * Instead of using a dedicated spinlock, we (ab)use inetsw_lock + */ +void build_ehash_secret(void) +{ + u32 rnd; + do { + get_random_bytes(&rnd, sizeof(rnd)); + } while (rnd == 0); + spin_lock_bh(&inetsw_lock); + if (!inet_ehash_secret) + inet_ehash_secret = rnd; + spin_unlock_bh(&inetsw_lock); +} +EXPORT_SYMBOL(build_ehash_secret); + +static inline int inet_netns_ok(struct net *net, int protocol) +{ + int hash; + struct net_protocol *ipprot; + + if (net == &init_net) + return 1; + + hash = protocol & (MAX_INET_PROTOS - 1); + ipprot = rcu_dereference(inet_protos[hash]); + + if (ipprot == NULL) + /* raw IP is OK */ + return 1; + return ipprot->netns_ok; +} + /* * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -233,6 +276,11 @@ static int inet_create(struct socket *sock, int protocol) int try_loading_module = 0; int err; + if (sock->type != SOCK_RAW && + sock->type != SOCK_DGRAM && + !inet_ehash_secret) + build_ehash_secret(); + sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ @@ -286,6 +334,10 @@ lookup_protocol: if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; + err = -EAFNOSUPPORT; + if (!inet_netns_ok(net, protocol)) + goto out_rcu_unlock; + sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -295,7 +347,7 @@ lookup_protocol: BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); if (sk == NULL) goto out; @@ -412,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -424,7 +476,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -750,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGSTAMP: @@ -761,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -779,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk->sk_prot->ioctl) @@ -806,10 +859,11 @@ const struct proto_ops inet_stream_ops = { .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, - .sendmsg = inet_sendmsg, + .sendmsg = tcp_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage, + .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -914,7 +968,7 @@ static struct inet_protosw inetsw_array[] = } }; -#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) +#define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) void inet_register_protosw(struct inet_protosw *p) { @@ -1023,8 +1077,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (sysctl_ip_dynaddr > 1) { printk(KERN_INFO "%s(): shifting inet->" - "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - __FUNCTION__, + "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", + __func__, NIPQUAD(old_saddr), NIPQUAD(new_saddr)); } @@ -1078,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, 0); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) sk_setup_caps(sk, &rt->u.dst); @@ -1112,7 +1166,7 @@ static int inet_gso_send_check(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; - iph = skb->nh.iph; + iph = ip_hdr(skb); ihl = iph->ihl * 4; if (ihl < sizeof(*iph)) goto out; @@ -1120,8 +1174,9 @@ static int inet_gso_send_check(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, ihl))) goto out; - skb->h.raw = __skb_pull(skb, ihl); - iph = skb->nh.iph; + __skb_pull(skb, ihl); + skb_reset_transport_header(skb); + iph = ip_hdr(skb); proto = iph->protocol & (MAX_INET_PROTOS - 1); err = -EPROTONOSUPPORT; @@ -1144,6 +1199,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) int ihl; int id; + if (!(features & NETIF_F_V4_CSUM)) + features &= ~NETIF_F_SG; + if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | SKB_GSO_UDP | @@ -1155,7 +1213,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) goto out; - iph = skb->nh.iph; + iph = ip_hdr(skb); ihl = iph->ihl * 4; if (ihl < sizeof(*iph)) goto out; @@ -1163,8 +1221,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) if (unlikely(!pskb_may_pull(skb, ihl))) goto out; - skb->h.raw = __skb_pull(skb, ihl); - iph = skb->nh.iph; + __skb_pull(skb, ihl); + skb_reset_transport_header(skb); + iph = ip_hdr(skb); id = ntohs(iph->id); proto = iph->protocol & (MAX_INET_PROTOS - 1); segs = ERR_PTR(-EPROTONOSUPPORT); @@ -1175,22 +1234,85 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) segs = ops->gso_segment(skb, features); rcu_read_unlock(); - if (!segs || unlikely(IS_ERR(segs))) + if (!segs || IS_ERR(segs)) goto out; skb = segs; do { - iph = skb->nh.iph; + iph = ip_hdr(skb); iph->id = htons(id++); iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; - iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); } while ((skb = skb->next)); out: return segs; } +int inet_ctl_sock_create(struct sock **sk, unsigned short family, + unsigned short type, unsigned char protocol, + struct net *net) +{ + struct socket *sock; + int rc = sock_create_kern(family, type, protocol, &sock); + + if (rc == 0) { + *sk = sock->sk; + (*sk)->sk_allocation = GFP_ATOMIC; + /* + * Unhash it so that IP input processing does not even see it, + * we do not wish this socket to see incoming packets. + */ + (*sk)->sk_prot->unhash(*sk); + + sk_change_net(*sk, net); + } + return rc; +} + +EXPORT_SYMBOL_GPL(inet_ctl_sock_create); + +unsigned long snmp_fold_field(void *mib[], int offt) +{ + unsigned long res = 0; + int i; + + for_each_possible_cpu(i) { + res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); + res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); + } + return res; +} +EXPORT_SYMBOL_GPL(snmp_fold_field); + +int snmp_mib_init(void *ptr[2], size_t mibsize) +{ + BUG_ON(ptr == NULL); + ptr[0] = __alloc_percpu(mibsize); + if (!ptr[0]) + goto err0; + ptr[1] = __alloc_percpu(mibsize); + if (!ptr[1]) + goto err1; + return 0; +err1: + free_percpu(ptr[0]); + ptr[0] = NULL; +err0: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(snmp_mib_init); + +void snmp_mib_free(void *ptr[2]) +{ + BUG_ON(ptr == NULL); + free_percpu(ptr[0]); + free_percpu(ptr[1]); + ptr[0] = ptr[1] = NULL; +} +EXPORT_SYMBOL_GPL(snmp_mib_free); + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1203,42 +1325,64 @@ static struct net_protocol tcp_protocol = { .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol icmp_protocol = { .handler = icmp_rcv, + .no_policy = 1, + .netns_ok = 1, }; static int __init init_ipv4_mibs(void) { - net_statistics[0] = alloc_percpu(struct linux_mib); - net_statistics[1] = alloc_percpu(struct linux_mib); - ip_statistics[0] = alloc_percpu(struct ipstats_mib); - ip_statistics[1] = alloc_percpu(struct ipstats_mib); - icmp_statistics[0] = alloc_percpu(struct icmp_mib); - icmp_statistics[1] = alloc_percpu(struct icmp_mib); - tcp_statistics[0] = alloc_percpu(struct tcp_mib); - tcp_statistics[1] = alloc_percpu(struct tcp_mib); - udp_statistics[0] = alloc_percpu(struct udp_mib); - udp_statistics[1] = alloc_percpu(struct udp_mib); - udplite_statistics[0] = alloc_percpu(struct udp_mib); - udplite_statistics[1] = alloc_percpu(struct udp_mib); - if (! - (net_statistics[0] && net_statistics[1] && ip_statistics[0] - && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] - && udp_statistics[0] && udp_statistics[1] - && udplite_statistics[0] && udplite_statistics[1] ) ) - return -ENOMEM; - - (void) tcp_mib_init(); + if (snmp_mib_init((void **)net_statistics, + sizeof(struct linux_mib)) < 0) + goto err_net_mib; + if (snmp_mib_init((void **)ip_statistics, + sizeof(struct ipstats_mib)) < 0) + goto err_ip_mib; + if (snmp_mib_init((void **)icmp_statistics, + sizeof(struct icmp_mib)) < 0) + goto err_icmp_mib; + if (snmp_mib_init((void **)icmpmsg_statistics, + sizeof(struct icmpmsg_mib)) < 0) + goto err_icmpmsg_mib; + if (snmp_mib_init((void **)tcp_statistics, + sizeof(struct tcp_mib)) < 0) + goto err_tcp_mib; + if (snmp_mib_init((void **)udp_statistics, + sizeof(struct udp_mib)) < 0) + goto err_udp_mib; + if (snmp_mib_init((void **)udplite_statistics, + sizeof(struct udp_mib)) < 0) + goto err_udplite_mib; + + tcp_mib_init(); return 0; + +err_udplite_mib: + snmp_mib_free((void **)udp_statistics); +err_udp_mib: + snmp_mib_free((void **)tcp_statistics); +err_tcp_mib: + snmp_mib_free((void **)icmpmsg_statistics); +err_icmpmsg_mib: + snmp_mib_free((void **)icmp_statistics); +err_icmp_mib: + snmp_mib_free((void **)ip_statistics); +err_ip_mib: + snmp_mib_free((void **)net_statistics); +err_net_mib: + return -ENOMEM; } static int ipv4_proc_init(void); @@ -1315,11 +1459,14 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(&inet_family_ops); + tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); + /* Setup UDP memory threshold */ + udp_init(); + /* Add UDP-Lite (RFC 3828) */ udplite4_register(); @@ -1327,7 +1474,8 @@ static int __init inet_init(void) * Set the ICMP layer up */ - icmp_init(&inet_family_ops); + if (icmp_init() < 0) + panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router @@ -1373,15 +1521,11 @@ static int __init ipv4_proc_init(void) goto out_tcp; if (udp4_proc_init()) goto out_udp; - if (fib_proc_init()) - goto out_fib; if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: - fib_proc_exit(); -out_fib: udp4_proc_exit(); out_udp: tcp4_proc_exit();