X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=net%2Fipv6%2Faf_inet6.c;h=37d14e735c273438bdd6a3361f551be7b82c4de8;hb=e213e9cf707c51808e372dabd1070a61af17e77b;hp=60461ad7fa6ff0deaad6d6d27f463594ae0d7825;hpb=696adfe84c11c571a1e0863460ff0ec142b4e5a9;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 60461ad..37d14e7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,22 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); +struct ipv6_params ipv6_defaults = { + .disable_ipv6 = 0, + .autoconf = 1, +}; + +static int disable_ipv6_mod = 0; + +module_param_named(disable, disable_ipv6_mod, int, 0444); +MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); + +module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444); +MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); + +module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); +MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); + static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -78,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct net *net, struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -141,7 +159,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; @@ -150,7 +168,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); @@ -168,7 +186,7 @@ lookup_protocol: inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { - inet->num = protocol; + inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -211,12 +229,12 @@ lookup_protocol: */ sk_refcnt_debug_inc(sk); - if (inet->num) { + if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); sk->sk_prot->hash(sk); } if (sk->sk_prot->init) { @@ -264,15 +282,32 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sk); /* Check these errors (active socket, double bind). */ - if (sk->sk_state != TCP_CLOSE || inet->num) { + if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + int chk_addr_ret; + + /* Binding to v4-mapped address on a v6-only socket + * makes no sense + */ + if (np->ipv6only) { + err = -EINVAL; + goto out; + } + + /* Reproduce AF_INET checks to make the bindings consitant */ v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(net, v4addr) != RTN_LOCAL) { + chk_addr_ret = inet_addr_type(net, v4addr); + if (!sysctl_ip_nonlocal_bind && + !(inet->freebind || inet->transparent) && + v4addr != htonl(INADDR_ANY) && + chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && + chk_addr_ret != RTN_BROADCAST) { err = -EADDRNOTAVAIL; goto out; } @@ -280,6 +315,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; + rcu_read_lock(); if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { @@ -292,12 +328,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; - goto out; + goto out_unlock; } - dev = dev_get_by_index(net, sk->sk_bound_dev_if); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; - goto out; + goto out_unlock; } } @@ -308,19 +344,16 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { - if (dev) - dev_put(dev); err = -EADDRNOTAVAIL; - goto out; + goto out_unlock; } } - if (dev) - dev_put(dev); + rcu_read_unlock(); } } - inet->rcv_saddr = v4addr; - inet->saddr = v4addr; + inet->inet_rcv_saddr = v4addr; + inet->inet_saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); @@ -334,16 +367,22 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (addr_type != IPV6_ADDR_ANY) + if (addr_type != IPV6_ADDR_ANY) { sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (addr_type != IPV6_ADDR_MAPPED) + np->ipv6only = 1; + } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = htons(inet->num); - inet->dport = 0; - inet->daddr = 0; + inet->inet_sport = htons(inet->inet_num); + inet->inet_dport = 0; + inet->inet_daddr = 0; out: release_sock(sk); return err; +out_unlock: + rcu_read_unlock(); + goto out; } EXPORT_SYMBOL(inet6_bind); @@ -404,12 +443,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; if (peer) { - if (!inet->dport) + if (!inet->inet_dport) return -ENOTCONN; if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; - sin->sin6_port = inet->dport; + sin->sin6_port = inet->inet_dport; ipv6_addr_copy(&sin->sin6_addr, &np->daddr); if (np->sndflow) sin->sin6_flowinfo = np->flow_label; @@ -419,7 +458,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr); - sin->sin6_port = inet->sport; + sin->sin6_port = inet->inet_sport; } if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->sk_bound_dev_if; @@ -515,7 +554,7 @@ const struct proto_ops inet6_dgram_ops = { #endif }; -static struct net_proto_family inet6_family_ops = { +static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, @@ -617,8 +656,9 @@ int inet6_sk_rebuild_header(struct sock *sk) ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { @@ -636,7 +676,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } @@ -671,10 +711,9 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL_GPL(ipv6_opt_accepted); -static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, - int proto) +static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) { - struct inet6_protocol *ops = NULL; + const struct inet6_protocol *ops = NULL; for (;;) { struct ipv6_opt_hdr *opth; @@ -703,13 +742,13 @@ static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, __skb_pull(skb, len); } - return ops; + return proto; } static int ipv6_gso_send_check(struct sk_buff *skb) { struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; + const struct inet6_protocol *ops; int err = -EINVAL; if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) @@ -720,7 +759,9 @@ static int ipv6_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + ops = rcu_dereference(inet6_protos[ + ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); + if (likely(ops && ops->gso_send_check)) { skb_reset_transport_header(skb); err = ops->gso_send_check(skb); @@ -735,7 +776,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; + const struct inet6_protocol *ops; + int proto; + struct frag_hdr *fptr; + unsigned int unfrag_ip6hlen; + u8 *prevhdr; + int offset = 0; if (!(features & NETIF_F_V6_CSUM)) features &= ~NETIF_F_SG; @@ -755,8 +801,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) __skb_pull(skb, sizeof(*ipv6h)); segs = ERR_PTR(-EPROTONOSUPPORT); + proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + ops = rcu_dereference(inet6_protos[proto]); if (likely(ops && ops->gso_segment)) { skb_reset_transport_header(skb); segs = ops->gso_segment(skb, features); @@ -770,17 +817,145 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) ipv6h = ipv6_hdr(skb); ipv6h->payload_len = htons(skb->len - skb->mac_len - sizeof(*ipv6h)); + if (proto == IPPROTO_UDP) { + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + fptr = (struct frag_hdr *)(skb_network_header(skb) + + unfrag_ip6hlen); + fptr->frag_off = htons(offset); + if (skb->next != NULL) + fptr->frag_off |= htons(IP6_MF); + offset += (ntohs(ipv6h->payload_len) - + sizeof(struct frag_hdr)); + } } out: return segs; } -static struct packet_type ipv6_packet_type = { - .type = __constant_htons(ETH_P_IPV6), +struct ipv6_gro_cb { + struct napi_gro_cb napi; + int proto; +}; + +#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb) + +static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + const struct inet6_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct ipv6hdr *iph; + unsigned int nlen; + unsigned int hlen; + unsigned int off; + int flush = 1; + int proto; + __wsum csum; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } + + skb_gro_pull(skb, sizeof(*iph)); + skb_set_transport_header(skb, skb_gro_offset(skb)); + + flush += ntohs(iph->payload_len) != skb_gro_len(skb); + + rcu_read_lock(); + proto = iph->nexthdr; + ops = rcu_dereference(inet6_protos[proto]); + if (!ops || !ops->gro_receive) { + __pskb_pull(skb, skb_gro_offset(skb)); + proto = ipv6_gso_pull_exthdrs(skb, proto); + skb_gro_pull(skb, -skb_transport_offset(skb)); + skb_reset_transport_header(skb); + __skb_push(skb, skb_gro_offset(skb)); + + if (!ops || !ops->gro_receive) + goto out_unlock; + + iph = ipv6_hdr(skb); + } + + IPV6_GRO_CB(skb)->proto = proto; + + flush--; + nlen = skb_network_header_len(skb); + + for (p = *head; p; p = p->next) { + struct ipv6hdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ipv6_hdr(p); + + /* All fields must match except length. */ + if (nlen != skb_network_header_len(p) || + memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) || + memcmp(&iph->nexthdr, &iph2->nexthdr, + nlen - offsetof(struct ipv6hdr, nexthdr))) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + + csum = skb->csum; + skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + + pp = ops->gro_receive(head, skb); + + skb->csum = csum; + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int ipv6_gro_complete(struct sk_buff *skb) +{ + const struct inet6_protocol *ops; + struct ipv6hdr *iph = ipv6_hdr(skb); + int err = -ENOSYS; + + iph->payload_len = htons(skb->len - skb_network_offset(skb) - + sizeof(*iph)); + + rcu_read_lock(); + ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + +static struct packet_type ipv6_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, }; static int __init ipv6_packet_init(void) @@ -794,61 +969,55 @@ static void ipv6_packet_cleanup(void) dev_remove_pack(&ipv6_packet_type); } -static int __init init_ipv6_mibs(void) +static int __net_init ipv6_init_mibs(struct net *net) { - if (snmp_mib_init((void **)ipv6_statistics, + if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, + sizeof (struct udp_mib)) < 0) + return -ENOMEM; + if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, + sizeof (struct udp_mib)) < 0) + goto err_udplite_mib; + if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)icmpv6msg_statistics, + if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) - goto err_udp_mib; - if (snmp_mib_init((void **)udplite_stats_in6, - sizeof (struct udp_mib)) < 0) - goto err_udplite_mib; return 0; -err_udplite_mib: - snmp_mib_free((void **)udp_stats_in6); -err_udp_mib: - snmp_mib_free((void **)icmpv6msg_statistics); err_icmpmsg_mib: - snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void **)ipv6_statistics); + snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); err_ip_mib: + snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); +err_udplite_mib: + snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); return -ENOMEM; - } -static void cleanup_ipv6_mibs(void) +static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void **)ipv6_statistics); - snmp_mib_free((void **)icmpv6_statistics); - snmp_mib_free((void **)icmpv6msg_statistics); - snmp_mib_free((void **)udp_stats_in6); - snmp_mib_free((void **)udplite_stats_in6); + snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + snmp_mib_free((void __percpu **)net->mib.icmpv6msg_statistics); } -static int inet6_net_init(struct net *net) +static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; - net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; - net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; - net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; - net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; - net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; - net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.icmpv6_time = 1*HZ; + err = ipv6_init_mibs(net); + if (err) + return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) @@ -859,7 +1028,6 @@ static int inet6_net_init(struct net *net) err = ac6_proc_init(net); if (err) goto proc_ac6_fail; -out: #endif return err; @@ -868,17 +1036,20 @@ proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); - goto out; +out: + ipv6_cleanup_mibs(net); + return err; #endif } -static void inet6_net_exit(struct net *net) +static void __net_exit inet6_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS udp6_proc_exit(net); tcp6_proc_exit(net); ac6_proc_exit(net); #endif + ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { @@ -890,10 +1061,21 @@ static int __init inet6_init(void) { struct sk_buff *dummy_skb; struct list_head *r; - int err; + int err = 0; BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + /* Register the socket-side information for inet6_create. */ + for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) + INIT_LIST_HEAD(r); + + if (disable_ipv6_mod) { + printk(KERN_INFO + "IPv6: Loaded, but administratively disabled, " + "reboot required to enable\n"); + goto out; + } + err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -911,10 +1093,6 @@ static int __init inet6_init(void) goto out_unregister_udplite_proto; - /* Register the socket-side information for inet6_create. */ - for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) - INIT_LIST_HEAD(r); - /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ @@ -929,11 +1107,11 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; - /* Initialise ipv6 mibs */ - err = init_ipv6_mibs(); +#ifdef CONFIG_SYSCTL + err = ipv6_static_sysctl_register(); if (err) - goto out_unregister_sock; - + goto static_sysctl_fail; +#endif /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance @@ -1058,8 +1236,10 @@ ipmr_fail: icmp_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: - cleanup_ipv6_mibs(); -out_unregister_sock: +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +static_sysctl_fail: +#endif sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: @@ -1078,6 +1258,9 @@ module_init(inet6_init); static void __exit inet6_exit(void) { + if (disable_ipv6_mod) + return; + /* First of all disallow new sockets creation. */ sock_unregister(PF_INET6); /* Disallow any further netlink messages */ @@ -1113,11 +1296,15 @@ static void __exit inet6_exit(void) rawv6_exit(); unregister_pernet_subsys(&inet6_net_ops); - cleanup_ipv6_mibs(); +#ifdef CONFIG_SYSCTL + ipv6_static_sysctl_unregister(); +#endif proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(inet6_exit);