X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;ds=sidebyside;f=net%2Fipv4%2Fip_gre.c;h=7631b20490f52d34dce1686ff48f2391812129b7;hb=d088dde7b19628f386c486f16cd471f5b5682ca8;hp=4a43739c9035df34a6ec0344662a2d8dab9c6325;hpb=749c10f931923451a4c59b4435d182aa9ae27a4f;p=safe%2Fjmp%2Flinux-2.6 diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4a43739..7631b20 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -66,10 +66,7 @@ solution, but it supposes maintaing new variable in ALL skb, even if no tunneling is used. - Current solution: t->recursion lock breaks dead loops. It looks - like dev->tbusy flag, but I preferred new variable, because - the semantics is different. One day, when hard_start_xmit - will be multithreaded we will have to use skb->encapsulation. + Current solution: HARD_TX_LOCK lock breaks dead loops. @@ -128,7 +125,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); #define HASH_SIZE 16 -static int ipgre_net_id; +static int ipgre_net_id __read_mostly; struct ipgre_net { struct ip_tunnel *tunnels[4][HASH_SIZE]; @@ -159,8 +156,13 @@ struct ipgre_net { #define tunnels_r tunnels[2] #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipgre_lock); -static DEFINE_RWLOCK(ipgre_lock); +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /* Given src, dst and key, find appropriate for input tunnel. */ @@ -172,13 +174,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, int link = dev->ifindex; unsigned h0 = HASH(remote); unsigned h1 = HASH(key); - struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL }; + struct ip_tunnel *t, *cand = NULL; struct ipgre_net *ign = net_generic(net, ipgre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB)) ? ARPHRD_ETHER : ARPHRD_IPGRE; - int idx; + int score, cand_score = 4; - for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { if (local != t->parms.iph.saddr || remote != t->parms.iph.daddr || key != t->parms.i_key || @@ -189,18 +191,21 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } - for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { if (remote != t->parms.iph.daddr || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) @@ -210,18 +215,21 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } - for (t = ign->tunnels_l[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { if ((local != t->parms.iph.saddr && (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) || @@ -233,18 +241,21 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } - for (t = ign->tunnels_wc[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { if (t->parms.i_key != key || !(t->dev->flags & IFF_UP)) continue; @@ -253,23 +264,26 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } - for (idx = 1; idx < ARRAY_SIZE(sel); idx++) - if (sel[idx] != NULL) - return sel[idx]; + if (cand != NULL) + return cand; - if (ign->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(ign->fb_tunnel_dev); + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); return NULL; } @@ -303,10 +317,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) { struct ip_tunnel **tp = ipgre_bucket(ign, t); + spin_lock_bh(&ipgre_lock); t->next = *tp; - write_lock_bh(&ipgre_lock); - *tp = t; - write_unlock_bh(&ipgre_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipgre_lock); } static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) @@ -315,9 +329,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipgre_lock); + spin_lock_bh(&ipgre_lock); *tp = t->next; - write_unlock_bh(&ipgre_lock); + spin_unlock_bh(&ipgre_lock); break; } } @@ -468,7 +482,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - read_lock(&ipgre_lock); + rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, @@ -480,13 +494,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; - if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipgre_lock); + rcu_read_unlock(); return; } @@ -565,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - read_lock(&ipgre_lock); + rcu_read_lock(); if ((tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, key, gre_proto))) { @@ -591,7 +605,7 @@ static int ipgre_rcv(struct sk_buff *skb) #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ - if (skb->rtable->fl.iif == 0) + if (skb_rtable(skb)->fl.iif == 0) goto drop; stats->multicast++; skb->pkt_type = PACKET_BROADCAST; @@ -632,30 +646,30 @@ static int ipgre_rcv(struct sk_buff *skb) stats->rx_packets++; stats->rx_bytes += len; skb->dev = tunnel->dev; - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); nf_reset(skb); skb_reset_network_header(skb); ipgre_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipgre_lock); + rcu_read_unlock(); return(0); } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - read_unlock(&ipgre_lock); + rcu_read_unlock(); drop_nolock: kfree_skb(skb); return(0); } -static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -668,11 +682,6 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be32 dst; int mtu; - if (tunnel->recursion++) { - stats->collisions++; - goto tx_error; - } - if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -687,13 +696,13 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if ((dst = tiph->daddr) == 0) { /* NBMA tunnel */ - if (skb->dst == NULL) { + if (skb_dst(skb) == NULL) { stats->tx_fifo_errors++; goto tx_error; } if (skb->protocol == htons(ETH_P_IP)) { - rt = skb->rtable; + rt = skb_rtable(skb); if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } @@ -701,7 +710,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb->dst->neighbour; + struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; @@ -725,10 +734,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } tos = tiph->tos; - if (tos&1) { + if (tos == 1) { + tos = 0; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; - tos &= ~1; } { @@ -755,10 +764,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (df) mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; else - mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); @@ -772,14 +781,14 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } #ifdef CONFIG_IPV6 else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb->dst; + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { if ((tunnel->parms.iph.daddr && !ipv4_is_multicast(tunnel->parms.iph.daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - skb->dst->metrics[RTAX_MTU-1] = mtu; + skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; } } @@ -792,7 +801,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) #endif if (tunnel->err_count > 0) { - if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); @@ -807,10 +817,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); - tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } if (skb->sk) skb_set_owner_w(new_skb, skb->sk); @@ -825,8 +834,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. @@ -877,8 +886,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset(skb); IPTUNNEL_XMIT(); - tunnel->recursion--; - return 0; + return NETDEV_TX_OK; tx_error_icmp: dst_link_failure(skb); @@ -886,8 +894,7 @@ tx_error_icmp: tx_error: stats->tx_errors++; dev_kfree_skb(skb); - tunnel->recursion--; - return 0; + return NETDEV_TX_OK; } static int ipgre_tunnel_bind_dev(struct net_device *dev) @@ -940,7 +947,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) addend += 4; } dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len - addend; + mtu -= dev->hard_header_len + addend; if (mtu < 68) mtu = 68; @@ -1226,6 +1233,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static int ipgre_tunnel_init(struct net_device *dev) @@ -1276,39 +1284,33 @@ static void ipgre_fb_tunnel_init(struct net_device *dev) } -static struct net_protocol ipgre_protocol = { +static const struct net_protocol ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, .netns_ok = 1, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign) +static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) { int prio; for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = ign->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = ign->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } -static int ipgre_init_net(struct net *net) +static int __net_init ipgre_init_net(struct net *net) { + struct ipgre_net *ign = net_generic(net, ipgre_net_id); int err; - struct ipgre_net *ign; - - err = -ENOMEM; - ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); - if (ign == NULL) - goto err_alloc; - - err = net_assign_generic(net, ipgre_net_id, ign); - if (err < 0) - goto err_assign; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", ipgre_tunnel_setup); @@ -1329,27 +1331,26 @@ static int ipgre_init_net(struct net *net) err_reg_dev: free_netdev(ign->fb_tunnel_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(ign); -err_alloc: return err; } -static void ipgre_exit_net(struct net *net) +static void __net_exit ipgre_exit_net(struct net *net) { struct ipgre_net *ign; + LIST_HEAD(list); ign = net_generic(net, ipgre_net_id); rtnl_lock(); - ipgre_destroy_tunnels(ign); + ipgre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); rtnl_unlock(); - kfree(ign); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, + .id = &ipgre_net_id, + .size = sizeof(struct ipgre_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1463,14 +1464,14 @@ static void ipgre_tap_setup(struct net_device *dev) ether_setup(dev); - dev->netdev_ops = &ipgre_netdev_ops; + dev->netdev_ops = &ipgre_tap_netdev_ops; dev->destructor = free_netdev; dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], +static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip_tunnel *nt; @@ -1524,25 +1525,29 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], if (t->dev != dev) return -EEXIST; } else { - unsigned nflags = 0; - t = nt; - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; + if (dev->type != ARPHRD_ETHER) { + unsigned nflags = 0; - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; + if (ipv4_is_multicast(p.iph.daddr)) + nflags = IFF_BROADCAST; + else if (p.iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } ipgre_tunnel_unlink(ign, t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; t->parms.i_key = p.i_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + } ipgre_tunnel_link(ign, t); netdev_state_change(dev); } @@ -1665,7 +1670,7 @@ static int __init ipgre_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); + err = register_pernet_device(&ipgre_net_ops); if (err < 0) goto gen_device_failed; @@ -1683,7 +1688,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); gen_device_failed: inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); goto out; @@ -1693,7 +1698,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); }