solution, but it supposes maintaing new variable in ALL
skb, even if no tunneling is used.
- Current solution: t->recursion lock breaks dead loops. It looks
- like dev->tbusy flag, but I preferred new variable, because
- the semantics is different. One day, when hard_start_xmit
- will be multithreaded we will have to use skb->encapsulation.
+ Current solution: HARD_TX_LOCK lock breaks dead loops.
/* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
+static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
__be32 remote, __be32 local,
__be32 key, __be16 gre_proto)
{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
unsigned h0 = HASH(remote);
unsigned h1 = HASH(key);
- struct ip_tunnel *t;
- struct ip_tunnel *t2 = NULL;
+ struct ip_tunnel *t, *cand = NULL;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
ARPHRD_ETHER : ARPHRD_IPGRE;
+ int score, cand_score = 4;
for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
- if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if (local != t->parms.iph.saddr ||
+ remote != t->parms.iph.daddr ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
- if (remote == t->parms.iph.daddr) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if (remote != t->parms.iph.daddr ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_l[h1]; t; t = t->next) {
- if (local == t->parms.iph.saddr ||
- (local == t->parms.iph.daddr &&
- ipv4_is_multicast(local))) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if ((local != t->parms.iph.saddr &&
+ (local != t->parms.iph.daddr ||
+ !ipv4_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_wc[h1]; t; t = t->next) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
- if (t2)
- return t2;
+ if (cand != NULL)
+ return cand;
- if (ign->fb_tunnel_dev->flags&IFF_UP)
+ if (ign->fb_tunnel_dev->flags & IFF_UP)
return netdev_priv(ign->fb_tunnel_dev);
+
return NULL;
}
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
+ int link = parms->link;
struct ip_tunnel *t, **tp;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
key == t->parms.i_key &&
+ link == t->parms.link &&
type == t->dev->type)
break;
}
read_lock(&ipgre_lock);
- t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
+ t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
flags & GRE_KEY ?
*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
p[1]);
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
- if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+ if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
gre_proto = *(__be16 *)(h + 2);
read_lock(&ipgre_lock);
- if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
+ if ((tunnel = ipgre_tunnel_lookup(skb->dev,
iph->saddr, iph->daddr, key,
gre_proto))) {
struct net_device_stats *stats = &tunnel->dev->stats;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
/* Looped back packet, drop it! */
- if (skb->rtable->fl.iif == 0)
+ if (skb_rtable(skb)->fl.iif == 0)
goto drop;
stats->multicast++;
skb->pkt_type = PACKET_BROADCAST;
stats->rx_packets++;
stats->rx_bytes += len;
skb->dev = tunnel->dev;
- dst_release(skb->dst);
- skb->dst = NULL;
+ skb_dst_drop(skb);
nf_reset(skb);
skb_reset_network_header(skb);
return(0);
}
-static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->dev->stats;
__be32 dst;
int mtu;
- if (tunnel->recursion++) {
- stats->collisions++;
- goto tx_error;
- }
-
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
if ((dst = tiph->daddr) == 0) {
/* NBMA tunnel */
- if (skb->dst == NULL) {
+ if (skb_dst(skb) == NULL) {
stats->tx_fifo_errors++;
goto tx_error;
}
if (skb->protocol == htons(ETH_P_IP)) {
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if ((dst = rt->rt_gateway) == 0)
goto tx_error_icmp;
}
else if (skb->protocol == htons(ETH_P_IPV6)) {
struct in6_addr *addr6;
int addr_type;
- struct neighbour *neigh = skb->dst->neighbour;
+ struct neighbour *neigh = skb_dst(skb)->neighbour;
if (neigh == NULL)
goto tx_error;
}
tos = tiph->tos;
- if (tos&1) {
+ if (tos == 1) {
+ tos = 0;
if (skb->protocol == htons(ETH_P_IP))
tos = old_iph->tos;
- tos &= ~1;
}
{
if (df)
mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
else
- mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
if (skb->protocol == htons(ETH_P_IP)) {
df |= (old_iph->frag_off&htons(IP_DF));
}
#ifdef CONFIG_IPV6
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
+ struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
- if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
+ if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
if ((tunnel->parms.iph.daddr &&
!ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
- skb->dst->metrics[RTAX_MTU-1] = mtu;
+ skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
}
}
#endif
if (tunnel->err_count > 0) {
- if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+ if (time_before(jiffies,
+ tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
dst_link_failure(skb);
ip_rt_put(rt);
stats->tx_dropped++;
dev_kfree_skb(skb);
- tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/*
* Push down and install the IPIP header.
nf_reset(skb);
IPTUNNEL_XMIT();
- tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
- tunnel->recursion--;
- return 0;
+ return NETDEV_TX_OK;
}
static int ipgre_tunnel_bind_dev(struct net_device *dev)
addend += 4;
}
dev->needed_headroom = addend + hlen;
- mtu -= dev->hard_header_len - addend;
+ mtu -= dev->hard_header_len + addend;
if (mtu < 68)
mtu = 68;
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
static int ipgre_tunnel_init(struct net_device *dev)
}
-static struct net_protocol ipgre_protocol = {
+static const struct net_protocol ipgre_protocol = {
.handler = ipgre_rcv,
.err_handler = ipgre_err,
.netns_ok = 1,
err = -ENOMEM;
goto err_alloc_dev;
}
+ dev_net_set(ign->fb_tunnel_dev, net);
ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
- dev_net_set(ign->fb_tunnel_dev, net);
ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
if ((err = register_netdev(ign->fb_tunnel_dev)))
ether_setup(dev);
- dev->netdev_ops = &ipgre_netdev_ops;
+ dev->netdev_ops = &ipgre_tap_netdev_ops;
dev->destructor = free_netdev;
dev->iflink = 0;
if (t->dev != dev)
return -EEXIST;
} else {
- unsigned nflags = 0;
-
t = nt;
- if (ipv4_is_multicast(p.iph.daddr))
- nflags = IFF_BROADCAST;
- else if (p.iph.daddr)
- nflags = IFF_POINTOPOINT;
+ if (dev->type != ARPHRD_ETHER) {
+ unsigned nflags = 0;
- if ((dev->flags ^ nflags) &
- (IFF_POINTOPOINT | IFF_BROADCAST))
- return -EINVAL;
+ if (ipv4_is_multicast(p.iph.daddr))
+ nflags = IFF_BROADCAST;
+ else if (p.iph.daddr)
+ nflags = IFF_POINTOPOINT;
+
+ if ((dev->flags ^ nflags) &
+ (IFF_POINTOPOINT | IFF_BROADCAST))
+ return -EINVAL;
+ }
ipgre_tunnel_unlink(ign, t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
t->parms.i_key = p.i_key;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
+ if (dev->type != ARPHRD_ETHER) {
+ memcpy(dev->dev_addr, &p.iph.saddr, 4);
+ memcpy(dev->broadcast, &p.iph.daddr, 4);
+ }
ipgre_tunnel_link(ign, t);
netdev_state_change(dev);
}