gre: Add netlink interface
[safe/jmp/linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44 #include <net/rtnetlink.h>
45
46 #ifdef CONFIG_IPV6
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #endif
51
52 /*
53    Problems & solutions
54    --------------------
55
56    1. The most important issue is detecting local dead loops.
57    They would cause complete host lockup in transmit, which
58    would be "resolved" by stack overflow or, if queueing is enabled,
59    with infinite looping in net_bh.
60
61    We cannot track such dead loops during route installation,
62    it is infeasible task. The most general solutions would be
63    to keep skb->encapsulation counter (sort of local ttl),
64    and silently drop packet when it expires. It is the best
65    solution, but it supposes maintaing new variable in ALL
66    skb, even if no tunneling is used.
67
68    Current solution: t->recursion lock breaks dead loops. It looks
69    like dev->tbusy flag, but I preferred new variable, because
70    the semantics is different. One day, when hard_start_xmit
71    will be multithreaded we will have to use skb->encapsulation.
72
73
74
75    2. Networking dead loops would not kill routers, but would really
76    kill network. IP hop limit plays role of "t->recursion" in this case,
77    if we copy it from packet being encapsulated to upper header.
78    It is very good solution, but it introduces two problems:
79
80    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81      do not work over tunnels.
82    - traceroute does not work. I planned to relay ICMP from tunnel,
83      so that this problem would be solved and traceroute output
84      would even more informative. This idea appeared to be wrong:
85      only Linux complies to rfc1812 now (yes, guys, Linux is the only
86      true router now :-)), all routers (at least, in neighbourhood of mine)
87      return only 8 bytes of payload. It is the end.
88
89    Hence, if we want that OSPF worked or traceroute said something reasonable,
90    we should search for another solution.
91
92    One of them is to parse packet trying to detect inner encapsulation
93    made by our node. It is difficult or even impossible, especially,
94    taking into account fragmentation. TO be short, tt is not solution at all.
95
96    Current solution: The solution was UNEXPECTEDLY SIMPLE.
97    We force DF flag on tunnels with preconfigured hop limit,
98    that is ALL. :-) Well, it does not remove the problem completely,
99    but exponential growth of network traffic is changed to linear
100    (branches, that exceed pmtu are pruned) and tunnel mtu
101    fastly degrades to value <68, where looping stops.
102    Yes, it is not good if there exists a router in the loop,
103    which does not force DF, even when encapsulating packets have DF set.
104    But it is not our problem! Nobody could accuse us, we made
105    all that we could make. Even if it is your gated who injected
106    fatal route to network, even if it were you who configured
107    fatal static route: you are innocent. :-)
108
109
110
111    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112    practically identical code. It would be good to glue them
113    together, but it is not very evident, how to make them modular.
114    sit is integral part of IPv6, ipip and gre are naturally modular.
115    We could extract common parts (hash table, ioctl etc)
116    to a separate module (ip_tunnel.c).
117
118    Alexey Kuznetsov.
119  */
120
121 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
122 static int ipgre_tunnel_init(struct net_device *dev);
123 static void ipgre_tunnel_setup(struct net_device *dev);
124 static int ipgre_tunnel_bind_dev(struct net_device *dev);
125
126 /* Fallback tunnel: no source, no destination, no key, no options */
127
128 static int ipgre_fb_tunnel_init(struct net_device *dev);
129
130 #define HASH_SIZE  16
131
132 static int ipgre_net_id;
133 struct ipgre_net {
134         struct ip_tunnel *tunnels[4][HASH_SIZE];
135
136         struct net_device *fb_tunnel_dev;
137 };
138
139 /* Tunnel hash table */
140
141 /*
142    4 hash tables:
143
144    3: (remote,local)
145    2: (remote,*)
146    1: (*,local)
147    0: (*,*)
148
149    We require exact key match i.e. if a key is present in packet
150    it will match only tunnel with the same key; if it is not present,
151    it will match only keyless tunnel.
152
153    All keysless packets, if not matched configured keyless tunnels
154    will match fallback tunnel.
155  */
156
157 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
158
159 #define tunnels_r_l     tunnels[3]
160 #define tunnels_r       tunnels[2]
161 #define tunnels_l       tunnels[1]
162 #define tunnels_wc      tunnels[0]
163
164 static DEFINE_RWLOCK(ipgre_lock);
165
166 /* Given src, dst and key, find appropriate for input tunnel. */
167
168 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
169                 __be32 remote, __be32 local, __be32 key)
170 {
171         unsigned h0 = HASH(remote);
172         unsigned h1 = HASH(key);
173         struct ip_tunnel *t;
174         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
175
176         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
177                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
178                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
179                                 return t;
180                 }
181         }
182         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
183                 if (remote == t->parms.iph.daddr) {
184                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
185                                 return t;
186                 }
187         }
188         for (t = ign->tunnels_l[h1]; t; t = t->next) {
189                 if (local == t->parms.iph.saddr ||
190                      (local == t->parms.iph.daddr &&
191                       ipv4_is_multicast(local))) {
192                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193                                 return t;
194                 }
195         }
196         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
197                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
198                         return t;
199         }
200
201         if (ign->fb_tunnel_dev->flags&IFF_UP)
202                 return netdev_priv(ign->fb_tunnel_dev);
203         return NULL;
204 }
205
206 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
207                 struct ip_tunnel_parm *parms)
208 {
209         __be32 remote = parms->iph.daddr;
210         __be32 local = parms->iph.saddr;
211         __be32 key = parms->i_key;
212         unsigned h = HASH(key);
213         int prio = 0;
214
215         if (local)
216                 prio |= 1;
217         if (remote && !ipv4_is_multicast(remote)) {
218                 prio |= 2;
219                 h ^= HASH(remote);
220         }
221
222         return &ign->tunnels[prio][h];
223 }
224
225 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
226                 struct ip_tunnel *t)
227 {
228         return __ipgre_bucket(ign, &t->parms);
229 }
230
231 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
232 {
233         struct ip_tunnel **tp = ipgre_bucket(ign, t);
234
235         t->next = *tp;
236         write_lock_bh(&ipgre_lock);
237         *tp = t;
238         write_unlock_bh(&ipgre_lock);
239 }
240
241 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
242 {
243         struct ip_tunnel **tp;
244
245         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
246                 if (t == *tp) {
247                         write_lock_bh(&ipgre_lock);
248                         *tp = t->next;
249                         write_unlock_bh(&ipgre_lock);
250                         break;
251                 }
252         }
253 }
254
255 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
256                 struct ip_tunnel_parm *parms, int create)
257 {
258         __be32 remote = parms->iph.daddr;
259         __be32 local = parms->iph.saddr;
260         __be32 key = parms->i_key;
261         struct ip_tunnel *t, **tp, *nt;
262         struct net_device *dev;
263         char name[IFNAMSIZ];
264         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
265
266         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
267                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
268                         if (key == t->parms.i_key)
269                                 return t;
270                 }
271         }
272         if (!create)
273                 return NULL;
274
275         if (parms->name[0])
276                 strlcpy(name, parms->name, IFNAMSIZ);
277         else
278                 sprintf(name, "gre%%d");
279
280         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
281         if (!dev)
282           return NULL;
283
284         dev_net_set(dev, net);
285
286         if (strchr(name, '%')) {
287                 if (dev_alloc_name(dev, name) < 0)
288                         goto failed_free;
289         }
290
291         nt = netdev_priv(dev);
292         nt->parms = *parms;
293         dev->rtnl_link_ops = &ipgre_link_ops;
294
295         dev->mtu = ipgre_tunnel_bind_dev(dev);
296
297         if (register_netdevice(dev) < 0)
298                 goto failed_free;
299
300         dev_hold(dev);
301         ipgre_tunnel_link(ign, nt);
302         return nt;
303
304 failed_free:
305         free_netdev(dev);
306         return NULL;
307 }
308
309 static void ipgre_tunnel_uninit(struct net_device *dev)
310 {
311         struct net *net = dev_net(dev);
312         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
313
314         ipgre_tunnel_unlink(ign, netdev_priv(dev));
315         dev_put(dev);
316 }
317
318
319 static void ipgre_err(struct sk_buff *skb, u32 info)
320 {
321
322 /* All the routers (except for Linux) return only
323    8 bytes of packet payload. It means, that precise relaying of
324    ICMP in the real Internet is absolutely infeasible.
325
326    Moreover, Cisco "wise men" put GRE key to the third word
327    in GRE header. It makes impossible maintaining even soft state for keyed
328    GRE tunnels with enabled checksum. Tell them "thank you".
329
330    Well, I wonder, rfc1812 was written by Cisco employee,
331    what the hell these idiots break standrads established
332    by themself???
333  */
334
335         struct iphdr *iph = (struct iphdr*)skb->data;
336         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
337         int grehlen = (iph->ihl<<2) + 4;
338         const int type = icmp_hdr(skb)->type;
339         const int code = icmp_hdr(skb)->code;
340         struct ip_tunnel *t;
341         __be16 flags;
342
343         flags = p[0];
344         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
345                 if (flags&(GRE_VERSION|GRE_ROUTING))
346                         return;
347                 if (flags&GRE_KEY) {
348                         grehlen += 4;
349                         if (flags&GRE_CSUM)
350                                 grehlen += 4;
351                 }
352         }
353
354         /* If only 8 bytes returned, keyed message will be dropped here */
355         if (skb_headlen(skb) < grehlen)
356                 return;
357
358         switch (type) {
359         default:
360         case ICMP_PARAMETERPROB:
361                 return;
362
363         case ICMP_DEST_UNREACH:
364                 switch (code) {
365                 case ICMP_SR_FAILED:
366                 case ICMP_PORT_UNREACH:
367                         /* Impossible event. */
368                         return;
369                 case ICMP_FRAG_NEEDED:
370                         /* Soft state for pmtu is maintained by IP core. */
371                         return;
372                 default:
373                         /* All others are translated to HOST_UNREACH.
374                            rfc2003 contains "deep thoughts" about NET_UNREACH,
375                            I believe they are just ether pollution. --ANK
376                          */
377                         break;
378                 }
379                 break;
380         case ICMP_TIME_EXCEEDED:
381                 if (code != ICMP_EXC_TTL)
382                         return;
383                 break;
384         }
385
386         read_lock(&ipgre_lock);
387         t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
388                         (flags&GRE_KEY) ?
389                         *(((__be32*)p) + (grehlen>>2) - 1) : 0);
390         if (t == NULL || t->parms.iph.daddr == 0 ||
391             ipv4_is_multicast(t->parms.iph.daddr))
392                 goto out;
393
394         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395                 goto out;
396
397         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
398                 t->err_count++;
399         else
400                 t->err_count = 1;
401         t->err_time = jiffies;
402 out:
403         read_unlock(&ipgre_lock);
404         return;
405 }
406
407 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
408 {
409         if (INET_ECN_is_ce(iph->tos)) {
410                 if (skb->protocol == htons(ETH_P_IP)) {
411                         IP_ECN_set_ce(ip_hdr(skb));
412                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
413                         IP6_ECN_set_ce(ipv6_hdr(skb));
414                 }
415         }
416 }
417
418 static inline u8
419 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
420 {
421         u8 inner = 0;
422         if (skb->protocol == htons(ETH_P_IP))
423                 inner = old_iph->tos;
424         else if (skb->protocol == htons(ETH_P_IPV6))
425                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
426         return INET_ECN_encapsulate(tos, inner);
427 }
428
429 static int ipgre_rcv(struct sk_buff *skb)
430 {
431         struct iphdr *iph;
432         u8     *h;
433         __be16    flags;
434         __sum16   csum = 0;
435         __be32 key = 0;
436         u32    seqno = 0;
437         struct ip_tunnel *tunnel;
438         int    offset = 4;
439
440         if (!pskb_may_pull(skb, 16))
441                 goto drop_nolock;
442
443         iph = ip_hdr(skb);
444         h = skb->data;
445         flags = *(__be16*)h;
446
447         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
448                 /* - Version must be 0.
449                    - We do not support routing headers.
450                  */
451                 if (flags&(GRE_VERSION|GRE_ROUTING))
452                         goto drop_nolock;
453
454                 if (flags&GRE_CSUM) {
455                         switch (skb->ip_summed) {
456                         case CHECKSUM_COMPLETE:
457                                 csum = csum_fold(skb->csum);
458                                 if (!csum)
459                                         break;
460                                 /* fall through */
461                         case CHECKSUM_NONE:
462                                 skb->csum = 0;
463                                 csum = __skb_checksum_complete(skb);
464                                 skb->ip_summed = CHECKSUM_COMPLETE;
465                         }
466                         offset += 4;
467                 }
468                 if (flags&GRE_KEY) {
469                         key = *(__be32*)(h + offset);
470                         offset += 4;
471                 }
472                 if (flags&GRE_SEQ) {
473                         seqno = ntohl(*(__be32*)(h + offset));
474                         offset += 4;
475                 }
476         }
477
478         read_lock(&ipgre_lock);
479         if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
480                                         iph->saddr, iph->daddr, key)) != NULL) {
481                 struct net_device_stats *stats = &tunnel->dev->stats;
482
483                 secpath_reset(skb);
484
485                 skb->protocol = *(__be16*)(h + 2);
486                 /* WCCP version 1 and 2 protocol decoding.
487                  * - Change protocol to IP
488                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
489                  */
490                 if (flags == 0 &&
491                     skb->protocol == htons(ETH_P_WCCP)) {
492                         skb->protocol = htons(ETH_P_IP);
493                         if ((*(h + offset) & 0xF0) != 0x40)
494                                 offset += 4;
495                 }
496
497                 skb->mac_header = skb->network_header;
498                 __pskb_pull(skb, offset);
499                 skb_reset_network_header(skb);
500                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
501                 skb->pkt_type = PACKET_HOST;
502 #ifdef CONFIG_NET_IPGRE_BROADCAST
503                 if (ipv4_is_multicast(iph->daddr)) {
504                         /* Looped back packet, drop it! */
505                         if (skb->rtable->fl.iif == 0)
506                                 goto drop;
507                         stats->multicast++;
508                         skb->pkt_type = PACKET_BROADCAST;
509                 }
510 #endif
511
512                 if (((flags&GRE_CSUM) && csum) ||
513                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
514                         stats->rx_crc_errors++;
515                         stats->rx_errors++;
516                         goto drop;
517                 }
518                 if (tunnel->parms.i_flags&GRE_SEQ) {
519                         if (!(flags&GRE_SEQ) ||
520                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
521                                 stats->rx_fifo_errors++;
522                                 stats->rx_errors++;
523                                 goto drop;
524                         }
525                         tunnel->i_seqno = seqno + 1;
526                 }
527                 stats->rx_packets++;
528                 stats->rx_bytes += skb->len;
529                 skb->dev = tunnel->dev;
530                 dst_release(skb->dst);
531                 skb->dst = NULL;
532                 nf_reset(skb);
533                 ipgre_ecn_decapsulate(iph, skb);
534                 netif_rx(skb);
535                 read_unlock(&ipgre_lock);
536                 return(0);
537         }
538         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
539
540 drop:
541         read_unlock(&ipgre_lock);
542 drop_nolock:
543         kfree_skb(skb);
544         return(0);
545 }
546
547 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
548 {
549         struct ip_tunnel *tunnel = netdev_priv(dev);
550         struct net_device_stats *stats = &tunnel->dev->stats;
551         struct iphdr  *old_iph = ip_hdr(skb);
552         struct iphdr  *tiph;
553         u8     tos;
554         __be16 df;
555         struct rtable *rt;                      /* Route to the other host */
556         struct net_device *tdev;                        /* Device to other host */
557         struct iphdr  *iph;                     /* Our new IP header */
558         unsigned int max_headroom;              /* The extra header space needed */
559         int    gre_hlen;
560         __be32 dst;
561         int    mtu;
562
563         if (tunnel->recursion++) {
564                 stats->collisions++;
565                 goto tx_error;
566         }
567
568         if (dev->header_ops) {
569                 gre_hlen = 0;
570                 tiph = (struct iphdr*)skb->data;
571         } else {
572                 gre_hlen = tunnel->hlen;
573                 tiph = &tunnel->parms.iph;
574         }
575
576         if ((dst = tiph->daddr) == 0) {
577                 /* NBMA tunnel */
578
579                 if (skb->dst == NULL) {
580                         stats->tx_fifo_errors++;
581                         goto tx_error;
582                 }
583
584                 if (skb->protocol == htons(ETH_P_IP)) {
585                         rt = skb->rtable;
586                         if ((dst = rt->rt_gateway) == 0)
587                                 goto tx_error_icmp;
588                 }
589 #ifdef CONFIG_IPV6
590                 else if (skb->protocol == htons(ETH_P_IPV6)) {
591                         struct in6_addr *addr6;
592                         int addr_type;
593                         struct neighbour *neigh = skb->dst->neighbour;
594
595                         if (neigh == NULL)
596                                 goto tx_error;
597
598                         addr6 = (struct in6_addr*)&neigh->primary_key;
599                         addr_type = ipv6_addr_type(addr6);
600
601                         if (addr_type == IPV6_ADDR_ANY) {
602                                 addr6 = &ipv6_hdr(skb)->daddr;
603                                 addr_type = ipv6_addr_type(addr6);
604                         }
605
606                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
607                                 goto tx_error_icmp;
608
609                         dst = addr6->s6_addr32[3];
610                 }
611 #endif
612                 else
613                         goto tx_error;
614         }
615
616         tos = tiph->tos;
617         if (tos&1) {
618                 if (skb->protocol == htons(ETH_P_IP))
619                         tos = old_iph->tos;
620                 tos &= ~1;
621         }
622
623         {
624                 struct flowi fl = { .oif = tunnel->parms.link,
625                                     .nl_u = { .ip4_u =
626                                               { .daddr = dst,
627                                                 .saddr = tiph->saddr,
628                                                 .tos = RT_TOS(tos) } },
629                                     .proto = IPPROTO_GRE };
630                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
631                         stats->tx_carrier_errors++;
632                         goto tx_error;
633                 }
634         }
635         tdev = rt->u.dst.dev;
636
637         if (tdev == dev) {
638                 ip_rt_put(rt);
639                 stats->collisions++;
640                 goto tx_error;
641         }
642
643         df = tiph->frag_off;
644         if (df)
645                 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
646         else
647                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
648
649         if (skb->dst)
650                 skb->dst->ops->update_pmtu(skb->dst, mtu);
651
652         if (skb->protocol == htons(ETH_P_IP)) {
653                 df |= (old_iph->frag_off&htons(IP_DF));
654
655                 if ((old_iph->frag_off&htons(IP_DF)) &&
656                     mtu < ntohs(old_iph->tot_len)) {
657                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
658                         ip_rt_put(rt);
659                         goto tx_error;
660                 }
661         }
662 #ifdef CONFIG_IPV6
663         else if (skb->protocol == htons(ETH_P_IPV6)) {
664                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
665
666                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
667                         if ((tunnel->parms.iph.daddr &&
668                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
669                             rt6->rt6i_dst.plen == 128) {
670                                 rt6->rt6i_flags |= RTF_MODIFIED;
671                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
672                         }
673                 }
674
675                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
676                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
677                         ip_rt_put(rt);
678                         goto tx_error;
679                 }
680         }
681 #endif
682
683         if (tunnel->err_count > 0) {
684                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
685                         tunnel->err_count--;
686
687                         dst_link_failure(skb);
688                 } else
689                         tunnel->err_count = 0;
690         }
691
692         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
693
694         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
695             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
696                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
697                 if (!new_skb) {
698                         ip_rt_put(rt);
699                         stats->tx_dropped++;
700                         dev_kfree_skb(skb);
701                         tunnel->recursion--;
702                         return 0;
703                 }
704                 if (skb->sk)
705                         skb_set_owner_w(new_skb, skb->sk);
706                 dev_kfree_skb(skb);
707                 skb = new_skb;
708                 old_iph = ip_hdr(skb);
709         }
710
711         skb->transport_header = skb->network_header;
712         skb_push(skb, gre_hlen);
713         skb_reset_network_header(skb);
714         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
715         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
716                               IPSKB_REROUTED);
717         dst_release(skb->dst);
718         skb->dst = &rt->u.dst;
719
720         /*
721          *      Push down and install the IPIP header.
722          */
723
724         iph                     =       ip_hdr(skb);
725         iph->version            =       4;
726         iph->ihl                =       sizeof(struct iphdr) >> 2;
727         iph->frag_off           =       df;
728         iph->protocol           =       IPPROTO_GRE;
729         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
730         iph->daddr              =       rt->rt_dst;
731         iph->saddr              =       rt->rt_src;
732
733         if ((iph->ttl = tiph->ttl) == 0) {
734                 if (skb->protocol == htons(ETH_P_IP))
735                         iph->ttl = old_iph->ttl;
736 #ifdef CONFIG_IPV6
737                 else if (skb->protocol == htons(ETH_P_IPV6))
738                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
739 #endif
740                 else
741                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
742         }
743
744         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
745         ((__be16*)(iph+1))[1] = skb->protocol;
746
747         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
748                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
749
750                 if (tunnel->parms.o_flags&GRE_SEQ) {
751                         ++tunnel->o_seqno;
752                         *ptr = htonl(tunnel->o_seqno);
753                         ptr--;
754                 }
755                 if (tunnel->parms.o_flags&GRE_KEY) {
756                         *ptr = tunnel->parms.o_key;
757                         ptr--;
758                 }
759                 if (tunnel->parms.o_flags&GRE_CSUM) {
760                         *ptr = 0;
761                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
762                 }
763         }
764
765         nf_reset(skb);
766
767         IPTUNNEL_XMIT();
768         tunnel->recursion--;
769         return 0;
770
771 tx_error_icmp:
772         dst_link_failure(skb);
773
774 tx_error:
775         stats->tx_errors++;
776         dev_kfree_skb(skb);
777         tunnel->recursion--;
778         return 0;
779 }
780
781 static int ipgre_tunnel_bind_dev(struct net_device *dev)
782 {
783         struct net_device *tdev = NULL;
784         struct ip_tunnel *tunnel;
785         struct iphdr *iph;
786         int hlen = LL_MAX_HEADER;
787         int mtu = ETH_DATA_LEN;
788         int addend = sizeof(struct iphdr) + 4;
789
790         tunnel = netdev_priv(dev);
791         iph = &tunnel->parms.iph;
792
793         /* Guess output device to choose reasonable mtu and needed_headroom */
794
795         if (iph->daddr) {
796                 struct flowi fl = { .oif = tunnel->parms.link,
797                                     .nl_u = { .ip4_u =
798                                               { .daddr = iph->daddr,
799                                                 .saddr = iph->saddr,
800                                                 .tos = RT_TOS(iph->tos) } },
801                                     .proto = IPPROTO_GRE };
802                 struct rtable *rt;
803                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
804                         tdev = rt->u.dst.dev;
805                         ip_rt_put(rt);
806                 }
807                 dev->flags |= IFF_POINTOPOINT;
808         }
809
810         if (!tdev && tunnel->parms.link)
811                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
812
813         if (tdev) {
814                 hlen = tdev->hard_header_len + tdev->needed_headroom;
815                 mtu = tdev->mtu;
816         }
817         dev->iflink = tunnel->parms.link;
818
819         /* Precalculate GRE options length */
820         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
821                 if (tunnel->parms.o_flags&GRE_CSUM)
822                         addend += 4;
823                 if (tunnel->parms.o_flags&GRE_KEY)
824                         addend += 4;
825                 if (tunnel->parms.o_flags&GRE_SEQ)
826                         addend += 4;
827         }
828         dev->needed_headroom = addend + hlen;
829         mtu -= dev->hard_header_len - addend;
830
831         if (mtu < 68)
832                 mtu = 68;
833
834         tunnel->hlen = addend;
835
836         return mtu;
837 }
838
839 static int
840 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
841 {
842         int err = 0;
843         struct ip_tunnel_parm p;
844         struct ip_tunnel *t;
845         struct net *net = dev_net(dev);
846         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
847
848         switch (cmd) {
849         case SIOCGETTUNNEL:
850                 t = NULL;
851                 if (dev == ign->fb_tunnel_dev) {
852                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
853                                 err = -EFAULT;
854                                 break;
855                         }
856                         t = ipgre_tunnel_locate(net, &p, 0);
857                 }
858                 if (t == NULL)
859                         t = netdev_priv(dev);
860                 memcpy(&p, &t->parms, sizeof(p));
861                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
862                         err = -EFAULT;
863                 break;
864
865         case SIOCADDTUNNEL:
866         case SIOCCHGTUNNEL:
867                 err = -EPERM;
868                 if (!capable(CAP_NET_ADMIN))
869                         goto done;
870
871                 err = -EFAULT;
872                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
873                         goto done;
874
875                 err = -EINVAL;
876                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
877                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
878                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
879                         goto done;
880                 if (p.iph.ttl)
881                         p.iph.frag_off |= htons(IP_DF);
882
883                 if (!(p.i_flags&GRE_KEY))
884                         p.i_key = 0;
885                 if (!(p.o_flags&GRE_KEY))
886                         p.o_key = 0;
887
888                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
889
890                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
891                         if (t != NULL) {
892                                 if (t->dev != dev) {
893                                         err = -EEXIST;
894                                         break;
895                                 }
896                         } else {
897                                 unsigned nflags=0;
898
899                                 t = netdev_priv(dev);
900
901                                 if (ipv4_is_multicast(p.iph.daddr))
902                                         nflags = IFF_BROADCAST;
903                                 else if (p.iph.daddr)
904                                         nflags = IFF_POINTOPOINT;
905
906                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
907                                         err = -EINVAL;
908                                         break;
909                                 }
910                                 ipgre_tunnel_unlink(ign, t);
911                                 t->parms.iph.saddr = p.iph.saddr;
912                                 t->parms.iph.daddr = p.iph.daddr;
913                                 t->parms.i_key = p.i_key;
914                                 t->parms.o_key = p.o_key;
915                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
916                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
917                                 ipgre_tunnel_link(ign, t);
918                                 netdev_state_change(dev);
919                         }
920                 }
921
922                 if (t) {
923                         err = 0;
924                         if (cmd == SIOCCHGTUNNEL) {
925                                 t->parms.iph.ttl = p.iph.ttl;
926                                 t->parms.iph.tos = p.iph.tos;
927                                 t->parms.iph.frag_off = p.iph.frag_off;
928                                 if (t->parms.link != p.link) {
929                                         t->parms.link = p.link;
930                                         dev->mtu = ipgre_tunnel_bind_dev(dev);
931                                         netdev_state_change(dev);
932                                 }
933                         }
934                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
935                                 err = -EFAULT;
936                 } else
937                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
938                 break;
939
940         case SIOCDELTUNNEL:
941                 err = -EPERM;
942                 if (!capable(CAP_NET_ADMIN))
943                         goto done;
944
945                 if (dev == ign->fb_tunnel_dev) {
946                         err = -EFAULT;
947                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
948                                 goto done;
949                         err = -ENOENT;
950                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
951                                 goto done;
952                         err = -EPERM;
953                         if (t == netdev_priv(ign->fb_tunnel_dev))
954                                 goto done;
955                         dev = t->dev;
956                 }
957                 unregister_netdevice(dev);
958                 err = 0;
959                 break;
960
961         default:
962                 err = -EINVAL;
963         }
964
965 done:
966         return err;
967 }
968
969 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
970 {
971         struct ip_tunnel *tunnel = netdev_priv(dev);
972         if (new_mtu < 68 ||
973             new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
974                 return -EINVAL;
975         dev->mtu = new_mtu;
976         return 0;
977 }
978
979 /* Nice toy. Unfortunately, useless in real life :-)
980    It allows to construct virtual multiprotocol broadcast "LAN"
981    over the Internet, provided multicast routing is tuned.
982
983
984    I have no idea was this bicycle invented before me,
985    so that I had to set ARPHRD_IPGRE to a random value.
986    I have an impression, that Cisco could make something similar,
987    but this feature is apparently missing in IOS<=11.2(8).
988
989    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
990    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
991
992    ping -t 255 224.66.66.66
993
994    If nobody answers, mbone does not work.
995
996    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
997    ip addr add 10.66.66.<somewhat>/24 dev Universe
998    ifconfig Universe up
999    ifconfig Universe add fe80::<Your_real_addr>/10
1000    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1001    ftp 10.66.66.66
1002    ...
1003    ftp fec0:6666:6666::193.233.7.65
1004    ...
1005
1006  */
1007
1008 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1009                         unsigned short type,
1010                         const void *daddr, const void *saddr, unsigned len)
1011 {
1012         struct ip_tunnel *t = netdev_priv(dev);
1013         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1014         __be16 *p = (__be16*)(iph+1);
1015
1016         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1017         p[0]            = t->parms.o_flags;
1018         p[1]            = htons(type);
1019
1020         /*
1021          *      Set the source hardware address.
1022          */
1023
1024         if (saddr)
1025                 memcpy(&iph->saddr, saddr, 4);
1026
1027         if (daddr) {
1028                 memcpy(&iph->daddr, daddr, 4);
1029                 return t->hlen;
1030         }
1031         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1032                 return t->hlen;
1033
1034         return -t->hlen;
1035 }
1036
1037 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1038 {
1039         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1040         memcpy(haddr, &iph->saddr, 4);
1041         return 4;
1042 }
1043
1044 static const struct header_ops ipgre_header_ops = {
1045         .create = ipgre_header,
1046         .parse  = ipgre_header_parse,
1047 };
1048
1049 #ifdef CONFIG_NET_IPGRE_BROADCAST
1050 static int ipgre_open(struct net_device *dev)
1051 {
1052         struct ip_tunnel *t = netdev_priv(dev);
1053
1054         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1055                 struct flowi fl = { .oif = t->parms.link,
1056                                     .nl_u = { .ip4_u =
1057                                               { .daddr = t->parms.iph.daddr,
1058                                                 .saddr = t->parms.iph.saddr,
1059                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1060                                     .proto = IPPROTO_GRE };
1061                 struct rtable *rt;
1062                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1063                         return -EADDRNOTAVAIL;
1064                 dev = rt->u.dst.dev;
1065                 ip_rt_put(rt);
1066                 if (__in_dev_get_rtnl(dev) == NULL)
1067                         return -EADDRNOTAVAIL;
1068                 t->mlink = dev->ifindex;
1069                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1070         }
1071         return 0;
1072 }
1073
1074 static int ipgre_close(struct net_device *dev)
1075 {
1076         struct ip_tunnel *t = netdev_priv(dev);
1077         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1078                 struct in_device *in_dev;
1079                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1080                 if (in_dev) {
1081                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1082                         in_dev_put(in_dev);
1083                 }
1084         }
1085         return 0;
1086 }
1087
1088 #endif
1089
1090 static void ipgre_tunnel_setup(struct net_device *dev)
1091 {
1092         dev->init               = ipgre_tunnel_init;
1093         dev->uninit             = ipgre_tunnel_uninit;
1094         dev->destructor         = free_netdev;
1095         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1096         dev->do_ioctl           = ipgre_tunnel_ioctl;
1097         dev->change_mtu         = ipgre_tunnel_change_mtu;
1098
1099         dev->type               = ARPHRD_IPGRE;
1100         dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1101         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1102         dev->flags              = IFF_NOARP;
1103         dev->iflink             = 0;
1104         dev->addr_len           = 4;
1105         dev->features           |= NETIF_F_NETNS_LOCAL;
1106 }
1107
1108 static int ipgre_tunnel_init(struct net_device *dev)
1109 {
1110         struct ip_tunnel *tunnel;
1111         struct iphdr *iph;
1112
1113         tunnel = netdev_priv(dev);
1114         iph = &tunnel->parms.iph;
1115
1116         tunnel->dev = dev;
1117         strcpy(tunnel->parms.name, dev->name);
1118
1119         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1120         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1121
1122         if (iph->daddr) {
1123 #ifdef CONFIG_NET_IPGRE_BROADCAST
1124                 if (ipv4_is_multicast(iph->daddr)) {
1125                         if (!iph->saddr)
1126                                 return -EINVAL;
1127                         dev->flags = IFF_BROADCAST;
1128                         dev->header_ops = &ipgre_header_ops;
1129                         dev->open = ipgre_open;
1130                         dev->stop = ipgre_close;
1131                 }
1132 #endif
1133         } else
1134                 dev->header_ops = &ipgre_header_ops;
1135
1136         return 0;
1137 }
1138
1139 static int ipgre_fb_tunnel_init(struct net_device *dev)
1140 {
1141         struct ip_tunnel *tunnel = netdev_priv(dev);
1142         struct iphdr *iph = &tunnel->parms.iph;
1143         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1144
1145         tunnel->dev = dev;
1146         strcpy(tunnel->parms.name, dev->name);
1147
1148         iph->version            = 4;
1149         iph->protocol           = IPPROTO_GRE;
1150         iph->ihl                = 5;
1151         tunnel->hlen            = sizeof(struct iphdr) + 4;
1152
1153         dev_hold(dev);
1154         ign->tunnels_wc[0]      = tunnel;
1155         return 0;
1156 }
1157
1158
1159 static struct net_protocol ipgre_protocol = {
1160         .handler        =       ipgre_rcv,
1161         .err_handler    =       ipgre_err,
1162         .netns_ok       =       1,
1163 };
1164
1165 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1166 {
1167         int prio;
1168
1169         for (prio = 0; prio < 4; prio++) {
1170                 int h;
1171                 for (h = 0; h < HASH_SIZE; h++) {
1172                         struct ip_tunnel *t;
1173                         while ((t = ign->tunnels[prio][h]) != NULL)
1174                                 unregister_netdevice(t->dev);
1175                 }
1176         }
1177 }
1178
1179 static int ipgre_init_net(struct net *net)
1180 {
1181         int err;
1182         struct ipgre_net *ign;
1183
1184         err = -ENOMEM;
1185         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1186         if (ign == NULL)
1187                 goto err_alloc;
1188
1189         err = net_assign_generic(net, ipgre_net_id, ign);
1190         if (err < 0)
1191                 goto err_assign;
1192
1193         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1194                                            ipgre_tunnel_setup);
1195         if (!ign->fb_tunnel_dev) {
1196                 err = -ENOMEM;
1197                 goto err_alloc_dev;
1198         }
1199
1200         ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1201         dev_net_set(ign->fb_tunnel_dev, net);
1202         ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1203
1204         if ((err = register_netdev(ign->fb_tunnel_dev)))
1205                 goto err_reg_dev;
1206
1207         return 0;
1208
1209 err_reg_dev:
1210         free_netdev(ign->fb_tunnel_dev);
1211 err_alloc_dev:
1212         /* nothing */
1213 err_assign:
1214         kfree(ign);
1215 err_alloc:
1216         return err;
1217 }
1218
1219 static void ipgre_exit_net(struct net *net)
1220 {
1221         struct ipgre_net *ign;
1222
1223         ign = net_generic(net, ipgre_net_id);
1224         rtnl_lock();
1225         ipgre_destroy_tunnels(ign);
1226         rtnl_unlock();
1227         kfree(ign);
1228 }
1229
1230 static struct pernet_operations ipgre_net_ops = {
1231         .init = ipgre_init_net,
1232         .exit = ipgre_exit_net,
1233 };
1234
1235 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1236 {
1237         __be16 flags;
1238
1239         if (!data)
1240                 return 0;
1241
1242         flags = 0;
1243         if (data[IFLA_GRE_IFLAGS])
1244                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1245         if (data[IFLA_GRE_OFLAGS])
1246                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1247         if (flags & (GRE_VERSION|GRE_ROUTING))
1248                 return -EINVAL;
1249
1250         return 0;
1251 }
1252
1253 static void ipgre_netlink_parms(struct nlattr *data[],
1254                                 struct ip_tunnel_parm *parms)
1255 {
1256         memset(parms, 0, sizeof(parms));
1257
1258         parms->iph.protocol = IPPROTO_GRE;
1259
1260         if (!data)
1261                 return;
1262
1263         if (data[IFLA_GRE_LINK])
1264                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1265
1266         if (data[IFLA_GRE_IFLAGS])
1267                 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1268
1269         if (data[IFLA_GRE_OFLAGS])
1270                 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1271
1272         if (data[IFLA_GRE_IKEY])
1273                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1274
1275         if (data[IFLA_GRE_OKEY])
1276                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1277
1278         if (data[IFLA_GRE_LOCAL])
1279                 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1280
1281         if (data[IFLA_GRE_REMOTE])
1282                 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1283
1284         if (data[IFLA_GRE_TTL])
1285                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1286
1287         if (data[IFLA_GRE_TOS])
1288                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1289
1290         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1291                 parms->iph.frag_off = htons(IP_DF);
1292 }
1293
1294 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1295                          struct nlattr *data[])
1296 {
1297         struct ip_tunnel *nt;
1298         struct net *net = dev_net(dev);
1299         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1300         int mtu;
1301         int err;
1302
1303         nt = netdev_priv(dev);
1304         ipgre_netlink_parms(data, &nt->parms);
1305
1306         if (ipgre_tunnel_locate(net, &nt->parms, 0))
1307                 return -EEXIST;
1308
1309         mtu = ipgre_tunnel_bind_dev(dev);
1310         if (!tb[IFLA_MTU])
1311                 dev->mtu = mtu;
1312
1313         err = register_netdevice(dev);
1314         if (err)
1315                 goto out;
1316
1317         dev_hold(dev);
1318         ipgre_tunnel_link(ign, nt);
1319
1320 out:
1321         return err;
1322 }
1323
1324 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1325                             struct nlattr *data[])
1326 {
1327         struct ip_tunnel *t, *nt;
1328         struct net *net = dev_net(dev);
1329         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1330         struct ip_tunnel_parm p;
1331         int mtu;
1332
1333         if (dev == ign->fb_tunnel_dev)
1334                 return -EINVAL;
1335
1336         nt = netdev_priv(dev);
1337         ipgre_netlink_parms(data, &p);
1338
1339         t = ipgre_tunnel_locate(net, &p, 0);
1340
1341         if (t) {
1342                 if (t->dev != dev)
1343                         return -EEXIST;
1344         } else {
1345                 unsigned nflags = 0;
1346
1347                 t = nt;
1348
1349                 if (ipv4_is_multicast(p.iph.daddr))
1350                         nflags = IFF_BROADCAST;
1351                 else if (p.iph.daddr)
1352                         nflags = IFF_POINTOPOINT;
1353
1354                 if ((dev->flags ^ nflags) &
1355                     (IFF_POINTOPOINT | IFF_BROADCAST))
1356                         return -EINVAL;
1357
1358                 ipgre_tunnel_unlink(ign, t);
1359                 t->parms.iph.saddr = p.iph.saddr;
1360                 t->parms.iph.daddr = p.iph.daddr;
1361                 t->parms.i_key = p.i_key;
1362                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1363                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1364                 ipgre_tunnel_link(ign, t);
1365                 netdev_state_change(dev);
1366         }
1367
1368         t->parms.o_key = p.o_key;
1369         t->parms.iph.ttl = p.iph.ttl;
1370         t->parms.iph.tos = p.iph.tos;
1371         t->parms.iph.frag_off = p.iph.frag_off;
1372
1373         if (t->parms.link != p.link) {
1374                 t->parms.link = p.link;
1375                 mtu = ipgre_tunnel_bind_dev(dev);
1376                 if (!tb[IFLA_MTU])
1377                         dev->mtu = mtu;
1378                 netdev_state_change(dev);
1379         }
1380
1381         return 0;
1382 }
1383
1384 static size_t ipgre_get_size(const struct net_device *dev)
1385 {
1386         return
1387                 /* IFLA_GRE_LINK */
1388                 nla_total_size(4) +
1389                 /* IFLA_GRE_IFLAGS */
1390                 nla_total_size(2) +
1391                 /* IFLA_GRE_OFLAGS */
1392                 nla_total_size(2) +
1393                 /* IFLA_GRE_IKEY */
1394                 nla_total_size(4) +
1395                 /* IFLA_GRE_OKEY */
1396                 nla_total_size(4) +
1397                 /* IFLA_GRE_LOCAL */
1398                 nla_total_size(4) +
1399                 /* IFLA_GRE_REMOTE */
1400                 nla_total_size(4) +
1401                 /* IFLA_GRE_TTL */
1402                 nla_total_size(1) +
1403                 /* IFLA_GRE_TOS */
1404                 nla_total_size(1) +
1405                 /* IFLA_GRE_PMTUDISC */
1406                 nla_total_size(1) +
1407                 0;
1408 }
1409
1410 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1411 {
1412         struct ip_tunnel *t = netdev_priv(dev);
1413         struct ip_tunnel_parm *p = &t->parms;
1414
1415         NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1416         NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1417         NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1418         NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1419         NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1420         NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1421         NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1422         NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1423         NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1424         NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1425
1426         return 0;
1427
1428 nla_put_failure:
1429         return -EMSGSIZE;
1430 }
1431
1432 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1433         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1434         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1435         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1436         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1437         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1438         [IFLA_GRE_LOCAL]        = { .len = 4 },
1439         [IFLA_GRE_REMOTE]       = { .len = 4 },
1440         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1441         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1442         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1443 };
1444
1445 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1446         .kind           = "gre",
1447         .maxtype        = IFLA_GRE_MAX,
1448         .policy         = ipgre_policy,
1449         .priv_size      = sizeof(struct ip_tunnel),
1450         .setup          = ipgre_tunnel_setup,
1451         .validate       = ipgre_tunnel_validate,
1452         .newlink        = ipgre_newlink,
1453         .changelink     = ipgre_changelink,
1454         .get_size       = ipgre_get_size,
1455         .fill_info      = ipgre_fill_info,
1456 };
1457
1458 /*
1459  *      And now the modules code and kernel interface.
1460  */
1461
1462 static int __init ipgre_init(void)
1463 {
1464         int err;
1465
1466         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1467
1468         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1469                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1470                 return -EAGAIN;
1471         }
1472
1473         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1474         if (err < 0)
1475                 goto gen_device_failed;
1476
1477         err = rtnl_link_register(&ipgre_link_ops);
1478         if (err < 0)
1479                 goto rtnl_link_failed;
1480
1481 out:
1482         return err;
1483
1484 rtnl_link_failed:
1485         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1486 gen_device_failed:
1487         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1488         goto out;
1489 }
1490
1491 static void __exit ipgre_fini(void)
1492 {
1493         rtnl_link_unregister(&ipgre_link_ops);
1494         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1495         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1496                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1497 }
1498
1499 module_init(ipgre_init);
1500 module_exit(ipgre_fini);
1501 MODULE_LICENSE("GPL");
1502 MODULE_ALIAS("rtnl-link-gre");