aa973812edc161d30ba8d9d40b5372aab8c465e5
[safe/jmp/linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50
51 /*
52    Problems & solutions
53    --------------------
54
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71
72
73
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107
108
109
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116
117    Alexey Kuznetsov.
118  */
119
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122
123 /* Fallback tunnel: no source, no destination, no key, no options */
124
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126
127 #define HASH_SIZE  16
128
129 static int ipgre_net_id;
130 struct ipgre_net {
131         struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133         struct net_device *fb_tunnel_dev;
134 };
135
136 /* Tunnel hash table */
137
138 /*
139    4 hash tables:
140
141    3: (remote,local)
142    2: (remote,*)
143    1: (*,local)
144    0: (*,*)
145
146    We require exact key match i.e. if a key is present in packet
147    it will match only tunnel with the same key; if it is not present,
148    it will match only keyless tunnel.
149
150    All keysless packets, if not matched configured keyless tunnels
151    will match fallback tunnel.
152  */
153
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155
156 #define tunnels_r_l     tunnels[3]
157 #define tunnels_r       tunnels[2]
158 #define tunnels_l       tunnels[1]
159 #define tunnels_wc      tunnels[0]
160
161 static DEFINE_RWLOCK(ipgre_lock);
162
163 /* Given src, dst and key, find appropriate for input tunnel. */
164
165 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166                 __be32 remote, __be32 local, __be32 key)
167 {
168         unsigned h0 = HASH(remote);
169         unsigned h1 = HASH(key);
170         struct ip_tunnel *t;
171         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
172
173         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
174                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176                                 return t;
177                 }
178         }
179         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
180                 if (remote == t->parms.iph.daddr) {
181                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182                                 return t;
183                 }
184         }
185         for (t = ign->tunnels_l[h1]; t; t = t->next) {
186                 if (local == t->parms.iph.saddr ||
187                      (local == t->parms.iph.daddr &&
188                       ipv4_is_multicast(local))) {
189                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190                                 return t;
191                 }
192         }
193         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
194                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195                         return t;
196         }
197
198         if (ign->fb_tunnel_dev->flags&IFF_UP)
199                 return netdev_priv(ign->fb_tunnel_dev);
200         return NULL;
201 }
202
203 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204                 struct ip_tunnel_parm *parms)
205 {
206         __be32 remote = parms->iph.daddr;
207         __be32 local = parms->iph.saddr;
208         __be32 key = parms->i_key;
209         unsigned h = HASH(key);
210         int prio = 0;
211
212         if (local)
213                 prio |= 1;
214         if (remote && !ipv4_is_multicast(remote)) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218
219         return &ign->tunnels[prio][h];
220 }
221
222 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223                 struct ip_tunnel *t)
224 {
225         return __ipgre_bucket(ign, &t->parms);
226 }
227
228 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
229 {
230         struct ip_tunnel **tp = ipgre_bucket(ign, t);
231
232         t->next = *tp;
233         write_lock_bh(&ipgre_lock);
234         *tp = t;
235         write_unlock_bh(&ipgre_lock);
236 }
237
238 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
239 {
240         struct ip_tunnel **tp;
241
242         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
243                 if (t == *tp) {
244                         write_lock_bh(&ipgre_lock);
245                         *tp = t->next;
246                         write_unlock_bh(&ipgre_lock);
247                         break;
248                 }
249         }
250 }
251
252 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253                 struct ip_tunnel_parm *parms, int create)
254 {
255         __be32 remote = parms->iph.daddr;
256         __be32 local = parms->iph.saddr;
257         __be32 key = parms->i_key;
258         struct ip_tunnel *t, **tp, *nt;
259         struct net_device *dev;
260         char name[IFNAMSIZ];
261         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
262
263         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
264                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265                         if (key == t->parms.i_key)
266                                 return t;
267                 }
268         }
269         if (!create)
270                 return NULL;
271
272         if (parms->name[0])
273                 strlcpy(name, parms->name, IFNAMSIZ);
274         else
275                 sprintf(name, "gre%%d");
276
277         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278         if (!dev)
279           return NULL;
280
281         if (strchr(name, '%')) {
282                 if (dev_alloc_name(dev, name) < 0)
283                         goto failed_free;
284         }
285
286         dev->init = ipgre_tunnel_init;
287         nt = netdev_priv(dev);
288         nt->parms = *parms;
289
290         if (register_netdevice(dev) < 0)
291                 goto failed_free;
292
293         dev_hold(dev);
294         ipgre_tunnel_link(ign, nt);
295         return nt;
296
297 failed_free:
298         free_netdev(dev);
299         return NULL;
300 }
301
302 static void ipgre_tunnel_uninit(struct net_device *dev)
303 {
304         struct net *net = dev_net(dev);
305         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
306
307         ipgre_tunnel_unlink(ign, netdev_priv(dev));
308         dev_put(dev);
309 }
310
311
312 static void ipgre_err(struct sk_buff *skb, u32 info)
313 {
314 #ifndef I_WISH_WORLD_WERE_PERFECT
315
316 /* It is not :-( All the routers (except for Linux) return only
317    8 bytes of packet payload. It means, that precise relaying of
318    ICMP in the real Internet is absolutely infeasible.
319
320    Moreover, Cisco "wise men" put GRE key to the third word
321    in GRE header. It makes impossible maintaining even soft state for keyed
322    GRE tunnels with enabled checksum. Tell them "thank you".
323
324    Well, I wonder, rfc1812 was written by Cisco employee,
325    what the hell these idiots break standrads established
326    by themself???
327  */
328
329         struct iphdr *iph = (struct iphdr*)skb->data;
330         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
331         int grehlen = (iph->ihl<<2) + 4;
332         const int type = icmp_hdr(skb)->type;
333         const int code = icmp_hdr(skb)->code;
334         struct ip_tunnel *t;
335         __be16 flags;
336
337         flags = p[0];
338         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
339                 if (flags&(GRE_VERSION|GRE_ROUTING))
340                         return;
341                 if (flags&GRE_KEY) {
342                         grehlen += 4;
343                         if (flags&GRE_CSUM)
344                                 grehlen += 4;
345                 }
346         }
347
348         /* If only 8 bytes returned, keyed message will be dropped here */
349         if (skb_headlen(skb) < grehlen)
350                 return;
351
352         switch (type) {
353         default:
354         case ICMP_PARAMETERPROB:
355                 return;
356
357         case ICMP_DEST_UNREACH:
358                 switch (code) {
359                 case ICMP_SR_FAILED:
360                 case ICMP_PORT_UNREACH:
361                         /* Impossible event. */
362                         return;
363                 case ICMP_FRAG_NEEDED:
364                         /* Soft state for pmtu is maintained by IP core. */
365                         return;
366                 default:
367                         /* All others are translated to HOST_UNREACH.
368                            rfc2003 contains "deep thoughts" about NET_UNREACH,
369                            I believe they are just ether pollution. --ANK
370                          */
371                         break;
372                 }
373                 break;
374         case ICMP_TIME_EXCEEDED:
375                 if (code != ICMP_EXC_TTL)
376                         return;
377                 break;
378         }
379
380         read_lock(&ipgre_lock);
381         t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
382                         (flags&GRE_KEY) ?
383                         *(((__be32*)p) + (grehlen>>2) - 1) : 0);
384         if (t == NULL || t->parms.iph.daddr == 0 ||
385             ipv4_is_multicast(t->parms.iph.daddr))
386                 goto out;
387
388         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
389                 goto out;
390
391         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
392                 t->err_count++;
393         else
394                 t->err_count = 1;
395         t->err_time = jiffies;
396 out:
397         read_unlock(&ipgre_lock);
398         return;
399 #else
400         struct iphdr *iph = (struct iphdr*)dp;
401         struct iphdr *eiph;
402         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
403         const int type = icmp_hdr(skb)->type;
404         const int code = icmp_hdr(skb)->code;
405         int rel_type = 0;
406         int rel_code = 0;
407         __be32 rel_info = 0;
408         __u32 n = 0;
409         __be16 flags;
410         int grehlen = (iph->ihl<<2) + 4;
411         struct sk_buff *skb2;
412         struct flowi fl;
413         struct rtable *rt;
414
415         if (p[1] != htons(ETH_P_IP))
416                 return;
417
418         flags = p[0];
419         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
420                 if (flags&(GRE_VERSION|GRE_ROUTING))
421                         return;
422                 if (flags&GRE_CSUM)
423                         grehlen += 4;
424                 if (flags&GRE_KEY)
425                         grehlen += 4;
426                 if (flags&GRE_SEQ)
427                         grehlen += 4;
428         }
429         if (len < grehlen + sizeof(struct iphdr))
430                 return;
431         eiph = (struct iphdr*)(dp + grehlen);
432
433         switch (type) {
434         default:
435                 return;
436         case ICMP_PARAMETERPROB:
437                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
438                 if (n < (iph->ihl<<2))
439                         return;
440
441                 /* So... This guy found something strange INSIDE encapsulated
442                    packet. Well, he is fool, but what can we do ?
443                  */
444                 rel_type = ICMP_PARAMETERPROB;
445                 n -= grehlen;
446                 rel_info = htonl(n << 24);
447                 break;
448
449         case ICMP_DEST_UNREACH:
450                 switch (code) {
451                 case ICMP_SR_FAILED:
452                 case ICMP_PORT_UNREACH:
453                         /* Impossible event. */
454                         return;
455                 case ICMP_FRAG_NEEDED:
456                         /* And it is the only really necessary thing :-) */
457                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
458                         if (n < grehlen+68)
459                                 return;
460                         n -= grehlen;
461                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
462                         if (n > ntohs(eiph->tot_len))
463                                 return;
464                         rel_info = htonl(n);
465                         break;
466                 default:
467                         /* All others are translated to HOST_UNREACH.
468                            rfc2003 contains "deep thoughts" about NET_UNREACH,
469                            I believe, it is just ether pollution. --ANK
470                          */
471                         rel_type = ICMP_DEST_UNREACH;
472                         rel_code = ICMP_HOST_UNREACH;
473                         break;
474                 }
475                 break;
476         case ICMP_TIME_EXCEEDED:
477                 if (code != ICMP_EXC_TTL)
478                         return;
479                 break;
480         }
481
482         /* Prepare fake skb to feed it to icmp_send */
483         skb2 = skb_clone(skb, GFP_ATOMIC);
484         if (skb2 == NULL)
485                 return;
486         dst_release(skb2->dst);
487         skb2->dst = NULL;
488         skb_pull(skb2, skb->data - (u8*)eiph);
489         skb_reset_network_header(skb2);
490
491         /* Try to guess incoming interface */
492         memset(&fl, 0, sizeof(fl));
493         fl.fl4_dst = eiph->saddr;
494         fl.fl4_tos = RT_TOS(eiph->tos);
495         fl.proto = IPPROTO_GRE;
496         if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
497                 kfree_skb(skb2);
498                 return;
499         }
500         skb2->dev = rt->u.dst.dev;
501
502         /* route "incoming" packet */
503         if (rt->rt_flags&RTCF_LOCAL) {
504                 ip_rt_put(rt);
505                 rt = NULL;
506                 fl.fl4_dst = eiph->daddr;
507                 fl.fl4_src = eiph->saddr;
508                 fl.fl4_tos = eiph->tos;
509                 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
510                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
511                         ip_rt_put(rt);
512                         kfree_skb(skb2);
513                         return;
514                 }
515         } else {
516                 ip_rt_put(rt);
517                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
518                     skb2->dst->dev->type != ARPHRD_IPGRE) {
519                         kfree_skb(skb2);
520                         return;
521                 }
522         }
523
524         /* change mtu on this route */
525         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
526                 if (n > dst_mtu(skb2->dst)) {
527                         kfree_skb(skb2);
528                         return;
529                 }
530                 skb2->dst->ops->update_pmtu(skb2->dst, n);
531         } else if (type == ICMP_TIME_EXCEEDED) {
532                 struct ip_tunnel *t = netdev_priv(skb2->dev);
533                 if (t->parms.iph.ttl) {
534                         rel_type = ICMP_DEST_UNREACH;
535                         rel_code = ICMP_HOST_UNREACH;
536                 }
537         }
538
539         icmp_send(skb2, rel_type, rel_code, rel_info);
540         kfree_skb(skb2);
541 #endif
542 }
543
544 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
545 {
546         if (INET_ECN_is_ce(iph->tos)) {
547                 if (skb->protocol == htons(ETH_P_IP)) {
548                         IP_ECN_set_ce(ip_hdr(skb));
549                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
550                         IP6_ECN_set_ce(ipv6_hdr(skb));
551                 }
552         }
553 }
554
555 static inline u8
556 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
557 {
558         u8 inner = 0;
559         if (skb->protocol == htons(ETH_P_IP))
560                 inner = old_iph->tos;
561         else if (skb->protocol == htons(ETH_P_IPV6))
562                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
563         return INET_ECN_encapsulate(tos, inner);
564 }
565
566 static int ipgre_rcv(struct sk_buff *skb)
567 {
568         struct iphdr *iph;
569         u8     *h;
570         __be16    flags;
571         __sum16   csum = 0;
572         __be32 key = 0;
573         u32    seqno = 0;
574         struct ip_tunnel *tunnel;
575         int    offset = 4;
576
577         if (!pskb_may_pull(skb, 16))
578                 goto drop_nolock;
579
580         iph = ip_hdr(skb);
581         h = skb->data;
582         flags = *(__be16*)h;
583
584         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
585                 /* - Version must be 0.
586                    - We do not support routing headers.
587                  */
588                 if (flags&(GRE_VERSION|GRE_ROUTING))
589                         goto drop_nolock;
590
591                 if (flags&GRE_CSUM) {
592                         switch (skb->ip_summed) {
593                         case CHECKSUM_COMPLETE:
594                                 csum = csum_fold(skb->csum);
595                                 if (!csum)
596                                         break;
597                                 /* fall through */
598                         case CHECKSUM_NONE:
599                                 skb->csum = 0;
600                                 csum = __skb_checksum_complete(skb);
601                                 skb->ip_summed = CHECKSUM_COMPLETE;
602                         }
603                         offset += 4;
604                 }
605                 if (flags&GRE_KEY) {
606                         key = *(__be32*)(h + offset);
607                         offset += 4;
608                 }
609                 if (flags&GRE_SEQ) {
610                         seqno = ntohl(*(__be32*)(h + offset));
611                         offset += 4;
612                 }
613         }
614
615         read_lock(&ipgre_lock);
616         if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
617                                         iph->saddr, iph->daddr, key)) != NULL) {
618                 secpath_reset(skb);
619
620                 skb->protocol = *(__be16*)(h + 2);
621                 /* WCCP version 1 and 2 protocol decoding.
622                  * - Change protocol to IP
623                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
624                  */
625                 if (flags == 0 &&
626                     skb->protocol == htons(ETH_P_WCCP)) {
627                         skb->protocol = htons(ETH_P_IP);
628                         if ((*(h + offset) & 0xF0) != 0x40)
629                                 offset += 4;
630                 }
631
632                 skb->mac_header = skb->network_header;
633                 __pskb_pull(skb, offset);
634                 skb_reset_network_header(skb);
635                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
636                 skb->pkt_type = PACKET_HOST;
637 #ifdef CONFIG_NET_IPGRE_BROADCAST
638                 if (ipv4_is_multicast(iph->daddr)) {
639                         /* Looped back packet, drop it! */
640                         if (skb->rtable->fl.iif == 0)
641                                 goto drop;
642                         tunnel->stat.multicast++;
643                         skb->pkt_type = PACKET_BROADCAST;
644                 }
645 #endif
646
647                 if (((flags&GRE_CSUM) && csum) ||
648                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
649                         tunnel->stat.rx_crc_errors++;
650                         tunnel->stat.rx_errors++;
651                         goto drop;
652                 }
653                 if (tunnel->parms.i_flags&GRE_SEQ) {
654                         if (!(flags&GRE_SEQ) ||
655                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
656                                 tunnel->stat.rx_fifo_errors++;
657                                 tunnel->stat.rx_errors++;
658                                 goto drop;
659                         }
660                         tunnel->i_seqno = seqno + 1;
661                 }
662                 tunnel->stat.rx_packets++;
663                 tunnel->stat.rx_bytes += skb->len;
664                 skb->dev = tunnel->dev;
665                 dst_release(skb->dst);
666                 skb->dst = NULL;
667                 nf_reset(skb);
668                 ipgre_ecn_decapsulate(iph, skb);
669                 netif_rx(skb);
670                 read_unlock(&ipgre_lock);
671                 return(0);
672         }
673         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
674
675 drop:
676         read_unlock(&ipgre_lock);
677 drop_nolock:
678         kfree_skb(skb);
679         return(0);
680 }
681
682 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
683 {
684         struct ip_tunnel *tunnel = netdev_priv(dev);
685         struct net_device_stats *stats = &tunnel->stat;
686         struct iphdr  *old_iph = ip_hdr(skb);
687         struct iphdr  *tiph;
688         u8     tos;
689         __be16 df;
690         struct rtable *rt;                      /* Route to the other host */
691         struct net_device *tdev;                        /* Device to other host */
692         struct iphdr  *iph;                     /* Our new IP header */
693         unsigned int max_headroom;              /* The extra header space needed */
694         int    gre_hlen;
695         __be32 dst;
696         int    mtu;
697
698         if (tunnel->recursion++) {
699                 tunnel->stat.collisions++;
700                 goto tx_error;
701         }
702
703         if (dev->header_ops) {
704                 gre_hlen = 0;
705                 tiph = (struct iphdr*)skb->data;
706         } else {
707                 gre_hlen = tunnel->hlen;
708                 tiph = &tunnel->parms.iph;
709         }
710
711         if ((dst = tiph->daddr) == 0) {
712                 /* NBMA tunnel */
713
714                 if (skb->dst == NULL) {
715                         tunnel->stat.tx_fifo_errors++;
716                         goto tx_error;
717                 }
718
719                 if (skb->protocol == htons(ETH_P_IP)) {
720                         rt = skb->rtable;
721                         if ((dst = rt->rt_gateway) == 0)
722                                 goto tx_error_icmp;
723                 }
724 #ifdef CONFIG_IPV6
725                 else if (skb->protocol == htons(ETH_P_IPV6)) {
726                         struct in6_addr *addr6;
727                         int addr_type;
728                         struct neighbour *neigh = skb->dst->neighbour;
729
730                         if (neigh == NULL)
731                                 goto tx_error;
732
733                         addr6 = (struct in6_addr*)&neigh->primary_key;
734                         addr_type = ipv6_addr_type(addr6);
735
736                         if (addr_type == IPV6_ADDR_ANY) {
737                                 addr6 = &ipv6_hdr(skb)->daddr;
738                                 addr_type = ipv6_addr_type(addr6);
739                         }
740
741                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
742                                 goto tx_error_icmp;
743
744                         dst = addr6->s6_addr32[3];
745                 }
746 #endif
747                 else
748                         goto tx_error;
749         }
750
751         tos = tiph->tos;
752         if (tos&1) {
753                 if (skb->protocol == htons(ETH_P_IP))
754                         tos = old_iph->tos;
755                 tos &= ~1;
756         }
757
758         {
759                 struct flowi fl = { .oif = tunnel->parms.link,
760                                     .nl_u = { .ip4_u =
761                                               { .daddr = dst,
762                                                 .saddr = tiph->saddr,
763                                                 .tos = RT_TOS(tos) } },
764                                     .proto = IPPROTO_GRE };
765                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
766                         tunnel->stat.tx_carrier_errors++;
767                         goto tx_error;
768                 }
769         }
770         tdev = rt->u.dst.dev;
771
772         if (tdev == dev) {
773                 ip_rt_put(rt);
774                 tunnel->stat.collisions++;
775                 goto tx_error;
776         }
777
778         df = tiph->frag_off;
779         if (df)
780                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
781         else
782                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
783
784         if (skb->dst)
785                 skb->dst->ops->update_pmtu(skb->dst, mtu);
786
787         if (skb->protocol == htons(ETH_P_IP)) {
788                 df |= (old_iph->frag_off&htons(IP_DF));
789
790                 if ((old_iph->frag_off&htons(IP_DF)) &&
791                     mtu < ntohs(old_iph->tot_len)) {
792                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
793                         ip_rt_put(rt);
794                         goto tx_error;
795                 }
796         }
797 #ifdef CONFIG_IPV6
798         else if (skb->protocol == htons(ETH_P_IPV6)) {
799                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
800
801                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
802                         if ((tunnel->parms.iph.daddr &&
803                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
804                             rt6->rt6i_dst.plen == 128) {
805                                 rt6->rt6i_flags |= RTF_MODIFIED;
806                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
807                         }
808                 }
809
810                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
811                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
812                         ip_rt_put(rt);
813                         goto tx_error;
814                 }
815         }
816 #endif
817
818         if (tunnel->err_count > 0) {
819                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
820                         tunnel->err_count--;
821
822                         dst_link_failure(skb);
823                 } else
824                         tunnel->err_count = 0;
825         }
826
827         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
828
829         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
830             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
831                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
832                 if (!new_skb) {
833                         ip_rt_put(rt);
834                         stats->tx_dropped++;
835                         dev_kfree_skb(skb);
836                         tunnel->recursion--;
837                         return 0;
838                 }
839                 if (skb->sk)
840                         skb_set_owner_w(new_skb, skb->sk);
841                 dev_kfree_skb(skb);
842                 skb = new_skb;
843                 old_iph = ip_hdr(skb);
844         }
845
846         skb->transport_header = skb->network_header;
847         skb_push(skb, gre_hlen);
848         skb_reset_network_header(skb);
849         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
850         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
851                               IPSKB_REROUTED);
852         dst_release(skb->dst);
853         skb->dst = &rt->u.dst;
854
855         /*
856          *      Push down and install the IPIP header.
857          */
858
859         iph                     =       ip_hdr(skb);
860         iph->version            =       4;
861         iph->ihl                =       sizeof(struct iphdr) >> 2;
862         iph->frag_off           =       df;
863         iph->protocol           =       IPPROTO_GRE;
864         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
865         iph->daddr              =       rt->rt_dst;
866         iph->saddr              =       rt->rt_src;
867
868         if ((iph->ttl = tiph->ttl) == 0) {
869                 if (skb->protocol == htons(ETH_P_IP))
870                         iph->ttl = old_iph->ttl;
871 #ifdef CONFIG_IPV6
872                 else if (skb->protocol == htons(ETH_P_IPV6))
873                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
874 #endif
875                 else
876                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
877         }
878
879         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
880         ((__be16*)(iph+1))[1] = skb->protocol;
881
882         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
883                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
884
885                 if (tunnel->parms.o_flags&GRE_SEQ) {
886                         ++tunnel->o_seqno;
887                         *ptr = htonl(tunnel->o_seqno);
888                         ptr--;
889                 }
890                 if (tunnel->parms.o_flags&GRE_KEY) {
891                         *ptr = tunnel->parms.o_key;
892                         ptr--;
893                 }
894                 if (tunnel->parms.o_flags&GRE_CSUM) {
895                         *ptr = 0;
896                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
897                 }
898         }
899
900         nf_reset(skb);
901
902         IPTUNNEL_XMIT();
903         tunnel->recursion--;
904         return 0;
905
906 tx_error_icmp:
907         dst_link_failure(skb);
908
909 tx_error:
910         stats->tx_errors++;
911         dev_kfree_skb(skb);
912         tunnel->recursion--;
913         return 0;
914 }
915
916 static void ipgre_tunnel_bind_dev(struct net_device *dev)
917 {
918         struct net_device *tdev = NULL;
919         struct ip_tunnel *tunnel;
920         struct iphdr *iph;
921         int hlen = LL_MAX_HEADER;
922         int mtu = ETH_DATA_LEN;
923         int addend = sizeof(struct iphdr) + 4;
924
925         tunnel = netdev_priv(dev);
926         iph = &tunnel->parms.iph;
927
928         /* Guess output device to choose reasonable mtu and hard_header_len */
929
930         if (iph->daddr) {
931                 struct flowi fl = { .oif = tunnel->parms.link,
932                                     .nl_u = { .ip4_u =
933                                               { .daddr = iph->daddr,
934                                                 .saddr = iph->saddr,
935                                                 .tos = RT_TOS(iph->tos) } },
936                                     .proto = IPPROTO_GRE };
937                 struct rtable *rt;
938                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
939                         tdev = rt->u.dst.dev;
940                         ip_rt_put(rt);
941                 }
942                 dev->flags |= IFF_POINTOPOINT;
943         }
944
945         if (!tdev && tunnel->parms.link)
946                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
947
948         if (tdev) {
949                 hlen = tdev->hard_header_len;
950                 mtu = tdev->mtu;
951         }
952         dev->iflink = tunnel->parms.link;
953
954         /* Precalculate GRE options length */
955         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
956                 if (tunnel->parms.o_flags&GRE_CSUM)
957                         addend += 4;
958                 if (tunnel->parms.o_flags&GRE_KEY)
959                         addend += 4;
960                 if (tunnel->parms.o_flags&GRE_SEQ)
961                         addend += 4;
962         }
963         dev->hard_header_len = hlen + addend;
964         dev->mtu = mtu - addend;
965         tunnel->hlen = addend;
966
967 }
968
969 static int
970 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
971 {
972         int err = 0;
973         struct ip_tunnel_parm p;
974         struct ip_tunnel *t;
975         struct net *net = dev_net(dev);
976         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
977
978         switch (cmd) {
979         case SIOCGETTUNNEL:
980                 t = NULL;
981                 if (dev == ign->fb_tunnel_dev) {
982                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
983                                 err = -EFAULT;
984                                 break;
985                         }
986                         t = ipgre_tunnel_locate(net, &p, 0);
987                 }
988                 if (t == NULL)
989                         t = netdev_priv(dev);
990                 memcpy(&p, &t->parms, sizeof(p));
991                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
992                         err = -EFAULT;
993                 break;
994
995         case SIOCADDTUNNEL:
996         case SIOCCHGTUNNEL:
997                 err = -EPERM;
998                 if (!capable(CAP_NET_ADMIN))
999                         goto done;
1000
1001                 err = -EFAULT;
1002                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1003                         goto done;
1004
1005                 err = -EINVAL;
1006                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1007                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1008                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1009                         goto done;
1010                 if (p.iph.ttl)
1011                         p.iph.frag_off |= htons(IP_DF);
1012
1013                 if (!(p.i_flags&GRE_KEY))
1014                         p.i_key = 0;
1015                 if (!(p.o_flags&GRE_KEY))
1016                         p.o_key = 0;
1017
1018                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1019
1020                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1021                         if (t != NULL) {
1022                                 if (t->dev != dev) {
1023                                         err = -EEXIST;
1024                                         break;
1025                                 }
1026                         } else {
1027                                 unsigned nflags=0;
1028
1029                                 t = netdev_priv(dev);
1030
1031                                 if (ipv4_is_multicast(p.iph.daddr))
1032                                         nflags = IFF_BROADCAST;
1033                                 else if (p.iph.daddr)
1034                                         nflags = IFF_POINTOPOINT;
1035
1036                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1037                                         err = -EINVAL;
1038                                         break;
1039                                 }
1040                                 ipgre_tunnel_unlink(ign, t);
1041                                 t->parms.iph.saddr = p.iph.saddr;
1042                                 t->parms.iph.daddr = p.iph.daddr;
1043                                 t->parms.i_key = p.i_key;
1044                                 t->parms.o_key = p.o_key;
1045                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1046                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1047                                 ipgre_tunnel_link(ign, t);
1048                                 netdev_state_change(dev);
1049                         }
1050                 }
1051
1052                 if (t) {
1053                         err = 0;
1054                         if (cmd == SIOCCHGTUNNEL) {
1055                                 t->parms.iph.ttl = p.iph.ttl;
1056                                 t->parms.iph.tos = p.iph.tos;
1057                                 t->parms.iph.frag_off = p.iph.frag_off;
1058                                 if (t->parms.link != p.link) {
1059                                         t->parms.link = p.link;
1060                                         ipgre_tunnel_bind_dev(dev);
1061                                         netdev_state_change(dev);
1062                                 }
1063                         }
1064                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1065                                 err = -EFAULT;
1066                 } else
1067                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1068                 break;
1069
1070         case SIOCDELTUNNEL:
1071                 err = -EPERM;
1072                 if (!capable(CAP_NET_ADMIN))
1073                         goto done;
1074
1075                 if (dev == ign->fb_tunnel_dev) {
1076                         err = -EFAULT;
1077                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1078                                 goto done;
1079                         err = -ENOENT;
1080                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1081                                 goto done;
1082                         err = -EPERM;
1083                         if (t == netdev_priv(ign->fb_tunnel_dev))
1084                                 goto done;
1085                         dev = t->dev;
1086                 }
1087                 unregister_netdevice(dev);
1088                 err = 0;
1089                 break;
1090
1091         default:
1092                 err = -EINVAL;
1093         }
1094
1095 done:
1096         return err;
1097 }
1098
1099 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1100 {
1101         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1102 }
1103
1104 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1105 {
1106         struct ip_tunnel *tunnel = netdev_priv(dev);
1107         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1108                 return -EINVAL;
1109         dev->mtu = new_mtu;
1110         return 0;
1111 }
1112
1113 /* Nice toy. Unfortunately, useless in real life :-)
1114    It allows to construct virtual multiprotocol broadcast "LAN"
1115    over the Internet, provided multicast routing is tuned.
1116
1117
1118    I have no idea was this bicycle invented before me,
1119    so that I had to set ARPHRD_IPGRE to a random value.
1120    I have an impression, that Cisco could make something similar,
1121    but this feature is apparently missing in IOS<=11.2(8).
1122
1123    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1124    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1125
1126    ping -t 255 224.66.66.66
1127
1128    If nobody answers, mbone does not work.
1129
1130    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1131    ip addr add 10.66.66.<somewhat>/24 dev Universe
1132    ifconfig Universe up
1133    ifconfig Universe add fe80::<Your_real_addr>/10
1134    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1135    ftp 10.66.66.66
1136    ...
1137    ftp fec0:6666:6666::193.233.7.65
1138    ...
1139
1140  */
1141
1142 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1143                         unsigned short type,
1144                         const void *daddr, const void *saddr, unsigned len)
1145 {
1146         struct ip_tunnel *t = netdev_priv(dev);
1147         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1148         __be16 *p = (__be16*)(iph+1);
1149
1150         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1151         p[0]            = t->parms.o_flags;
1152         p[1]            = htons(type);
1153
1154         /*
1155          *      Set the source hardware address.
1156          */
1157
1158         if (saddr)
1159                 memcpy(&iph->saddr, saddr, 4);
1160
1161         if (daddr) {
1162                 memcpy(&iph->daddr, daddr, 4);
1163                 return t->hlen;
1164         }
1165         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1166                 return t->hlen;
1167
1168         return -t->hlen;
1169 }
1170
1171 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1172 {
1173         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1174         memcpy(haddr, &iph->saddr, 4);
1175         return 4;
1176 }
1177
1178 static const struct header_ops ipgre_header_ops = {
1179         .create = ipgre_header,
1180         .parse  = ipgre_header_parse,
1181 };
1182
1183 #ifdef CONFIG_NET_IPGRE_BROADCAST
1184 static int ipgre_open(struct net_device *dev)
1185 {
1186         struct ip_tunnel *t = netdev_priv(dev);
1187
1188         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1189                 struct flowi fl = { .oif = t->parms.link,
1190                                     .nl_u = { .ip4_u =
1191                                               { .daddr = t->parms.iph.daddr,
1192                                                 .saddr = t->parms.iph.saddr,
1193                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1194                                     .proto = IPPROTO_GRE };
1195                 struct rtable *rt;
1196                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1197                         return -EADDRNOTAVAIL;
1198                 dev = rt->u.dst.dev;
1199                 ip_rt_put(rt);
1200                 if (__in_dev_get_rtnl(dev) == NULL)
1201                         return -EADDRNOTAVAIL;
1202                 t->mlink = dev->ifindex;
1203                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1204         }
1205         return 0;
1206 }
1207
1208 static int ipgre_close(struct net_device *dev)
1209 {
1210         struct ip_tunnel *t = netdev_priv(dev);
1211         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1212                 struct in_device *in_dev;
1213                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1214                 if (in_dev) {
1215                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1216                         in_dev_put(in_dev);
1217                 }
1218         }
1219         return 0;
1220 }
1221
1222 #endif
1223
1224 static void ipgre_tunnel_setup(struct net_device *dev)
1225 {
1226         dev->uninit             = ipgre_tunnel_uninit;
1227         dev->destructor         = free_netdev;
1228         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1229         dev->get_stats          = ipgre_tunnel_get_stats;
1230         dev->do_ioctl           = ipgre_tunnel_ioctl;
1231         dev->change_mtu         = ipgre_tunnel_change_mtu;
1232
1233         dev->type               = ARPHRD_IPGRE;
1234         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1235         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1236         dev->flags              = IFF_NOARP;
1237         dev->iflink             = 0;
1238         dev->addr_len           = 4;
1239 }
1240
1241 static int ipgre_tunnel_init(struct net_device *dev)
1242 {
1243         struct ip_tunnel *tunnel;
1244         struct iphdr *iph;
1245
1246         tunnel = netdev_priv(dev);
1247         iph = &tunnel->parms.iph;
1248
1249         tunnel->dev = dev;
1250         strcpy(tunnel->parms.name, dev->name);
1251
1252         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1253         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1254
1255         ipgre_tunnel_bind_dev(dev);
1256
1257         if (iph->daddr) {
1258 #ifdef CONFIG_NET_IPGRE_BROADCAST
1259                 if (ipv4_is_multicast(iph->daddr)) {
1260                         if (!iph->saddr)
1261                                 return -EINVAL;
1262                         dev->flags = IFF_BROADCAST;
1263                         dev->header_ops = &ipgre_header_ops;
1264                         dev->open = ipgre_open;
1265                         dev->stop = ipgre_close;
1266                 }
1267 #endif
1268         } else
1269                 dev->header_ops = &ipgre_header_ops;
1270
1271         return 0;
1272 }
1273
1274 static int ipgre_fb_tunnel_init(struct net_device *dev)
1275 {
1276         struct ip_tunnel *tunnel = netdev_priv(dev);
1277         struct iphdr *iph = &tunnel->parms.iph;
1278         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1279
1280         tunnel->dev = dev;
1281         strcpy(tunnel->parms.name, dev->name);
1282
1283         iph->version            = 4;
1284         iph->protocol           = IPPROTO_GRE;
1285         iph->ihl                = 5;
1286         tunnel->hlen            = sizeof(struct iphdr) + 4;
1287
1288         dev_hold(dev);
1289         ign->tunnels_wc[0]      = tunnel;
1290         return 0;
1291 }
1292
1293
1294 static struct net_protocol ipgre_protocol = {
1295         .handler        =       ipgre_rcv,
1296         .err_handler    =       ipgre_err,
1297 };
1298
1299 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1300 {
1301         int prio;
1302
1303         for (prio = 0; prio < 4; prio++) {
1304                 int h;
1305                 for (h = 0; h < HASH_SIZE; h++) {
1306                         struct ip_tunnel *t;
1307                         while ((t = ign->tunnels[prio][h]) != NULL)
1308                                 unregister_netdevice(t->dev);
1309                 }
1310         }
1311 }
1312
1313 static int ipgre_init_net(struct net *net)
1314 {
1315         int err;
1316         struct ipgre_net *ign;
1317
1318         err = -ENOMEM;
1319         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1320         if (ign == NULL)
1321                 goto err_alloc;
1322
1323         err = net_assign_generic(net, ipgre_net_id, ign);
1324         if (err < 0)
1325                 goto err_assign;
1326
1327         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1328                                            ipgre_tunnel_setup);
1329         if (!ign->fb_tunnel_dev) {
1330                 err = -ENOMEM;
1331                 goto err_alloc_dev;
1332         }
1333
1334         ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1335         dev_net_set(ign->fb_tunnel_dev, net);
1336
1337         if ((err = register_netdev(ign->fb_tunnel_dev)))
1338                 goto err_reg_dev;
1339
1340         return 0;
1341
1342 err_reg_dev:
1343         free_netdev(ign->fb_tunnel_dev);
1344 err_alloc_dev:
1345         /* nothing */
1346 err_assign:
1347         kfree(ign);
1348 err_alloc:
1349         return err;
1350 }
1351
1352 static void ipgre_exit_net(struct net *net)
1353 {
1354         struct ipgre_net *ign;
1355
1356         ign = net_generic(net, ipgre_net_id);
1357         rtnl_lock();
1358         ipgre_destroy_tunnels(ign);
1359         rtnl_unlock();
1360         kfree(ign);
1361 }
1362
1363 static struct pernet_operations ipgre_net_ops = {
1364         .init = ipgre_init_net,
1365         .exit = ipgre_exit_net,
1366 };
1367
1368 /*
1369  *      And now the modules code and kernel interface.
1370  */
1371
1372 static int __init ipgre_init(void)
1373 {
1374         int err;
1375
1376         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1377
1378         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1379                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1380                 return -EAGAIN;
1381         }
1382
1383         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1384         if (err < 0)
1385                 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1386
1387         return err;
1388 }
1389
1390 static void __exit ipgre_fini(void)
1391 {
1392         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1393                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1394
1395         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1396 }
1397
1398 module_init(ipgre_init);
1399 module_exit(ipgre_fini);
1400 MODULE_LICENSE("GPL");