[GRE]: Add net/gre_net argument to some functions.
[safe/jmp/linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50
51 /*
52    Problems & solutions
53    --------------------
54
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71
72
73
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107
108
109
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116
117    Alexey Kuznetsov.
118  */
119
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122
123 /* Fallback tunnel: no source, no destination, no key, no options */
124
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126
127 static int ipgre_net_id;
128 struct ipgre_net {
129 };
130
131 static struct net_device *ipgre_fb_tunnel_dev;
132
133 /* Tunnel hash table */
134
135 /*
136    4 hash tables:
137
138    3: (remote,local)
139    2: (remote,*)
140    1: (*,local)
141    0: (*,*)
142
143    We require exact key match i.e. if a key is present in packet
144    it will match only tunnel with the same key; if it is not present,
145    it will match only keyless tunnel.
146
147    All keysless packets, if not matched configured keyless tunnels
148    will match fallback tunnel.
149  */
150
151 #define HASH_SIZE  16
152 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
153
154 static struct ip_tunnel *tunnels[4][HASH_SIZE];
155
156 #define tunnels_r_l     (tunnels[3])
157 #define tunnels_r       (tunnels[2])
158 #define tunnels_l       (tunnels[1])
159 #define tunnels_wc      (tunnels[0])
160
161 static DEFINE_RWLOCK(ipgre_lock);
162
163 /* Given src, dst and key, find appropriate for input tunnel. */
164
165 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166                 __be32 remote, __be32 local, __be32 key)
167 {
168         unsigned h0 = HASH(remote);
169         unsigned h1 = HASH(key);
170         struct ip_tunnel *t;
171
172         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
173                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
174                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
175                                 return t;
176                 }
177         }
178         for (t = tunnels_r[h0^h1]; t; t = t->next) {
179                 if (remote == t->parms.iph.daddr) {
180                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
181                                 return t;
182                 }
183         }
184         for (t = tunnels_l[h1]; t; t = t->next) {
185                 if (local == t->parms.iph.saddr ||
186                      (local == t->parms.iph.daddr &&
187                       ipv4_is_multicast(local))) {
188                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
189                                 return t;
190                 }
191         }
192         for (t = tunnels_wc[h1]; t; t = t->next) {
193                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
194                         return t;
195         }
196
197         if (ipgre_fb_tunnel_dev->flags&IFF_UP)
198                 return netdev_priv(ipgre_fb_tunnel_dev);
199         return NULL;
200 }
201
202 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
203                 struct ip_tunnel_parm *parms)
204 {
205         __be32 remote = parms->iph.daddr;
206         __be32 local = parms->iph.saddr;
207         __be32 key = parms->i_key;
208         unsigned h = HASH(key);
209         int prio = 0;
210
211         if (local)
212                 prio |= 1;
213         if (remote && !ipv4_is_multicast(remote)) {
214                 prio |= 2;
215                 h ^= HASH(remote);
216         }
217
218         return &tunnels[prio][h];
219 }
220
221 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
222                 struct ip_tunnel *t)
223 {
224         return __ipgre_bucket(ign, &t->parms);
225 }
226
227 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
228 {
229         struct ip_tunnel **tp = ipgre_bucket(ign, t);
230
231         t->next = *tp;
232         write_lock_bh(&ipgre_lock);
233         *tp = t;
234         write_unlock_bh(&ipgre_lock);
235 }
236
237 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
238 {
239         struct ip_tunnel **tp;
240
241         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
242                 if (t == *tp) {
243                         write_lock_bh(&ipgre_lock);
244                         *tp = t->next;
245                         write_unlock_bh(&ipgre_lock);
246                         break;
247                 }
248         }
249 }
250
251 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
252                 struct ip_tunnel_parm *parms, int create)
253 {
254         __be32 remote = parms->iph.daddr;
255         __be32 local = parms->iph.saddr;
256         __be32 key = parms->i_key;
257         struct ip_tunnel *t, **tp, *nt;
258         struct net_device *dev;
259         char name[IFNAMSIZ];
260         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
261
262         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
263                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
264                         if (key == t->parms.i_key)
265                                 return t;
266                 }
267         }
268         if (!create)
269                 return NULL;
270
271         if (parms->name[0])
272                 strlcpy(name, parms->name, IFNAMSIZ);
273         else
274                 sprintf(name, "gre%%d");
275
276         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
277         if (!dev)
278           return NULL;
279
280         if (strchr(name, '%')) {
281                 if (dev_alloc_name(dev, name) < 0)
282                         goto failed_free;
283         }
284
285         dev->init = ipgre_tunnel_init;
286         nt = netdev_priv(dev);
287         nt->parms = *parms;
288
289         if (register_netdevice(dev) < 0)
290                 goto failed_free;
291
292         dev_hold(dev);
293         ipgre_tunnel_link(ign, nt);
294         return nt;
295
296 failed_free:
297         free_netdev(dev);
298         return NULL;
299 }
300
301 static void ipgre_tunnel_uninit(struct net_device *dev)
302 {
303         struct net *net = dev_net(dev);
304         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
305
306         ipgre_tunnel_unlink(ign, netdev_priv(dev));
307         dev_put(dev);
308 }
309
310
311 static void ipgre_err(struct sk_buff *skb, u32 info)
312 {
313 #ifndef I_WISH_WORLD_WERE_PERFECT
314
315 /* It is not :-( All the routers (except for Linux) return only
316    8 bytes of packet payload. It means, that precise relaying of
317    ICMP in the real Internet is absolutely infeasible.
318
319    Moreover, Cisco "wise men" put GRE key to the third word
320    in GRE header. It makes impossible maintaining even soft state for keyed
321    GRE tunnels with enabled checksum. Tell them "thank you".
322
323    Well, I wonder, rfc1812 was written by Cisco employee,
324    what the hell these idiots break standrads established
325    by themself???
326  */
327
328         struct iphdr *iph = (struct iphdr*)skb->data;
329         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
330         int grehlen = (iph->ihl<<2) + 4;
331         const int type = icmp_hdr(skb)->type;
332         const int code = icmp_hdr(skb)->code;
333         struct ip_tunnel *t;
334         __be16 flags;
335
336         flags = p[0];
337         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
338                 if (flags&(GRE_VERSION|GRE_ROUTING))
339                         return;
340                 if (flags&GRE_KEY) {
341                         grehlen += 4;
342                         if (flags&GRE_CSUM)
343                                 grehlen += 4;
344                 }
345         }
346
347         /* If only 8 bytes returned, keyed message will be dropped here */
348         if (skb_headlen(skb) < grehlen)
349                 return;
350
351         switch (type) {
352         default:
353         case ICMP_PARAMETERPROB:
354                 return;
355
356         case ICMP_DEST_UNREACH:
357                 switch (code) {
358                 case ICMP_SR_FAILED:
359                 case ICMP_PORT_UNREACH:
360                         /* Impossible event. */
361                         return;
362                 case ICMP_FRAG_NEEDED:
363                         /* Soft state for pmtu is maintained by IP core. */
364                         return;
365                 default:
366                         /* All others are translated to HOST_UNREACH.
367                            rfc2003 contains "deep thoughts" about NET_UNREACH,
368                            I believe they are just ether pollution. --ANK
369                          */
370                         break;
371                 }
372                 break;
373         case ICMP_TIME_EXCEEDED:
374                 if (code != ICMP_EXC_TTL)
375                         return;
376                 break;
377         }
378
379         read_lock(&ipgre_lock);
380         t = ipgre_tunnel_lookup(&init_net, iph->daddr, iph->saddr,
381                         (flags&GRE_KEY) ?
382                         *(((__be32*)p) + (grehlen>>2) - 1) : 0);
383         if (t == NULL || t->parms.iph.daddr == 0 ||
384             ipv4_is_multicast(t->parms.iph.daddr))
385                 goto out;
386
387         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
388                 goto out;
389
390         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
391                 t->err_count++;
392         else
393                 t->err_count = 1;
394         t->err_time = jiffies;
395 out:
396         read_unlock(&ipgre_lock);
397         return;
398 #else
399         struct iphdr *iph = (struct iphdr*)dp;
400         struct iphdr *eiph;
401         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
402         const int type = icmp_hdr(skb)->type;
403         const int code = icmp_hdr(skb)->code;
404         int rel_type = 0;
405         int rel_code = 0;
406         __be32 rel_info = 0;
407         __u32 n = 0;
408         __be16 flags;
409         int grehlen = (iph->ihl<<2) + 4;
410         struct sk_buff *skb2;
411         struct flowi fl;
412         struct rtable *rt;
413
414         if (p[1] != htons(ETH_P_IP))
415                 return;
416
417         flags = p[0];
418         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
419                 if (flags&(GRE_VERSION|GRE_ROUTING))
420                         return;
421                 if (flags&GRE_CSUM)
422                         grehlen += 4;
423                 if (flags&GRE_KEY)
424                         grehlen += 4;
425                 if (flags&GRE_SEQ)
426                         grehlen += 4;
427         }
428         if (len < grehlen + sizeof(struct iphdr))
429                 return;
430         eiph = (struct iphdr*)(dp + grehlen);
431
432         switch (type) {
433         default:
434                 return;
435         case ICMP_PARAMETERPROB:
436                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
437                 if (n < (iph->ihl<<2))
438                         return;
439
440                 /* So... This guy found something strange INSIDE encapsulated
441                    packet. Well, he is fool, but what can we do ?
442                  */
443                 rel_type = ICMP_PARAMETERPROB;
444                 n -= grehlen;
445                 rel_info = htonl(n << 24);
446                 break;
447
448         case ICMP_DEST_UNREACH:
449                 switch (code) {
450                 case ICMP_SR_FAILED:
451                 case ICMP_PORT_UNREACH:
452                         /* Impossible event. */
453                         return;
454                 case ICMP_FRAG_NEEDED:
455                         /* And it is the only really necessary thing :-) */
456                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
457                         if (n < grehlen+68)
458                                 return;
459                         n -= grehlen;
460                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
461                         if (n > ntohs(eiph->tot_len))
462                                 return;
463                         rel_info = htonl(n);
464                         break;
465                 default:
466                         /* All others are translated to HOST_UNREACH.
467                            rfc2003 contains "deep thoughts" about NET_UNREACH,
468                            I believe, it is just ether pollution. --ANK
469                          */
470                         rel_type = ICMP_DEST_UNREACH;
471                         rel_code = ICMP_HOST_UNREACH;
472                         break;
473                 }
474                 break;
475         case ICMP_TIME_EXCEEDED:
476                 if (code != ICMP_EXC_TTL)
477                         return;
478                 break;
479         }
480
481         /* Prepare fake skb to feed it to icmp_send */
482         skb2 = skb_clone(skb, GFP_ATOMIC);
483         if (skb2 == NULL)
484                 return;
485         dst_release(skb2->dst);
486         skb2->dst = NULL;
487         skb_pull(skb2, skb->data - (u8*)eiph);
488         skb_reset_network_header(skb2);
489
490         /* Try to guess incoming interface */
491         memset(&fl, 0, sizeof(fl));
492         fl.fl4_dst = eiph->saddr;
493         fl.fl4_tos = RT_TOS(eiph->tos);
494         fl.proto = IPPROTO_GRE;
495         if (ip_route_output_key(&init_net, &rt, &fl)) {
496                 kfree_skb(skb2);
497                 return;
498         }
499         skb2->dev = rt->u.dst.dev;
500
501         /* route "incoming" packet */
502         if (rt->rt_flags&RTCF_LOCAL) {
503                 ip_rt_put(rt);
504                 rt = NULL;
505                 fl.fl4_dst = eiph->daddr;
506                 fl.fl4_src = eiph->saddr;
507                 fl.fl4_tos = eiph->tos;
508                 if (ip_route_output_key(&init_net, &rt, &fl) ||
509                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
510                         ip_rt_put(rt);
511                         kfree_skb(skb2);
512                         return;
513                 }
514         } else {
515                 ip_rt_put(rt);
516                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
517                     skb2->dst->dev->type != ARPHRD_IPGRE) {
518                         kfree_skb(skb2);
519                         return;
520                 }
521         }
522
523         /* change mtu on this route */
524         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
525                 if (n > dst_mtu(skb2->dst)) {
526                         kfree_skb(skb2);
527                         return;
528                 }
529                 skb2->dst->ops->update_pmtu(skb2->dst, n);
530         } else if (type == ICMP_TIME_EXCEEDED) {
531                 struct ip_tunnel *t = netdev_priv(skb2->dev);
532                 if (t->parms.iph.ttl) {
533                         rel_type = ICMP_DEST_UNREACH;
534                         rel_code = ICMP_HOST_UNREACH;
535                 }
536         }
537
538         icmp_send(skb2, rel_type, rel_code, rel_info);
539         kfree_skb(skb2);
540 #endif
541 }
542
543 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
544 {
545         if (INET_ECN_is_ce(iph->tos)) {
546                 if (skb->protocol == htons(ETH_P_IP)) {
547                         IP_ECN_set_ce(ip_hdr(skb));
548                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
549                         IP6_ECN_set_ce(ipv6_hdr(skb));
550                 }
551         }
552 }
553
554 static inline u8
555 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
556 {
557         u8 inner = 0;
558         if (skb->protocol == htons(ETH_P_IP))
559                 inner = old_iph->tos;
560         else if (skb->protocol == htons(ETH_P_IPV6))
561                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
562         return INET_ECN_encapsulate(tos, inner);
563 }
564
565 static int ipgre_rcv(struct sk_buff *skb)
566 {
567         struct iphdr *iph;
568         u8     *h;
569         __be16    flags;
570         __sum16   csum = 0;
571         __be32 key = 0;
572         u32    seqno = 0;
573         struct ip_tunnel *tunnel;
574         int    offset = 4;
575
576         if (!pskb_may_pull(skb, 16))
577                 goto drop_nolock;
578
579         iph = ip_hdr(skb);
580         h = skb->data;
581         flags = *(__be16*)h;
582
583         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
584                 /* - Version must be 0.
585                    - We do not support routing headers.
586                  */
587                 if (flags&(GRE_VERSION|GRE_ROUTING))
588                         goto drop_nolock;
589
590                 if (flags&GRE_CSUM) {
591                         switch (skb->ip_summed) {
592                         case CHECKSUM_COMPLETE:
593                                 csum = csum_fold(skb->csum);
594                                 if (!csum)
595                                         break;
596                                 /* fall through */
597                         case CHECKSUM_NONE:
598                                 skb->csum = 0;
599                                 csum = __skb_checksum_complete(skb);
600                                 skb->ip_summed = CHECKSUM_COMPLETE;
601                         }
602                         offset += 4;
603                 }
604                 if (flags&GRE_KEY) {
605                         key = *(__be32*)(h + offset);
606                         offset += 4;
607                 }
608                 if (flags&GRE_SEQ) {
609                         seqno = ntohl(*(__be32*)(h + offset));
610                         offset += 4;
611                 }
612         }
613
614         read_lock(&ipgre_lock);
615         if ((tunnel = ipgre_tunnel_lookup(&init_net,
616                                         iph->saddr, iph->daddr, key)) != NULL) {
617                 secpath_reset(skb);
618
619                 skb->protocol = *(__be16*)(h + 2);
620                 /* WCCP version 1 and 2 protocol decoding.
621                  * - Change protocol to IP
622                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
623                  */
624                 if (flags == 0 &&
625                     skb->protocol == htons(ETH_P_WCCP)) {
626                         skb->protocol = htons(ETH_P_IP);
627                         if ((*(h + offset) & 0xF0) != 0x40)
628                                 offset += 4;
629                 }
630
631                 skb->mac_header = skb->network_header;
632                 __pskb_pull(skb, offset);
633                 skb_reset_network_header(skb);
634                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
635                 skb->pkt_type = PACKET_HOST;
636 #ifdef CONFIG_NET_IPGRE_BROADCAST
637                 if (ipv4_is_multicast(iph->daddr)) {
638                         /* Looped back packet, drop it! */
639                         if (skb->rtable->fl.iif == 0)
640                                 goto drop;
641                         tunnel->stat.multicast++;
642                         skb->pkt_type = PACKET_BROADCAST;
643                 }
644 #endif
645
646                 if (((flags&GRE_CSUM) && csum) ||
647                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
648                         tunnel->stat.rx_crc_errors++;
649                         tunnel->stat.rx_errors++;
650                         goto drop;
651                 }
652                 if (tunnel->parms.i_flags&GRE_SEQ) {
653                         if (!(flags&GRE_SEQ) ||
654                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
655                                 tunnel->stat.rx_fifo_errors++;
656                                 tunnel->stat.rx_errors++;
657                                 goto drop;
658                         }
659                         tunnel->i_seqno = seqno + 1;
660                 }
661                 tunnel->stat.rx_packets++;
662                 tunnel->stat.rx_bytes += skb->len;
663                 skb->dev = tunnel->dev;
664                 dst_release(skb->dst);
665                 skb->dst = NULL;
666                 nf_reset(skb);
667                 ipgre_ecn_decapsulate(iph, skb);
668                 netif_rx(skb);
669                 read_unlock(&ipgre_lock);
670                 return(0);
671         }
672         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
673
674 drop:
675         read_unlock(&ipgre_lock);
676 drop_nolock:
677         kfree_skb(skb);
678         return(0);
679 }
680
681 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
682 {
683         struct ip_tunnel *tunnel = netdev_priv(dev);
684         struct net_device_stats *stats = &tunnel->stat;
685         struct iphdr  *old_iph = ip_hdr(skb);
686         struct iphdr  *tiph;
687         u8     tos;
688         __be16 df;
689         struct rtable *rt;                      /* Route to the other host */
690         struct net_device *tdev;                        /* Device to other host */
691         struct iphdr  *iph;                     /* Our new IP header */
692         unsigned int max_headroom;              /* The extra header space needed */
693         int    gre_hlen;
694         __be32 dst;
695         int    mtu;
696
697         if (tunnel->recursion++) {
698                 tunnel->stat.collisions++;
699                 goto tx_error;
700         }
701
702         if (dev->header_ops) {
703                 gre_hlen = 0;
704                 tiph = (struct iphdr*)skb->data;
705         } else {
706                 gre_hlen = tunnel->hlen;
707                 tiph = &tunnel->parms.iph;
708         }
709
710         if ((dst = tiph->daddr) == 0) {
711                 /* NBMA tunnel */
712
713                 if (skb->dst == NULL) {
714                         tunnel->stat.tx_fifo_errors++;
715                         goto tx_error;
716                 }
717
718                 if (skb->protocol == htons(ETH_P_IP)) {
719                         rt = skb->rtable;
720                         if ((dst = rt->rt_gateway) == 0)
721                                 goto tx_error_icmp;
722                 }
723 #ifdef CONFIG_IPV6
724                 else if (skb->protocol == htons(ETH_P_IPV6)) {
725                         struct in6_addr *addr6;
726                         int addr_type;
727                         struct neighbour *neigh = skb->dst->neighbour;
728
729                         if (neigh == NULL)
730                                 goto tx_error;
731
732                         addr6 = (struct in6_addr*)&neigh->primary_key;
733                         addr_type = ipv6_addr_type(addr6);
734
735                         if (addr_type == IPV6_ADDR_ANY) {
736                                 addr6 = &ipv6_hdr(skb)->daddr;
737                                 addr_type = ipv6_addr_type(addr6);
738                         }
739
740                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
741                                 goto tx_error_icmp;
742
743                         dst = addr6->s6_addr32[3];
744                 }
745 #endif
746                 else
747                         goto tx_error;
748         }
749
750         tos = tiph->tos;
751         if (tos&1) {
752                 if (skb->protocol == htons(ETH_P_IP))
753                         tos = old_iph->tos;
754                 tos &= ~1;
755         }
756
757         {
758                 struct flowi fl = { .oif = tunnel->parms.link,
759                                     .nl_u = { .ip4_u =
760                                               { .daddr = dst,
761                                                 .saddr = tiph->saddr,
762                                                 .tos = RT_TOS(tos) } },
763                                     .proto = IPPROTO_GRE };
764                 if (ip_route_output_key(&init_net, &rt, &fl)) {
765                         tunnel->stat.tx_carrier_errors++;
766                         goto tx_error;
767                 }
768         }
769         tdev = rt->u.dst.dev;
770
771         if (tdev == dev) {
772                 ip_rt_put(rt);
773                 tunnel->stat.collisions++;
774                 goto tx_error;
775         }
776
777         df = tiph->frag_off;
778         if (df)
779                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
780         else
781                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
782
783         if (skb->dst)
784                 skb->dst->ops->update_pmtu(skb->dst, mtu);
785
786         if (skb->protocol == htons(ETH_P_IP)) {
787                 df |= (old_iph->frag_off&htons(IP_DF));
788
789                 if ((old_iph->frag_off&htons(IP_DF)) &&
790                     mtu < ntohs(old_iph->tot_len)) {
791                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
792                         ip_rt_put(rt);
793                         goto tx_error;
794                 }
795         }
796 #ifdef CONFIG_IPV6
797         else if (skb->protocol == htons(ETH_P_IPV6)) {
798                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
799
800                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
801                         if ((tunnel->parms.iph.daddr &&
802                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
803                             rt6->rt6i_dst.plen == 128) {
804                                 rt6->rt6i_flags |= RTF_MODIFIED;
805                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
806                         }
807                 }
808
809                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
810                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
811                         ip_rt_put(rt);
812                         goto tx_error;
813                 }
814         }
815 #endif
816
817         if (tunnel->err_count > 0) {
818                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
819                         tunnel->err_count--;
820
821                         dst_link_failure(skb);
822                 } else
823                         tunnel->err_count = 0;
824         }
825
826         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
827
828         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
829             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
830                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
831                 if (!new_skb) {
832                         ip_rt_put(rt);
833                         stats->tx_dropped++;
834                         dev_kfree_skb(skb);
835                         tunnel->recursion--;
836                         return 0;
837                 }
838                 if (skb->sk)
839                         skb_set_owner_w(new_skb, skb->sk);
840                 dev_kfree_skb(skb);
841                 skb = new_skb;
842                 old_iph = ip_hdr(skb);
843         }
844
845         skb->transport_header = skb->network_header;
846         skb_push(skb, gre_hlen);
847         skb_reset_network_header(skb);
848         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
849         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
850                               IPSKB_REROUTED);
851         dst_release(skb->dst);
852         skb->dst = &rt->u.dst;
853
854         /*
855          *      Push down and install the IPIP header.
856          */
857
858         iph                     =       ip_hdr(skb);
859         iph->version            =       4;
860         iph->ihl                =       sizeof(struct iphdr) >> 2;
861         iph->frag_off           =       df;
862         iph->protocol           =       IPPROTO_GRE;
863         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
864         iph->daddr              =       rt->rt_dst;
865         iph->saddr              =       rt->rt_src;
866
867         if ((iph->ttl = tiph->ttl) == 0) {
868                 if (skb->protocol == htons(ETH_P_IP))
869                         iph->ttl = old_iph->ttl;
870 #ifdef CONFIG_IPV6
871                 else if (skb->protocol == htons(ETH_P_IPV6))
872                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
873 #endif
874                 else
875                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
876         }
877
878         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
879         ((__be16*)(iph+1))[1] = skb->protocol;
880
881         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
882                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
883
884                 if (tunnel->parms.o_flags&GRE_SEQ) {
885                         ++tunnel->o_seqno;
886                         *ptr = htonl(tunnel->o_seqno);
887                         ptr--;
888                 }
889                 if (tunnel->parms.o_flags&GRE_KEY) {
890                         *ptr = tunnel->parms.o_key;
891                         ptr--;
892                 }
893                 if (tunnel->parms.o_flags&GRE_CSUM) {
894                         *ptr = 0;
895                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
896                 }
897         }
898
899         nf_reset(skb);
900
901         IPTUNNEL_XMIT();
902         tunnel->recursion--;
903         return 0;
904
905 tx_error_icmp:
906         dst_link_failure(skb);
907
908 tx_error:
909         stats->tx_errors++;
910         dev_kfree_skb(skb);
911         tunnel->recursion--;
912         return 0;
913 }
914
915 static void ipgre_tunnel_bind_dev(struct net_device *dev)
916 {
917         struct net_device *tdev = NULL;
918         struct ip_tunnel *tunnel;
919         struct iphdr *iph;
920         int hlen = LL_MAX_HEADER;
921         int mtu = ETH_DATA_LEN;
922         int addend = sizeof(struct iphdr) + 4;
923
924         tunnel = netdev_priv(dev);
925         iph = &tunnel->parms.iph;
926
927         /* Guess output device to choose reasonable mtu and hard_header_len */
928
929         if (iph->daddr) {
930                 struct flowi fl = { .oif = tunnel->parms.link,
931                                     .nl_u = { .ip4_u =
932                                               { .daddr = iph->daddr,
933                                                 .saddr = iph->saddr,
934                                                 .tos = RT_TOS(iph->tos) } },
935                                     .proto = IPPROTO_GRE };
936                 struct rtable *rt;
937                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
938                         tdev = rt->u.dst.dev;
939                         ip_rt_put(rt);
940                 }
941                 dev->flags |= IFF_POINTOPOINT;
942         }
943
944         if (!tdev && tunnel->parms.link)
945                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
946
947         if (tdev) {
948                 hlen = tdev->hard_header_len;
949                 mtu = tdev->mtu;
950         }
951         dev->iflink = tunnel->parms.link;
952
953         /* Precalculate GRE options length */
954         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
955                 if (tunnel->parms.o_flags&GRE_CSUM)
956                         addend += 4;
957                 if (tunnel->parms.o_flags&GRE_KEY)
958                         addend += 4;
959                 if (tunnel->parms.o_flags&GRE_SEQ)
960                         addend += 4;
961         }
962         dev->hard_header_len = hlen + addend;
963         dev->mtu = mtu - addend;
964         tunnel->hlen = addend;
965
966 }
967
968 static int
969 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
970 {
971         int err = 0;
972         struct ip_tunnel_parm p;
973         struct ip_tunnel *t;
974         struct net *net = dev_net(dev);
975         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
976
977         switch (cmd) {
978         case SIOCGETTUNNEL:
979                 t = NULL;
980                 if (dev == ipgre_fb_tunnel_dev) {
981                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
982                                 err = -EFAULT;
983                                 break;
984                         }
985                         t = ipgre_tunnel_locate(net, &p, 0);
986                 }
987                 if (t == NULL)
988                         t = netdev_priv(dev);
989                 memcpy(&p, &t->parms, sizeof(p));
990                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
991                         err = -EFAULT;
992                 break;
993
994         case SIOCADDTUNNEL:
995         case SIOCCHGTUNNEL:
996                 err = -EPERM;
997                 if (!capable(CAP_NET_ADMIN))
998                         goto done;
999
1000                 err = -EFAULT;
1001                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1002                         goto done;
1003
1004                 err = -EINVAL;
1005                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1006                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1007                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1008                         goto done;
1009                 if (p.iph.ttl)
1010                         p.iph.frag_off |= htons(IP_DF);
1011
1012                 if (!(p.i_flags&GRE_KEY))
1013                         p.i_key = 0;
1014                 if (!(p.o_flags&GRE_KEY))
1015                         p.o_key = 0;
1016
1017                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1018
1019                 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1020                         if (t != NULL) {
1021                                 if (t->dev != dev) {
1022                                         err = -EEXIST;
1023                                         break;
1024                                 }
1025                         } else {
1026                                 unsigned nflags=0;
1027
1028                                 t = netdev_priv(dev);
1029
1030                                 if (ipv4_is_multicast(p.iph.daddr))
1031                                         nflags = IFF_BROADCAST;
1032                                 else if (p.iph.daddr)
1033                                         nflags = IFF_POINTOPOINT;
1034
1035                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1036                                         err = -EINVAL;
1037                                         break;
1038                                 }
1039                                 ipgre_tunnel_unlink(ign, t);
1040                                 t->parms.iph.saddr = p.iph.saddr;
1041                                 t->parms.iph.daddr = p.iph.daddr;
1042                                 t->parms.i_key = p.i_key;
1043                                 t->parms.o_key = p.o_key;
1044                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1045                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1046                                 ipgre_tunnel_link(ign, t);
1047                                 netdev_state_change(dev);
1048                         }
1049                 }
1050
1051                 if (t) {
1052                         err = 0;
1053                         if (cmd == SIOCCHGTUNNEL) {
1054                                 t->parms.iph.ttl = p.iph.ttl;
1055                                 t->parms.iph.tos = p.iph.tos;
1056                                 t->parms.iph.frag_off = p.iph.frag_off;
1057                                 if (t->parms.link != p.link) {
1058                                         t->parms.link = p.link;
1059                                         ipgre_tunnel_bind_dev(dev);
1060                                         netdev_state_change(dev);
1061                                 }
1062                         }
1063                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1064                                 err = -EFAULT;
1065                 } else
1066                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1067                 break;
1068
1069         case SIOCDELTUNNEL:
1070                 err = -EPERM;
1071                 if (!capable(CAP_NET_ADMIN))
1072                         goto done;
1073
1074                 if (dev == ipgre_fb_tunnel_dev) {
1075                         err = -EFAULT;
1076                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1077                                 goto done;
1078                         err = -ENOENT;
1079                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1080                                 goto done;
1081                         err = -EPERM;
1082                         if (t == netdev_priv(ipgre_fb_tunnel_dev))
1083                                 goto done;
1084                         dev = t->dev;
1085                 }
1086                 unregister_netdevice(dev);
1087                 err = 0;
1088                 break;
1089
1090         default:
1091                 err = -EINVAL;
1092         }
1093
1094 done:
1095         return err;
1096 }
1097
1098 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1099 {
1100         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1101 }
1102
1103 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1104 {
1105         struct ip_tunnel *tunnel = netdev_priv(dev);
1106         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1107                 return -EINVAL;
1108         dev->mtu = new_mtu;
1109         return 0;
1110 }
1111
1112 /* Nice toy. Unfortunately, useless in real life :-)
1113    It allows to construct virtual multiprotocol broadcast "LAN"
1114    over the Internet, provided multicast routing is tuned.
1115
1116
1117    I have no idea was this bicycle invented before me,
1118    so that I had to set ARPHRD_IPGRE to a random value.
1119    I have an impression, that Cisco could make something similar,
1120    but this feature is apparently missing in IOS<=11.2(8).
1121
1122    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1123    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1124
1125    ping -t 255 224.66.66.66
1126
1127    If nobody answers, mbone does not work.
1128
1129    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1130    ip addr add 10.66.66.<somewhat>/24 dev Universe
1131    ifconfig Universe up
1132    ifconfig Universe add fe80::<Your_real_addr>/10
1133    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1134    ftp 10.66.66.66
1135    ...
1136    ftp fec0:6666:6666::193.233.7.65
1137    ...
1138
1139  */
1140
1141 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1142                         unsigned short type,
1143                         const void *daddr, const void *saddr, unsigned len)
1144 {
1145         struct ip_tunnel *t = netdev_priv(dev);
1146         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1147         __be16 *p = (__be16*)(iph+1);
1148
1149         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1150         p[0]            = t->parms.o_flags;
1151         p[1]            = htons(type);
1152
1153         /*
1154          *      Set the source hardware address.
1155          */
1156
1157         if (saddr)
1158                 memcpy(&iph->saddr, saddr, 4);
1159
1160         if (daddr) {
1161                 memcpy(&iph->daddr, daddr, 4);
1162                 return t->hlen;
1163         }
1164         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1165                 return t->hlen;
1166
1167         return -t->hlen;
1168 }
1169
1170 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1171 {
1172         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1173         memcpy(haddr, &iph->saddr, 4);
1174         return 4;
1175 }
1176
1177 static const struct header_ops ipgre_header_ops = {
1178         .create = ipgre_header,
1179         .parse  = ipgre_header_parse,
1180 };
1181
1182 #ifdef CONFIG_NET_IPGRE_BROADCAST
1183 static int ipgre_open(struct net_device *dev)
1184 {
1185         struct ip_tunnel *t = netdev_priv(dev);
1186
1187         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1188                 struct flowi fl = { .oif = t->parms.link,
1189                                     .nl_u = { .ip4_u =
1190                                               { .daddr = t->parms.iph.daddr,
1191                                                 .saddr = t->parms.iph.saddr,
1192                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1193                                     .proto = IPPROTO_GRE };
1194                 struct rtable *rt;
1195                 if (ip_route_output_key(&init_net, &rt, &fl))
1196                         return -EADDRNOTAVAIL;
1197                 dev = rt->u.dst.dev;
1198                 ip_rt_put(rt);
1199                 if (__in_dev_get_rtnl(dev) == NULL)
1200                         return -EADDRNOTAVAIL;
1201                 t->mlink = dev->ifindex;
1202                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1203         }
1204         return 0;
1205 }
1206
1207 static int ipgre_close(struct net_device *dev)
1208 {
1209         struct ip_tunnel *t = netdev_priv(dev);
1210         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1211                 struct in_device *in_dev;
1212                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1213                 if (in_dev) {
1214                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1215                         in_dev_put(in_dev);
1216                 }
1217         }
1218         return 0;
1219 }
1220
1221 #endif
1222
1223 static void ipgre_tunnel_setup(struct net_device *dev)
1224 {
1225         dev->uninit             = ipgre_tunnel_uninit;
1226         dev->destructor         = free_netdev;
1227         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1228         dev->get_stats          = ipgre_tunnel_get_stats;
1229         dev->do_ioctl           = ipgre_tunnel_ioctl;
1230         dev->change_mtu         = ipgre_tunnel_change_mtu;
1231
1232         dev->type               = ARPHRD_IPGRE;
1233         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1234         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1235         dev->flags              = IFF_NOARP;
1236         dev->iflink             = 0;
1237         dev->addr_len           = 4;
1238 }
1239
1240 static int ipgre_tunnel_init(struct net_device *dev)
1241 {
1242         struct ip_tunnel *tunnel;
1243         struct iphdr *iph;
1244
1245         tunnel = netdev_priv(dev);
1246         iph = &tunnel->parms.iph;
1247
1248         tunnel->dev = dev;
1249         strcpy(tunnel->parms.name, dev->name);
1250
1251         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1252         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1253
1254         ipgre_tunnel_bind_dev(dev);
1255
1256         if (iph->daddr) {
1257 #ifdef CONFIG_NET_IPGRE_BROADCAST
1258                 if (ipv4_is_multicast(iph->daddr)) {
1259                         if (!iph->saddr)
1260                                 return -EINVAL;
1261                         dev->flags = IFF_BROADCAST;
1262                         dev->header_ops = &ipgre_header_ops;
1263                         dev->open = ipgre_open;
1264                         dev->stop = ipgre_close;
1265                 }
1266 #endif
1267         } else
1268                 dev->header_ops = &ipgre_header_ops;
1269
1270         return 0;
1271 }
1272
1273 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1274 {
1275         struct ip_tunnel *tunnel = netdev_priv(dev);
1276         struct iphdr *iph = &tunnel->parms.iph;
1277
1278         tunnel->dev = dev;
1279         strcpy(tunnel->parms.name, dev->name);
1280
1281         iph->version            = 4;
1282         iph->protocol           = IPPROTO_GRE;
1283         iph->ihl                = 5;
1284         tunnel->hlen            = sizeof(struct iphdr) + 4;
1285
1286         dev_hold(dev);
1287         tunnels_wc[0]           = tunnel;
1288         return 0;
1289 }
1290
1291
1292 static struct net_protocol ipgre_protocol = {
1293         .handler        =       ipgre_rcv,
1294         .err_handler    =       ipgre_err,
1295 };
1296
1297 static int ipgre_init_net(struct net *net)
1298 {
1299         int err;
1300         struct ipgre_net *ign;
1301
1302         err = -ENOMEM;
1303         ign = kmalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1304         if (ign == NULL)
1305                 goto err_alloc;
1306
1307         err = net_assign_generic(net, ipgre_net_id, ign);
1308         if (err < 0)
1309                 goto err_assign;
1310
1311         return 0;
1312
1313 err_assign:
1314         kfree(ign);
1315 err_alloc:
1316         return err;
1317 }
1318
1319 static void ipgre_exit_net(struct net *net)
1320 {
1321         struct ipgre_net *ign;
1322
1323         ign = net_generic(net, ipgre_net_id);
1324         kfree(ign);
1325 }
1326
1327 static struct pernet_operations ipgre_net_ops = {
1328         .init = ipgre_init_net,
1329         .exit = ipgre_exit_net,
1330 };
1331
1332 /*
1333  *      And now the modules code and kernel interface.
1334  */
1335
1336 static int __init ipgre_init(void)
1337 {
1338         int err;
1339
1340         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1341
1342         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1343                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1344                 return -EAGAIN;
1345         }
1346
1347         ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1348                                            ipgre_tunnel_setup);
1349         if (!ipgre_fb_tunnel_dev) {
1350                 err = -ENOMEM;
1351                 goto err1;
1352         }
1353
1354         ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1355
1356         if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1357                 goto err2;
1358
1359         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1360         if (err < 0)
1361                 goto err3;
1362 out:
1363         return err;
1364 err2:
1365         free_netdev(ipgre_fb_tunnel_dev);
1366 err1:
1367         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1368         goto out;
1369 err3:
1370         unregister_netdevice(ipgre_fb_tunnel_dev);
1371         goto err1;
1372 }
1373
1374 static void __exit ipgre_destroy_tunnels(void)
1375 {
1376         int prio;
1377
1378         for (prio = 0; prio < 4; prio++) {
1379                 int h;
1380                 for (h = 0; h < HASH_SIZE; h++) {
1381                         struct ip_tunnel *t;
1382                         while ((t = tunnels[prio][h]) != NULL)
1383                                 unregister_netdevice(t->dev);
1384                 }
1385         }
1386 }
1387
1388 static void __exit ipgre_fini(void)
1389 {
1390         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1391                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1392
1393         rtnl_lock();
1394         ipgre_destroy_tunnels();
1395         rtnl_unlock();
1396
1397         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1398 }
1399
1400 module_init(ipgre_init);
1401 module_exit(ipgre_fini);
1402 MODULE_LICENSE("GPL");