ipgre: Use on-device stats instead of private ones.
[safe/jmp/linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50
51 /*
52    Problems & solutions
53    --------------------
54
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71
72
73
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107
108
109
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116
117    Alexey Kuznetsov.
118  */
119
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122
123 /* Fallback tunnel: no source, no destination, no key, no options */
124
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126
127 #define HASH_SIZE  16
128
129 static int ipgre_net_id;
130 struct ipgre_net {
131         struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133         struct net_device *fb_tunnel_dev;
134 };
135
136 /* Tunnel hash table */
137
138 /*
139    4 hash tables:
140
141    3: (remote,local)
142    2: (remote,*)
143    1: (*,local)
144    0: (*,*)
145
146    We require exact key match i.e. if a key is present in packet
147    it will match only tunnel with the same key; if it is not present,
148    it will match only keyless tunnel.
149
150    All keysless packets, if not matched configured keyless tunnels
151    will match fallback tunnel.
152  */
153
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155
156 #define tunnels_r_l     tunnels[3]
157 #define tunnels_r       tunnels[2]
158 #define tunnels_l       tunnels[1]
159 #define tunnels_wc      tunnels[0]
160
161 static DEFINE_RWLOCK(ipgre_lock);
162
163 /* Given src, dst and key, find appropriate for input tunnel. */
164
165 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166                 __be32 remote, __be32 local, __be32 key)
167 {
168         unsigned h0 = HASH(remote);
169         unsigned h1 = HASH(key);
170         struct ip_tunnel *t;
171         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
172
173         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
174                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176                                 return t;
177                 }
178         }
179         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
180                 if (remote == t->parms.iph.daddr) {
181                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182                                 return t;
183                 }
184         }
185         for (t = ign->tunnels_l[h1]; t; t = t->next) {
186                 if (local == t->parms.iph.saddr ||
187                      (local == t->parms.iph.daddr &&
188                       ipv4_is_multicast(local))) {
189                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190                                 return t;
191                 }
192         }
193         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
194                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195                         return t;
196         }
197
198         if (ign->fb_tunnel_dev->flags&IFF_UP)
199                 return netdev_priv(ign->fb_tunnel_dev);
200         return NULL;
201 }
202
203 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204                 struct ip_tunnel_parm *parms)
205 {
206         __be32 remote = parms->iph.daddr;
207         __be32 local = parms->iph.saddr;
208         __be32 key = parms->i_key;
209         unsigned h = HASH(key);
210         int prio = 0;
211
212         if (local)
213                 prio |= 1;
214         if (remote && !ipv4_is_multicast(remote)) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218
219         return &ign->tunnels[prio][h];
220 }
221
222 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223                 struct ip_tunnel *t)
224 {
225         return __ipgre_bucket(ign, &t->parms);
226 }
227
228 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
229 {
230         struct ip_tunnel **tp = ipgre_bucket(ign, t);
231
232         t->next = *tp;
233         write_lock_bh(&ipgre_lock);
234         *tp = t;
235         write_unlock_bh(&ipgre_lock);
236 }
237
238 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
239 {
240         struct ip_tunnel **tp;
241
242         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
243                 if (t == *tp) {
244                         write_lock_bh(&ipgre_lock);
245                         *tp = t->next;
246                         write_unlock_bh(&ipgre_lock);
247                         break;
248                 }
249         }
250 }
251
252 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253                 struct ip_tunnel_parm *parms, int create)
254 {
255         __be32 remote = parms->iph.daddr;
256         __be32 local = parms->iph.saddr;
257         __be32 key = parms->i_key;
258         struct ip_tunnel *t, **tp, *nt;
259         struct net_device *dev;
260         char name[IFNAMSIZ];
261         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
262
263         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
264                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265                         if (key == t->parms.i_key)
266                                 return t;
267                 }
268         }
269         if (!create)
270                 return NULL;
271
272         if (parms->name[0])
273                 strlcpy(name, parms->name, IFNAMSIZ);
274         else
275                 sprintf(name, "gre%%d");
276
277         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278         if (!dev)
279           return NULL;
280
281         dev_net_set(dev, net);
282
283         if (strchr(name, '%')) {
284                 if (dev_alloc_name(dev, name) < 0)
285                         goto failed_free;
286         }
287
288         dev->init = ipgre_tunnel_init;
289         nt = netdev_priv(dev);
290         nt->parms = *parms;
291
292         if (register_netdevice(dev) < 0)
293                 goto failed_free;
294
295         dev_hold(dev);
296         ipgre_tunnel_link(ign, nt);
297         return nt;
298
299 failed_free:
300         free_netdev(dev);
301         return NULL;
302 }
303
304 static void ipgre_tunnel_uninit(struct net_device *dev)
305 {
306         struct net *net = dev_net(dev);
307         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308
309         ipgre_tunnel_unlink(ign, netdev_priv(dev));
310         dev_put(dev);
311 }
312
313
314 static void ipgre_err(struct sk_buff *skb, u32 info)
315 {
316 #ifndef I_WISH_WORLD_WERE_PERFECT
317
318 /* It is not :-( All the routers (except for Linux) return only
319    8 bytes of packet payload. It means, that precise relaying of
320    ICMP in the real Internet is absolutely infeasible.
321
322    Moreover, Cisco "wise men" put GRE key to the third word
323    in GRE header. It makes impossible maintaining even soft state for keyed
324    GRE tunnels with enabled checksum. Tell them "thank you".
325
326    Well, I wonder, rfc1812 was written by Cisco employee,
327    what the hell these idiots break standrads established
328    by themself???
329  */
330
331         struct iphdr *iph = (struct iphdr*)skb->data;
332         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
333         int grehlen = (iph->ihl<<2) + 4;
334         const int type = icmp_hdr(skb)->type;
335         const int code = icmp_hdr(skb)->code;
336         struct ip_tunnel *t;
337         __be16 flags;
338
339         flags = p[0];
340         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
341                 if (flags&(GRE_VERSION|GRE_ROUTING))
342                         return;
343                 if (flags&GRE_KEY) {
344                         grehlen += 4;
345                         if (flags&GRE_CSUM)
346                                 grehlen += 4;
347                 }
348         }
349
350         /* If only 8 bytes returned, keyed message will be dropped here */
351         if (skb_headlen(skb) < grehlen)
352                 return;
353
354         switch (type) {
355         default:
356         case ICMP_PARAMETERPROB:
357                 return;
358
359         case ICMP_DEST_UNREACH:
360                 switch (code) {
361                 case ICMP_SR_FAILED:
362                 case ICMP_PORT_UNREACH:
363                         /* Impossible event. */
364                         return;
365                 case ICMP_FRAG_NEEDED:
366                         /* Soft state for pmtu is maintained by IP core. */
367                         return;
368                 default:
369                         /* All others are translated to HOST_UNREACH.
370                            rfc2003 contains "deep thoughts" about NET_UNREACH,
371                            I believe they are just ether pollution. --ANK
372                          */
373                         break;
374                 }
375                 break;
376         case ICMP_TIME_EXCEEDED:
377                 if (code != ICMP_EXC_TTL)
378                         return;
379                 break;
380         }
381
382         read_lock(&ipgre_lock);
383         t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
384                         (flags&GRE_KEY) ?
385                         *(((__be32*)p) + (grehlen>>2) - 1) : 0);
386         if (t == NULL || t->parms.iph.daddr == 0 ||
387             ipv4_is_multicast(t->parms.iph.daddr))
388                 goto out;
389
390         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
391                 goto out;
392
393         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
394                 t->err_count++;
395         else
396                 t->err_count = 1;
397         t->err_time = jiffies;
398 out:
399         read_unlock(&ipgre_lock);
400         return;
401 #else
402         struct iphdr *iph = (struct iphdr*)dp;
403         struct iphdr *eiph;
404         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
405         const int type = icmp_hdr(skb)->type;
406         const int code = icmp_hdr(skb)->code;
407         int rel_type = 0;
408         int rel_code = 0;
409         __be32 rel_info = 0;
410         __u32 n = 0;
411         __be16 flags;
412         int grehlen = (iph->ihl<<2) + 4;
413         struct sk_buff *skb2;
414         struct flowi fl;
415         struct rtable *rt;
416
417         if (p[1] != htons(ETH_P_IP))
418                 return;
419
420         flags = p[0];
421         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422                 if (flags&(GRE_VERSION|GRE_ROUTING))
423                         return;
424                 if (flags&GRE_CSUM)
425                         grehlen += 4;
426                 if (flags&GRE_KEY)
427                         grehlen += 4;
428                 if (flags&GRE_SEQ)
429                         grehlen += 4;
430         }
431         if (len < grehlen + sizeof(struct iphdr))
432                 return;
433         eiph = (struct iphdr*)(dp + grehlen);
434
435         switch (type) {
436         default:
437                 return;
438         case ICMP_PARAMETERPROB:
439                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440                 if (n < (iph->ihl<<2))
441                         return;
442
443                 /* So... This guy found something strange INSIDE encapsulated
444                    packet. Well, he is fool, but what can we do ?
445                  */
446                 rel_type = ICMP_PARAMETERPROB;
447                 n -= grehlen;
448                 rel_info = htonl(n << 24);
449                 break;
450
451         case ICMP_DEST_UNREACH:
452                 switch (code) {
453                 case ICMP_SR_FAILED:
454                 case ICMP_PORT_UNREACH:
455                         /* Impossible event. */
456                         return;
457                 case ICMP_FRAG_NEEDED:
458                         /* And it is the only really necessary thing :-) */
459                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460                         if (n < grehlen+68)
461                                 return;
462                         n -= grehlen;
463                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464                         if (n > ntohs(eiph->tot_len))
465                                 return;
466                         rel_info = htonl(n);
467                         break;
468                 default:
469                         /* All others are translated to HOST_UNREACH.
470                            rfc2003 contains "deep thoughts" about NET_UNREACH,
471                            I believe, it is just ether pollution. --ANK
472                          */
473                         rel_type = ICMP_DEST_UNREACH;
474                         rel_code = ICMP_HOST_UNREACH;
475                         break;
476                 }
477                 break;
478         case ICMP_TIME_EXCEEDED:
479                 if (code != ICMP_EXC_TTL)
480                         return;
481                 break;
482         }
483
484         /* Prepare fake skb to feed it to icmp_send */
485         skb2 = skb_clone(skb, GFP_ATOMIC);
486         if (skb2 == NULL)
487                 return;
488         dst_release(skb2->dst);
489         skb2->dst = NULL;
490         skb_pull(skb2, skb->data - (u8*)eiph);
491         skb_reset_network_header(skb2);
492
493         /* Try to guess incoming interface */
494         memset(&fl, 0, sizeof(fl));
495         fl.fl4_dst = eiph->saddr;
496         fl.fl4_tos = RT_TOS(eiph->tos);
497         fl.proto = IPPROTO_GRE;
498         if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499                 kfree_skb(skb2);
500                 return;
501         }
502         skb2->dev = rt->u.dst.dev;
503
504         /* route "incoming" packet */
505         if (rt->rt_flags&RTCF_LOCAL) {
506                 ip_rt_put(rt);
507                 rt = NULL;
508                 fl.fl4_dst = eiph->daddr;
509                 fl.fl4_src = eiph->saddr;
510                 fl.fl4_tos = eiph->tos;
511                 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
513                         ip_rt_put(rt);
514                         kfree_skb(skb2);
515                         return;
516                 }
517         } else {
518                 ip_rt_put(rt);
519                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520                     skb2->dst->dev->type != ARPHRD_IPGRE) {
521                         kfree_skb(skb2);
522                         return;
523                 }
524         }
525
526         /* change mtu on this route */
527         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528                 if (n > dst_mtu(skb2->dst)) {
529                         kfree_skb(skb2);
530                         return;
531                 }
532                 skb2->dst->ops->update_pmtu(skb2->dst, n);
533         } else if (type == ICMP_TIME_EXCEEDED) {
534                 struct ip_tunnel *t = netdev_priv(skb2->dev);
535                 if (t->parms.iph.ttl) {
536                         rel_type = ICMP_DEST_UNREACH;
537                         rel_code = ICMP_HOST_UNREACH;
538                 }
539         }
540
541         icmp_send(skb2, rel_type, rel_code, rel_info);
542         kfree_skb(skb2);
543 #endif
544 }
545
546 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
547 {
548         if (INET_ECN_is_ce(iph->tos)) {
549                 if (skb->protocol == htons(ETH_P_IP)) {
550                         IP_ECN_set_ce(ip_hdr(skb));
551                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
552                         IP6_ECN_set_ce(ipv6_hdr(skb));
553                 }
554         }
555 }
556
557 static inline u8
558 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
559 {
560         u8 inner = 0;
561         if (skb->protocol == htons(ETH_P_IP))
562                 inner = old_iph->tos;
563         else if (skb->protocol == htons(ETH_P_IPV6))
564                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
565         return INET_ECN_encapsulate(tos, inner);
566 }
567
568 static int ipgre_rcv(struct sk_buff *skb)
569 {
570         struct iphdr *iph;
571         u8     *h;
572         __be16    flags;
573         __sum16   csum = 0;
574         __be32 key = 0;
575         u32    seqno = 0;
576         struct ip_tunnel *tunnel;
577         int    offset = 4;
578
579         if (!pskb_may_pull(skb, 16))
580                 goto drop_nolock;
581
582         iph = ip_hdr(skb);
583         h = skb->data;
584         flags = *(__be16*)h;
585
586         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
587                 /* - Version must be 0.
588                    - We do not support routing headers.
589                  */
590                 if (flags&(GRE_VERSION|GRE_ROUTING))
591                         goto drop_nolock;
592
593                 if (flags&GRE_CSUM) {
594                         switch (skb->ip_summed) {
595                         case CHECKSUM_COMPLETE:
596                                 csum = csum_fold(skb->csum);
597                                 if (!csum)
598                                         break;
599                                 /* fall through */
600                         case CHECKSUM_NONE:
601                                 skb->csum = 0;
602                                 csum = __skb_checksum_complete(skb);
603                                 skb->ip_summed = CHECKSUM_COMPLETE;
604                         }
605                         offset += 4;
606                 }
607                 if (flags&GRE_KEY) {
608                         key = *(__be32*)(h + offset);
609                         offset += 4;
610                 }
611                 if (flags&GRE_SEQ) {
612                         seqno = ntohl(*(__be32*)(h + offset));
613                         offset += 4;
614                 }
615         }
616
617         read_lock(&ipgre_lock);
618         if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
619                                         iph->saddr, iph->daddr, key)) != NULL) {
620                 struct net_device_stats *stats = &tunnel->dev->stats;
621
622                 secpath_reset(skb);
623
624                 skb->protocol = *(__be16*)(h + 2);
625                 /* WCCP version 1 and 2 protocol decoding.
626                  * - Change protocol to IP
627                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
628                  */
629                 if (flags == 0 &&
630                     skb->protocol == htons(ETH_P_WCCP)) {
631                         skb->protocol = htons(ETH_P_IP);
632                         if ((*(h + offset) & 0xF0) != 0x40)
633                                 offset += 4;
634                 }
635
636                 skb->mac_header = skb->network_header;
637                 __pskb_pull(skb, offset);
638                 skb_reset_network_header(skb);
639                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
640                 skb->pkt_type = PACKET_HOST;
641 #ifdef CONFIG_NET_IPGRE_BROADCAST
642                 if (ipv4_is_multicast(iph->daddr)) {
643                         /* Looped back packet, drop it! */
644                         if (skb->rtable->fl.iif == 0)
645                                 goto drop;
646                         stats->multicast++;
647                         skb->pkt_type = PACKET_BROADCAST;
648                 }
649 #endif
650
651                 if (((flags&GRE_CSUM) && csum) ||
652                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
653                         stats->rx_crc_errors++;
654                         stats->rx_errors++;
655                         goto drop;
656                 }
657                 if (tunnel->parms.i_flags&GRE_SEQ) {
658                         if (!(flags&GRE_SEQ) ||
659                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
660                                 stats->rx_fifo_errors++;
661                                 stats->rx_errors++;
662                                 goto drop;
663                         }
664                         tunnel->i_seqno = seqno + 1;
665                 }
666                 stats->rx_packets++;
667                 stats->rx_bytes += skb->len;
668                 skb->dev = tunnel->dev;
669                 dst_release(skb->dst);
670                 skb->dst = NULL;
671                 nf_reset(skb);
672                 ipgre_ecn_decapsulate(iph, skb);
673                 netif_rx(skb);
674                 read_unlock(&ipgre_lock);
675                 return(0);
676         }
677         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
678
679 drop:
680         read_unlock(&ipgre_lock);
681 drop_nolock:
682         kfree_skb(skb);
683         return(0);
684 }
685
686 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
687 {
688         struct ip_tunnel *tunnel = netdev_priv(dev);
689         struct net_device_stats *stats = &tunnel->dev->stats;
690         struct iphdr  *old_iph = ip_hdr(skb);
691         struct iphdr  *tiph;
692         u8     tos;
693         __be16 df;
694         struct rtable *rt;                      /* Route to the other host */
695         struct net_device *tdev;                        /* Device to other host */
696         struct iphdr  *iph;                     /* Our new IP header */
697         unsigned int max_headroom;              /* The extra header space needed */
698         int    gre_hlen;
699         __be32 dst;
700         int    mtu;
701
702         if (tunnel->recursion++) {
703                 stats->collisions++;
704                 goto tx_error;
705         }
706
707         if (dev->header_ops) {
708                 gre_hlen = 0;
709                 tiph = (struct iphdr*)skb->data;
710         } else {
711                 gre_hlen = tunnel->hlen;
712                 tiph = &tunnel->parms.iph;
713         }
714
715         if ((dst = tiph->daddr) == 0) {
716                 /* NBMA tunnel */
717
718                 if (skb->dst == NULL) {
719                         stats->tx_fifo_errors++;
720                         goto tx_error;
721                 }
722
723                 if (skb->protocol == htons(ETH_P_IP)) {
724                         rt = skb->rtable;
725                         if ((dst = rt->rt_gateway) == 0)
726                                 goto tx_error_icmp;
727                 }
728 #ifdef CONFIG_IPV6
729                 else if (skb->protocol == htons(ETH_P_IPV6)) {
730                         struct in6_addr *addr6;
731                         int addr_type;
732                         struct neighbour *neigh = skb->dst->neighbour;
733
734                         if (neigh == NULL)
735                                 goto tx_error;
736
737                         addr6 = (struct in6_addr*)&neigh->primary_key;
738                         addr_type = ipv6_addr_type(addr6);
739
740                         if (addr_type == IPV6_ADDR_ANY) {
741                                 addr6 = &ipv6_hdr(skb)->daddr;
742                                 addr_type = ipv6_addr_type(addr6);
743                         }
744
745                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
746                                 goto tx_error_icmp;
747
748                         dst = addr6->s6_addr32[3];
749                 }
750 #endif
751                 else
752                         goto tx_error;
753         }
754
755         tos = tiph->tos;
756         if (tos&1) {
757                 if (skb->protocol == htons(ETH_P_IP))
758                         tos = old_iph->tos;
759                 tos &= ~1;
760         }
761
762         {
763                 struct flowi fl = { .oif = tunnel->parms.link,
764                                     .nl_u = { .ip4_u =
765                                               { .daddr = dst,
766                                                 .saddr = tiph->saddr,
767                                                 .tos = RT_TOS(tos) } },
768                                     .proto = IPPROTO_GRE };
769                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
770                         stats->tx_carrier_errors++;
771                         goto tx_error;
772                 }
773         }
774         tdev = rt->u.dst.dev;
775
776         if (tdev == dev) {
777                 ip_rt_put(rt);
778                 stats->collisions++;
779                 goto tx_error;
780         }
781
782         df = tiph->frag_off;
783         if (df)
784                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
785         else
786                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
787
788         if (skb->dst)
789                 skb->dst->ops->update_pmtu(skb->dst, mtu);
790
791         if (skb->protocol == htons(ETH_P_IP)) {
792                 df |= (old_iph->frag_off&htons(IP_DF));
793
794                 if ((old_iph->frag_off&htons(IP_DF)) &&
795                     mtu < ntohs(old_iph->tot_len)) {
796                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
797                         ip_rt_put(rt);
798                         goto tx_error;
799                 }
800         }
801 #ifdef CONFIG_IPV6
802         else if (skb->protocol == htons(ETH_P_IPV6)) {
803                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
804
805                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
806                         if ((tunnel->parms.iph.daddr &&
807                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
808                             rt6->rt6i_dst.plen == 128) {
809                                 rt6->rt6i_flags |= RTF_MODIFIED;
810                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
811                         }
812                 }
813
814                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
815                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
816                         ip_rt_put(rt);
817                         goto tx_error;
818                 }
819         }
820 #endif
821
822         if (tunnel->err_count > 0) {
823                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
824                         tunnel->err_count--;
825
826                         dst_link_failure(skb);
827                 } else
828                         tunnel->err_count = 0;
829         }
830
831         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
832
833         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
834             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
835                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
836                 if (!new_skb) {
837                         ip_rt_put(rt);
838                         stats->tx_dropped++;
839                         dev_kfree_skb(skb);
840                         tunnel->recursion--;
841                         return 0;
842                 }
843                 if (skb->sk)
844                         skb_set_owner_w(new_skb, skb->sk);
845                 dev_kfree_skb(skb);
846                 skb = new_skb;
847                 old_iph = ip_hdr(skb);
848         }
849
850         skb->transport_header = skb->network_header;
851         skb_push(skb, gre_hlen);
852         skb_reset_network_header(skb);
853         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
854         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
855                               IPSKB_REROUTED);
856         dst_release(skb->dst);
857         skb->dst = &rt->u.dst;
858
859         /*
860          *      Push down and install the IPIP header.
861          */
862
863         iph                     =       ip_hdr(skb);
864         iph->version            =       4;
865         iph->ihl                =       sizeof(struct iphdr) >> 2;
866         iph->frag_off           =       df;
867         iph->protocol           =       IPPROTO_GRE;
868         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
869         iph->daddr              =       rt->rt_dst;
870         iph->saddr              =       rt->rt_src;
871
872         if ((iph->ttl = tiph->ttl) == 0) {
873                 if (skb->protocol == htons(ETH_P_IP))
874                         iph->ttl = old_iph->ttl;
875 #ifdef CONFIG_IPV6
876                 else if (skb->protocol == htons(ETH_P_IPV6))
877                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
878 #endif
879                 else
880                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
881         }
882
883         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
884         ((__be16*)(iph+1))[1] = skb->protocol;
885
886         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
887                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
888
889                 if (tunnel->parms.o_flags&GRE_SEQ) {
890                         ++tunnel->o_seqno;
891                         *ptr = htonl(tunnel->o_seqno);
892                         ptr--;
893                 }
894                 if (tunnel->parms.o_flags&GRE_KEY) {
895                         *ptr = tunnel->parms.o_key;
896                         ptr--;
897                 }
898                 if (tunnel->parms.o_flags&GRE_CSUM) {
899                         *ptr = 0;
900                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
901                 }
902         }
903
904         nf_reset(skb);
905
906         IPTUNNEL_XMIT();
907         tunnel->recursion--;
908         return 0;
909
910 tx_error_icmp:
911         dst_link_failure(skb);
912
913 tx_error:
914         stats->tx_errors++;
915         dev_kfree_skb(skb);
916         tunnel->recursion--;
917         return 0;
918 }
919
920 static void ipgre_tunnel_bind_dev(struct net_device *dev)
921 {
922         struct net_device *tdev = NULL;
923         struct ip_tunnel *tunnel;
924         struct iphdr *iph;
925         int hlen = LL_MAX_HEADER;
926         int mtu = ETH_DATA_LEN;
927         int addend = sizeof(struct iphdr) + 4;
928
929         tunnel = netdev_priv(dev);
930         iph = &tunnel->parms.iph;
931
932         /* Guess output device to choose reasonable mtu and hard_header_len */
933
934         if (iph->daddr) {
935                 struct flowi fl = { .oif = tunnel->parms.link,
936                                     .nl_u = { .ip4_u =
937                                               { .daddr = iph->daddr,
938                                                 .saddr = iph->saddr,
939                                                 .tos = RT_TOS(iph->tos) } },
940                                     .proto = IPPROTO_GRE };
941                 struct rtable *rt;
942                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
943                         tdev = rt->u.dst.dev;
944                         ip_rt_put(rt);
945                 }
946                 dev->flags |= IFF_POINTOPOINT;
947         }
948
949         if (!tdev && tunnel->parms.link)
950                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
951
952         if (tdev) {
953                 hlen = tdev->hard_header_len;
954                 mtu = tdev->mtu;
955         }
956         dev->iflink = tunnel->parms.link;
957
958         /* Precalculate GRE options length */
959         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
960                 if (tunnel->parms.o_flags&GRE_CSUM)
961                         addend += 4;
962                 if (tunnel->parms.o_flags&GRE_KEY)
963                         addend += 4;
964                 if (tunnel->parms.o_flags&GRE_SEQ)
965                         addend += 4;
966         }
967         dev->hard_header_len = hlen + addend;
968         dev->mtu = mtu - addend;
969         tunnel->hlen = addend;
970
971 }
972
973 static int
974 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
975 {
976         int err = 0;
977         struct ip_tunnel_parm p;
978         struct ip_tunnel *t;
979         struct net *net = dev_net(dev);
980         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
981
982         switch (cmd) {
983         case SIOCGETTUNNEL:
984                 t = NULL;
985                 if (dev == ign->fb_tunnel_dev) {
986                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
987                                 err = -EFAULT;
988                                 break;
989                         }
990                         t = ipgre_tunnel_locate(net, &p, 0);
991                 }
992                 if (t == NULL)
993                         t = netdev_priv(dev);
994                 memcpy(&p, &t->parms, sizeof(p));
995                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
996                         err = -EFAULT;
997                 break;
998
999         case SIOCADDTUNNEL:
1000         case SIOCCHGTUNNEL:
1001                 err = -EPERM;
1002                 if (!capable(CAP_NET_ADMIN))
1003                         goto done;
1004
1005                 err = -EFAULT;
1006                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1007                         goto done;
1008
1009                 err = -EINVAL;
1010                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1011                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1012                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1013                         goto done;
1014                 if (p.iph.ttl)
1015                         p.iph.frag_off |= htons(IP_DF);
1016
1017                 if (!(p.i_flags&GRE_KEY))
1018                         p.i_key = 0;
1019                 if (!(p.o_flags&GRE_KEY))
1020                         p.o_key = 0;
1021
1022                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1023
1024                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1025                         if (t != NULL) {
1026                                 if (t->dev != dev) {
1027                                         err = -EEXIST;
1028                                         break;
1029                                 }
1030                         } else {
1031                                 unsigned nflags=0;
1032
1033                                 t = netdev_priv(dev);
1034
1035                                 if (ipv4_is_multicast(p.iph.daddr))
1036                                         nflags = IFF_BROADCAST;
1037                                 else if (p.iph.daddr)
1038                                         nflags = IFF_POINTOPOINT;
1039
1040                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1041                                         err = -EINVAL;
1042                                         break;
1043                                 }
1044                                 ipgre_tunnel_unlink(ign, t);
1045                                 t->parms.iph.saddr = p.iph.saddr;
1046                                 t->parms.iph.daddr = p.iph.daddr;
1047                                 t->parms.i_key = p.i_key;
1048                                 t->parms.o_key = p.o_key;
1049                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1050                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1051                                 ipgre_tunnel_link(ign, t);
1052                                 netdev_state_change(dev);
1053                         }
1054                 }
1055
1056                 if (t) {
1057                         err = 0;
1058                         if (cmd == SIOCCHGTUNNEL) {
1059                                 t->parms.iph.ttl = p.iph.ttl;
1060                                 t->parms.iph.tos = p.iph.tos;
1061                                 t->parms.iph.frag_off = p.iph.frag_off;
1062                                 if (t->parms.link != p.link) {
1063                                         t->parms.link = p.link;
1064                                         ipgre_tunnel_bind_dev(dev);
1065                                         netdev_state_change(dev);
1066                                 }
1067                         }
1068                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1069                                 err = -EFAULT;
1070                 } else
1071                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1072                 break;
1073
1074         case SIOCDELTUNNEL:
1075                 err = -EPERM;
1076                 if (!capable(CAP_NET_ADMIN))
1077                         goto done;
1078
1079                 if (dev == ign->fb_tunnel_dev) {
1080                         err = -EFAULT;
1081                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1082                                 goto done;
1083                         err = -ENOENT;
1084                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1085                                 goto done;
1086                         err = -EPERM;
1087                         if (t == netdev_priv(ign->fb_tunnel_dev))
1088                                 goto done;
1089                         dev = t->dev;
1090                 }
1091                 unregister_netdevice(dev);
1092                 err = 0;
1093                 break;
1094
1095         default:
1096                 err = -EINVAL;
1097         }
1098
1099 done:
1100         return err;
1101 }
1102
1103 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1104 {
1105         struct ip_tunnel *tunnel = netdev_priv(dev);
1106         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1107                 return -EINVAL;
1108         dev->mtu = new_mtu;
1109         return 0;
1110 }
1111
1112 /* Nice toy. Unfortunately, useless in real life :-)
1113    It allows to construct virtual multiprotocol broadcast "LAN"
1114    over the Internet, provided multicast routing is tuned.
1115
1116
1117    I have no idea was this bicycle invented before me,
1118    so that I had to set ARPHRD_IPGRE to a random value.
1119    I have an impression, that Cisco could make something similar,
1120    but this feature is apparently missing in IOS<=11.2(8).
1121
1122    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1123    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1124
1125    ping -t 255 224.66.66.66
1126
1127    If nobody answers, mbone does not work.
1128
1129    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1130    ip addr add 10.66.66.<somewhat>/24 dev Universe
1131    ifconfig Universe up
1132    ifconfig Universe add fe80::<Your_real_addr>/10
1133    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1134    ftp 10.66.66.66
1135    ...
1136    ftp fec0:6666:6666::193.233.7.65
1137    ...
1138
1139  */
1140
1141 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1142                         unsigned short type,
1143                         const void *daddr, const void *saddr, unsigned len)
1144 {
1145         struct ip_tunnel *t = netdev_priv(dev);
1146         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1147         __be16 *p = (__be16*)(iph+1);
1148
1149         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1150         p[0]            = t->parms.o_flags;
1151         p[1]            = htons(type);
1152
1153         /*
1154          *      Set the source hardware address.
1155          */
1156
1157         if (saddr)
1158                 memcpy(&iph->saddr, saddr, 4);
1159
1160         if (daddr) {
1161                 memcpy(&iph->daddr, daddr, 4);
1162                 return t->hlen;
1163         }
1164         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1165                 return t->hlen;
1166
1167         return -t->hlen;
1168 }
1169
1170 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1171 {
1172         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1173         memcpy(haddr, &iph->saddr, 4);
1174         return 4;
1175 }
1176
1177 static const struct header_ops ipgre_header_ops = {
1178         .create = ipgre_header,
1179         .parse  = ipgre_header_parse,
1180 };
1181
1182 #ifdef CONFIG_NET_IPGRE_BROADCAST
1183 static int ipgre_open(struct net_device *dev)
1184 {
1185         struct ip_tunnel *t = netdev_priv(dev);
1186
1187         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1188                 struct flowi fl = { .oif = t->parms.link,
1189                                     .nl_u = { .ip4_u =
1190                                               { .daddr = t->parms.iph.daddr,
1191                                                 .saddr = t->parms.iph.saddr,
1192                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1193                                     .proto = IPPROTO_GRE };
1194                 struct rtable *rt;
1195                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1196                         return -EADDRNOTAVAIL;
1197                 dev = rt->u.dst.dev;
1198                 ip_rt_put(rt);
1199                 if (__in_dev_get_rtnl(dev) == NULL)
1200                         return -EADDRNOTAVAIL;
1201                 t->mlink = dev->ifindex;
1202                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1203         }
1204         return 0;
1205 }
1206
1207 static int ipgre_close(struct net_device *dev)
1208 {
1209         struct ip_tunnel *t = netdev_priv(dev);
1210         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1211                 struct in_device *in_dev;
1212                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1213                 if (in_dev) {
1214                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1215                         in_dev_put(in_dev);
1216                 }
1217         }
1218         return 0;
1219 }
1220
1221 #endif
1222
1223 static void ipgre_tunnel_setup(struct net_device *dev)
1224 {
1225         dev->uninit             = ipgre_tunnel_uninit;
1226         dev->destructor         = free_netdev;
1227         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1228         dev->do_ioctl           = ipgre_tunnel_ioctl;
1229         dev->change_mtu         = ipgre_tunnel_change_mtu;
1230
1231         dev->type               = ARPHRD_IPGRE;
1232         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1233         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1234         dev->flags              = IFF_NOARP;
1235         dev->iflink             = 0;
1236         dev->addr_len           = 4;
1237         dev->features           |= NETIF_F_NETNS_LOCAL;
1238 }
1239
1240 static int ipgre_tunnel_init(struct net_device *dev)
1241 {
1242         struct ip_tunnel *tunnel;
1243         struct iphdr *iph;
1244
1245         tunnel = netdev_priv(dev);
1246         iph = &tunnel->parms.iph;
1247
1248         tunnel->dev = dev;
1249         strcpy(tunnel->parms.name, dev->name);
1250
1251         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1252         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1253
1254         ipgre_tunnel_bind_dev(dev);
1255
1256         if (iph->daddr) {
1257 #ifdef CONFIG_NET_IPGRE_BROADCAST
1258                 if (ipv4_is_multicast(iph->daddr)) {
1259                         if (!iph->saddr)
1260                                 return -EINVAL;
1261                         dev->flags = IFF_BROADCAST;
1262                         dev->header_ops = &ipgre_header_ops;
1263                         dev->open = ipgre_open;
1264                         dev->stop = ipgre_close;
1265                 }
1266 #endif
1267         } else
1268                 dev->header_ops = &ipgre_header_ops;
1269
1270         return 0;
1271 }
1272
1273 static int ipgre_fb_tunnel_init(struct net_device *dev)
1274 {
1275         struct ip_tunnel *tunnel = netdev_priv(dev);
1276         struct iphdr *iph = &tunnel->parms.iph;
1277         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1278
1279         tunnel->dev = dev;
1280         strcpy(tunnel->parms.name, dev->name);
1281
1282         iph->version            = 4;
1283         iph->protocol           = IPPROTO_GRE;
1284         iph->ihl                = 5;
1285         tunnel->hlen            = sizeof(struct iphdr) + 4;
1286
1287         dev_hold(dev);
1288         ign->tunnels_wc[0]      = tunnel;
1289         return 0;
1290 }
1291
1292
1293 static struct net_protocol ipgre_protocol = {
1294         .handler        =       ipgre_rcv,
1295         .err_handler    =       ipgre_err,
1296         .netns_ok       =       1,
1297 };
1298
1299 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1300 {
1301         int prio;
1302
1303         for (prio = 0; prio < 4; prio++) {
1304                 int h;
1305                 for (h = 0; h < HASH_SIZE; h++) {
1306                         struct ip_tunnel *t;
1307                         while ((t = ign->tunnels[prio][h]) != NULL)
1308                                 unregister_netdevice(t->dev);
1309                 }
1310         }
1311 }
1312
1313 static int ipgre_init_net(struct net *net)
1314 {
1315         int err;
1316         struct ipgre_net *ign;
1317
1318         err = -ENOMEM;
1319         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1320         if (ign == NULL)
1321                 goto err_alloc;
1322
1323         err = net_assign_generic(net, ipgre_net_id, ign);
1324         if (err < 0)
1325                 goto err_assign;
1326
1327         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1328                                            ipgre_tunnel_setup);
1329         if (!ign->fb_tunnel_dev) {
1330                 err = -ENOMEM;
1331                 goto err_alloc_dev;
1332         }
1333
1334         ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1335         dev_net_set(ign->fb_tunnel_dev, net);
1336
1337         if ((err = register_netdev(ign->fb_tunnel_dev)))
1338                 goto err_reg_dev;
1339
1340         return 0;
1341
1342 err_reg_dev:
1343         free_netdev(ign->fb_tunnel_dev);
1344 err_alloc_dev:
1345         /* nothing */
1346 err_assign:
1347         kfree(ign);
1348 err_alloc:
1349         return err;
1350 }
1351
1352 static void ipgre_exit_net(struct net *net)
1353 {
1354         struct ipgre_net *ign;
1355
1356         ign = net_generic(net, ipgre_net_id);
1357         rtnl_lock();
1358         ipgre_destroy_tunnels(ign);
1359         rtnl_unlock();
1360         kfree(ign);
1361 }
1362
1363 static struct pernet_operations ipgre_net_ops = {
1364         .init = ipgre_init_net,
1365         .exit = ipgre_exit_net,
1366 };
1367
1368 /*
1369  *      And now the modules code and kernel interface.
1370  */
1371
1372 static int __init ipgre_init(void)
1373 {
1374         int err;
1375
1376         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1377
1378         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1379                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1380                 return -EAGAIN;
1381         }
1382
1383         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1384         if (err < 0)
1385                 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1386
1387         return err;
1388 }
1389
1390 static void __exit ipgre_fini(void)
1391 {
1392         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1393                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1394
1395         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1396 }
1397
1398 module_init(ipgre_init);
1399 module_exit(ipgre_fini);
1400 MODULE_LICENSE("GPL");