[IPIP]: Make the fallback tunnel device per-net.
[safe/jmp/linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120
121 #define HASH_SIZE  16
122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
123
124 static int ipip_net_id;
125 struct ipip_net {
126         struct net_device *fb_tunnel_dev;
127 };
128
129 static int ipip_fb_tunnel_init(struct net_device *dev);
130 static int ipip_tunnel_init(struct net_device *dev);
131 static void ipip_tunnel_setup(struct net_device *dev);
132
133 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
134 static struct ip_tunnel *tunnels_r[HASH_SIZE];
135 static struct ip_tunnel *tunnels_l[HASH_SIZE];
136 static struct ip_tunnel *tunnels_wc[1];
137 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
138
139 static DEFINE_RWLOCK(ipip_lock);
140
141 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
142 {
143         unsigned h0 = HASH(remote);
144         unsigned h1 = HASH(local);
145         struct ip_tunnel *t;
146
147         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
148                 if (local == t->parms.iph.saddr &&
149                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_r[h0]; t; t = t->next) {
153                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         for (t = tunnels_l[h1]; t; t = t->next) {
157                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158                         return t;
159         }
160         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
161                 return t;
162         return NULL;
163 }
164
165 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
166 {
167         __be32 remote = parms->iph.daddr;
168         __be32 local = parms->iph.saddr;
169         unsigned h = 0;
170         int prio = 0;
171
172         if (remote) {
173                 prio |= 2;
174                 h ^= HASH(remote);
175         }
176         if (local) {
177                 prio |= 1;
178                 h ^= HASH(local);
179         }
180         return &tunnels[prio][h];
181 }
182
183 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
184 {
185         return __ipip_bucket(&t->parms);
186 }
187
188 static void ipip_tunnel_unlink(struct ip_tunnel *t)
189 {
190         struct ip_tunnel **tp;
191
192         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
193                 if (t == *tp) {
194                         write_lock_bh(&ipip_lock);
195                         *tp = t->next;
196                         write_unlock_bh(&ipip_lock);
197                         break;
198                 }
199         }
200 }
201
202 static void ipip_tunnel_link(struct ip_tunnel *t)
203 {
204         struct ip_tunnel **tp = ipip_bucket(t);
205
206         t->next = *tp;
207         write_lock_bh(&ipip_lock);
208         *tp = t;
209         write_unlock_bh(&ipip_lock);
210 }
211
212 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
213 {
214         __be32 remote = parms->iph.daddr;
215         __be32 local = parms->iph.saddr;
216         struct ip_tunnel *t, **tp, *nt;
217         struct net_device *dev;
218         char name[IFNAMSIZ];
219
220         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
221                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
222                         return t;
223         }
224         if (!create)
225                 return NULL;
226
227         if (parms->name[0])
228                 strlcpy(name, parms->name, IFNAMSIZ);
229         else
230                 sprintf(name, "tunl%%d");
231
232         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
233         if (dev == NULL)
234                 return NULL;
235
236         if (strchr(name, '%')) {
237                 if (dev_alloc_name(dev, name) < 0)
238                         goto failed_free;
239         }
240
241         nt = netdev_priv(dev);
242         dev->init = ipip_tunnel_init;
243         nt->parms = *parms;
244
245         if (register_netdevice(dev) < 0)
246                 goto failed_free;
247
248         dev_hold(dev);
249         ipip_tunnel_link(nt);
250         return nt;
251
252 failed_free:
253         free_netdev(dev);
254         return NULL;
255 }
256
257 static void ipip_tunnel_uninit(struct net_device *dev)
258 {
259         struct net *net = dev_net(dev);
260         struct ipip_net *ipn = net_generic(net, ipip_net_id);
261
262         if (dev == ipn->fb_tunnel_dev) {
263                 write_lock_bh(&ipip_lock);
264                 tunnels_wc[0] = NULL;
265                 write_unlock_bh(&ipip_lock);
266         } else
267                 ipip_tunnel_unlink(netdev_priv(dev));
268         dev_put(dev);
269 }
270
271 static int ipip_err(struct sk_buff *skb, u32 info)
272 {
273 #ifndef I_WISH_WORLD_WERE_PERFECT
274
275 /* It is not :-( All the routers (except for Linux) return only
276    8 bytes of packet payload. It means, that precise relaying of
277    ICMP in the real Internet is absolutely infeasible.
278  */
279         struct iphdr *iph = (struct iphdr*)skb->data;
280         const int type = icmp_hdr(skb)->type;
281         const int code = icmp_hdr(skb)->code;
282         struct ip_tunnel *t;
283         int err;
284
285         switch (type) {
286         default:
287         case ICMP_PARAMETERPROB:
288                 return 0;
289
290         case ICMP_DEST_UNREACH:
291                 switch (code) {
292                 case ICMP_SR_FAILED:
293                 case ICMP_PORT_UNREACH:
294                         /* Impossible event. */
295                         return 0;
296                 case ICMP_FRAG_NEEDED:
297                         /* Soft state for pmtu is maintained by IP core. */
298                         return 0;
299                 default:
300                         /* All others are translated to HOST_UNREACH.
301                            rfc2003 contains "deep thoughts" about NET_UNREACH,
302                            I believe they are just ether pollution. --ANK
303                          */
304                         break;
305                 }
306                 break;
307         case ICMP_TIME_EXCEEDED:
308                 if (code != ICMP_EXC_TTL)
309                         return 0;
310                 break;
311         }
312
313         err = -ENOENT;
314
315         read_lock(&ipip_lock);
316         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
317         if (t == NULL || t->parms.iph.daddr == 0)
318                 goto out;
319
320         err = 0;
321         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
322                 goto out;
323
324         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
325                 t->err_count++;
326         else
327                 t->err_count = 1;
328         t->err_time = jiffies;
329 out:
330         read_unlock(&ipip_lock);
331         return err;
332 #else
333         struct iphdr *iph = (struct iphdr*)dp;
334         int hlen = iph->ihl<<2;
335         struct iphdr *eiph;
336         const int type = icmp_hdr(skb)->type;
337         const int code = icmp_hdr(skb)->code;
338         int rel_type = 0;
339         int rel_code = 0;
340         __be32 rel_info = 0;
341         __u32 n = 0;
342         struct sk_buff *skb2;
343         struct flowi fl;
344         struct rtable *rt;
345
346         if (len < hlen + sizeof(struct iphdr))
347                 return 0;
348         eiph = (struct iphdr*)(dp + hlen);
349
350         switch (type) {
351         default:
352                 return 0;
353         case ICMP_PARAMETERPROB:
354                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
355                 if (n < hlen)
356                         return 0;
357
358                 /* So... This guy found something strange INSIDE encapsulated
359                    packet. Well, he is fool, but what can we do ?
360                  */
361                 rel_type = ICMP_PARAMETERPROB;
362                 rel_info = htonl((n - hlen) << 24);
363                 break;
364
365         case ICMP_DEST_UNREACH:
366                 switch (code) {
367                 case ICMP_SR_FAILED:
368                 case ICMP_PORT_UNREACH:
369                         /* Impossible event. */
370                         return 0;
371                 case ICMP_FRAG_NEEDED:
372                         /* And it is the only really necessary thing :-) */
373                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
374                         if (n < hlen+68)
375                                 return 0;
376                         n -= hlen;
377                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
378                         if (n > ntohs(eiph->tot_len))
379                                 return 0;
380                         rel_info = htonl(n);
381                         break;
382                 default:
383                         /* All others are translated to HOST_UNREACH.
384                            rfc2003 contains "deep thoughts" about NET_UNREACH,
385                            I believe, it is just ether pollution. --ANK
386                          */
387                         rel_type = ICMP_DEST_UNREACH;
388                         rel_code = ICMP_HOST_UNREACH;
389                         break;
390                 }
391                 break;
392         case ICMP_TIME_EXCEEDED:
393                 if (code != ICMP_EXC_TTL)
394                         return 0;
395                 break;
396         }
397
398         /* Prepare fake skb to feed it to icmp_send */
399         skb2 = skb_clone(skb, GFP_ATOMIC);
400         if (skb2 == NULL)
401                 return 0;
402         dst_release(skb2->dst);
403         skb2->dst = NULL;
404         skb_pull(skb2, skb->data - (u8*)eiph);
405         skb_reset_network_header(skb2);
406
407         /* Try to guess incoming interface */
408         memset(&fl, 0, sizeof(fl));
409         fl.fl4_daddr = eiph->saddr;
410         fl.fl4_tos = RT_TOS(eiph->tos);
411         fl.proto = IPPROTO_IPIP;
412         if (ip_route_output_key(&init_net, &rt, &key)) {
413                 kfree_skb(skb2);
414                 return 0;
415         }
416         skb2->dev = rt->u.dst.dev;
417
418         /* route "incoming" packet */
419         if (rt->rt_flags&RTCF_LOCAL) {
420                 ip_rt_put(rt);
421                 rt = NULL;
422                 fl.fl4_daddr = eiph->daddr;
423                 fl.fl4_src = eiph->saddr;
424                 fl.fl4_tos = eiph->tos;
425                 if (ip_route_output_key(&init_net, &rt, &fl) ||
426                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
427                         ip_rt_put(rt);
428                         kfree_skb(skb2);
429                         return 0;
430                 }
431         } else {
432                 ip_rt_put(rt);
433                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
434                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
435                         kfree_skb(skb2);
436                         return 0;
437                 }
438         }
439
440         /* change mtu on this route */
441         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
442                 if (n > dst_mtu(skb2->dst)) {
443                         kfree_skb(skb2);
444                         return 0;
445                 }
446                 skb2->dst->ops->update_pmtu(skb2->dst, n);
447         } else if (type == ICMP_TIME_EXCEEDED) {
448                 struct ip_tunnel *t = netdev_priv(skb2->dev);
449                 if (t->parms.iph.ttl) {
450                         rel_type = ICMP_DEST_UNREACH;
451                         rel_code = ICMP_HOST_UNREACH;
452                 }
453         }
454
455         icmp_send(skb2, rel_type, rel_code, rel_info);
456         kfree_skb(skb2);
457         return 0;
458 #endif
459 }
460
461 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
462                                         struct sk_buff *skb)
463 {
464         struct iphdr *inner_iph = ip_hdr(skb);
465
466         if (INET_ECN_is_ce(outer_iph->tos))
467                 IP_ECN_set_ce(inner_iph);
468 }
469
470 static int ipip_rcv(struct sk_buff *skb)
471 {
472         struct ip_tunnel *tunnel;
473         const struct iphdr *iph = ip_hdr(skb);
474
475         read_lock(&ipip_lock);
476         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
477                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
478                         read_unlock(&ipip_lock);
479                         kfree_skb(skb);
480                         return 0;
481                 }
482
483                 secpath_reset(skb);
484
485                 skb->mac_header = skb->network_header;
486                 skb_reset_network_header(skb);
487                 skb->protocol = htons(ETH_P_IP);
488                 skb->pkt_type = PACKET_HOST;
489
490                 tunnel->stat.rx_packets++;
491                 tunnel->stat.rx_bytes += skb->len;
492                 skb->dev = tunnel->dev;
493                 dst_release(skb->dst);
494                 skb->dst = NULL;
495                 nf_reset(skb);
496                 ipip_ecn_decapsulate(iph, skb);
497                 netif_rx(skb);
498                 read_unlock(&ipip_lock);
499                 return 0;
500         }
501         read_unlock(&ipip_lock);
502
503         return -1;
504 }
505
506 /*
507  *      This function assumes it is being called from dev_queue_xmit()
508  *      and that skb is filled properly by that function.
509  */
510
511 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
512 {
513         struct ip_tunnel *tunnel = netdev_priv(dev);
514         struct net_device_stats *stats = &tunnel->stat;
515         struct iphdr  *tiph = &tunnel->parms.iph;
516         u8     tos = tunnel->parms.iph.tos;
517         __be16 df = tiph->frag_off;
518         struct rtable *rt;                      /* Route to the other host */
519         struct net_device *tdev;                        /* Device to other host */
520         struct iphdr  *old_iph = ip_hdr(skb);
521         struct iphdr  *iph;                     /* Our new IP header */
522         unsigned int max_headroom;              /* The extra header space needed */
523         __be32 dst = tiph->daddr;
524         int    mtu;
525
526         if (tunnel->recursion++) {
527                 tunnel->stat.collisions++;
528                 goto tx_error;
529         }
530
531         if (skb->protocol != htons(ETH_P_IP))
532                 goto tx_error;
533
534         if (tos&1)
535                 tos = old_iph->tos;
536
537         if (!dst) {
538                 /* NBMA tunnel */
539                 if ((rt = skb->rtable) == NULL) {
540                         tunnel->stat.tx_fifo_errors++;
541                         goto tx_error;
542                 }
543                 if ((dst = rt->rt_gateway) == 0)
544                         goto tx_error_icmp;
545         }
546
547         {
548                 struct flowi fl = { .oif = tunnel->parms.link,
549                                     .nl_u = { .ip4_u =
550                                               { .daddr = dst,
551                                                 .saddr = tiph->saddr,
552                                                 .tos = RT_TOS(tos) } },
553                                     .proto = IPPROTO_IPIP };
554                 if (ip_route_output_key(&init_net, &rt, &fl)) {
555                         tunnel->stat.tx_carrier_errors++;
556                         goto tx_error_icmp;
557                 }
558         }
559         tdev = rt->u.dst.dev;
560
561         if (tdev == dev) {
562                 ip_rt_put(rt);
563                 tunnel->stat.collisions++;
564                 goto tx_error;
565         }
566
567         if (tiph->frag_off)
568                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
569         else
570                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
571
572         if (mtu < 68) {
573                 tunnel->stat.collisions++;
574                 ip_rt_put(rt);
575                 goto tx_error;
576         }
577         if (skb->dst)
578                 skb->dst->ops->update_pmtu(skb->dst, mtu);
579
580         df |= (old_iph->frag_off&htons(IP_DF));
581
582         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
583                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
584                 ip_rt_put(rt);
585                 goto tx_error;
586         }
587
588         if (tunnel->err_count > 0) {
589                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
590                         tunnel->err_count--;
591                         dst_link_failure(skb);
592                 } else
593                         tunnel->err_count = 0;
594         }
595
596         /*
597          * Okay, now see if we can stuff it in the buffer as-is.
598          */
599         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
600
601         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
602             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
603                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
604                 if (!new_skb) {
605                         ip_rt_put(rt);
606                         stats->tx_dropped++;
607                         dev_kfree_skb(skb);
608                         tunnel->recursion--;
609                         return 0;
610                 }
611                 if (skb->sk)
612                         skb_set_owner_w(new_skb, skb->sk);
613                 dev_kfree_skb(skb);
614                 skb = new_skb;
615                 old_iph = ip_hdr(skb);
616         }
617
618         skb->transport_header = skb->network_header;
619         skb_push(skb, sizeof(struct iphdr));
620         skb_reset_network_header(skb);
621         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
622         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
623                               IPSKB_REROUTED);
624         dst_release(skb->dst);
625         skb->dst = &rt->u.dst;
626
627         /*
628          *      Push down and install the IPIP header.
629          */
630
631         iph                     =       ip_hdr(skb);
632         iph->version            =       4;
633         iph->ihl                =       sizeof(struct iphdr)>>2;
634         iph->frag_off           =       df;
635         iph->protocol           =       IPPROTO_IPIP;
636         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
637         iph->daddr              =       rt->rt_dst;
638         iph->saddr              =       rt->rt_src;
639
640         if ((iph->ttl = tiph->ttl) == 0)
641                 iph->ttl        =       old_iph->ttl;
642
643         nf_reset(skb);
644
645         IPTUNNEL_XMIT();
646         tunnel->recursion--;
647         return 0;
648
649 tx_error_icmp:
650         dst_link_failure(skb);
651 tx_error:
652         stats->tx_errors++;
653         dev_kfree_skb(skb);
654         tunnel->recursion--;
655         return 0;
656 }
657
658 static void ipip_tunnel_bind_dev(struct net_device *dev)
659 {
660         struct net_device *tdev = NULL;
661         struct ip_tunnel *tunnel;
662         struct iphdr *iph;
663
664         tunnel = netdev_priv(dev);
665         iph = &tunnel->parms.iph;
666
667         if (iph->daddr) {
668                 struct flowi fl = { .oif = tunnel->parms.link,
669                                     .nl_u = { .ip4_u =
670                                               { .daddr = iph->daddr,
671                                                 .saddr = iph->saddr,
672                                                 .tos = RT_TOS(iph->tos) } },
673                                     .proto = IPPROTO_IPIP };
674                 struct rtable *rt;
675                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
676                         tdev = rt->u.dst.dev;
677                         ip_rt_put(rt);
678                 }
679                 dev->flags |= IFF_POINTOPOINT;
680         }
681
682         if (!tdev && tunnel->parms.link)
683                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
684
685         if (tdev) {
686                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
687                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
688         }
689         dev->iflink = tunnel->parms.link;
690 }
691
692 static int
693 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
694 {
695         int err = 0;
696         struct ip_tunnel_parm p;
697         struct ip_tunnel *t;
698         struct net *net = dev_net(dev);
699         struct ipip_net *ipn = net_generic(net, ipip_net_id);
700
701         switch (cmd) {
702         case SIOCGETTUNNEL:
703                 t = NULL;
704                 if (dev == ipn->fb_tunnel_dev) {
705                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
706                                 err = -EFAULT;
707                                 break;
708                         }
709                         t = ipip_tunnel_locate(&p, 0);
710                 }
711                 if (t == NULL)
712                         t = netdev_priv(dev);
713                 memcpy(&p, &t->parms, sizeof(p));
714                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
715                         err = -EFAULT;
716                 break;
717
718         case SIOCADDTUNNEL:
719         case SIOCCHGTUNNEL:
720                 err = -EPERM;
721                 if (!capable(CAP_NET_ADMIN))
722                         goto done;
723
724                 err = -EFAULT;
725                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
726                         goto done;
727
728                 err = -EINVAL;
729                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
730                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
731                         goto done;
732                 if (p.iph.ttl)
733                         p.iph.frag_off |= htons(IP_DF);
734
735                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
736
737                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
738                         if (t != NULL) {
739                                 if (t->dev != dev) {
740                                         err = -EEXIST;
741                                         break;
742                                 }
743                         } else {
744                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
745                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
746                                         err = -EINVAL;
747                                         break;
748                                 }
749                                 t = netdev_priv(dev);
750                                 ipip_tunnel_unlink(t);
751                                 t->parms.iph.saddr = p.iph.saddr;
752                                 t->parms.iph.daddr = p.iph.daddr;
753                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
754                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
755                                 ipip_tunnel_link(t);
756                                 netdev_state_change(dev);
757                         }
758                 }
759
760                 if (t) {
761                         err = 0;
762                         if (cmd == SIOCCHGTUNNEL) {
763                                 t->parms.iph.ttl = p.iph.ttl;
764                                 t->parms.iph.tos = p.iph.tos;
765                                 t->parms.iph.frag_off = p.iph.frag_off;
766                                 if (t->parms.link != p.link) {
767                                         t->parms.link = p.link;
768                                         ipip_tunnel_bind_dev(dev);
769                                         netdev_state_change(dev);
770                                 }
771                         }
772                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
773                                 err = -EFAULT;
774                 } else
775                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
776                 break;
777
778         case SIOCDELTUNNEL:
779                 err = -EPERM;
780                 if (!capable(CAP_NET_ADMIN))
781                         goto done;
782
783                 if (dev == ipn->fb_tunnel_dev) {
784                         err = -EFAULT;
785                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
786                                 goto done;
787                         err = -ENOENT;
788                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
789                                 goto done;
790                         err = -EPERM;
791                         if (t->dev == ipn->fb_tunnel_dev)
792                                 goto done;
793                         dev = t->dev;
794                 }
795                 unregister_netdevice(dev);
796                 err = 0;
797                 break;
798
799         default:
800                 err = -EINVAL;
801         }
802
803 done:
804         return err;
805 }
806
807 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
808 {
809         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
810 }
811
812 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
813 {
814         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
815                 return -EINVAL;
816         dev->mtu = new_mtu;
817         return 0;
818 }
819
820 static void ipip_tunnel_setup(struct net_device *dev)
821 {
822         dev->uninit             = ipip_tunnel_uninit;
823         dev->hard_start_xmit    = ipip_tunnel_xmit;
824         dev->get_stats          = ipip_tunnel_get_stats;
825         dev->do_ioctl           = ipip_tunnel_ioctl;
826         dev->change_mtu         = ipip_tunnel_change_mtu;
827         dev->destructor         = free_netdev;
828
829         dev->type               = ARPHRD_TUNNEL;
830         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
831         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
832         dev->flags              = IFF_NOARP;
833         dev->iflink             = 0;
834         dev->addr_len           = 4;
835 }
836
837 static int ipip_tunnel_init(struct net_device *dev)
838 {
839         struct ip_tunnel *tunnel;
840
841         tunnel = netdev_priv(dev);
842
843         tunnel->dev = dev;
844         strcpy(tunnel->parms.name, dev->name);
845
846         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
847         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
848
849         ipip_tunnel_bind_dev(dev);
850
851         return 0;
852 }
853
854 static int ipip_fb_tunnel_init(struct net_device *dev)
855 {
856         struct ip_tunnel *tunnel = netdev_priv(dev);
857         struct iphdr *iph = &tunnel->parms.iph;
858
859         tunnel->dev = dev;
860         strcpy(tunnel->parms.name, dev->name);
861
862         iph->version            = 4;
863         iph->protocol           = IPPROTO_IPIP;
864         iph->ihl                = 5;
865
866         dev_hold(dev);
867         tunnels_wc[0]           = tunnel;
868         return 0;
869 }
870
871 static struct xfrm_tunnel ipip_handler = {
872         .handler        =       ipip_rcv,
873         .err_handler    =       ipip_err,
874         .priority       =       1,
875 };
876
877 static char banner[] __initdata =
878         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
879
880 static int ipip_init_net(struct net *net)
881 {
882         int err;
883         struct ipip_net *ipn;
884
885         err = -ENOMEM;
886         ipn = kmalloc(sizeof(struct ipip_net), GFP_KERNEL);
887         if (ipn == NULL)
888                 goto err_alloc;
889
890         err = net_assign_generic(net, ipip_net_id, ipn);
891         if (err < 0)
892                 goto err_assign;
893
894         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
895                                            "tunl0",
896                                            ipip_tunnel_setup);
897         if (!ipn->fb_tunnel_dev) {
898                 err = -ENOMEM;
899                 goto err_alloc_dev;
900         }
901
902         ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
903         dev_net_set(ipn->fb_tunnel_dev, net);
904
905         if ((err = register_netdev(ipn->fb_tunnel_dev)))
906                 goto err_reg_dev;
907
908         return 0;
909
910 err_reg_dev:
911         free_netdev(ipn->fb_tunnel_dev);
912 err_alloc_dev:
913         /* nothing */
914 err_assign:
915         kfree(ipn);
916 err_alloc:
917         return err;
918 }
919
920 static void ipip_exit_net(struct net *net)
921 {
922         struct ipip_net *ipn;
923
924         ipn = net_generic(net, ipip_net_id);
925         rtnl_lock();
926         unregister_netdevice(ipn->fb_tunnel_dev);
927         rtnl_unlock();
928         kfree(ipn);
929 }
930
931 static struct pernet_operations ipip_net_ops = {
932         .init = ipip_init_net,
933         .exit = ipip_exit_net,
934 };
935
936 static int __init ipip_init(void)
937 {
938         int err;
939
940         printk(banner);
941
942         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
943                 printk(KERN_INFO "ipip init: can't register tunnel\n");
944                 return -EAGAIN;
945         }
946
947         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
948         if (err)
949                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
950
951         return err;
952 }
953
954 static void __exit ipip_destroy_tunnels(void)
955 {
956         int prio;
957
958         for (prio = 1; prio < 4; prio++) {
959                 int h;
960                 for (h = 0; h < HASH_SIZE; h++) {
961                         struct ip_tunnel *t;
962                         while ((t = tunnels[prio][h]) != NULL)
963                                 unregister_netdevice(t->dev);
964                 }
965         }
966 }
967
968 static void __exit ipip_fini(void)
969 {
970         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
971                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
972
973         rtnl_lock();
974         ipip_destroy_tunnels();
975         rtnl_unlock();
976
977         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
978 }
979
980 module_init(ipip_init);
981 module_exit(ipip_fini);
982 MODULE_LICENSE("GPL");