[IPV4]: Add 'rtable' field in struct sk_buff to alias 'dst' and avoid casts
[safe/jmp/linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
125
126 static struct net_device *ipip_fb_tunnel_dev;
127
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134 static DEFINE_RWLOCK(ipip_lock);
135
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
137 {
138         unsigned h0 = HASH(remote);
139         unsigned h1 = HASH(local);
140         struct ip_tunnel *t;
141
142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143                 if (local == t->parms.iph.saddr &&
144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145                         return t;
146         }
147         for (t = tunnels_r[h0]; t; t = t->next) {
148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149                         return t;
150         }
151         for (t = tunnels_l[h1]; t; t = t->next) {
152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153                         return t;
154         }
155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156                 return t;
157         return NULL;
158 }
159
160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
161 {
162         __be32 remote = parms->iph.daddr;
163         __be32 local = parms->iph.saddr;
164         unsigned h = 0;
165         int prio = 0;
166
167         if (remote) {
168                 prio |= 2;
169                 h ^= HASH(remote);
170         }
171         if (local) {
172                 prio |= 1;
173                 h ^= HASH(local);
174         }
175         return &tunnels[prio][h];
176 }
177
178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179 {
180         return __ipip_bucket(&t->parms);
181 }
182
183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
184 {
185         struct ip_tunnel **tp;
186
187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188                 if (t == *tp) {
189                         write_lock_bh(&ipip_lock);
190                         *tp = t->next;
191                         write_unlock_bh(&ipip_lock);
192                         break;
193                 }
194         }
195 }
196
197 static void ipip_tunnel_link(struct ip_tunnel *t)
198 {
199         struct ip_tunnel **tp = ipip_bucket(t);
200
201         t->next = *tp;
202         write_lock_bh(&ipip_lock);
203         *tp = t;
204         write_unlock_bh(&ipip_lock);
205 }
206
207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208 {
209         __be32 remote = parms->iph.daddr;
210         __be32 local = parms->iph.saddr;
211         struct ip_tunnel *t, **tp, *nt;
212         struct net_device *dev;
213         char name[IFNAMSIZ];
214
215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217                         return t;
218         }
219         if (!create)
220                 return NULL;
221
222         if (parms->name[0])
223                 strlcpy(name, parms->name, IFNAMSIZ);
224         else
225                 sprintf(name, "tunl%%d");
226
227         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228         if (dev == NULL)
229                 return NULL;
230
231         if (strchr(name, '%')) {
232                 if (dev_alloc_name(dev, name) < 0)
233                         goto failed_free;
234         }
235
236         nt = netdev_priv(dev);
237         dev->init = ipip_tunnel_init;
238         nt->parms = *parms;
239
240         if (register_netdevice(dev) < 0)
241                 goto failed_free;
242
243         dev_hold(dev);
244         ipip_tunnel_link(nt);
245         return nt;
246
247 failed_free:
248         free_netdev(dev);
249         return NULL;
250 }
251
252 static void ipip_tunnel_uninit(struct net_device *dev)
253 {
254         if (dev == ipip_fb_tunnel_dev) {
255                 write_lock_bh(&ipip_lock);
256                 tunnels_wc[0] = NULL;
257                 write_unlock_bh(&ipip_lock);
258         } else
259                 ipip_tunnel_unlink(netdev_priv(dev));
260         dev_put(dev);
261 }
262
263 static int ipip_err(struct sk_buff *skb, u32 info)
264 {
265 #ifndef I_WISH_WORLD_WERE_PERFECT
266
267 /* It is not :-( All the routers (except for Linux) return only
268    8 bytes of packet payload. It means, that precise relaying of
269    ICMP in the real Internet is absolutely infeasible.
270  */
271         struct iphdr *iph = (struct iphdr*)skb->data;
272         const int type = icmp_hdr(skb)->type;
273         const int code = icmp_hdr(skb)->code;
274         struct ip_tunnel *t;
275         int err;
276
277         switch (type) {
278         default:
279         case ICMP_PARAMETERPROB:
280                 return 0;
281
282         case ICMP_DEST_UNREACH:
283                 switch (code) {
284                 case ICMP_SR_FAILED:
285                 case ICMP_PORT_UNREACH:
286                         /* Impossible event. */
287                         return 0;
288                 case ICMP_FRAG_NEEDED:
289                         /* Soft state for pmtu is maintained by IP core. */
290                         return 0;
291                 default:
292                         /* All others are translated to HOST_UNREACH.
293                            rfc2003 contains "deep thoughts" about NET_UNREACH,
294                            I believe they are just ether pollution. --ANK
295                          */
296                         break;
297                 }
298                 break;
299         case ICMP_TIME_EXCEEDED:
300                 if (code != ICMP_EXC_TTL)
301                         return 0;
302                 break;
303         }
304
305         err = -ENOENT;
306
307         read_lock(&ipip_lock);
308         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
309         if (t == NULL || t->parms.iph.daddr == 0)
310                 goto out;
311
312         err = 0;
313         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
314                 goto out;
315
316         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
317                 t->err_count++;
318         else
319                 t->err_count = 1;
320         t->err_time = jiffies;
321 out:
322         read_unlock(&ipip_lock);
323         return err;
324 #else
325         struct iphdr *iph = (struct iphdr*)dp;
326         int hlen = iph->ihl<<2;
327         struct iphdr *eiph;
328         const int type = icmp_hdr(skb)->type;
329         const int code = icmp_hdr(skb)->code;
330         int rel_type = 0;
331         int rel_code = 0;
332         __be32 rel_info = 0;
333         __u32 n = 0;
334         struct sk_buff *skb2;
335         struct flowi fl;
336         struct rtable *rt;
337
338         if (len < hlen + sizeof(struct iphdr))
339                 return 0;
340         eiph = (struct iphdr*)(dp + hlen);
341
342         switch (type) {
343         default:
344                 return 0;
345         case ICMP_PARAMETERPROB:
346                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
347                 if (n < hlen)
348                         return 0;
349
350                 /* So... This guy found something strange INSIDE encapsulated
351                    packet. Well, he is fool, but what can we do ?
352                  */
353                 rel_type = ICMP_PARAMETERPROB;
354                 rel_info = htonl((n - hlen) << 24);
355                 break;
356
357         case ICMP_DEST_UNREACH:
358                 switch (code) {
359                 case ICMP_SR_FAILED:
360                 case ICMP_PORT_UNREACH:
361                         /* Impossible event. */
362                         return 0;
363                 case ICMP_FRAG_NEEDED:
364                         /* And it is the only really necessary thing :-) */
365                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
366                         if (n < hlen+68)
367                                 return 0;
368                         n -= hlen;
369                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
370                         if (n > ntohs(eiph->tot_len))
371                                 return 0;
372                         rel_info = htonl(n);
373                         break;
374                 default:
375                         /* All others are translated to HOST_UNREACH.
376                            rfc2003 contains "deep thoughts" about NET_UNREACH,
377                            I believe, it is just ether pollution. --ANK
378                          */
379                         rel_type = ICMP_DEST_UNREACH;
380                         rel_code = ICMP_HOST_UNREACH;
381                         break;
382                 }
383                 break;
384         case ICMP_TIME_EXCEEDED:
385                 if (code != ICMP_EXC_TTL)
386                         return 0;
387                 break;
388         }
389
390         /* Prepare fake skb to feed it to icmp_send */
391         skb2 = skb_clone(skb, GFP_ATOMIC);
392         if (skb2 == NULL)
393                 return 0;
394         dst_release(skb2->dst);
395         skb2->dst = NULL;
396         skb_pull(skb2, skb->data - (u8*)eiph);
397         skb_reset_network_header(skb2);
398
399         /* Try to guess incoming interface */
400         memset(&fl, 0, sizeof(fl));
401         fl.fl4_daddr = eiph->saddr;
402         fl.fl4_tos = RT_TOS(eiph->tos);
403         fl.proto = IPPROTO_IPIP;
404         if (ip_route_output_key(&init_net, &rt, &key)) {
405                 kfree_skb(skb2);
406                 return 0;
407         }
408         skb2->dev = rt->u.dst.dev;
409
410         /* route "incoming" packet */
411         if (rt->rt_flags&RTCF_LOCAL) {
412                 ip_rt_put(rt);
413                 rt = NULL;
414                 fl.fl4_daddr = eiph->daddr;
415                 fl.fl4_src = eiph->saddr;
416                 fl.fl4_tos = eiph->tos;
417                 if (ip_route_output_key(&init_net, &rt, &fl) ||
418                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
419                         ip_rt_put(rt);
420                         kfree_skb(skb2);
421                         return 0;
422                 }
423         } else {
424                 ip_rt_put(rt);
425                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
426                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
427                         kfree_skb(skb2);
428                         return 0;
429                 }
430         }
431
432         /* change mtu on this route */
433         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
434                 if (n > dst_mtu(skb2->dst)) {
435                         kfree_skb(skb2);
436                         return 0;
437                 }
438                 skb2->dst->ops->update_pmtu(skb2->dst, n);
439         } else if (type == ICMP_TIME_EXCEEDED) {
440                 struct ip_tunnel *t = netdev_priv(skb2->dev);
441                 if (t->parms.iph.ttl) {
442                         rel_type = ICMP_DEST_UNREACH;
443                         rel_code = ICMP_HOST_UNREACH;
444                 }
445         }
446
447         icmp_send(skb2, rel_type, rel_code, rel_info);
448         kfree_skb(skb2);
449         return 0;
450 #endif
451 }
452
453 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
454                                         struct sk_buff *skb)
455 {
456         struct iphdr *inner_iph = ip_hdr(skb);
457
458         if (INET_ECN_is_ce(outer_iph->tos))
459                 IP_ECN_set_ce(inner_iph);
460 }
461
462 static int ipip_rcv(struct sk_buff *skb)
463 {
464         struct ip_tunnel *tunnel;
465         const struct iphdr *iph = ip_hdr(skb);
466
467         read_lock(&ipip_lock);
468         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
469                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
470                         read_unlock(&ipip_lock);
471                         kfree_skb(skb);
472                         return 0;
473                 }
474
475                 secpath_reset(skb);
476
477                 skb->mac_header = skb->network_header;
478                 skb_reset_network_header(skb);
479                 skb->protocol = htons(ETH_P_IP);
480                 skb->pkt_type = PACKET_HOST;
481
482                 tunnel->stat.rx_packets++;
483                 tunnel->stat.rx_bytes += skb->len;
484                 skb->dev = tunnel->dev;
485                 dst_release(skb->dst);
486                 skb->dst = NULL;
487                 nf_reset(skb);
488                 ipip_ecn_decapsulate(iph, skb);
489                 netif_rx(skb);
490                 read_unlock(&ipip_lock);
491                 return 0;
492         }
493         read_unlock(&ipip_lock);
494
495         return -1;
496 }
497
498 /*
499  *      This function assumes it is being called from dev_queue_xmit()
500  *      and that skb is filled properly by that function.
501  */
502
503 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
504 {
505         struct ip_tunnel *tunnel = netdev_priv(dev);
506         struct net_device_stats *stats = &tunnel->stat;
507         struct iphdr  *tiph = &tunnel->parms.iph;
508         u8     tos = tunnel->parms.iph.tos;
509         __be16 df = tiph->frag_off;
510         struct rtable *rt;                      /* Route to the other host */
511         struct net_device *tdev;                        /* Device to other host */
512         struct iphdr  *old_iph = ip_hdr(skb);
513         struct iphdr  *iph;                     /* Our new IP header */
514         unsigned int max_headroom;              /* The extra header space needed */
515         __be32 dst = tiph->daddr;
516         int    mtu;
517
518         if (tunnel->recursion++) {
519                 tunnel->stat.collisions++;
520                 goto tx_error;
521         }
522
523         if (skb->protocol != htons(ETH_P_IP))
524                 goto tx_error;
525
526         if (tos&1)
527                 tos = old_iph->tos;
528
529         if (!dst) {
530                 /* NBMA tunnel */
531                 if ((rt = skb->rtable) == NULL) {
532                         tunnel->stat.tx_fifo_errors++;
533                         goto tx_error;
534                 }
535                 if ((dst = rt->rt_gateway) == 0)
536                         goto tx_error_icmp;
537         }
538
539         {
540                 struct flowi fl = { .oif = tunnel->parms.link,
541                                     .nl_u = { .ip4_u =
542                                               { .daddr = dst,
543                                                 .saddr = tiph->saddr,
544                                                 .tos = RT_TOS(tos) } },
545                                     .proto = IPPROTO_IPIP };
546                 if (ip_route_output_key(&init_net, &rt, &fl)) {
547                         tunnel->stat.tx_carrier_errors++;
548                         goto tx_error_icmp;
549                 }
550         }
551         tdev = rt->u.dst.dev;
552
553         if (tdev == dev) {
554                 ip_rt_put(rt);
555                 tunnel->stat.collisions++;
556                 goto tx_error;
557         }
558
559         if (tiph->frag_off)
560                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
561         else
562                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
563
564         if (mtu < 68) {
565                 tunnel->stat.collisions++;
566                 ip_rt_put(rt);
567                 goto tx_error;
568         }
569         if (skb->dst)
570                 skb->dst->ops->update_pmtu(skb->dst, mtu);
571
572         df |= (old_iph->frag_off&htons(IP_DF));
573
574         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
575                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
576                 ip_rt_put(rt);
577                 goto tx_error;
578         }
579
580         if (tunnel->err_count > 0) {
581                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
582                         tunnel->err_count--;
583                         dst_link_failure(skb);
584                 } else
585                         tunnel->err_count = 0;
586         }
587
588         /*
589          * Okay, now see if we can stuff it in the buffer as-is.
590          */
591         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
592
593         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
594             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
595                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
596                 if (!new_skb) {
597                         ip_rt_put(rt);
598                         stats->tx_dropped++;
599                         dev_kfree_skb(skb);
600                         tunnel->recursion--;
601                         return 0;
602                 }
603                 if (skb->sk)
604                         skb_set_owner_w(new_skb, skb->sk);
605                 dev_kfree_skb(skb);
606                 skb = new_skb;
607                 old_iph = ip_hdr(skb);
608         }
609
610         skb->transport_header = skb->network_header;
611         skb_push(skb, sizeof(struct iphdr));
612         skb_reset_network_header(skb);
613         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
614         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
615                               IPSKB_REROUTED);
616         dst_release(skb->dst);
617         skb->dst = &rt->u.dst;
618
619         /*
620          *      Push down and install the IPIP header.
621          */
622
623         iph                     =       ip_hdr(skb);
624         iph->version            =       4;
625         iph->ihl                =       sizeof(struct iphdr)>>2;
626         iph->frag_off           =       df;
627         iph->protocol           =       IPPROTO_IPIP;
628         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
629         iph->daddr              =       rt->rt_dst;
630         iph->saddr              =       rt->rt_src;
631
632         if ((iph->ttl = tiph->ttl) == 0)
633                 iph->ttl        =       old_iph->ttl;
634
635         nf_reset(skb);
636
637         IPTUNNEL_XMIT();
638         tunnel->recursion--;
639         return 0;
640
641 tx_error_icmp:
642         dst_link_failure(skb);
643 tx_error:
644         stats->tx_errors++;
645         dev_kfree_skb(skb);
646         tunnel->recursion--;
647         return 0;
648 }
649
650 static void ipip_tunnel_bind_dev(struct net_device *dev)
651 {
652         struct net_device *tdev = NULL;
653         struct ip_tunnel *tunnel;
654         struct iphdr *iph;
655
656         tunnel = netdev_priv(dev);
657         iph = &tunnel->parms.iph;
658
659         if (iph->daddr) {
660                 struct flowi fl = { .oif = tunnel->parms.link,
661                                     .nl_u = { .ip4_u =
662                                               { .daddr = iph->daddr,
663                                                 .saddr = iph->saddr,
664                                                 .tos = RT_TOS(iph->tos) } },
665                                     .proto = IPPROTO_IPIP };
666                 struct rtable *rt;
667                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
668                         tdev = rt->u.dst.dev;
669                         ip_rt_put(rt);
670                 }
671                 dev->flags |= IFF_POINTOPOINT;
672         }
673
674         if (!tdev && tunnel->parms.link)
675                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
676
677         if (tdev) {
678                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
679                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
680         }
681         dev->iflink = tunnel->parms.link;
682 }
683
684 static int
685 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
686 {
687         int err = 0;
688         struct ip_tunnel_parm p;
689         struct ip_tunnel *t;
690
691         switch (cmd) {
692         case SIOCGETTUNNEL:
693                 t = NULL;
694                 if (dev == ipip_fb_tunnel_dev) {
695                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
696                                 err = -EFAULT;
697                                 break;
698                         }
699                         t = ipip_tunnel_locate(&p, 0);
700                 }
701                 if (t == NULL)
702                         t = netdev_priv(dev);
703                 memcpy(&p, &t->parms, sizeof(p));
704                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
705                         err = -EFAULT;
706                 break;
707
708         case SIOCADDTUNNEL:
709         case SIOCCHGTUNNEL:
710                 err = -EPERM;
711                 if (!capable(CAP_NET_ADMIN))
712                         goto done;
713
714                 err = -EFAULT;
715                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
716                         goto done;
717
718                 err = -EINVAL;
719                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
720                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
721                         goto done;
722                 if (p.iph.ttl)
723                         p.iph.frag_off |= htons(IP_DF);
724
725                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
726
727                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
728                         if (t != NULL) {
729                                 if (t->dev != dev) {
730                                         err = -EEXIST;
731                                         break;
732                                 }
733                         } else {
734                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
735                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
736                                         err = -EINVAL;
737                                         break;
738                                 }
739                                 t = netdev_priv(dev);
740                                 ipip_tunnel_unlink(t);
741                                 t->parms.iph.saddr = p.iph.saddr;
742                                 t->parms.iph.daddr = p.iph.daddr;
743                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
744                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
745                                 ipip_tunnel_link(t);
746                                 netdev_state_change(dev);
747                         }
748                 }
749
750                 if (t) {
751                         err = 0;
752                         if (cmd == SIOCCHGTUNNEL) {
753                                 t->parms.iph.ttl = p.iph.ttl;
754                                 t->parms.iph.tos = p.iph.tos;
755                                 t->parms.iph.frag_off = p.iph.frag_off;
756                                 if (t->parms.link != p.link) {
757                                         t->parms.link = p.link;
758                                         ipip_tunnel_bind_dev(dev);
759                                         netdev_state_change(dev);
760                                 }
761                         }
762                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
763                                 err = -EFAULT;
764                 } else
765                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
766                 break;
767
768         case SIOCDELTUNNEL:
769                 err = -EPERM;
770                 if (!capable(CAP_NET_ADMIN))
771                         goto done;
772
773                 if (dev == ipip_fb_tunnel_dev) {
774                         err = -EFAULT;
775                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
776                                 goto done;
777                         err = -ENOENT;
778                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
779                                 goto done;
780                         err = -EPERM;
781                         if (t->dev == ipip_fb_tunnel_dev)
782                                 goto done;
783                         dev = t->dev;
784                 }
785                 unregister_netdevice(dev);
786                 err = 0;
787                 break;
788
789         default:
790                 err = -EINVAL;
791         }
792
793 done:
794         return err;
795 }
796
797 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
798 {
799         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
800 }
801
802 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
803 {
804         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
805                 return -EINVAL;
806         dev->mtu = new_mtu;
807         return 0;
808 }
809
810 static void ipip_tunnel_setup(struct net_device *dev)
811 {
812         dev->uninit             = ipip_tunnel_uninit;
813         dev->hard_start_xmit    = ipip_tunnel_xmit;
814         dev->get_stats          = ipip_tunnel_get_stats;
815         dev->do_ioctl           = ipip_tunnel_ioctl;
816         dev->change_mtu         = ipip_tunnel_change_mtu;
817         dev->destructor         = free_netdev;
818
819         dev->type               = ARPHRD_TUNNEL;
820         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
821         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
822         dev->flags              = IFF_NOARP;
823         dev->iflink             = 0;
824         dev->addr_len           = 4;
825 }
826
827 static int ipip_tunnel_init(struct net_device *dev)
828 {
829         struct ip_tunnel *tunnel;
830
831         tunnel = netdev_priv(dev);
832
833         tunnel->dev = dev;
834         strcpy(tunnel->parms.name, dev->name);
835
836         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
837         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
838
839         ipip_tunnel_bind_dev(dev);
840
841         return 0;
842 }
843
844 static int __init ipip_fb_tunnel_init(struct net_device *dev)
845 {
846         struct ip_tunnel *tunnel = netdev_priv(dev);
847         struct iphdr *iph = &tunnel->parms.iph;
848
849         tunnel->dev = dev;
850         strcpy(tunnel->parms.name, dev->name);
851
852         iph->version            = 4;
853         iph->protocol           = IPPROTO_IPIP;
854         iph->ihl                = 5;
855
856         dev_hold(dev);
857         tunnels_wc[0]           = tunnel;
858         return 0;
859 }
860
861 static struct xfrm_tunnel ipip_handler = {
862         .handler        =       ipip_rcv,
863         .err_handler    =       ipip_err,
864         .priority       =       1,
865 };
866
867 static char banner[] __initdata =
868         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
869
870 static int __init ipip_init(void)
871 {
872         int err;
873
874         printk(banner);
875
876         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
877                 printk(KERN_INFO "ipip init: can't register tunnel\n");
878                 return -EAGAIN;
879         }
880
881         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
882                                            "tunl0",
883                                            ipip_tunnel_setup);
884         if (!ipip_fb_tunnel_dev) {
885                 err = -ENOMEM;
886                 goto err1;
887         }
888
889         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
890
891         if ((err = register_netdev(ipip_fb_tunnel_dev)))
892                 goto err2;
893  out:
894         return err;
895  err2:
896         free_netdev(ipip_fb_tunnel_dev);
897  err1:
898         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
899         goto out;
900 }
901
902 static void __exit ipip_destroy_tunnels(void)
903 {
904         int prio;
905
906         for (prio = 1; prio < 4; prio++) {
907                 int h;
908                 for (h = 0; h < HASH_SIZE; h++) {
909                         struct ip_tunnel *t;
910                         while ((t = tunnels[prio][h]) != NULL)
911                                 unregister_netdevice(t->dev);
912                 }
913         }
914 }
915
916 static void __exit ipip_fini(void)
917 {
918         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
919                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
920
921         rtnl_lock();
922         ipip_destroy_tunnels();
923         unregister_netdevice(ipip_fb_tunnel_dev);
924         rtnl_unlock();
925 }
926
927 module_init(ipip_init);
928 module_exit(ipip_fini);
929 MODULE_LICENSE("GPL");